6 #ifndef TURI_UNITY_DATAFRAME_HPP 7 #define TURI_UNITY_DATAFRAME_HPP 10 #include <core/data/flexible_type/flexible_type.hpp> 11 #include <core/storage/serialization/serialization_includes.hpp> 15 class flexible_type_registry;
16 typedef uint32_t field_id_type;
44 std::map<std::string, flex_type_enum>
types;
51 std::map<std::string, std::vector<flexible_type> >
values;
64 void read_csv(
const std::string& path,
char delimiter,
bool use_header);
70 return (values.begin() == values.end()) ? 0
71 : values.begin()->second.size();
89 inline size_t ncols()
const {
return values.size(); }
95 return types.count(key);
103 log_and_throw(std::string(
"Column " + key +
" does not exist."));
105 const std::vector<flexible_type>& col = values.at(key);
107 for (
size_t i = 0; i < col.size(); ++i) {
119 inline std::pair< flex_type_enum, std::vector<flexible_type>&>
121 return {types.at(key), values.at(key)};
128 inline std::pair< flex_type_enum, const std::vector<flexible_type>&>
130 return {types.at(key), values.at(key)};
142 const std::vector<flexible_type>& val,
150 std::vector<flexible_type>&& val,
160 oarc << names << types <<
values;
165 iarc >> names >> types >>
values;
208 std::vector<std::string>
names;
211 std::vector<flex_type_enum>
types;
214 std::vector<std::pair<std::vector<flexible_type>::const_iterator,
215 std::vector<flexible_type>::const_iterator> > iterators;
221 size_t current_column;
231 typedef int difference_type;
234 typedef std::forward_iterator_tag iterator_category;
250 std::swap(iterators[a], iterators[b]);
251 std::swap(names[a], names[b]);
252 std::swap(types[a], types[b]);
257 ++iterators[current_column].first;
259 if (current_column == num_columns) {
275 inline size_t row()
const {
281 return current_column;
296 return names[current_column];
311 return types[current_column];
330 void skip_rows(
size_t num_rows_to_skip);
334 return num_el == other.num_el && idx < other.idx;
339 return !((*this) == other);
344 return *(iterators[current_column].first);
349 return *(iterators[current_column].first);
355 return &(*(iterators[current_column].first));
360 return &(*(iterators[current_column].first));
373 size_t endrow)> partialrowfn);
size_t row() const
Returns the index of the current row.
std::pair< flex_type_enum, const std::vector< flexible_type > & > operator[](std::string key) const
void clear()
Clears the contents of the dataframe.
const flexible_type * operator->() const
Dereferences the iterator, returning a reference to the underlying flexible_type. ...
const flexible_type & operator*() const
Dereferences the iterator, returning a reference to the underlying flexible_type. ...
The serialization input archive object which, provided with a reference to an istream, will read from the istream, providing deserialization capabilities.
const std::string & column_name() const
Returns the name of the current column.
void remove_column(std::string key)
bool contains(std::string key) const
std::pair< flex_type_enum, std::vector< flexible_type > & > operator[](std::string key)
flex_type_enum column_type(size_t idx) const
Returns the type of an arbitrary column.
size_t column() const
Returns the index of the current column.
size_t column_size() const
Returns the number of columns.
const std::string & column_name(size_t idx) const
Returns the name of an arbitrary column.
void read_csv(const std::string &path, char delimiter, bool use_header)
std::vector< std::string > names
A vector storing the name of columns.
bool operator!=(const dataframe_row_iterator &other)
Returns true if both iterators are not equal.
void swap_column_order(size_t a, size_t b)
bool operator==(const dataframe_row_iterator &other)
Returns true if both iterators are equal.
flex_type_enum column_type() const
Returns the type of the current column.
void parallel_dataframe_iterate(const dataframe_t &df, std::function< void(dataframe_row_iterator &iter, size_t startrow, size_t endrow)> partialrowfn)
std::map< std::string, std::vector< flexible_type > > values
const std::vector< flex_type_enum > & column_types() const
Returns the list of all column types.
void load(iarchive &iarc)
Deserializer.
dataframe_row_iterator & operator++()
pre-increments to the next entry of the dataframe row-wise
const std::vector< std::string > & column_names() const
Returns the list of all column names.
std::map< std::string, flex_type_enum > types
A map from the column name to the type of the column.
size_t ncols() const
Returns the number of columns in the dataframe.
const flexible_type & operator*()
Dereferences the iterator, returning a reference to the underlying flexible_type. ...
const flexible_type * operator->()
Dereferences the iterator, returning a reference to the underlying flexible_type. ...
size_t row_size() const
Returns the number of rows.
void save(oarchive &oarc) const
Serializer.
void set_column(std::string key, const std::vector< flexible_type > &val, flex_type_enum type)
The serialization output archive object which, provided with a reference to an ostream, will write to the ostream, providing serialization capabilities.
bool contains_nan(std::string key) const
dataframe_row_iterator & operator++(int)
post-increments to the next entry of the dataframe row-wise
void set_type(std::string key, flex_type_enum type)