6 #ifndef TURI_UNITY_LIB_SFRAME_READER_HPP 7 #define TURI_UNITY_LIB_SFRAME_READER_HPP 13 #include <core/data/flexible_type/flexible_type.hpp> 14 #include <core/storage/sframe_data/sarray_reader.hpp> 15 #include <core/storage/sframe_data/sframe_index_file.hpp> 16 #include <core/storage/sframe_data/sframe_constants.hpp> 22 struct csv_line_tokenizer;
46 typedef std::vector<flexible_type> value_type;
47 typedef int difference_type;
48 typedef value_type* pointer;
49 typedef value_type& reference;
50 typedef std::input_iterator_tag iterator_category;
67 bool is_begin_iterator) : _data(&data),
68 _segmentid(segmentid) {
71 cur_iter.resize(_data->size());
72 cur_element.resize(_data->size());
73 for(
size_t i = 0; i < _data->size(); ++i) {
74 if(is_begin_iterator) {
75 cur_iter[i] = _data->at(i)->begin(segmentid);
77 cur_iter[i] = _data->at(i)->end(segmentid);
82 segment_limit = _data->at(0)->segment_length(segmentid);
84 if(is_begin_iterator) {
87 cur_segment_pos = segment_limit;
95 for(
auto& i : cur_iter) {
102 if(cur_segment_pos > segment_limit) {
103 cur_segment_pos = segment_limit;
131 return _data == other._data &&
132 _segmentid == other._segmentid &&
133 cur_segment_pos == other.cur_segment_pos;
141 return _data != other._data ||
142 _segmentid != other._segmentid ||
143 cur_segment_pos != other.cur_segment_pos;
151 for (
size_t i = 0; i < _data->size(); ++i) {
152 cur_element[i] = *(cur_iter[i]);
171 return (
int)(cur_segment_pos) - (
int)(other.cur_segment_pos);
174 const std::vector<std::shared_ptr<sarray_reader<flexible_type> > > *_data;
175 std::vector<sarray_iterator<flexible_type> > cur_iter;
177 size_t _segmentid = (size_t)(-1);
178 size_t cur_segment_pos = 0;
179 size_t segment_limit = 0;
180 mutable value_type cur_element;
233 void init(
const sframe& array,
size_t num_segments = (
size_t)(-1));
246 void init(
const sframe& array,
const std::vector<size_t>& segment_lengths);
249 iterator begin (
size_t segmentid)
const;
252 iterator end (
size_t segmentid)
const;
269 size_t read_rows(
size_t row_start,
271 std::vector<std::vector<flexible_type> >& out_obj);
289 size_t read_rows(
size_t row_start,
297 void reset_iterators();
301 return index_info.ncolumns;
306 return index_info.nrows;
311 return index_info.nrows;
319 ASSERT_LT(i, index_info.ncolumns);
321 return index_info.column_names[i];
329 ASSERT_LT(i, index_info.ncolumns);
331 return column_data[i]->get_type();
336 return m_num_segments;
344 ASSERT_LT(segment, num_segments());
345 if (index_info.ncolumns == 0)
return 0;
346 return column_data[0]->segment_length(segment);
354 auto iter = std::find(index_info.column_names.begin(),
355 index_info.column_names.end(),
357 return iter != index_info.column_names.end();
365 auto iter = std::find(index_info.column_names.begin(),
366 index_info.column_names.end(),
368 if (iter != index_info.column_names.end()) {
369 return (iter) - index_info.column_names.begin();
371 throw (std::string(
"Column name " + column_name +
" does not exist."));
380 std::vector<std::shared_ptr<sarray_reader<flexible_type> > > column_data;
382 size_t m_num_segments = 0;
flex_type_enum column_type(size_t i) const
const value_type * operator->() const
size_t size() const
Returns the length of each sarray.
sframe_iterator(const std::vector< std::shared_ptr< sarray_reader< flexible_type > > > &data, size_t segmentid, bool is_begin_iterator)
std::string column_name(size_t i) const
size_t segment_length(size_t segment) const
bool contains_column(const std::string &column_name) const
const value_type & operator*() const
int operator-(const sframe_iterator &other) const
bool operator==(const sframe_iterator &other) const
size_t num_rows() const
Returns the length of each sarray.
sframe_iterator & operator++()
sframe_iterator::value_type value_type
The value type the sframe stores.
size_t column_index(const std::string &column_name) const
sframe_iterator iterator
The iterator type which begin and end returns.
size_t num_segments() const
Returns the number of segments in the SFrame. Does not throw.
bool operator!=(const sframe_iterator &other) const
size_t num_columns() const
Returns the number of columns in the SFrame. Does not throw.