6 #ifndef TURI_DML_DATA_ITERATOR_H_ 7 #define TURI_DML_DATA_ITERATOR_H_ 9 #include <core/logging/assertions.hpp> 10 #include <ml/ml_data/data_storage/ml_data_row_translation.hpp> 11 #include <ml/ml_data/data_storage/ml_data_block_manager.hpp> 12 #include <ml/ml_data/ml_data.hpp> 13 #include <ml/ml_data/row_reference.hpp> 14 #include <core/util/code_optimization.hpp> 17 #include <core/storage/sframe_data/sarray.hpp> 19 #include <Eigen/SparseCore> 28 typedef Eigen::Matrix<double, Eigen::Dynamic,1> DenseVector;
29 typedef Eigen::SparseVector<double> SparseVector;
44 void setup(
const ml_data& _data,
46 size_t thread_idx,
size_t num_threads);
61 inline bool done()
const {
return current_row_index == iter_row_index_end; }
65 inline size_t row_index()
const {
return current_row_index - global_row_start; }
107 return *(current_data_iter() + raw_index);
116 size_t absolute_row_index = row_index + global_row_start;
118 ASSERT_MSG(absolute_row_index <= global_row_end,
119 "Requested row index out of bounds.");
121 ASSERT_MSG((iter_row_index_start == global_row_start
122 && iter_row_index_end == global_row_end),
123 "Seek not supported with multithreaded iterators.");
125 current_row_index = absolute_row_index;
128 setup_block_containing_current_row_index();
139 size_t iter_row_index_start = -1;
140 size_t iter_row_index_end = -1;
141 size_t current_row_index = -1;
142 size_t current_block_index = -1;
151 size_t global_row_start, global_row_end;
160 DASSERT_LT(row.current_in_block_index, row.data_block->translated_rows.entry_data.size());
162 return &(row.data_block->translated_rows.entry_data[row.current_in_block_index]);
169 size_t index = current_row_index - (current_block_index * data.row_block_size);
172 DASSERT_LT(index, data.row_block_size);
182 if(row.has_translated_columns || rm.has_target)
183 row.current_in_block_index += get_row_data_size(rm, current_data_iter());
187 DASSERT_GE(current_row_index, current_block_index * data.row_block_size);
189 row.current_in_block_row_index = current_row_index - current_block_index * data.row_block_size;
191 if(row.current_in_block_row_index == data.row_block_size && !
done())
199 void setup_block_containing_current_row_index() GL_HOT_NOINLINE;
203 void load_next_block() GL_HOT_NOINLINE;
void reset()
Resets the iterator to the start of the sframes in ml_data.
const ml_data & ml_data_source() const
void seek(size_t row_index)
const ml_data_iterator & operator++() GL_HOT_INLINE_FLATTEN
Advance the iterator to the next observation.
ml_data_row_reference const * operator->() const GL_HOT_INLINE_FLATTEN
ml_data_internal::entry_value _raw_row_entry(size_t raw_index) const GL_HOT_INLINE_FLATTEN
#define DASSERT_FALSE(cond)
#define GL_HOT_INLINE_FLATTEN
bool done() const
Returns true if the iteration is done, false otherwise.
ml_data_row_reference operator*() const GL_HOT_INLINE_FLATTEN