6 #ifndef TURI_ML2_DATA_ITERATOR_BASE_H_ 7 #define TURI_ML2_DATA_ITERATOR_BASE_H_ 9 #include <core/logging/assertions.hpp> 10 #include <toolkits/ml_data_2/data_storage/ml_data_row_translation.hpp> 11 #include <toolkits/ml_data_2/data_storage/ml_data_block_manager.hpp> 12 #include <toolkits/ml_data_2/ml_data.hpp> 13 #include <toolkits/ml_data_2/side_features.hpp> 14 #include <toolkits/ml_data_2/iterators/composite_row_type.hpp> 15 #include <toolkits/ml_data_2/iterators/row_reference.hpp> 16 #include <core/util/code_optimization.hpp> 19 #include <core/storage/sframe_data/sarray.hpp> 21 #include <Eigen/SparseCore> 26 namespace turi {
namespace v2 {
30 typedef Eigen::Matrix<double, Eigen::Dynamic,1> DenseVector;
31 typedef Eigen::SparseVector<double> SparseVector;
49 void setup(
const ml_data& _data,
51 size_t thread_idx,
size_t num_threads,
52 const std::map<std::string, flexible_type>& options);
56 virtual void internal_setup(
const std::map<std::string, flexible_type>& options) {}
105 template <
typename Entry>
116 if(!has_translated_columns)
119 ml_data_internal::copy_raw_into_ml_data_entry_row(
123 DASSERT_LE(x.size(), data->max_row_size());
135 if(!has_untranslated_columns) {
140 x.resize(data_block->untranslated_columns.size());
144 for(
size_t i = 0; i < data_block->untranslated_columns.size(); ++i) {
145 x[i] = data_block->untranslated_columns[i][
row_index];
189 if(!has_translated_columns)
192 ml_data_internal::copy_raw_into_eigen_array(
196 use_reference_encoding);
236 if(!has_translated_columns)
239 ml_data_internal::copy_raw_into_eigen_array(
243 use_reference_encoding);
267 template <
typename DenseRowXpr>
273 ml_data_internal::copy_raw_into_eigen_array(
277 use_reference_encoding);
326 ref.data_block = data_block;
327 ref.side_features = side_features;
329 ref.use_reference_encoding = use_reference_encoding;
358 std::shared_ptr<ml_data> data;
362 std::shared_ptr<ml_data_side_features> side_features;
367 bool use_reference_encoding =
false;
368 bool has_untranslated_columns =
false;
369 bool has_translated_columns =
false;
371 size_t row_block_size = -1;
401 std::shared_ptr<ml_data_internal::ml_data_block> data_block;
410 DASSERT_LT(current_in_block_index, data_block->translated_rows.entry_data.size());
419 size_t index = current_row_index - (current_block_index * row_block_size);
422 DASSERT_LT(index, row_block_size);
432 if(has_translated_columns)
437 if(current_row_index == (current_block_index + 1) * row_block_size && !
done())
void load_next_block() GL_HOT_NOINLINE
void advance_row() GL_HOT_INLINE_FLATTEN
const ml_data & ml_data_source() const
ml_data_row_reference get_reference() const
GL_HOT_INLINE void fill_observation(std::vector< Entry > &x) const
ml_data_internal::entry_value_iterator current_data_iter() const GL_HOT_INLINE_FLATTEN
bool add_side_information
GL_HOT_INLINE_FLATTEN void fill_eigen_row(DenseRowXpr &&x) const
void fill_observation(DenseVector &x) const GL_HOT_INLINE_FLATTEN
virtual bool done() const
Returns true if the iteration is done, false otherwise.
virtual void reset()
Resets the iterator to the start of the sframes in ml_data.
void fill_observation(SparseVector &x) const GL_HOT_INLINE_FLATTEN
size_t iter_row_index_start
size_t target_index() const GL_HOT_INLINE_FLATTEN
void fill_observation(composite_row_container &crc) GL_HOT_INLINE_FLATTEN
size_t iter_row_index_end
void fill_untranslated_values(std::vector< flexible_type > &x) const GL_HOT_INLINE_FLATTEN
double target_value() const GL_HOT_INLINE_FLATTEN
ml_data_internal::entry_value _raw_row_entry(size_t raw_index) const GL_HOT_INLINE_FLATTEN
#define DASSERT_FALSE(cond)
size_t current_in_block_index
#define GL_HOT_INLINE_FLATTEN
size_t current_block_index
void setup_block_containing_current_row_index() GL_HOT_NOINLINE
Loads the block containing the row index row_index.
virtual ~ml_data_iterator_base()
#define DASSERT_TRUE(cond)
size_t current_block_row_index() const GL_HOT_INLINE_FLATTEN
size_t unsliced_row_index() const
Returns the absolute row index.