6 #ifndef TURI_ML2_DATA_COLUMN_METADATA_H_ 7 #define TURI_ML2_DATA_COLUMN_METADATA_H_ 9 #include <toolkits/ml_data_2/indexing/column_indexer.hpp> 10 #include <toolkits/ml_data_2/statistics/column_statistics.hpp> 11 #include <core/storage/sframe_data/sarray.hpp> 21 namespace ml_data_internal {
39 std::string name =
"";
42 std::shared_ptr<ml_data_internal::column_indexer> indexer =
nullptr;
43 std::shared_ptr<ml_data_internal::column_statistics> statistics =
nullptr;
52 void setup(
bool is_target_column,
53 const std::string& name,
55 const std::map<std::string, ml_column_mode>& mode_overrides,
56 const std::map<std::string, flexible_type>& options);
61 void set_training_index_offset(
size_t previous_total);
71 bool has_fixed_size = (column_data_size_if_fixed != size_t(-1));
75 return has_fixed_size;
81 return (mode == ml_column_mode::UNTRANSLATED);
88 return index_size_at_train_time;
97 DASSERT_TRUE(global_index_offset_at_train_time !=
size_t(-1));
98 return global_index_offset_at_train_time;
103 friend struct turi::metadata_load;
108 size_t index_size_at_train_time = size_t(-1);
109 size_t column_data_size_if_fixed = size_t(-1);
111 size_t global_index_offset_at_train_time = size_t(-1);
114 const size_t fixed_column_size()
const {
116 return column_data_size_if_fixed;
129 return indexer->indexed_column_size();
132 return column_data_size_if_fixed;
151 typedef std::shared_ptr<column_metadata> column_metadata_ptr;
165 void setup(
const std::vector<std::shared_ptr<column_metadata> >& _metadata_vect,
bool _has_target);
169 void set_index_sizes(
const std::shared_ptr<ml_metadata>& m);
171 bool has_target =
false;
172 bool target_is_indexed =
false;
176 bool data_size_is_constant =
false;
181 size_t constant_data_size = 0;
185 size_t num_x_columns = 0;
189 size_t total_num_columns = 0;
202 if(mode == ml_column_mode::NUMERIC_VECTOR) {
211 if(UNLIKELY(nv != column_data_size_if_fixed)) {
213 auto throw_error = [&]() GL_GCC_ONLY(GL_COLD_NOINLINE) {
215 std::string(
"Dataset mismatch between training and prediction. Numeric feature '")
217 +
"' must contain lists of consistent size. (Found lists/arrays of sizes " 218 + std::to_string(nv) +
" and " 219 + std::to_string(column_data_size_if_fixed) +
").");
240 } END_OUT_OF_PLACE_SAVE()
244 arc >> is_not_nullptr;
249 m = std::shared_ptr<turi::v2::ml_data_internal::column_metadata>(
nullptr);
251 } END_OUT_OF_PLACE_LOAD()
static GL_HOT_INLINE_FLATTEN bool mode_has_fixed_size(ml_column_mode mode)
#define BEGIN_OUT_OF_PLACE_LOAD(arc, tname, tval)
Macro to make it easy to define out-of-place loads.
std::vector< double > flex_vec
The serialization input archive object which, provided with a reference to an istream, will read from the istream, providing deserialization capabilities.
flex_type_enum get_type() const
#define GL_HOT_INLINE_FLATTEN
The serialization output archive object which, provided with a reference to an ostream, will write to the ostream, providing serialization capabilities.
static GL_HOT_INLINE_FLATTEN bool mode_is_indexed(ml_column_mode mode)
#define DASSERT_TRUE(cond)
#define BEGIN_OUT_OF_PLACE_SAVE(arc, tname, tval)
Macro to make it easy to define out-of-place saves.