6 #ifndef TURI_ML2_METADATA_H_ 7 #define TURI_ML2_METADATA_H_ 9 #include <toolkits/ml_data_2/ml_data_column_modes.hpp> 10 #include <toolkits/ml_data_2/data_storage/internal_metadata.hpp> 11 #include <core/export.hpp> 19 class ml_data_side_features;
21 class EXPORT ml_metadata {
25 ml_metadata(
const ml_metadata&) =
delete;
30 inline bool has_target()
const;
42 inline bool is_indexed(
size_t column_index)
const;
53 inline bool is_indexed(
const std::string& column_name)
const;
60 inline const std::shared_ptr<ml_data_internal::column_indexer>&
61 indexer(
size_t column_index)
const;
70 inline const std::shared_ptr<ml_data_internal::column_indexer>&
71 indexer(
const std::string& column_name)
const;
78 inline bool target_is_indexed()
const;
83 inline const std::shared_ptr<ml_data_internal::column_indexer>& target_indexer()
const;
93 inline const std::shared_ptr<ml_data_internal::column_statistics>&
94 statistics(
size_t column_index)
const;
103 inline const std::shared_ptr<ml_data_internal::column_statistics>&
104 statistics(
const std::string& column_name)
const;
109 inline const std::shared_ptr<ml_data_internal::column_statistics>& target_statistics()
const;
118 inline size_t num_columns(
bool include_side_columns_if_present =
true,
119 bool include_untranslated_columns =
true)
const;
123 inline size_t num_untranslated_columns()
const;
128 inline bool has_translated_columns()
const;
133 inline bool has_untranslated_columns()
const;
139 inline const std::string& column_name(
size_t column_index)
const;
143 std::vector<std::string> column_names(
bool include_side_columns_if_present =
true)
const;
150 inline size_t column_index(
const std::string& column_name)
const;
157 inline bool contains_column(
const std::string& column_name)
const;
161 inline const std::string& target_column_name()
const;
167 inline size_t column_size(
size_t column_index)
const;
171 inline size_t target_column_size()
const;
182 inline size_t index_size(
size_t column_index)
const;
192 inline size_t index_size(
const std::string& column_name)
const;
202 inline size_t global_index_offset(
size_t column_index)
const;
213 inline size_t global_index_offset(
const std::string& column_name)
const;
219 inline size_t target_index_size()
const;
224 inline size_t num_dimensions()
const;
234 inline bool is_categorical(
size_t column_index)
const;
243 inline bool is_categorical(
const std::string& column_name)
const;
252 inline bool target_is_categorical()
const;
259 inline bool is_side_column(
size_t column_index)
const;
268 inline bool is_side_column(
const std::string& column_name)
const;
275 inline bool is_untranslated_column(
size_t column_index)
const;
284 inline bool is_untranslated_column(
const std::string& column_name)
const;
300 inline ml_column_mode column_mode(
const std::string& column_name)
const;
321 inline flex_type_enum column_type(
const std::string& column_name)
const;
333 inline const std::map<std::string, flexible_type>& get_current_options()
const;
347 std::string feature_name(
size_t column_idx,
size_t index)
const;
361 std::vector<std::string> feature_names(
bool unpack_categorical_columns =
true)
const;
366 size_t get_version()
const {
return 2; }
378 inline bool has_side_features()
const;
383 inline void set_missing_value_on_predict(
const std::string& missing_value_action) {
384 options[
"missing_value_action_on_predict"] = missing_value_action;
389 inline std::shared_ptr<ml_data_side_features> get_side_features()
const;
397 void set_training_index_sizes_to_current_column_sizes();
426 std::shared_ptr<ml_metadata> select_columns(
427 const std::vector<std::string>& columns,
bool include_target =
true,
428 const std::vector<std::string>& columns_with_cleared_metadata = {})
const;
440 inline ml_data_internal::column_metadata_ptr get_column_metadata(
size_t column_index)
const;
445 friend class ml_data;
446 friend struct turi::metadata_load;
449 std::vector<ml_data_internal::column_metadata_ptr> columns;
450 ml_data_internal::column_metadata_ptr target;
453 std::shared_ptr<ml_data_side_features> side_features;
459 std::vector<std::string> original_column_names;
462 std::map<std::string, flexible_type> options;
468 void setup_cached_values();
470 size_t _num_dimensions = size_t(-1);
471 size_t _num_untranslated_columns = size_t(-1);
472 std::map<std::string, size_t> _column_name_to_index_map;
478 #include <toolkits/ml_data_2/metadata_impl.hpp> 491 } END_OUT_OF_PLACE_SAVE()
495 arc >> is_not_nullptr;
497 m.reset(
new turi::v2::ml_metadata);
500 m = std::shared_ptr<turi::v2::ml_metadata>(
nullptr);
502 } END_OUT_OF_PLACE_LOAD()
#define BEGIN_OUT_OF_PLACE_LOAD(arc, tname, tval)
Macro to make it easy to define out-of-place loads.
The serialization input archive object which, provided with a reference to an istream, will read from the istream, providing deserialization capabilities.
The serialization output archive object which, provided with a reference to an ostream, will write to the ostream, providing serialization capabilities.
#define BEGIN_OUT_OF_PLACE_SAVE(arc, tname, tval)
Macro to make it easy to define out-of-place saves.