Turi Create
4.0
|
#include <ml/ml_data/metadata.hpp>
Public Member Functions | |
bool | has_target () const |
bool | is_indexed (size_t column_index) const |
bool | is_indexed (const std::string &column_name) const |
const std::shared_ptr< ml_data_internal::column_indexer > & | indexer (size_t column_index) const |
const std::shared_ptr< ml_data_internal::column_indexer > & | indexer (const std::string &column_name) const |
bool | target_is_indexed () const |
const std::shared_ptr< ml_data_internal::column_statistics > & | statistics (size_t column_index) const |
const std::shared_ptr< ml_data_internal::column_statistics > & | statistics (const std::string &column_name) const |
size_t | num_columns (bool include_untranslated_columns=true) const |
size_t | num_untranslated_columns () const |
bool | has_translated_columns () const |
bool | has_untranslated_columns () const |
const std::string & | column_name (size_t column_index) const |
std::vector< std::string > | column_names () const |
size_t | column_index (const std::string &column_name, bool max_on_error=false) const |
bool | contains_column (const std::string &column_name) const |
const std::string & | target_column_name () const |
size_t | column_size (size_t column_index) const |
const flex_nd_vec::index_range_type & | nd_column_shape (size_t column_index) const |
const flex_nd_vec::index_range_type & | nd_column_shape (const std::string &column_name) const |
size_t | target_column_size () const |
size_t | index_size (size_t column_index) const |
size_t | index_size (const std::string &column_name) const |
size_t | global_index_offset (size_t column_index) const |
size_t | global_index_offset (const std::string &column_name) const |
size_t | target_index_size () const |
size_t | num_dimensions () const |
bool | is_categorical (size_t column_index) const |
bool | is_categorical (const std::string &column_name) const |
bool | target_is_categorical () const |
bool | is_untranslated_column (size_t column_index) const |
bool | is_untranslated_column (const std::string &column_name) const |
ml_column_mode | column_mode (size_t column_index) const |
ml_column_mode | column_mode (const std::string &column_name) const |
ml_column_mode | target_column_mode () const |
flex_type_enum | column_type (size_t column_index) const |
flex_type_enum | column_type (const std::string &column_name) const |
flex_type_enum | target_column_type () const |
size_t | get_version () const |
std::string | feature_name (size_t column_idx, size_t index, bool quote_string_values=false) const |
std::vector< std::string > | feature_names (bool unpack_categorical_columns=true) const |
void | save (turi::oarchive &oarc) const |
void | load (turi::iarchive &iarc) |
void | set_training_index_sizes_to_current_column_sizes () |
ml_data_internal::column_metadata_ptr | get_column_metadata (size_t column_index) const |
ml_metadata provides all the column-wise statistics and column translation information for ml_data.
Definition at line 23 of file metadata.hpp.
|
inline |
Returns the index of the column matching column_name, or throws an error if it does not exist.
column_name | The name of the column. |
max_on_error | If true, then size_t(-1) is returned if the column is not present. |
Returns the index of the column matching column_name, or throws an error if it does not exist.
Definition at line 27 of file metadata_impl.hpp.
|
inline |
Returns the mode of the column. See ml_data_column_modes.hpp for details on the column modes.
column_index | The index of the column. |
Definition at line 232 of file metadata_impl.hpp.
|
inline |
Returns the mode of the column. See ml_data_column_modes.hpp for details on the column modes.
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
column_name | The name of the column. |
Definition at line 243 of file metadata_impl.hpp.
|
inline |
Returns the name of the column at column_index.
column_index | The index of the column. |
Returns the size of the columns in the metadata that were present at train time.
Definition at line 125 of file metadata_impl.hpp.
std::vector<std::string> turi::ml_metadata::column_names | ( | ) | const |
Returns all column names as a vector.
|
inline |
Returns the current index size of the columns in the metadata.
column_index | The index of the column. |
Returns the current index size of the columns in the metadata.
Definition at line 152 of file metadata_impl.hpp.
|
inline |
Returns the size of the columns in the metadata that were present at train time.
column_index | The index of the column. |
Definition at line 260 of file metadata_impl.hpp.
|
inline |
Returns the size of the columns in the metadata that were present at train time.
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
column_name | The name of the column. |
Definition at line 271 of file metadata_impl.hpp.
|
inline |
Returns true if the metadata contains the given column.
column_name | The name of the column. |
Definition at line 48 of file metadata_impl.hpp.
std::string turi::ml_metadata::feature_name | ( | size_t | column_idx, |
size_t | index, | ||
bool | quote_string_values = false |
||
) | const |
Returns the feature name of a specific feature present in the metadata.
Numeric columns are represented by the column name.
Categorical / Categorical List / Dictionary columns are represented by "name[category]".
Vectors are represented by "vector[index]", where index is numerical.
ND vectors are represented by "nd_vector[idx1,idx2]" etc.
std::vector<std::string> turi::ml_metadata::feature_names | ( | bool | unpack_categorical_columns = true | ) | const |
Returns a list of all the feature names present in the metadata.
Numeric columns are represented by the column name.
Categorical / Categorical List / Dictionary columns are represented by "name[category]".
Vectors are represented by "vector[index]", where index is numerical.
ND vectors are represented by "nd_vector[idx1,idx2]" etc.
|
inline |
Returns a pointer to the internal column metadata of column column_index. Useful for dealing with the column_index
Definition at line 18 of file metadata_impl.hpp.
|
inline |
Serialization version.
Definition at line 323 of file metadata.hpp.
|
inline |
Returns the global index offset of the columns in the metadata that were present at train time. This is fixed at setup time; global indices for the column c_idx are in the interval [global_index_offset(c_idx), global_index_offset(c_idx) + index_size(c_idx) - 1]
column_index | The index of the column. |
Returns the global index offset of the columns in the metadata that were present at train time. This is fixed at setup time; global indices for the column c_idx are in the interval [global_index_offset(c_idx), global_index_offset(c_idx) + index_size(c_idx) - 1]
Definition at line 210 of file metadata_impl.hpp.
|
inline |
Returns the global index offset of the columns in the metadata that were present at train time. This is fixed at setup time; global indices for the column c_idx are in the interval [global_index_offset(c_idx), global_index_offset(c_idx) + index_size(c_idx) - 1]
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
column_name | The name of the column. |
Definition at line 223 of file metadata_impl.hpp.
|
inline |
Returns true if there is a target column present and false otherwise.
Definition at line 53 of file metadata_impl.hpp.
|
inline |
Returns true if there are translated columns present, and false otherwise.
Definition at line 102 of file metadata_impl.hpp.
|
inline |
Returns true if there are untranslated columns present, and false otherwise.
Definition at line 109 of file metadata_impl.hpp.
|
inline |
Returns the index size of the column in the metadata that were present at train time. Index size differs from column size in that column_size may grow on test, but index_size is constant.
column_index | The index of the column. |
Returns the index size of the columns in the metadata that were present at train time. Index size differs from column size in that column_size may grow on test, but index_size is constant.
Definition at line 180 of file metadata_impl.hpp.
|
inline |
Returns the index size of the column in the metadata that were present at train time. Index size differs from column size in that column_size may grow on test, but index_size is constant.
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
column_name | The name of the column. |
Definition at line 192 of file metadata_impl.hpp.
|
inline |
Returns the indexer for a particular column.
column_index | The index of the column. |
Definition at line 61 of file metadata_impl.hpp.
|
inline |
Returns the indexer for a particular column.
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
column_name | The name of the column. |
Definition at line 66 of file metadata_impl.hpp.
|
inline |
Returns true if the underlying type is treated as a categorical variable, and false otherwise.
column_index | The index of the column. |
Definition at line 288 of file metadata_impl.hpp.
|
inline |
Returns true if the underlying type is treated as a categorical variable, and false otherwise.
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
column_name | The name of the column. |
Definition at line 300 of file metadata_impl.hpp.
|
inline |
Returns true if the underlying column type is indexed, and false otherwise. This differs form the is_categorical in that dictionaries are not treated as pure categorical variables, as they have values associated with them, but they are indexed.
column_index | The index of the column. |
Definition at line 311 of file metadata_impl.hpp.
|
inline |
Returns true if the underlying column type is indexed, and false otherwise. This differs form the is_categorical in that dictionaries are not treated as pure categorical variables, as they have values associated with them, but they are indexed.
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
column_name | The name of the column. |
Definition at line 325 of file metadata_impl.hpp.
|
inline |
Returns true if the underlying column type is untranslated. This means it will only be available as flexible_type later on.
column_index | The index of the column. |
Definition at line 334 of file metadata_impl.hpp.
|
inline |
Returns true if the underlying column type is untranslated. This means it will only be available as flexible_type later on.
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
column_name | The name of the column. |
Definition at line 345 of file metadata_impl.hpp.
void turi::ml_metadata::load | ( | turi::iarchive & | iarc | ) |
Serialization – load.
|
inline |
If the type of the column is an ND vector, returns the shape of the nd_vector held by that coulmn.
column_index | The index of the column. |
Returns the current nd column shape of the columns
Definition at line 158 of file metadata_impl.hpp.
|
inline |
If the type of the column is an ND vector, returns the shape of the nd_vector held by that coulmn.
column_index | The index of the column. |
Returns the current nd column shape of the columns
Definition at line 164 of file metadata_impl.hpp.
|
inline |
Returns the number of columns present.
Definition at line 85 of file metadata_impl.hpp.
|
inline |
Returns the number of distinct dimensions, including all categorical features, etc.
Returns the number of distinct dimensions, including all categorical features.
Definition at line 116 of file metadata_impl.hpp.
|
inline |
Returns the number of untranslated columns present.
Definition at line 94 of file metadata_impl.hpp.
void turi::ml_metadata::save | ( | turi::oarchive & | oarc | ) | const |
Serialization – save.
void turi::ml_metadata::set_training_index_sizes_to_current_column_sizes | ( | ) |
Sets the values of all future calls to index_size() to return the column_size values currently present in the indexers. This is done automatically at the end of fill(), but it can be useful if more is done to the indexers after that that is still considered part of training.
|
inline |
Returns the statistics for a particular column.
column_index | The index of the column. |
Definition at line 75 of file metadata_impl.hpp.
|
inline |
Returns the statistics for a particular column.
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
column_name | The name of the column. |
Definition at line 80 of file metadata_impl.hpp.
|
inline |
Returns the mode of the target column. See ml_data_column_modes.hpp for details on the column modes.
Definition at line 250 of file metadata_impl.hpp.
|
inline |
Returns the name of the column at column_index.
Returns the size of the columns in the metadata that were present at train time.
Definition at line 133 of file metadata_impl.hpp.
|
inline |
Returns the current index size of the columns in the metadata.
Definition at line 171 of file metadata_impl.hpp.
|
inline |
Returns the mode of the target column. See ml_data_column_modes.hpp for details on the column modes.
Returns the type of the target column.
Definition at line 278 of file metadata_impl.hpp.
|
inline |
Returns the index size of the columns in the metadata that were present at train time. Index size differs from column size in that column_size may grow on test, but index_size is constant.
Definition at line 200 of file metadata_impl.hpp.
|
inline |
Returns true if the underlying target type is treated as a categorical variable, and false otherwise.
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
column_name | The name of the column. |
Returns true if the underlying type is treated as a categorical variable, and false otherwise.
Definition at line 353 of file metadata_impl.hpp.
|
inline |
Returns true if the underlying target type is indexed, and false otherwise. This differs form the is_categorical in that dictionaries are not treated as pure categorical variables, as they have values associated with them, but they are indexed.
Returns true if the underlying type is indexed, and false otherwise. This differs form the is_categorical in that dictionaries are not treated as pure categorical variables, as they have values associated with them, but they are indexed.
Definition at line 363 of file metadata_impl.hpp.