Turi Create  4.0
turi::ml_metadata Class Reference

#include <ml/ml_data/metadata.hpp>

Public Member Functions

bool has_target () const
 
bool is_indexed (size_t column_index) const
 
bool is_indexed (const std::string &column_name) const
 
const std::shared_ptr< ml_data_internal::column_indexer > & indexer (size_t column_index) const
 
const std::shared_ptr< ml_data_internal::column_indexer > & indexer (const std::string &column_name) const
 
bool target_is_indexed () const
 
const std::shared_ptr< ml_data_internal::column_statistics > & statistics (size_t column_index) const
 
const std::shared_ptr< ml_data_internal::column_statistics > & statistics (const std::string &column_name) const
 
size_t num_columns (bool include_untranslated_columns=true) const
 
size_t num_untranslated_columns () const
 
bool has_translated_columns () const
 
bool has_untranslated_columns () const
 
const std::string & column_name (size_t column_index) const
 
std::vector< std::string > column_names () const
 
size_t column_index (const std::string &column_name, bool max_on_error=false) const
 
bool contains_column (const std::string &column_name) const
 
const std::string & target_column_name () const
 
size_t column_size (size_t column_index) const
 
const flex_nd_vec::index_range_type & nd_column_shape (size_t column_index) const
 
const flex_nd_vec::index_range_type & nd_column_shape (const std::string &column_name) const
 
size_t target_column_size () const
 
size_t index_size (size_t column_index) const
 
size_t index_size (const std::string &column_name) const
 
size_t global_index_offset (size_t column_index) const
 
size_t global_index_offset (const std::string &column_name) const
 
size_t target_index_size () const
 
size_t num_dimensions () const
 
bool is_categorical (size_t column_index) const
 
bool is_categorical (const std::string &column_name) const
 
bool target_is_categorical () const
 
bool is_untranslated_column (size_t column_index) const
 
bool is_untranslated_column (const std::string &column_name) const
 
ml_column_mode column_mode (size_t column_index) const
 
ml_column_mode column_mode (const std::string &column_name) const
 
ml_column_mode target_column_mode () const
 
flex_type_enum column_type (size_t column_index) const
 
flex_type_enum column_type (const std::string &column_name) const
 
flex_type_enum target_column_type () const
 
size_t get_version () const
 
std::string feature_name (size_t column_idx, size_t index, bool quote_string_values=false) const
 
std::vector< std::string > feature_names (bool unpack_categorical_columns=true) const
 
void save (turi::oarchive &oarc) const
 
void load (turi::iarchive &iarc)
 
void set_training_index_sizes_to_current_column_sizes ()
 
ml_data_internal::column_metadata_ptr get_column_metadata (size_t column_index) const
 

Detailed Description

ml_metadata provides all the column-wise statistics and column translation information for ml_data.

Definition at line 23 of file metadata.hpp.

Member Function Documentation

◆ column_index()

size_t turi::ml_metadata::column_index ( const std::string &  _column_name,
bool  max_on_error = false 
) const
inline

Returns the index of the column matching column_name, or throws an error if it does not exist.

Parameters
column_nameThe name of the column.
max_on_errorIf true, then size_t(-1) is returned if the column is not present.

Returns the index of the column matching column_name, or throws an error if it does not exist.

Definition at line 27 of file metadata_impl.hpp.

◆ column_mode() [1/2]

ml_column_mode turi::ml_metadata::column_mode ( size_t  column_index) const
inline

Returns the mode of the column. See ml_data_column_modes.hpp for details on the column modes.

Parameters
column_indexThe index of the column.

Definition at line 232 of file metadata_impl.hpp.

◆ column_mode() [2/2]

ml_column_mode turi::ml_metadata::column_mode ( const std::string &  column_name) const
inline

Returns the mode of the column. See ml_data_column_modes.hpp for details on the column modes.

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

Parameters
column_nameThe name of the column.

Definition at line 243 of file metadata_impl.hpp.

◆ column_name()

const std::string & turi::ml_metadata::column_name ( size_t  column_index) const
inline

Returns the name of the column at column_index.

Parameters
column_indexThe index of the column.

Returns the size of the columns in the metadata that were present at train time.

Definition at line 125 of file metadata_impl.hpp.

◆ column_names()

std::vector<std::string> turi::ml_metadata::column_names ( ) const

Returns all column names as a vector.

◆ column_size()

size_t turi::ml_metadata::column_size ( size_t  column_index) const
inline

Returns the current index size of the columns in the metadata.

Parameters
column_indexThe index of the column.

Returns the current index size of the columns in the metadata.

Definition at line 152 of file metadata_impl.hpp.

◆ column_type() [1/2]

flex_type_enum turi::ml_metadata::column_type ( size_t  column_index) const
inline

Returns the size of the columns in the metadata that were present at train time.

Parameters
column_indexThe index of the column.

Definition at line 260 of file metadata_impl.hpp.

◆ column_type() [2/2]

flex_type_enum turi::ml_metadata::column_type ( const std::string &  column_name) const
inline

Returns the size of the columns in the metadata that were present at train time.

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

Parameters
column_nameThe name of the column.

Definition at line 271 of file metadata_impl.hpp.

◆ contains_column()

bool turi::ml_metadata::contains_column ( const std::string &  column_name) const
inline

Returns true if the metadata contains the given column.

Parameters
column_nameThe name of the column.

Definition at line 48 of file metadata_impl.hpp.

◆ feature_name()

std::string turi::ml_metadata::feature_name ( size_t  column_idx,
size_t  index,
bool  quote_string_values = false 
) const

Returns the feature name of a specific feature present in the metadata.

Numeric columns are represented by the column name.

Categorical / Categorical List / Dictionary columns are represented by "name[category]".

Vectors are represented by "vector[index]", where index is numerical.

ND vectors are represented by "nd_vector[idx1,idx2]" etc.

Returns
Names of features

◆ feature_names()

std::vector<std::string> turi::ml_metadata::feature_names ( bool  unpack_categorical_columns = true) const

Returns a list of all the feature names present in the metadata.

Numeric columns are represented by the column name.

Categorical / Categorical List / Dictionary columns are represented by "name[category]".

Vectors are represented by "vector[index]", where index is numerical.

ND vectors are represented by "nd_vector[idx1,idx2]" etc.

Returns
Names of features

◆ get_column_metadata()

ml_data_internal::column_metadata_ptr turi::ml_metadata::get_column_metadata ( size_t  column_index) const
inline

Returns a pointer to the internal column metadata of column column_index. Useful for dealing with the column_index

Definition at line 18 of file metadata_impl.hpp.

◆ get_version()

size_t turi::ml_metadata::get_version ( ) const
inline

Serialization version.

Definition at line 323 of file metadata.hpp.

◆ global_index_offset() [1/2]

size_t turi::ml_metadata::global_index_offset ( size_t  column_index) const
inline

Returns the global index offset of the columns in the metadata that were present at train time. This is fixed at setup time; global indices for the column c_idx are in the interval [global_index_offset(c_idx), global_index_offset(c_idx) + index_size(c_idx) - 1]

Parameters
column_indexThe index of the column.

Returns the global index offset of the columns in the metadata that were present at train time. This is fixed at setup time; global indices for the column c_idx are in the interval [global_index_offset(c_idx), global_index_offset(c_idx) + index_size(c_idx) - 1]

Definition at line 210 of file metadata_impl.hpp.

◆ global_index_offset() [2/2]

size_t turi::ml_metadata::global_index_offset ( const std::string &  column_name) const
inline

Returns the global index offset of the columns in the metadata that were present at train time. This is fixed at setup time; global indices for the column c_idx are in the interval [global_index_offset(c_idx), global_index_offset(c_idx) + index_size(c_idx) - 1]

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

Parameters
column_nameThe name of the column.

Definition at line 223 of file metadata_impl.hpp.

◆ has_target()

bool turi::ml_metadata::has_target ( ) const
inline

Returns true if there is a target column present and false otherwise.

Definition at line 53 of file metadata_impl.hpp.

◆ has_translated_columns()

bool turi::ml_metadata::has_translated_columns ( ) const
inline

Returns true if there are translated columns present, and false otherwise.

Definition at line 102 of file metadata_impl.hpp.

◆ has_untranslated_columns()

bool turi::ml_metadata::has_untranslated_columns ( ) const
inline

Returns true if there are untranslated columns present, and false otherwise.

Definition at line 109 of file metadata_impl.hpp.

◆ index_size() [1/2]

size_t turi::ml_metadata::index_size ( size_t  column_index) const
inline

Returns the index size of the column in the metadata that were present at train time. Index size differs from column size in that column_size may grow on test, but index_size is constant.

Parameters
column_indexThe index of the column.

Returns the index size of the columns in the metadata that were present at train time. Index size differs from column size in that column_size may grow on test, but index_size is constant.

Definition at line 180 of file metadata_impl.hpp.

◆ index_size() [2/2]

size_t turi::ml_metadata::index_size ( const std::string &  column_name) const
inline

Returns the index size of the column in the metadata that were present at train time. Index size differs from column size in that column_size may grow on test, but index_size is constant.

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

Parameters
column_nameThe name of the column.

Definition at line 192 of file metadata_impl.hpp.

◆ indexer() [1/2]

const std::shared_ptr< ml_data_internal::column_indexer > & turi::ml_metadata::indexer ( size_t  column_index) const
inline

Returns the indexer for a particular column.

Parameters
column_indexThe index of the column.

Definition at line 61 of file metadata_impl.hpp.

◆ indexer() [2/2]

const std::shared_ptr< ml_data_internal::column_indexer > & turi::ml_metadata::indexer ( const std::string &  column_name) const
inline

Returns the indexer for a particular column.

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

Parameters
column_nameThe name of the column.

Definition at line 66 of file metadata_impl.hpp.

◆ is_categorical() [1/2]

bool turi::ml_metadata::is_categorical ( size_t  column_index) const
inline

Returns true if the underlying type is treated as a categorical variable, and false otherwise.

Parameters
column_indexThe index of the column.

Definition at line 288 of file metadata_impl.hpp.

◆ is_categorical() [2/2]

bool turi::ml_metadata::is_categorical ( const std::string &  column_name) const
inline

Returns true if the underlying type is treated as a categorical variable, and false otherwise.

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

Parameters
column_nameThe name of the column.

Definition at line 300 of file metadata_impl.hpp.

◆ is_indexed() [1/2]

bool turi::ml_metadata::is_indexed ( size_t  column_index) const
inline

Returns true if the underlying column type is indexed, and false otherwise. This differs form the is_categorical in that dictionaries are not treated as pure categorical variables, as they have values associated with them, but they are indexed.

Parameters
column_indexThe index of the column.

Definition at line 311 of file metadata_impl.hpp.

◆ is_indexed() [2/2]

bool turi::ml_metadata::is_indexed ( const std::string &  column_name) const
inline

Returns true if the underlying column type is indexed, and false otherwise. This differs form the is_categorical in that dictionaries are not treated as pure categorical variables, as they have values associated with them, but they are indexed.

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

Parameters
column_nameThe name of the column.

Definition at line 325 of file metadata_impl.hpp.

◆ is_untranslated_column() [1/2]

bool turi::ml_metadata::is_untranslated_column ( size_t  column_index) const
inline

Returns true if the underlying column type is untranslated. This means it will only be available as flexible_type later on.

Parameters
column_indexThe index of the column.

Definition at line 334 of file metadata_impl.hpp.

◆ is_untranslated_column() [2/2]

bool turi::ml_metadata::is_untranslated_column ( const std::string &  column_name) const
inline

Returns true if the underlying column type is untranslated. This means it will only be available as flexible_type later on.

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

Parameters
column_nameThe name of the column.

Definition at line 345 of file metadata_impl.hpp.

◆ load()

void turi::ml_metadata::load ( turi::iarchive iarc)

Serialization – load.

◆ nd_column_shape() [1/2]

const flex_nd_vec::index_range_type & turi::ml_metadata::nd_column_shape ( size_t  column_index) const
inline

If the type of the column is an ND vector, returns the shape of the nd_vector held by that coulmn.

Parameters
column_indexThe index of the column.

Returns the current nd column shape of the columns

Definition at line 158 of file metadata_impl.hpp.

◆ nd_column_shape() [2/2]

const flex_nd_vec::index_range_type & turi::ml_metadata::nd_column_shape ( const std::string &  column_name) const
inline

If the type of the column is an ND vector, returns the shape of the nd_vector held by that coulmn.

Parameters
column_indexThe index of the column.

Returns the current nd column shape of the columns

Definition at line 164 of file metadata_impl.hpp.

◆ num_columns()

size_t turi::ml_metadata::num_columns ( bool  include_untranslated_columns = true) const
inline

Returns the number of columns present.

Definition at line 85 of file metadata_impl.hpp.

◆ num_dimensions()

size_t turi::ml_metadata::num_dimensions ( ) const
inline

Returns the number of distinct dimensions, including all categorical features, etc.

Returns the number of distinct dimensions, including all categorical features.

Definition at line 116 of file metadata_impl.hpp.

◆ num_untranslated_columns()

size_t turi::ml_metadata::num_untranslated_columns ( ) const
inline

Returns the number of untranslated columns present.

Definition at line 94 of file metadata_impl.hpp.

◆ save()

void turi::ml_metadata::save ( turi::oarchive oarc) const

Serialization – save.

◆ set_training_index_sizes_to_current_column_sizes()

void turi::ml_metadata::set_training_index_sizes_to_current_column_sizes ( )

Sets the values of all future calls to index_size() to return the column_size values currently present in the indexers. This is done automatically at the end of fill(), but it can be useful if more is done to the indexers after that that is still considered part of training.

◆ statistics() [1/2]

const std::shared_ptr< ml_data_internal::column_statistics > & turi::ml_metadata::statistics ( size_t  column_index) const
inline

Returns the statistics for a particular column.

Parameters
column_indexThe index of the column.

Definition at line 75 of file metadata_impl.hpp.

◆ statistics() [2/2]

const std::shared_ptr< ml_data_internal::column_statistics > & turi::ml_metadata::statistics ( const std::string &  column_name) const
inline

Returns the statistics for a particular column.

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

Parameters
column_nameThe name of the column.

Definition at line 80 of file metadata_impl.hpp.

◆ target_column_mode()

ml_column_mode turi::ml_metadata::target_column_mode ( ) const
inline

Returns the mode of the target column. See ml_data_column_modes.hpp for details on the column modes.

Definition at line 250 of file metadata_impl.hpp.

◆ target_column_name()

const std::string & turi::ml_metadata::target_column_name ( ) const
inline

Returns the name of the column at column_index.

Returns the size of the columns in the metadata that were present at train time.

Definition at line 133 of file metadata_impl.hpp.

◆ target_column_size()

size_t turi::ml_metadata::target_column_size ( ) const
inline

Returns the current index size of the columns in the metadata.

Definition at line 171 of file metadata_impl.hpp.

◆ target_column_type()

flex_type_enum turi::ml_metadata::target_column_type ( ) const
inline

Returns the mode of the target column. See ml_data_column_modes.hpp for details on the column modes.

Returns the type of the target column.

Definition at line 278 of file metadata_impl.hpp.

◆ target_index_size()

size_t turi::ml_metadata::target_index_size ( ) const
inline

Returns the index size of the columns in the metadata that were present at train time. Index size differs from column size in that column_size may grow on test, but index_size is constant.

Definition at line 200 of file metadata_impl.hpp.

◆ target_is_categorical()

bool turi::ml_metadata::target_is_categorical ( ) const
inline

Returns true if the underlying target type is treated as a categorical variable, and false otherwise.

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

Parameters
column_nameThe name of the column.

Returns true if the underlying type is treated as a categorical variable, and false otherwise.

Definition at line 353 of file metadata_impl.hpp.

◆ target_is_indexed()

bool turi::ml_metadata::target_is_indexed ( ) const
inline

Returns true if the underlying target type is indexed, and false otherwise. This differs form the is_categorical in that dictionaries are not treated as pure categorical variables, as they have values associated with them, but they are indexed.

Returns true if the underlying type is indexed, and false otherwise. This differs form the is_categorical in that dictionaries are not treated as pure categorical variables, as they have values associated with them, but they are indexed.

Definition at line 363 of file metadata_impl.hpp.


The documentation for this class was generated from the following files: