Turi Create  4.0
turi::ml_data_internal::column_statistics Class Reference

#include <ml/ml_data/column_statistics.hpp>

Public Member Functions

 column_statistics (std::string column_name, ml_column_mode mode, flex_type_enum original_column_type)
 
size_t num_observations () const
 
void initialize ()
 Initialize the statistics – counting, mean, and stdev.
 
void update_categorical_statistics (size_t thread_idx, const std::vector< size_t > &cat_index_vect) GL_HOT
 Update categorical statistics for a batch of categorical indices.
 
void update_numeric_statistics (size_t thread_idx, const std::vector< double > &value_vect) GL_HOT
 Update categorical statistics for a batch of real values.
 
void update_dict_statistics (size_t thread_idx, const std::vector< std::pair< size_t, double > > &dict) GL_HOT
 Update statistics after observing a dictionary.
 
void finalize ()
 
void reindex (const std::vector< size_t > &new_index_map, size_t new_column_size)
 
void merge_in (const column_statistics &other)
 
size_t get_version () const
 
void save_impl (turi::oarchive &oarc) const
 
void load_version (turi::iarchive &iarc, size_t version)
 
bool is_equal (const column_statistics *other_ptr) const
 
bool operator== (const column_statistics &other) const
 
bool operator!= (const column_statistics &other) const
 

Detailed Description

column_metadata contains "meta data" concerning indexing of a single column of an SFrame. A collection of meta_data column objects is "all" the metadata required in the ml_data container.

Definition at line 35 of file column_statistics.hpp.

Constructor & Destructor Documentation

◆ column_statistics()

turi::ml_data_internal::column_statistics::column_statistics ( std::string  column_name,
ml_column_mode  mode,
flex_type_enum  original_column_type 
)

Default constructor.

Member Function Documentation

◆ finalize()

void turi::ml_data_internal::column_statistics::finalize ( )

Perform final computations on the different statistics. Must be called after all the data is filled.

◆ get_version()

size_t turi::ml_data_internal::column_statistics::get_version ( ) const
inline

Returns the current serialization version of this model.

Definition at line 351 of file column_statistics.hpp.

◆ is_equal()

bool turi::ml_data_internal::column_statistics::is_equal ( const column_statistics other_ptr) const

For debugging purposes.

◆ load_version()

void turi::ml_data_internal::column_statistics::load_version ( turi::iarchive iarc,
size_t  version 
)

Load the object.

◆ merge_in()

void turi::ml_data_internal::column_statistics::merge_in ( const column_statistics other)

Merges in statistics from another column_statistics object.

◆ num_observations()

size_t turi::ml_data_internal::column_statistics::num_observations ( ) const
inline

Returns the number of seen by the methods collecting the statistics.

Definition at line 54 of file column_statistics.hpp.

◆ operator!=()

bool turi::ml_data_internal::column_statistics::operator!= ( const column_statistics other) const

Inequality testing – slow! Use for debugging/testing

◆ operator==()

bool turi::ml_data_internal::column_statistics::operator== ( const column_statistics other) const

Equality testing – slow! Use for debugging/testing

◆ reindex()

void turi::ml_data_internal::column_statistics::reindex ( const std::vector< size_t > &  new_index_map,
size_t  new_column_size 
)

Reindex, typically according to a global map of things.

◆ save_impl()

void turi::ml_data_internal::column_statistics::save_impl ( turi::oarchive oarc) const

Serialize the object (save).


The documentation for this class was generated from the following file: