Turi Create  4.0
turi::statistics_tracker Class Reference

#include <toolkits/feature_engineering/statistics_tracker.hpp>

Public Member Functions

 statistics_tracker (const std::string _column_name="")
 
 statistics_tracker (const statistics_tracker &)=delete
 
void initialize ()
 
void insert_or_update (const flexible_type &key, flexible_type value, size_t thread_idx=0) GL_HOT
 
size_t lookup (const flexible_type &value) const
 
size_t lookup_counts (const flexible_type &value) const
 
flex_float lookup_means (const flexible_type &value) const
 
void finalize (size_t num_examples)
 
flexible_type inverse_lookup (size_t idx) const
 
size_t size () const
 
std::vector< flexible_typeget_keys () const
 
size_t get_version () const
 
void save_impl (turi::oarchive &oarc) const
 
void load_version (turi::iarchive &iarc, size_t version)
 

Detailed Description

Parallel statistics(mean) tracker

Note: This implementation is intended to be general and will be moved to some place more general later.

Construction

// Construct the tracker with the arguments. auto tracker = statistics_tracker(10, 1, "column_name_for_error_messages"); tracker.initialize();

// Insert flexible types into the tracker for (const flexible_type& v: sa.range_iterator() { tracker.insert_or_update(v); }

// Finalize mapping tracker.finalize();

Lookups

size_t index = tracker.lookup(v); // Returns (size_t) -1 if not present.

size_t counts = tracker.lookup_counts(v); // Returns 0 if not present.

flexible_type v = tracker.inverse_lookup(1) // Fails if index doesn't exist.

Parallel construction

// Initialize tracker.initialize();

// Perform the indexing. in_parallel([&](size_t thread_idx, size_t num_threads) {

size_t start_idx = src_size * thread_idx / num_threads; size_t end_idx = src_size * (thread_idx + 1) / num_threads;

// Inserts value of 1 for each key k for (const flexible_type& k: sa.range_iterator(start_idx, end_idx) { tracker.insert_or_update(k,1,thread_id); }

// Finalize tracker.finalize();

Definition at line 72 of file statistics_tracker.hpp.

Constructor & Destructor Documentation

◆ statistics_tracker() [1/2]

turi::statistics_tracker::statistics_tracker ( const std::string  _column_name = "")
inline

Default constructor

Parameters
[in]column_nameColumn name for display.

Definition at line 82 of file statistics_tracker.hpp.

◆ statistics_tracker() [2/2]

turi::statistics_tracker::statistics_tracker ( const statistics_tracker )
delete

Copy constructor: Don't want to risk making copies of this.

Member Function Documentation

◆ finalize()

void turi::statistics_tracker::finalize ( size_t  num_examples)

Finalize by dropping indices that dont meet

  • Count requirement i.e count >= threshold.
  • Topk requirement.

◆ get_keys()

std::vector<flexible_type> turi::statistics_tracker::get_keys ( ) const
inline

Returns the number of categorical variables.

Returns
Column size.

Definition at line 162 of file statistics_tracker.hpp.

◆ get_version()

size_t turi::statistics_tracker::get_version ( ) const

Returns the current version used for the serialization.

◆ initialize()

void turi::statistics_tracker::initialize ( )

Initialize the index mapping and setup. Should be called before starting the map.

◆ insert_or_update()

void turi::statistics_tracker::insert_or_update ( const flexible_type key,
flexible_type  value,
size_t  thread_idx = 0 
)

Insert

Parameters
[in]keyFlexible type.
[in]valueFlexible type.
[in]thread_idxThread id (For parallel insertion).

◆ inverse_lookup()

flexible_type turi::statistics_tracker::inverse_lookup ( size_t  idx) const

Returns the "value" associated with the index.

Parameters

◆ load_version()

void turi::statistics_tracker::load_version ( turi::iarchive iarc,
size_t  version 
)

Load the object.

◆ lookup()

size_t turi::statistics_tracker::lookup ( const flexible_type value) const

Returns the index associated with the value.

Parameters
[in]valueSearch for the value.
Returns
The index. (Returns size_t(-1) if not present).

◆ lookup_counts()

size_t turi::statistics_tracker::lookup_counts ( const flexible_type value) const

Returns the counts associated with the value.

Parameters
[in]valueSearch for the value.
Returns
Counts (Returns 0 if not present).

◆ lookup_means()

flex_float turi::statistics_tracker::lookup_means ( const flexible_type value) const

Returns the counts associated with the value.

Parameters
[in]valueSearch for the value.
Returns
Counts (Returns 0 if not present).

◆ save_impl()

void turi::statistics_tracker::save_impl ( turi::oarchive oarc) const

Serialize the object (save).

◆ size()

size_t turi::statistics_tracker::size ( ) const
inline

Returns the number of categorical variables.

Returns
Column size.

Definition at line 154 of file statistics_tracker.hpp.


The documentation for this class was generated from the following file: