Turi Create
4.0
|
#include <toolkits/feature_engineering/statistics_tracker.hpp>
Public Member Functions | |
statistics_tracker (const std::string _column_name="") | |
statistics_tracker (const statistics_tracker &)=delete | |
void | initialize () |
void | insert_or_update (const flexible_type &key, flexible_type value, size_t thread_idx=0) GL_HOT |
size_t | lookup (const flexible_type &value) const |
size_t | lookup_counts (const flexible_type &value) const |
flex_float | lookup_means (const flexible_type &value) const |
void | finalize (size_t num_examples) |
flexible_type | inverse_lookup (size_t idx) const |
size_t | size () const |
std::vector< flexible_type > | get_keys () const |
size_t | get_version () const |
void | save_impl (turi::oarchive &oarc) const |
void | load_version (turi::iarchive &iarc, size_t version) |
Parallel statistics(mean) tracker
Note: This implementation is intended to be general and will be moved to some place more general later.
// Construct the tracker with the arguments. auto tracker = statistics_tracker(10, 1, "column_name_for_error_messages"); tracker.initialize();
// Insert flexible types into the tracker for (const flexible_type& v: sa.range_iterator() { tracker.insert_or_update(v); }
// Finalize mapping tracker.finalize();
size_t index = tracker.lookup(v); // Returns (size_t) -1 if not present.
size_t counts = tracker.lookup_counts(v); // Returns 0 if not present.
flexible_type v = tracker.inverse_lookup(1) // Fails if index doesn't exist.
// Initialize tracker.initialize();
// Perform the indexing. in_parallel([&](size_t thread_idx, size_t num_threads) {
size_t start_idx = src_size * thread_idx / num_threads; size_t end_idx = src_size * (thread_idx + 1) / num_threads;
// Inserts value of 1 for each key k for (const flexible_type& k: sa.range_iterator(start_idx, end_idx) { tracker.insert_or_update(k,1,thread_id); }
// Finalize tracker.finalize();
Definition at line 72 of file statistics_tracker.hpp.
|
inline |
Default constructor
[in] | column_name | Column name for display. |
Definition at line 82 of file statistics_tracker.hpp.
|
delete |
Copy constructor: Don't want to risk making copies of this.
void turi::statistics_tracker::finalize | ( | size_t | num_examples | ) |
Finalize by dropping indices that dont meet
|
inline |
Returns the number of categorical variables.
Definition at line 162 of file statistics_tracker.hpp.
size_t turi::statistics_tracker::get_version | ( | ) | const |
Returns the current version used for the serialization.
void turi::statistics_tracker::initialize | ( | ) |
Initialize the index mapping and setup. Should be called before starting the map.
void turi::statistics_tracker::insert_or_update | ( | const flexible_type & | key, |
flexible_type | value, | ||
size_t | thread_idx = 0 |
||
) |
Insert
[in] | key | Flexible type. |
[in] | value | Flexible type. |
[in] | thread_idx | Thread id (For parallel insertion). |
flexible_type turi::statistics_tracker::inverse_lookup | ( | size_t | idx | ) | const |
Returns the "value" associated with the index.
void turi::statistics_tracker::load_version | ( | turi::iarchive & | iarc, |
size_t | version | ||
) |
Load the object.
size_t turi::statistics_tracker::lookup | ( | const flexible_type & | value | ) | const |
Returns the index associated with the value.
[in] | value | Search for the value. |
size_t turi::statistics_tracker::lookup_counts | ( | const flexible_type & | value | ) | const |
Returns the counts associated with the value.
[in] | value | Search for the value. |
flex_float turi::statistics_tracker::lookup_means | ( | const flexible_type & | value | ) | const |
Returns the counts associated with the value.
[in] | value | Search for the value. |
void turi::statistics_tracker::save_impl | ( | turi::oarchive & | oarc | ) | const |
Serialize the object (save).
|
inline |
Returns the number of categorical variables.
Definition at line 154 of file statistics_tracker.hpp.