6 #ifndef TURI_ML2_DATA_UNIQUE_COLUMN_INDEXER_H_ 7 #define TURI_ML2_DATA_UNIQUE_COLUMN_INDEXER_H_ 9 #include <core/data/flexible_type/flexible_type.hpp> 10 #include <core/util/hash_value.hpp> 11 #include <core/logging/assertions.hpp> 12 #include <core/util/bitops.hpp> 13 #include <core/storage/serialization/serialization_includes.hpp> 14 #include <core/generics/hopscotch_map.hpp> 15 #include <core/parallel/pthread_tools.hpp> 16 #include <toolkits/ml_data_2/indexing/column_indexer.hpp> 18 namespace turi {
namespace v2 {
namespace ml_data_internal {
27 static constexpr
int _column_unique_indexer_first_level_lookup_size_n_bits = 8;
161 std::vector<flexible_type> reset_and_return_values();
178 std::function<flexible_type(const flexible_type&)>
indexing_lambda()
const;
182 std::vector<std::pair<simple_spinlock, hopscotch_map<hash_value, size_t> > >
183 index_by_values_lookup;
185 std::vector<std::vector<std::pair<size_t, flexible_type> > >
186 values_by_index_threadlocal_accumulator;
188 std::vector<flexible_type> values_by_index_lookup;
191 atomic<size_t> _column_size = 0;
193 mutex index_modification_lock;
std::function< flexible_type(const flexible_type &)> deindexing_lambda() const
size_t get_version() const
The serialization input archive object which, provided with a reference to an istream, will read from the istream, providing deserialization capabilities.
std::function< flexible_type(const flexible_type &)> indexing_lambda() const
void register_real_value(const flexible_type &feature)
void load_version(turi::iarchive &iarc, size_t version)
void save_impl(turi::oarchive &oarc) const
void insert_values_into_index(const std::vector< flexible_type > &features)
std::shared_ptr< column_indexer > create_cleared_copy() const
size_t immutable_map_value_to_index(const flexible_type &feature) const
size_t indexed_column_size() const
std::set< flex_type_enum > extract_key_types() const
flexible_type map_index_to_value(size_t idx) const
void set_values(std::vector< flexible_type > &&values)
std::set< T > values(const std::map< Key, T > &map)
The serialization output archive object which, provided with a reference to an ostream, will write to the ostream, providing serialization capabilities.
size_t map_value_to_index(size_t thread_idx, const flexible_type &feature) GL_HOT