6 #ifndef TURI_TOPK_COLUMN_INDEXER_H_ 7 #define TURI_TOPK_COLUMN_INDEXER_H_ 9 #include <core/data/flexible_type/flexible_type.hpp> 10 #include <core/util/hash_value.hpp> 11 #include <core/logging/assertions.hpp> 12 #include <core/util/bitops.hpp> 13 #include <core/storage/serialization/serialization_includes.hpp> 14 #include <core/generics/hopscotch_map.hpp> 15 #include <core/parallel/pthread_tools.hpp> 16 #include <core/export.hpp> 84 const size_t& _threshold = 1,
85 const size_t& _max_threshold = (
size_t) -1,
86 const std::string _column_name =
"") : topk(_topk),
87 threshold(_threshold), max_threshold(_max_threshold),column_name(_column_name) {
111 void insert_or_update(
const flexible_type& value,
size_t thread_idx = 0,
112 size_t count = 1) GL_HOT;
159 return index_lookup.size();
165 size_t get_version()
const;
180 size_t topk = (size_t) (-1);
181 size_t threshold = 0;
182 size_t max_threshold = (size_t) (-1);
183 std::string column_name =
"";
186 std::vector<hopscotch_map<hash_value, std::pair<flexible_type, size_t>>>
187 threadlocal_accumulator;
190 std::vector<flexible_type>
values;
191 std::vector<size_t> counts;
202 void retain_only_top_k_values();
207 void retain_min_count_values();
212 void delete_min_count_values();
216 void mark_for_deletion(
size_t index);
220 void delete_all_marked();
238 size_t version = m->get_version();
243 } END_OUT_OF_PLACE_SAVE()
249 arc >> is_not_nullptr;
258 m->load_version(arc, version);
261 m = std::shared_ptr<topk_indexer>(
nullptr);
263 } END_OUT_OF_PLACE_LOAD()
#define BEGIN_OUT_OF_PLACE_LOAD(arc, tname, tval)
Macro to make it easy to define out-of-place loads.
The serialization input archive object which, provided with a reference to an istream, will read from the istream, providing deserialization capabilities.
topk_indexer(const size_t &_topk=(size_t) -1, const size_t &_threshold=1, const size_t &_max_threshold=(size_t) -1, const std::string _column_name="")
std::set< T > values(const std::map< Key, T > &map)
The serialization output archive object which, provided with a reference to an ostream, will write to the ostream, providing serialization capabilities.
std::vector< flexible_type > get_values() const
#define BEGIN_OUT_OF_PLACE_SAVE(arc, tname, tval)
Macro to make it easy to define out-of-place saves.