Turi Create  4.0
value_container_mapper.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_VALUE_CONTAINER_MAPPER_H_
7 #define TURI_VALUE_CONTAINER_MAPPER_H_
8 
9 #include <vector>
10 #include <map>
11 #include <cmath>
12 #include <set>
13 
14 #include <core/util/code_optimization.hpp>
15 #include <sparsehash/dense_hash_set>
16 #include <core/generics/value_container_mapper_internal.hpp>
17 
18 namespace turi {
19 
20 /**
21  * \ingroup util
22  * A fast, specialized container to track lookups from a Value to a
23  * container holding that value (plus other things). Essentially,
24  * this is very optimized version of std::map<Value,
25  * ValueContainer*>, which adds the following assumptions on the API
26  * and the ValueContainer type in order to be really fast.
27  *
28  * 1. The hash, implemented using a custom hashkey class (see below),
29  * is tracked explicitly with the value, and it is up to the user
30  * to track this. This permits caching this value and more
31  * efficient storage.
32  *
33  * 2. The ValueContainer class must hold a hashkey_and_value
34  * container (see below); this is essentially a (hashkey, value)
35  * pair but with certain other optimizations. This must be
36  * accessible via a get_hashkey_and_value() method in the value
37  * container, which is then used by
38  *
39  * 3. Pointers to the ValueContainer are what is stored, and it is
40  * assumed that a Value -> ValueContainer* mapping is valid if and
41  * only if the value container holds the same value. Otherwise
42  * the find() method returns a nullptr. (The invalidate function
43  * below sticks to this assumption; it just tracks things for lazy
44  * cleanup).
45  *
46  * For a usage example, see ml/sketches/space_saving.hpp.
47  *
48  * The value_container_mapper::hashkey class is initializable by
49  * value:
50  *
51  * struct hashkey {
52  * hashkey();
53  * hashkey(const Value& v);
54  *
55  * // A bunch of internal methods...
56  * };
57  *
58  * The value_container_mapper::hashkey_and_value is initializable
59  * either by value (in which case the hashkey is created from a hash
60  * of the value), or by hashkey and value pair. It also implements
61  * key() and value() methods to return the hashkey() and value()
62  * respectively:
63  *
64  * struct hashkey_and_value {
65  * hashkey_and_value();
66  * hashkey_and_value(const Value& v);
67  * hashkey_and_value(const hashkey& hk, const Value& v);
68  *
69  * hashkey key() const;
70  * const Value& value() const;
71  *
72  * // A bunch of internal methods...
73  * };
74  */
75 template <typename Value, typename ValueContainer>
77  public:
78 
79  // Define the equality comparison and hash function comparison
80  // stuff. This allows us to use an intersecting hash still and have
81  // it work.
82 
85 
86  /** Reserves internal storage for n elements.
87  */
88  void reserve(size_t n) { _reserve(n); }
89 
90  /** Returns the current size of the hash table.
91  */
92  inline size_t size() const {
93  return _size();
94  }
95 
96  /** Clears the hash table.
97  */
98  inline void clear() {
99  _clear();
100  }
101 
102  /** Inserts a lookup index into the hash mapping.
103  */
104  void insert(const hashkey_and_value& hv, ValueContainer* v_ptr) GL_HOT_INLINE_FLATTEN {
105  DASSERT_TRUE(hv.value() == v_ptr->get_hashkey_and_value().value());
106  DASSERT_TRUE(hv.key() == v_ptr->get_hashkey_and_value().key());
107  _insert(hv.key(), v_ptr);
108  }
109 
110  /** Inserts a lookup index into the hash mapping.
111  * Overload that pulls the value from v_ptr.
112  * \overload
113  */
114  void insert(const hashkey& hk, ValueContainer* v_ptr) GL_HOT_INLINE_FLATTEN {
115  _insert(hk, v_ptr);
116  }
117 
118  /** Returns the container associated with this key and value. If
119  * it's not present or has been invalidated, returns nullptr.
120  */
121  ValueContainer* find(const hashkey_and_value& hv) GL_HOT_INLINE_FLATTEN {
122  return _find_reference<ValueContainer*, value_container_mapper>(this, hv.key(), hv.value());
123  }
124 
125  /** Same as above, but avoids a potential copy operation of the
126  * value if it is not stored in a hashkey_and_value instance
127  * already.
128  * \overload
129  */
130  ValueContainer* find(const hashkey& key, const Value& t) GL_HOT_INLINE_FLATTEN {
131  return _find_reference<ValueContainer*, value_container_mapper>(this, key, t);
132  }
133 
134  /** Returns the container associated with this (key, value). If
135  * it's not present, returns nullptr. Const overload.
136  * \overload
137  */
138  const ValueContainer* find(const hashkey_and_value& hv) const GL_HOT_INLINE_FLATTEN {
139  return _find_reference<const ValueContainer*, const value_container_mapper>(this, hv.key(), hv.value());
140  }
141 
142  /** Same as above, but avoids a potential copy operation of the
143  * value if it is not stored in a hashkey_and_value instance
144  * already. Const overload.
145  * \overload
146  */
147  const ValueContainer* find(const hashkey& key, const Value& t) const GL_HOT_INLINE_FLATTEN {
148  return _find_reference<const ValueContainer*, const value_container_mapper>(this, key, t);
149  }
150 
151  /** Marks a particular value_container as invalid. It is assumed,
152  * however, that as long as ValueContainer* holds the value, it is
153  * a valid key; otherwise it is not. This function does some lazy
154  * cleanup, but may not erase the key.
155  */
156  void invalidate(const hashkey_and_value& hv, ValueContainer* v_ptr) GL_HOT_INLINE_FLATTEN {
157  DASSERT_TRUE(hv.value() == v_ptr->get_hashkey_and_value().value());
158  DASSERT_TRUE(hv.key() == v_ptr->get_hashkey_and_value().key());
159  _invalidate(hv.key(), v_ptr);
160  }
161 
162  /** Marks a particular value_container as invalid. It is assumed,
163  * however, that as long as ValueContainer* holds the value, it is
164  * a valid key; otherwise it is not. This function does some lazy
165  * cleanup, but may not erase the key.
166  *
167  * Overload that pulls the value from v_ptr.
168  * \overload
169  */
170  void invalidate(const hashkey& hk, ValueContainer* v_ptr) GL_HOT_INLINE_FLATTEN {
171  _invalidate(hk, v_ptr);
172  }
173 
174  private:
175  struct internal_value_type {
176  hashkey first;
177  mutable ValueContainer* second;
178  };
179 
180  struct gdhs_hash {
181  size_t operator()(const internal_value_type& kv_pair) const GL_HOT_INLINE_FLATTEN {
182  return kv_pair.first.hash();
183  }
184  };
185 
186  struct gdhs_equality {
187  bool operator()(const internal_value_type& kv_pair_1, const internal_value_type& kv_pair_2) const GL_HOT_INLINE_FLATTEN {
188 
189  if(hashkey::key_is_exact()) {
190  return kv_pair_1.first == kv_pair_2.first;
191 
192  } else {
193 
194  if(kv_pair_1.first != kv_pair_2.first)
195  return false;
196 
197  if(hashkey::use_explicit_delete()) {
198 
199  if(kv_pair_1.second == nullptr && kv_pair_2.second == nullptr)
200  return true;
201 
202  return (kv_pair_1.second != nullptr
203  && kv_pair_2.second != nullptr
204  && (kv_pair_1.second->get_hashkey_and_value().value()
205  == kv_pair_2.second->get_hashkey_and_value().value()));
206 
207  } else {
208 
209  if(kv_pair_1.second == nullptr && kv_pair_2.second == nullptr)
210  return true;
211 
212  return (kv_pair_1.second != nullptr
213  && kv_pair_2.second != nullptr
214  && kv_pair_1.second->get_hashkey_and_value().key() == kv_pair_1.first
215  && kv_pair_2.second->get_hashkey_and_value().key() == kv_pair_2.first
216  && (kv_pair_1.second->get_hashkey_and_value().value()
217  == kv_pair_2.second->get_hashkey_and_value().value()));
218  }
219  }
220  }
221  };
222 
223  typedef google::dense_hash_set<internal_value_type, gdhs_hash, gdhs_equality> hash_map_type;
224  friend hash_map_type;
225 
226  public:
227 
229  : _kv_empty_value(hashkey_and_value(hashkey::as_empty(), Value()))
230  , _kv_empty({hashkey::as_empty(), nullptr})
231  , _kv_deleted_value(hashkey_and_value(hashkey::as_deleted(), Value()))
232  , _kv_deleted({hashkey::as_deleted(), nullptr})
233  {
234  table.set_empty_key(_kv_empty);
235 
236  if(hashkey::use_explicit_delete())
237  table.set_deleted_key(_kv_deleted);
238 
239  // A few internal consistency tests
240  DASSERT_TRUE(gdhs_equality()(_kv_deleted, _kv_deleted));
241  DASSERT_TRUE(gdhs_equality()(_kv_empty, _kv_empty));
242  DASSERT_FALSE(gdhs_equality()(_kv_empty, _kv_deleted));
243  DASSERT_FALSE(gdhs_equality()(_kv_deleted, _kv_empty));
244  }
245 
246 
247  private:
248 
249  /** Reserve the proper amount for the table.
250  */
251  void _reserve(size_t n) GL_HOT_INLINE_FLATTEN {
252  if(hashkey::use_explicit_delete()) {
253  reserved_size = n;
254  table.resize( n );
255  } else {
256  reserved_size = 3*n;
257  table.resize( 3*n );
258  }
259 
260  erase_counter = 0;
261  }
262 
263  /** Marks a key, value_container as invalid. It is assumed, however, that as long as
264  * ValueContainer* holds the value, it is a valid key; otherwise
265  * it is not. This function does some lazy cleanup, but may not
266  * erase the key.
267  */
268  void _invalidate(const hashkey& key, ValueContainer* v_ptr) GL_HOT_INLINE_FLATTEN {
269 
270  internal_value_type kv_pair{key, v_ptr};
271 
272  if(UNLIKELY(hashkey::is_empty(kv_pair.first) )) {
273  _erase_in_stack(kv_pair, _kv_empty, _kv_empty_overflow_stack);
274  return;
275  }
276 
277  if(UNLIKELY(hashkey::is_deleted(kv_pair.first) )) {
278  _erase_in_stack(kv_pair, _kv_deleted, _kv_deleted_overflow_stack);
279  return;
280  }
281 
282  if(hashkey::use_explicit_delete()) {
283 
284  table.erase(kv_pair);
285 
286  } else {
287 
288  ++erase_counter;
289 
290  if(UNLIKELY(erase_counter >= reserved_size)) {
291  refresh_hash_table();
292  erase_counter = 0;
293  }
294  }
295  }
296 
297  /** Insert a key, valuecontainer pair into the hash table.
298  */
299  void _insert(const hashkey& key, ValueContainer* v_ptr) GL_HOT_INLINE_FLATTEN {
300  internal_value_type kv_pair{key, v_ptr};
301 
302  if(UNLIKELY(hashkey::is_empty(kv_pair.first) )) {
303  _insert_in_stack(kv_pair, _kv_empty, _kv_empty_overflow_stack);
304  return;
305  }
306 
307  if(UNLIKELY(hashkey::is_deleted(kv_pair.first) )) {
308  _insert_in_stack(kv_pair, _kv_deleted, _kv_deleted_overflow_stack);
309  return;
310  }
311 
312  auto ret = table.insert(kv_pair);
313 
314  if(!hashkey::use_explicit_delete()) {
315  ret.first->second = kv_pair.second;
316  }
317  }
318 
319 
320  /** Returns the current size of the hash table.
321  */
322  inline size_t _size() const {
323  return ( (_kv_empty.second == nullptr ? 0 : 1)
324  + _kv_empty_overflow_stack.size()
325  + (_kv_deleted.second == nullptr ? 0 : 1)
326  + _kv_deleted_overflow_stack.size()
327  + table.size());
328  }
329 
330  /** Clears the hash table.
331  */
332  inline void _clear() {
333  _kv_empty.second = nullptr;
334  _kv_empty_overflow_stack.clear();
335  _kv_deleted.second = nullptr;
336  _kv_deleted_overflow_stack.clear();
337  table.clear();
338  }
339 
340  private:
341 
342  size_t reserved_size;
343  size_t erase_counter = 0;
344 
345  ValueContainer _kv_empty_value;
346  internal_value_type _kv_empty;
347  std::vector<internal_value_type> _kv_empty_overflow_stack;
348 
349  ValueContainer _kv_deleted_value;
350  internal_value_type _kv_deleted;
351  std::vector<internal_value_type> _kv_deleted_overflow_stack;
352 
353  hash_map_type table;
354 
355  void _erase_in_stack(const internal_value_type& kv_pair,
356  internal_value_type& kv_base,
357  std::vector<internal_value_type>& stack) GL_HOT_NOINLINE_FLATTEN {
358 
359  if(hashkey::key_is_exact()) {
360  DASSERT_TRUE(stack.empty());
361  kv_base.second = nullptr;
362  } else {
363 
364  // Test for value explicitly.
365  DASSERT_TRUE(kv_base.second != nullptr);
366  if(kv_base.second->get_hashkey_and_value().value()
367  == kv_pair.second->get_hashkey_and_value().value()) {
368 
369  // This is the one; pull in anything from the stack
370  kv_base.second = nullptr;
371 
372  if(!stack.empty()) {
373  kv_base = stack.back();
374  stack.pop_back();
375  }
376 
377  } else {
378 
379  // Drop it from the stack.
380  DASSERT_FALSE(stack.empty());
381 
382  size_t found_idx = size_t(-1);
383  for(size_t idx = 0; idx < stack.size(); ++idx) {
384  if(stack[idx].second->get_hashkey_and_value().value()
385  == kv_pair.second->get_hashkey_and_value().value()) {
386  found_idx = idx;
387  break;
388  }
389  }
390  DASSERT_FALSE(found_idx == size_t(-1));
391 
392  std::swap(stack[found_idx], stack.back());
393  stack.pop_back();
394  }
395  }
396  }
397 
398  mutable ValueContainer v_temp;
399 
400  /** Return a reference to the key-value pair associated with a
401  * particular key. Returns one with a nullptr for the
402  * value-pointer if it is not present.
403  */
404  template <typename _ReferenceType, typename _ThisType>
406  static _ReferenceType _find_reference(_ThisType* c, const hashkey& key, const Value& t) {
407 
408  if(hashkey::key_is_exact()) {
409 
410  if(UNLIKELY(hashkey::is_empty(key) )) {
411  return c->_kv_empty.second;
412  }
413 
414  if(UNLIKELY(hashkey::is_deleted(key) )) {
415  return c->_kv_deleted.second;
416  }
417 
418  c->v_temp = ValueContainer(hashkey_and_value(key, t));
419 
420  auto it = c->table.find(internal_value_type{key, &c->v_temp});
421 
422  return ( (it == c->table.end()
423  || (!hashkey::use_explicit_delete() && it->first != it->second->get_hashkey_and_value().key()))
424  ? _ReferenceType(nullptr)
425  : (it->second) );
426 
427  } else {
428 
429  typedef typename std::conditional<std::is_const<_ThisType>::value,
430  const internal_value_type&, internal_value_type&>::type iv_ref;
431 
432  typedef typename std::conditional<std::is_const<_ThisType>::value,
433  const std::vector<internal_value_type>&,
434  std::vector<internal_value_type>& >::type iv_vect_ref;
435 
436  // Set in one of the local stacks.
437  auto find_in_stack = [&](
438  iv_ref kv_base,
439  iv_vect_ref stack) GL_GCC_ONLY(GL_HOT_NOINLINE_FLATTEN) {
440 
441  if(LIKELY(kv_base.second == nullptr)) {
442  return _ReferenceType(nullptr);
443  }
444 
445  if(kv_base.second->get_hashkey_and_value().value() == t) {
446  return _ReferenceType(kv_base.second);
447  }
448 
449  for(size_t idx = 0; idx < stack.size(); ++idx) {
450  if(stack[idx].second->get_hashkey_and_value().value() == t)
451  return _ReferenceType(stack[idx].second);
452  }
453 
454  return _ReferenceType(nullptr);
455  };
456 
457  if(UNLIKELY(hashkey::is_empty(key) )) {
458  return find_in_stack(c->_kv_empty, c->_kv_empty_overflow_stack);
459  }
460 
461  if(UNLIKELY(hashkey::is_deleted(key))) {
462  return find_in_stack(c->_kv_deleted, c->_kv_deleted_overflow_stack);
463  }
464 
465  c->v_temp = ValueContainer(hashkey_and_value(key, t));
466 
467  auto it = c->table.find(internal_value_type{key, &c->v_temp});
468 
469  return ( (it == c->table.end()
470  || (!hashkey::use_explicit_delete() && it->first != it->second->get_hashkey_and_value().key()))
471  ? _ReferenceType(nullptr)
472  : (it->second) );
473  }
474  }
475 
476  // Set in one of the local stacks.
477  void _insert_in_stack(
478  const internal_value_type& v,
479  internal_value_type& kv_base,
480  std::vector<internal_value_type>& stack) GL_HOT_NOINLINE_FLATTEN {
481 
482  if(hashkey::key_is_exact()) {
483  DASSERT_TRUE(kv_base.second == nullptr);
484  kv_base.second = v.second;
485  } else {
486  if(LIKELY(kv_base.second == nullptr)) {
487  DASSERT_TRUE(stack.empty());
488  kv_base.second = v.second;
489  } else {
490  stack.push_back(v);
491  }
492  }
493  }
494 
495  /** Refreshes the hash table, clearing out the erased elements.
496  */
497  void refresh_hash_table() GL_HOT_NOINLINE_FLATTEN {
498 
499  hash_map_type alt_table;
500  alt_table.set_empty_key(_kv_empty);
501  alt_table.resize(reserved_size);
502 
503  for(auto it = table.begin(); it != table.end(); ++it) {
504  if(it->second->get_hashkey_and_value().key() == it->first) {
505  alt_table.insert(*it);
506  }
507  }
508 
509  table.swap(alt_table);
510  }
511 };
512 
513 }
514 
515 #endif /* _VALUE_CONTAINER_MAPPER_H_ */
void invalidate(const hashkey &hk, ValueContainer *v_ptr) GL_HOT_INLINE_FLATTEN
ValueContainer * find(const hashkey_and_value &hv) GL_HOT_INLINE_FLATTEN
ValueContainer * find(const hashkey &key, const Value &t) GL_HOT_INLINE_FLATTEN
#define DASSERT_FALSE(cond)
Definition: assertions.hpp:365
#define GL_HOT_INLINE_FLATTEN
void insert(const hashkey &hk, ValueContainer *v_ptr) GL_HOT_INLINE_FLATTEN
const ValueContainer * find(const hashkey &key, const Value &t) const GL_HOT_INLINE_FLATTEN
const ValueContainer * find(const hashkey_and_value &hv) const GL_HOT_INLINE_FLATTEN
void insert(const hashkey_and_value &hv, ValueContainer *v_ptr) GL_HOT_INLINE_FLATTEN
#define DASSERT_TRUE(cond)
Definition: assertions.hpp:364
void invalidate(const hashkey_and_value &hv, ValueContainer *v_ptr) GL_HOT_INLINE_FLATTEN