Turi Create  4.0
hopscotch_map.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_UTIL_HOPSCOTCH_HASH_HPP
7 #define TURI_UTIL_HOPSCOTCH_HASH_HPP
8 
9 #include <core/generics/hopscotch_table.hpp>
10 
11 #include <core/storage/serialization/serialization_includes.hpp>
12 
13 
14 #define _HOPSCOTCH_MAP_DEFAULT_HASH std::hash<Key>
15 
16 
17 
18 namespace turi {
19 
20 
21 
22  /**
23  * A hopscotch hash map. More or less similar
24  * interface as boost::unordered_map, not necessarily
25  * entirely STL compliant.
26  * Really should only be used to store small keys and trivial values.
27  *
28  * \tparam Key The key of the map
29  * \tparam Value The value to store for each key
30  * \tparam Hash The hash functor type. Defaults to std::hash<Key> if C++11 is
31  * available. Otherwise defaults to boost::hash<Key>
32  * \tparam KeyEqual The functor used to identify object equality. Defaults to
33  * std::equal_to<Key>
34  */
35  template <typename Key,
36  typename Value,
37  typename Hash = _HOPSCOTCH_MAP_DEFAULT_HASH,
38  typename KeyEqual = std::equal_to<Key> >
39  class hopscotch_map {
40 
41  public:
42  // public typedefs
43  typedef Key key_type;
44  typedef std::pair<Key, Value> value_type;
45  typedef Value mapped_type;
46  typedef size_t size_type;
47  typedef Hash hasher;
48  typedef KeyEqual equality_function;
49  typedef value_type* pointer;
50  typedef value_type& reference;
51  typedef const value_type* const_pointer;
52  typedef const value_type& const_reference;
53 
54 
55  typedef std::pair<Key, Value> storage_type;
56 
57  struct hash_redirect{
58  Hash hashfun;
59  hash_redirect(Hash h): hashfun(h) { }
60  size_t operator()(const storage_type& v) const {
61  return hashfun(v.first);
62  }
63  };
64  struct key_equal_redirect{
65  KeyEqual keyeq;
66  key_equal_redirect(KeyEqual k): keyeq(k) { }
67  bool operator()(const storage_type& v, const storage_type& v2) const {
68  return keyeq(v.first, v2.first);
69  }
70  };
71 
72  typedef hopscotch_table<storage_type,
73  hash_redirect,
74  key_equal_redirect> container_type;
75 
76  typedef boost::unordered_map<key_type, mapped_type, Hash> spill_type;
77 
78  struct const_iterator{
79  typedef std::forward_iterator_tag iterator_category;
80  typedef const typename hopscotch_map::value_type value_type;
81  typedef size_t difference_type;
82  typedef value_type* pointer;
83  typedef value_type& reference;
84 
85  friend class hopscotch_map;
86 
87  const hopscotch_map* ptr;
89  typename hopscotch_map::spill_type::const_iterator iter2;
90  bool in_spill;
91 
92  const_iterator():ptr(NULL) {}
93 
94 
95  const_iterator operator++() {
96  if (!in_spill) {
97  ++iter;
98  if (iter == ptr->container->end()) {
99  in_spill = true;
100  }
101  } else {
102  ++iter2;
103  }
104  return *this;
105  }
106 
107  const_iterator operator++(int) {
108  iterator cur = *this;
109  ++(*this);
110  return cur;
111  }
112 
113 
114  reference operator*() {
115  if (!in_spill) return (*iter);
116  else return *reinterpret_cast<pointer>(&(*iter2));
117  }
118 
119  pointer operator->() {
120  if (!in_spill) return &(*iter);
121  else return reinterpret_cast<pointer>(&(*iter2));
122  // this is annoying. but it unfortunately has to be this way
123  }
124 
125  bool operator==(const const_iterator it) const {
126  return ptr == it.ptr &&
127  ((!in_spill && iter == it.iter) ||
128  (in_spill && iter2 == it.iter2));
129  }
130 
131  bool operator!=(const const_iterator iter) const {
132  return !((*this) == iter);
133  }
134  private:
135  const_iterator(const hopscotch_map* map,
136  typename container_type::const_iterator iter,
137  typename spill_type::const_iterator iter2):
138  ptr(map), iter(iter), iter2(iter2), in_spill(iter == ptr->container->end()) { }
139  };
140 
141  struct iterator {
142  typedef std::forward_iterator_tag iterator_category;
143  typedef typename hopscotch_map::value_type value_type;
144  typedef size_t difference_type;
145  typedef value_type* pointer;
146  typedef value_type& reference;
147 
148  friend class hopscotch_map;
149 
150  hopscotch_map* ptr;
152  typename hopscotch_map::spill_type::iterator iter2;
153  bool in_spill;
154 
155  iterator():ptr(NULL) {}
156 
157 
158  operator const_iterator() const {
159  const_iterator it(ptr, iter, iter2);
160  return it;
161  }
162 
163  iterator operator++() {
164  if (!in_spill) {
165  ++iter;
166  // if I went past the end of the main array,
167  // go to the spill array.
168  if (iter == ptr->container->end()) {
169  in_spill = true;
170  }
171  } else {
172  ++iter2;
173  }
174  return *this;
175  }
176 
177  iterator operator++(int) {
178  iterator cur = *this;
179  ++(*this);
180  return cur;
181  }
182 
183 
184  reference operator*() {
185  if (!in_spill) return (*iter);
186  else return *reinterpret_cast<pointer>(&(*iter2));
187  }
188 
189  pointer operator->() {
190  if (!in_spill) return &(*iter);
191  else return reinterpret_cast<pointer>(&(*iter2));
192  }
193 
194  bool operator==(const iterator it) const {
195  return ptr == it.ptr &&
196  ((!in_spill && iter == it.iter) ||
197  (in_spill && iter2 == it.iter2));
198  }
199 
200  bool operator!=(const iterator iter) const {
201  return !((*this) == iter);
202  }
203  private:
204  iterator(hopscotch_map* map,
205  typename container_type::iterator iter,
206  typename spill_type::iterator iter2):
207  ptr(map), iter(iter), iter2(iter2), in_spill(iter == ptr->container->end()) { }
208  };
209 
210 
211 
212  private:
213 
214 
215  // The primary storage. Used by all sequential accessors.
216  container_type* container;
217 
218  // excess elements which refuse to be inserted go here.
219  spill_type spill;
220 
221  // the hash function to use. hashes a pair<key, value> to hash(key)
222  hash_redirect hashfun;
223 
224  // the equality function to use. Tests equality on only the first
225  // element of the pair
226  key_equal_redirect equalfun;
227 
228  container_type* create_new_container(size_t size) {
229  return new container_type(size, hashfun, equalfun);
230  }
231 
232  void destroy_all() {
233  delete container;
234  spill.clear();
235  container = NULL;
236  }
237 
238  // rehashes the hash table to one which is double the size
239  void rehash_to_new_container(size_t newsize = (size_t)(-1)) {
240  /*
241  std::cerr << "Rehash at " << container->size() << "/"
242  << container->capacity() << ": "
243  << container->load_factor() << std::endl;
244  */
245  // rehash
246  if (newsize == (size_t)(-1)) newsize = size() * 2;
247  container_type* newcontainer = create_new_container(newsize);
248  const_iterator citer = begin();
249  spill_type newspill;
250  while (citer != end()) {
251  if(newcontainer->insert(*citer) == newcontainer->end()) {
252  newspill.insert(*citer);
253  }
254  ++citer;
255  }
256  std::swap(container, newcontainer);
257  std::swap(spill, newspill);
258  delete newcontainer;
259  }
260 
261  // Inserts a value into the hash table. This does not check
262  // if the key already exists, and may produce duplicate values.
263  iterator do_insert(const value_type &v) {
264  typename container_type::iterator iter = container->insert(v);
265 
266  if (iter != container->end()) {
267  return iterator(this, iter, spill.begin());
268  }
269  else {
270  if (load_factor() > 0.8) {
271  rehash_to_new_container();
272  iter = container->insert(v);
273  if(iter != container->end()) {
274  return iterator(this, iter, spill.begin());
275  }
276  else {
277  return iterator(this, container->end(), spill.insert(v).first);
278  }
279  } else {
280  // we have a *really* terrible hash function.
281  // use the spill
282  return iterator(this, container->end(), spill.insert(v).first);
283  }
284  }
285  }
286 
287  public:
288 
289  hopscotch_map(Hash hashfun = Hash(),
290  KeyEqual equalfun = KeyEqual()):
291  container(NULL),
292  hashfun(hashfun), equalfun(equalfun) {
293  container = create_new_container(32);
294  }
295 
296  hopscotch_map(const hopscotch_map& h):
297  hashfun(h.hashfun), equalfun(h.equalfun) {
298  container = create_new_container(h.capacity());
299  (*container) = *(h.container);
300  spill = h.spill;
301  }
302 
303  // only increases
304  void rehash(size_t s) {
305  if (s > capacity()) {
306  rehash_to_new_container(s);
307  }
308  }
309 
310  ~hopscotch_map() {
311  destroy_all();
312  }
313 
314  hasher hash_function() const {
315  return hashfun.hashfun;
316  }
317 
318  KeyEqual key_eq() const {
319  return equalfun.equalfun;
320  }
321 
322  hopscotch_map& operator=(const hopscotch_map& other) {
323  (*container) = *(other.container);
324  hashfun = other.hashfun;
325  equalfun = other.equalfun;
326  return *this;
327  }
328 
329  size_type size() const {
330  return container->size() + spill.size();
331  }
332 
333  iterator begin() {
334  return iterator(this, container->begin(), spill.begin());
335  }
336 
337  iterator end() {
338  return iterator(this, container->end(), spill.end());
339  }
340 
341 
342  const_iterator begin() const {
343  return const_iterator(this, container->begin(), spill.begin());
344  }
345 
346  const_iterator end() const {
347  return const_iterator(this, container->end(), spill.end());
348  }
349 
350 
351  std::pair<iterator, bool> insert(const value_type& v) {
352  iterator i = find(v.first);
353  if (i != end()) return std::make_pair(i, false);
354  else return std::make_pair(do_insert(v), true);
355  }
356 
357 
358  iterator insert(const_iterator hint, const value_type& v) {
359  return insert(v).first;
360  }
361 
362  iterator find(key_type const& k) {
363  value_type v(k, mapped_type());
364  typename container_type::iterator iter = container->find(v);
365  if (iter != container->end()) {
366  return iterator(this, iter, spill.begin());
367  } else {
368  return iterator(this, iter, spill.find(k));
369  }
370  }
371 
372  const_iterator find(key_type const& k) const {
373  value_type v(k, mapped_type());
374  typename container_type::iterator iter = container->find(v);
375  if (iter != container->end()) {
376  return const_iterator(this, iter, spill.begin());
377  } else {
378  return const_iterator(this, iter, spill.find(k));
379  }
380  }
381 
382  size_t count(key_type const& k) const {
383  value_type v(k, mapped_type());
384  return container->count(v) || spill.count(k);
385  }
386 
387 
388  bool erase(iterator iter) {
389  return container->erase(iter.iter) || spill.erase(iter.iter2);
390  }
391 
392  bool erase(key_type const& k) {
393  value_type v(k, mapped_type());
394  return container->erase(v) || spill.erase(k);
395  }
396 
397  void swap(hopscotch_map& other) {
398  std::swap(container, other.container);
399  std::swap(spill, other.spill);
400  std::swap(hashfun, other.hashfun);
401  std::swap(equalfun, other.equalfun);
402  }
403 
404  mapped_type& operator[](const key_type& i) {
405  iterator iter = find(i);
406  value_type tmp(i, mapped_type());
407  if (iter == end()) iter = do_insert(tmp);
408  return iter->second;
409  }
410 
411  void clear() {
412  destroy_all();
413  container = create_new_container(128);
414  }
415 
416 
417  size_t capacity() const {
418  return container->capacity() + spill.size();
419  }
420 
421 
422  float load_factor() const {
423  return float(size()) / capacity();
424  }
425 
426  void save(oarchive &oarc) const {
427  oarc << size() << capacity();
428  const_iterator iter = begin();
429  while (iter != end()) {
430  oarc << (*iter);
431  ++iter;
432  }
433  }
434 
435 
436  void load(iarchive &iarc) {
437  size_t s, c;
438  iarc >> s >> c;
439  if (capacity() != c) {
440  destroy_all();
441  container = create_new_container(c);
442  }
443  else {
444  container->clear();
445  }
446  for (size_t i = 0;i < s; ++i) {
447  value_type v;
448  iarc >> v;
449  insert(v);
450  }
451  }
452 
453  void put(const value_type &v) {
454  // try to insert into the container
455  (*this)[v.first] = v.second;
456  }
457 
458  void put(const Key& k, const Value& v) {
459  (*this)[k] = v;
460  }
461 
462  std::pair<bool, Value> get(const Key& k) const {
463  const_iterator iter = find(k);
464  return std::make_pair(iter == end(), iter->second);
465  }
466  };
467 
468 }; // end of turi namespace
469 
470 #endif
size_t capacity() const
Returns the capacity of the table.
The serialization input archive object which, provided with a reference to an istream, will read from the istream, providing deserialization capabilities.
Definition: iarchive.hpp:60
iterator end()
Returns an iterator to the end of the table.
size_t count(const value_type &v) const
Returns 1 if the table contains a given element. 0 otherwise.
const_iterator find(const value_type &key) const
iterator begin()
Returns an iterator to the start of the table.
bool erase(iterator iter)
iterator insert(const value_type &newdata)
size_t size() const
Returns the number of elements in the table.
The serialization output archive object which, provided with a reference to an ostream, will write to the ostream, providing serialization capabilities.
Definition: oarchive.hpp:80