Turi Create  4.0
hopscotch_set.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_UTIL_HOPSCOTCH_SET_HPP
7 #define TURI_UTIL_HOPSCOTCH_SET_HPP
8 
9 #include <core/generics/hopscotch_table.hpp>
10 
11 #include <core/storage/serialization/serialization_includes.hpp>
12 
13 
14 #define _HOPSCOTCH_SET_DEFAULT_HASH std::hash<Key>
15 
16 
17 
18 namespace turi {
19 
20 
21 
22  /**
23  * A hopscotch hash set. More or less similar
24  * interface as boost::unordered_set, not necessarily
25  * entirely STL compliant.
26  * Really should only be used to store small keys and trivial values.
27  *
28  * \tparam Key The key of the set
29  * \tparam Hash The hash functor type. Defaults to std::hash<Key> if C++11 is
30  * available. Otherwise defaults to boost::hash<Key>
31  * \tparam KeyEqual The functor used to identify object equality. Defaults to
32  * std::equal_to<Key>
33  */
34  template <typename Key,
35  typename Hash = _HOPSCOTCH_SET_DEFAULT_HASH,
36  typename KeyEqual = std::equal_to<Key> >
37  class hopscotch_set {
38 
39  public:
40  // public typedefs
41  typedef Key value_type;
42  typedef size_t size_type;
43  typedef Hash hasher;
44  typedef KeyEqual equality_function;
45  typedef value_type* pointer;
46  typedef value_type& reference;
47  typedef const value_type* const_pointer;
48  typedef const value_type& const_reference;
49 
50 
51  typedef Key storage_type;
52 
53  typedef hopscotch_table<storage_type,
54  Hash,
55  KeyEqual> container_type;
56 
57  typedef typename container_type::iterator iterator;
59 
60  private:
61 
62 
63  // The primary storage. Used by all sequential accessors.
64  container_type* container;
65 
66  // the hash function to use. hashes a pair<key, value> to hash(key)
67  hasher hashfun;
68 
69  // the equality function to use. Tests equality on only the first
70  // element of the pair
71  equality_function equalfun;
72 
73  container_type* create_new_container(size_t size) {
74  return new container_type(size, hashfun, equalfun);
75  }
76 
77  void destroy_all() {
78  delete container;
79  container = NULL;
80  }
81 
82  // rehashes the hash table to one which is double the size
83  container_type* rehash_to_new_container(size_t newsize = (size_t)(-1)) {
84  /*
85  std::cerr << "Rehash at " << container->size() << "/"
86  << container->capacity() << ": "
87  << container->load_factor() << std::endl;
88  */
89  // rehash
90  if (newsize == (size_t)(-1)) newsize = container->size() * 2;
91  container_type* newcontainer = create_new_container(newsize);
92  const_iterator citer = begin();
93  while (citer != end()) {
94  DASSERT_TRUE(newcontainer->insert(*citer) != newcontainer->end());
95  ++citer;
96  }
97  return newcontainer;
98  }
99 
100  // Inserts a value into the hash table. This does not check
101  // if the key already exists, and may produce duplicate values.
102  iterator do_insert(const value_type &v) {
103  iterator iter = container->insert(v);
104 
105  if (iter != container->end()) {
106  return iter;
107  }
108  else {
109  container_type* newcontainer = rehash_to_new_container();
110  iter = newcontainer->insert(v);
111  DASSERT_TRUE(iter != newcontainer->end());
112  std::swap(container, newcontainer);
113  delete newcontainer;
114  return iter;
115  }
116  }
117 
118  public:
119 
120  hopscotch_set(size_t initialsize = 32,
121  Hash hashfun = Hash(),
122  KeyEqual equalfun = KeyEqual()):
123  container(NULL),
124  hashfun(hashfun), equalfun(equalfun) {
125  container = create_new_container(initialsize);
126  }
127 
128  hopscotch_set(const hopscotch_set& h):
129  hashfun(h.hashfun), equalfun(h.equalfun) {
130  container = create_new_container(h.capacity());
131  (*container) = *(h.container);
132  }
133 
134 
135  // only increases
136  void rehash(size_t s) {
137  if (s > capacity()) {
138  container_type* newcontainer = rehash_to_new_container(s);
139  std::swap(container, newcontainer);
140  delete newcontainer;
141  }
142  }
143 
144  ~hopscotch_set() {
145  destroy_all();
146  }
147 
148  hasher hash_function() const {
149  return hashfun;
150  }
151 
152  KeyEqual key_eq() const {
153  return equalfun;
154  }
155 
156  hopscotch_set& operator=(const hopscotch_set& other) {
157  (*container) = *(other.container);
158  hashfun = other.hashfun;
159  equalfun = other.equalfun;
160  return *this;
161  }
162 
163  size_type size() const {
164  return container->size();
165  }
166 
167  iterator begin() {
168  return container->begin();
169  }
170 
171  iterator end() {
172  return container->end();
173  }
174 
175 
176  const_iterator begin() const {
177  return container->begin();
178  }
179 
180  const_iterator end() const {
181  return container->end();
182  }
183 
184 
185  std::pair<iterator, bool> insert(const value_type& v) {
186  iterator i = find(v);
187  if (i != end()) return std::make_pair(i, false);
188  else return std::make_pair(do_insert(v), true);
189  }
190 
191 
192  iterator insert(const_iterator hint, const value_type& v) {
193  return insert(v).first;
194  }
195 
196  iterator find(value_type const& v) {
197  return container->find(v);
198  }
199 
200  const_iterator find(value_type const& v) const {
201  return container->find(v);
202  }
203 
204  size_t count(value_type const& v) const {
205  return container->count(v);
206  }
207 
208 
209  bool erase(iterator iter) {
210  return container->erase(iter);
211  }
212 
213  bool erase(value_type const& v) {
214  return container->erase(v);
215  }
216 
217  void swap(hopscotch_set& other) {
218  std::swap(container, other.container);
219  std::swap(hashfun, other.hashfun);
220  std::swap(equalfun, other.equalfun);
221  }
222 
223  void clear() {
224  destroy_all();
225  container = create_new_container(128);
226  }
227 
228 
229  size_t capacity() const {
230  return container->capacity();
231  }
232 
233 
234  float load_factor() const {
235  return container->load_factor();
236  }
237 
238  void save(oarchive &oarc) const {
239  oarc << size() << capacity();
240  const_iterator iter = begin();
241  while (iter != end()) {
242  oarc << (*iter);
243  ++iter;
244  }
245  }
246 
247 
248  void load(iarchive &iarc) {
249  size_t s, c;
250  iarc >> s >> c;
251  if (capacity() != c) {
252  destroy_all();
253  container = create_new_container(c);
254  }
255  else {
256  container->clear();
257  }
258  for (size_t i = 0;i < s; ++i) {
259  value_type v;
260  iarc >> v;
261  insert(v);
262  }
263  }
264  };
265 
266 }; // end of turi namespace
267 
268 #endif
size_t capacity() const
Returns the capacity of the table.
The serialization input archive object which, provided with a reference to an istream, will read from the istream, providing deserialization capabilities.
Definition: iarchive.hpp:60
iterator end()
Returns an iterator to the end of the table.
size_t count(const value_type &v) const
Returns 1 if the table contains a given element. 0 otherwise.
const_iterator find(const value_type &key) const
iterator begin()
Returns an iterator to the start of the table.
bool erase(iterator iter)
iterator insert(const value_type &newdata)
size_t size() const
Returns the number of elements in the table.
The serialization output archive object which, provided with a reference to an ostream, will write to the ostream, providing serialization capabilities.
Definition: oarchive.hpp:80
#define DASSERT_TRUE(cond)
Definition: assertions.hpp:364