Turi Create  4.0
cityhash_tc.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_CITYHASH_GL_H_
7 #define TURI_CITYHASH_GL_H_
8 
9 #include <vector>
10 #include <core/util/code_optimization.hpp>
11 
12 // Copyright (c) 2011 Google, Inc.
13 //
14 // Permission is hereby granted, free of charge, to any person obtaining a copy
15 // of this software and associated documentation files (the "Software"), to deal
16 // in the Software without restriction, including without limitation the rights
17 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
18 // copies of the Software, and to permit persons to whom the Software is
19 // furnished to do so, subject to the following conditions:
20 //
21 // The above copyright notice and this permission notice shall be included in
22 // all copies or substantial portions of the Software.
23 //
24 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
30 // THE SOFTWARE.
31 //
32 // CityHash, by Geoff Pike and Jyrki Alakuijala
33 //
34 // This file provides CityHash64() and related functions.
35 
36 // This file is a merged header of the CityHash functions with a set
37 // of convenience functions.
38 
39 // From CityHash comments:
40 //
41 // It's probably possible to create even faster hash functions by
42 // writing a program that systematically explores some of the space of
43 // possible hash functions, by using SIMD instructions, or by
44 // compromising on hash quality.
45 //
46 // http://code.google.com/p/cityhash/
47 //
48 // This file provides a few functions for hashing strings. All of them are
49 // high-quality functions in the sense that they pass standard tests such
50 // as Austin Appleby's SMHasher. They are also fast.
51 //
52 // For 64-bit x86 code, on short strings, we don't know of anything faster than
53 // CityHash64 that is of comparable quality. We believe our nearest competitor
54 // is Murmur3. For 64-bit x86 code, CityHash64 is an excellent choice for hash
55 // tables and most other hashing (excluding cryptography).
56 //
57 // For 64-bit x86 code, on long strings, the picture is more complicated.
58 // On many recent Intel CPUs, such as Nehalem, Westmere, Sandy Bridge, etc.,
59 // CityHashCrc128 appears to be faster than all competitors of comparable
60 // quality. CityHash128 is also good but not quite as fast. We believe our
61 // nearest competitor is Bob Jenkins' Spooky. We don't have great data for
62 // other 64-bit CPUs, but for long strings we know that Spooky is slightly
63 // faster than CityHash on some relatively recent AMD x86-64 CPUs, for example.
64 // Note that CityHashCrc128 is declared in citycrc.h.
65 //
66 // For 32-bit x86 code, we don't know of anything faster than CityHash32 that
67 // is of comparable quality. We believe our nearest competitor is Murmur3A.
68 // (On 64-bit CPUs, it is typically faster to use the other CityHash variants.)
69 //
70 // Functions in the CityHash family are not suitable for cryptography.
71 //
72 // Please see CityHash's README file for more details on our performance
73 // measurements and so on.
74 //
75 // WARNING: This code has been only lightly tested on big-endian platforms!
76 // It is known to work well on little-endian platforms that have a small penalty
77 // for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
78 // It should work on all 32-bit and 64-bit platforms that allow unaligned reads;
79 // bug reports are welcome.
80 //
81 // By the way, for some hash functions, given strings a and b, the hash
82 // of a+b is easily derived from the hashes of a and b. This property
83 // doesn't hold for any hash functions in this file.
84 
85 #include <algorithm>
86 #include <string.h> // for memcpy and memset
87 #include <cstdint>
88 
89 #include "int128_types.hpp"
90 
91 #ifdef __SSE4_2__
92 #include <nmmintrin.h>
93 #endif
94 
95 // Local macros
96 #if defined( _MSC_VER) || defined(_WIN32)
97 
98 #include <stdlib.h>
99 #define bswap_32(x) _byteswap_ulong(x)
100 #define bswap_64(x) _byteswap_uint64(x)
101 
102 #elif defined(__APPLE__)
103 
104 // Mac OS X / Darwin features
105 #include <libkern/OSByteOrder.h>
106 #define bswap_32(x) OSSwapInt32(x)
107 #define bswap_64(x) OSSwapInt64(x)
108 
109 #elif defined(__NetBSD__)
110 
111 #include <sys/types.h>
112 #include <machine/bswap.h>
113 #if defined(__BSWAP_RENAME) && !defined(__bswap_32)
114 #define bswap_32(x) bswap32(x)
115 #define bswap_64(x) bswap64(x)
116 #endif
117 
118 #else
119 
120 #include <byteswap.h>
121 
122 #endif
123 
124 #ifdef WORDS_BIGENDIAN
125 #define uint32_t_in_expected_order(x) (bswap_32(x))
126 #define uint64_t_in_expected_order(x) (bswap_64(x))
127 #else
128 #define uint32_t_in_expected_order(x) (x)
129 #define uint64_t_in_expected_order(x) (x)
130 #endif
131 
132 #define _CH_PERMUTE3(a, b, c) do { std::swap(a, b); std::swap(a, c); } while (0)
133 
134 namespace turi {
135 
136 namespace cityhash_local {
137 
138 typedef std::pair<uint64_t, uint64_t> local_uint128;
139 
140 static inline uint64_t Uint128Low64(const local_uint128& x) {
141  return x.first;
142 }
143 
144 static inline uint64_t Uint128High64(const local_uint128& x) {
145  return x.second;
146 }
147 
148 static inline uint64_t UNALIGNED_LOAD64(const char *p) {
149  uint64_t result;
150  memcpy(&result, p, sizeof(result));
151  return result;
152 }
153 
154 static inline uint32_t UNALIGNED_LOAD32(const char *p) {
155  uint32_t result;
156  memcpy(&result, p, sizeof(result));
157  return result;
158 }
159 
160 static inline uint64_t Hash128to64(const local_uint128& x) {
161  // Murmur-inspired hashing.
162  const uint64_t kMul = 0x9ddfea08eb382d69ULL;
163  uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
164  a ^= (a >> 47);
165  uint64_t b = (Uint128High64(x) ^ a) * kMul;
166  b ^= (b >> 47);
167  b *= kMul;
168  return b;
169 }
170 
171 static inline uint64_t Fetch64(const char *p) {
172  return uint64_t_in_expected_order(UNALIGNED_LOAD64(p));
173 }
174 
175 static inline uint32_t Fetch32(const char *p) {
176  return uint32_t_in_expected_order(UNALIGNED_LOAD32(p));
177 }
178 
179 // Some primes between 2^63 and 2^64 for various uses.
180 static const uint64_t k0 = 0xc3a5c85c97cb3127ULL;
181 static const uint64_t k1 = 0xb492b66fbe98f273ULL;
182 static const uint64_t k2 = 0x9ae16a3b2f90404fULL;
183 
184 // Magic numbers for 32-bit hashing. Copied from Murmur3.
185 static const uint32_t c1 = 0xcc9e2d51;
186 static const uint32_t c2 = 0x1b873593;
187 
188 // A 32-bit to 32-bit integer hash copied from Murmur3.
189 static inline uint32_t fmix(uint32_t h)
190 {
191  h ^= h >> 16;
192  h *= 0x85ebca6b;
193  h ^= h >> 13;
194  h *= 0xc2b2ae35;
195  h ^= h >> 16;
196  return h;
197 }
198 
199 static inline uint32_t Rotate32(uint32_t val, int shift) {
200  // Avoid shifting by 32: doing so yields an undefined result.
201  return shift == 0 ? val : ((val >> shift) | (val << (32 - shift)));
202 }
203 
204 static inline uint32_t Mur(uint32_t a, uint32_t h) {
205  // Helper from Murmur3 for combining two 32-bit values.
206  a *= c1;
207  a = Rotate32(a, 17);
208  a *= c2;
209  h ^= a;
210  h = Rotate32(h, 19);
211  return h * 5 + 0xe6546b64;
212 }
213 
214 static inline uint32_t Hash32Len13to24(const char *s, size_t len) {
215  uint32_t a = Fetch32(s - 4 + (len >> 1));
216  uint32_t b = Fetch32(s + 4);
217  uint32_t c = Fetch32(s + len - 8);
218  uint32_t d = Fetch32(s + (len >> 1));
219  uint32_t e = Fetch32(s);
220  uint32_t f = Fetch32(s + len - 4);
221  uint32_t h = static_cast<uint32_t>(len);
222 
223  return fmix(Mur(f, Mur(e, Mur(d, Mur(c, Mur(b, Mur(a, h)))))));
224 }
225 
226 static inline uint32_t Hash32Len0to4(const char *s, size_t len) {
227  uint32_t b = 0;
228  uint32_t c = 9;
229  for (size_t i = 0; i < len; i++) {
230  signed char v = s[i];
231  b = b * c1 + v;
232  c ^= b;
233  }
234  return fmix(Mur(b, Mur(static_cast<uint32_t>(len), c)));
235 }
236 
237 static inline uint32_t Hash32Len5to12(const char *s, size_t len) {
238  uint32_t a = static_cast<uint32_t>(len), b = static_cast<uint32_t>(len * 5), c = 9, d = b;
239  a += Fetch32(s);
240  b += Fetch32(s + len - 4);
241  c += Fetch32(s + ((len >> 1) & 4));
242  return fmix(Mur(c, Mur(b, Mur(a, d))));
243 }
244 
245 static inline uint32_t CityHash32(const char *s, size_t len) {
246  if (len <= 24) {
247  return len <= 12 ?
248  (len <= 4 ? Hash32Len0to4(s, len) : Hash32Len5to12(s, len)) :
249  Hash32Len13to24(s, len);
250  }
251 
252  // len > 24
253  uint32_t h = static_cast<uint32_t>(len), g = static_cast<uint32_t>(c1 * len), f = g;
254  uint32_t a0 = Rotate32(Fetch32(s + len - 4) * c1, 17) * c2;
255  uint32_t a1 = Rotate32(Fetch32(s + len - 8) * c1, 17) * c2;
256  uint32_t a2 = Rotate32(Fetch32(s + len - 16) * c1, 17) * c2;
257  uint32_t a3 = Rotate32(Fetch32(s + len - 12) * c1, 17) * c2;
258  uint32_t a4 = Rotate32(Fetch32(s + len - 20) * c1, 17) * c2;
259  h ^= a0;
260  h = Rotate32(h, 19);
261  h = h * 5 + 0xe6546b64;
262  h ^= a2;
263  h = Rotate32(h, 19);
264  h = h * 5 + 0xe6546b64;
265  g ^= a1;
266  g = Rotate32(g, 19);
267  g = g * 5 + 0xe6546b64;
268  g ^= a3;
269  g = Rotate32(g, 19);
270  g = g * 5 + 0xe6546b64;
271  f += a4;
272  f = Rotate32(f, 19);
273  f = f * 5 + 0xe6546b64;
274  size_t iters = (len - 1) / 20;
275  do {
276  uint32_t a0 = Rotate32(Fetch32(s) * c1, 17) * c2;
277  uint32_t a1 = Fetch32(s + 4);
278  uint32_t a2 = Rotate32(Fetch32(s + 8) * c1, 17) * c2;
279  uint32_t a3 = Rotate32(Fetch32(s + 12) * c1, 17) * c2;
280  uint32_t a4 = Fetch32(s + 16);
281  h ^= a0;
282  h = Rotate32(h, 18);
283  h = h * 5 + 0xe6546b64;
284  f += a1;
285  f = Rotate32(f, 19);
286  f = f * c1;
287  g += a2;
288  g = Rotate32(g, 18);
289  g = g * 5 + 0xe6546b64;
290  h ^= a3 + a1;
291  h = Rotate32(h, 19);
292  h = h * 5 + 0xe6546b64;
293  g ^= a4;
294  g = bswap_32(g) * 5;
295  h += a4 * 5;
296  h = bswap_32(h);
297  f += a0;
298  _CH_PERMUTE3(f, h, g);
299  s += 20;
300  } while (--iters != 0);
301  g = Rotate32(g, 11) * c1;
302  g = Rotate32(g, 17) * c1;
303  f = Rotate32(f, 11) * c1;
304  f = Rotate32(f, 17) * c1;
305  h = Rotate32(h + g, 19);
306  h = h * 5 + 0xe6546b64;
307  h = Rotate32(h, 17) * c1;
308  h = Rotate32(h + f, 19);
309  h = h * 5 + 0xe6546b64;
310  h = Rotate32(h, 17) * c1;
311  return h;
312 }
313 
314 // Bitwise right rotate. Normally this will compile to a single
315 // instruction, especially if the shift is a manifest constant.
316 static inline uint64_t Rotate(uint64_t val, int shift) {
317  // Avoid shifting by 64: doing so yields an undefined result.
318  return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
319 }
320 
321 static inline uint64_t ShiftMix(uint64_t val) {
322  return val ^ (val >> 47);
323 }
324 
325 static inline uint64_t HashLen16(uint64_t u, uint64_t v) {
326  return Hash128to64(local_uint128(u, v));
327 }
328 
329 static inline uint64_t HashLen16(uint64_t u, uint64_t v, uint64_t mul) {
330  // Murmur-inspired hashing.
331  uint64_t a = (u ^ v) * mul;
332  a ^= (a >> 47);
333  uint64_t b = (v ^ a) * mul;
334  b ^= (b >> 47);
335  b *= mul;
336  return b;
337 }
338 
339 static inline uint64_t HashLen0to16(const char *s, size_t len) {
340  if (len >= 8) {
341  uint64_t mul = k2 + len * 2;
342  uint64_t a = Fetch64(s) + k2;
343  uint64_t b = Fetch64(s + len - 8);
344  uint64_t c = Rotate(b, 37) * mul + a;
345  uint64_t d = (Rotate(a, 25) + b) * mul;
346  return HashLen16(c, d, mul);
347  }
348  if (len >= 4) {
349  uint64_t mul = k2 + len * 2;
350  uint64_t a = Fetch32(s);
351  return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul);
352  }
353  if (len > 0) {
354  uint8_t a = s[0];
355  uint8_t b = s[len >> 1];
356  uint8_t c = s[len - 1];
357  uint32_t y = static_cast<uint32_t>(a) + (static_cast<uint32_t>(b) << 8);
358  uint32_t z = static_cast<uint32_t>(len) + (c << 2);
359  return ShiftMix(y * k2 ^ z * k0) * k2;
360  }
361  return k2;
362 }
363 
364 // This probably works well for 16-byte strings as well, but it may be overkill
365 // in that case.
366 static inline uint64_t HashLen17to32(const char *s, size_t len) {
367  uint64_t mul = k2 + len * 2;
368  uint64_t a = Fetch64(s) * k1;
369  uint64_t b = Fetch64(s + 8);
370  uint64_t c = Fetch64(s + len - 8) * mul;
371  uint64_t d = Fetch64(s + len - 16) * k2;
372  return HashLen16(Rotate(a + b, 43) + Rotate(c, 30) + d,
373  a + Rotate(b + k2, 18) + c, mul);
374 }
375 
376 // Return a 16-byte hash for 48 bytes. Quick and dirty.
377 // Callers do best to use "random-looking" values for a and b.
378 static inline std::pair<uint64_t, uint64_t> WeakHashLen32WithSeeds(
379  uint64_t w, uint64_t x, uint64_t y, uint64_t z, uint64_t a, uint64_t b) {
380  a += w;
381  b = Rotate(b + a + z, 21);
382  uint64_t c = a;
383  a += x;
384  a += y;
385  b += Rotate(a, 44);
386  return std::make_pair(a + z, b + c);
387 }
388 
389 // Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
390 static inline std::pair<uint64_t, uint64_t> WeakHashLen32WithSeeds(
391  const char* s, uint64_t a, uint64_t b) {
392  return WeakHashLen32WithSeeds(Fetch64(s),
393  Fetch64(s + 8),
394  Fetch64(s + 16),
395  Fetch64(s + 24),
396  a,
397  b);
398 }
399 
400 // Return an 8-byte hash for 33 to 64 bytes.
401 static uint64_t HashLen33to64(const char *s, size_t len) {
402  uint64_t mul = k2 + len * 2;
403  uint64_t a = Fetch64(s) * k2;
404  uint64_t b = Fetch64(s + 8);
405  uint64_t c = Fetch64(s + len - 24);
406  uint64_t d = Fetch64(s + len - 32);
407  uint64_t e = Fetch64(s + 16) * k2;
408  uint64_t f = Fetch64(s + 24) * 9;
409  uint64_t g = Fetch64(s + len - 8);
410  uint64_t h = Fetch64(s + len - 16) * mul;
411  uint64_t u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9;
412  uint64_t v = ((a + g) ^ d) + f + 1;
413  uint64_t w = bswap_64((u + v) * mul) + h;
414  uint64_t x = Rotate(e + f, 42) + c;
415  uint64_t y = (bswap_64((v + w) * mul) + g) * mul;
416  uint64_t z = e + f + c;
417  a = bswap_64((x + z) * mul + y) + b;
418  b = ShiftMix((z + a) * mul + d + h) * mul;
419  return b + x;
420 }
421 
422 static inline uint64_t CityHash64(const char *s, size_t len) {
423  if (len <= 32) {
424  if (len <= 16) {
425  return HashLen0to16(s, len);
426  } else {
427  return HashLen17to32(s, len);
428  }
429  } else if (len <= 64) {
430  return HashLen33to64(s, len);
431  }
432 
433  // For strings over 64 bytes we hash the end first, and then as we
434  // loop we keep 56 bytes of state: v, w, x, y, and z.
435  uint64_t x = Fetch64(s + len - 40);
436  uint64_t y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
437  uint64_t z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
438  std::pair<uint64_t, uint64_t> v = WeakHashLen32WithSeeds(s + len - 64, len, z);
439  std::pair<uint64_t, uint64_t> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
440  x = x * k1 + Fetch64(s);
441 
442  // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
443  len = (len - 1) & ~static_cast<size_t>(63);
444  do {
445  x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
446  y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
447  x ^= w.second;
448  y += v.first + Fetch64(s + 40);
449  z = Rotate(z + w.first, 33) * k1;
450  v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
451  w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
452  std::swap(z, x);
453  s += 64;
454  len -= 64;
455  } while (len != 0);
456  return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
457  HashLen16(v.second, w.second) + x);
458 }
459 
460 static inline uint64_t CityHash64WithSeeds(const char *s, size_t len,
461  uint64_t seed0, uint64_t seed1) {
462  return HashLen16(CityHash64(s, len) - seed0, seed1);
463 }
464 
465 static inline uint64_t CityHash64WithSeed(const char *s, size_t len, uint64_t seed) {
466  return CityHash64WithSeeds(s, len, k2, seed);
467 }
468 
469 // A subroutine for CityHash128(). Returns a decent 128-bit hash for strings
470 // of any length representable in signed long. Based on City and Murmur.
471 static inline local_uint128 CityMurmur(const char *s, size_t len, local_uint128 seed) {
472  uint64_t a = Uint128Low64(seed);
473  uint64_t b = Uint128High64(seed);
474  uint64_t c = 0;
475  uint64_t d = 0;
476  signed long l = len - 16;
477  if (l <= 0) { // len <= 16
478  a = ShiftMix(a * k1) * k1;
479  c = b * k1 + HashLen0to16(s, len);
480  d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c));
481  } else { // len > 16
482  c = HashLen16(Fetch64(s + len - 8) + k1, a);
483  d = HashLen16(b + len, c + Fetch64(s + len - 16));
484  a += d;
485  do {
486  a ^= ShiftMix(Fetch64(s) * k1) * k1;
487  a *= k1;
488  b ^= a;
489  c ^= ShiftMix(Fetch64(s + 8) * k1) * k1;
490  c *= k1;
491  d ^= c;
492  s += 16;
493  l -= 16;
494  } while (l > 0);
495  }
496  a = HashLen16(a, c);
497  b = HashLen16(d, b);
498  return local_uint128(a ^ b, HashLen16(b, a));
499 }
500 
501 static inline local_uint128 CityHash128WithSeed(const char *s, size_t len, local_uint128 seed) {
502  if (len < 128) {
503  return CityMurmur(s, len, seed);
504  }
505 
506  // We expect len >= 128 to be the common case. Keep 56 bytes of state:
507  // v, w, x, y, and z.
508  std::pair<uint64_t, uint64_t> v, w;
509  uint64_t x = Uint128Low64(seed);
510  uint64_t y = Uint128High64(seed);
511  uint64_t z = len * k1;
512  v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s);
513  v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8);
514  w.first = Rotate(y + z, 35) * k1 + x;
515  w.second = Rotate(x + Fetch64(s + 88), 53) * k1;
516 
517  // This is the same inner loop as CityHash64(), manually unrolled.
518  do {
519  x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
520  y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
521  x ^= w.second;
522  y += v.first + Fetch64(s + 40);
523  z = Rotate(z + w.first, 33) * k1;
524  v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
525  w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
526  std::swap(z, x);
527  s += 64;
528  x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
529  y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
530  x ^= w.second;
531  y += v.first + Fetch64(s + 40);
532  z = Rotate(z + w.first, 33) * k1;
533  v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
534  w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
535  std::swap(z, x);
536  s += 64;
537  len -= 128;
538  } while (LIKELY(len >= 128));
539  x += Rotate(v.first + z, 49) * k0;
540  y = y * k0 + Rotate(w.second, 37);
541  z = z * k0 + Rotate(w.first, 27);
542  w.first *= 9;
543  v.first *= k0;
544  // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
545  for (size_t tail_done = 0; tail_done < len; ) {
546  tail_done += 32;
547  y = Rotate(x + y, 42) * k0 + v.second;
548  w.first += Fetch64(s + len - tail_done + 16);
549  x = x * k0 + w.first;
550  z += w.second + Fetch64(s + len - tail_done);
551  w.second += v.first;
552  v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second);
553  v.first *= k0;
554  }
555  // At this point our 56 bytes of state should contain more than
556  // enough information for a strong 128-bit hash. We use two
557  // different 56-byte-to-8-byte hashes to get a 16-byte final result.
558  x = HashLen16(x, v.first);
559  y = HashLen16(y + z, w.first);
560  return local_uint128(HashLen16(x + v.second, w.second) + y,
561  HashLen16(x + w.second, y + v.second));
562 }
563 
564 static inline local_uint128 CityHash128(const char *s, size_t len) {
565  return len >= 16 ?
566  CityHash128WithSeed(s + 16, len - 16,
567  local_uint128(Fetch64(s), Fetch64(s + 8) + k0)) :
568  CityHash128WithSeed(s, len, local_uint128(k0, k1));
569 }
570 
571 #ifdef __SSE4_2__
572 // Requires len >= 240.
573 static void CityHashCrc256Long(const char *s, size_t len,
574  uint32_t seed, uint64_t *result) {
575  uint64_t a = Fetch64(s + 56) + k0;
576  uint64_t b = Fetch64(s + 96) + k0;
577  uint64_t c = result[0] = HashLen16(b, len);
578  uint64_t d = result[1] = Fetch64(s + 120) * k0 + len;
579  uint64_t e = Fetch64(s + 184) + seed;
580  uint64_t f = 0;
581  uint64_t g = 0;
582  uint64_t h = c + d;
583  uint64_t x = seed;
584  uint64_t y = 0;
585  uint64_t z = 0;
586 
587  // 240 bytes of input per iter.
588  size_t iters = len / 240;
589  len -= iters * 240;
590  do {
591 #undef CHUNK
592 #define CHUNK(r) \
593  _CH_PERMUTE3(x, z, y); \
594  b += Fetch64(s); \
595  c += Fetch64(s + 8); \
596  d += Fetch64(s + 16); \
597  e += Fetch64(s + 24); \
598  f += Fetch64(s + 32); \
599  a += b; \
600  h += f; \
601  b += c; \
602  f += d; \
603  g += e; \
604  e += z; \
605  g += x; \
606  z = _mm_crc32_u64(z, b + g); \
607  y = _mm_crc32_u64(y, e + h); \
608  x = _mm_crc32_u64(x, f + a); \
609  e = Rotate(e, r); \
610  c += e; \
611  s += 40
612 
613  CHUNK(0); _CH_PERMUTE3(a, h, c);
614  CHUNK(33); _CH_PERMUTE3(a, h, f);
615  CHUNK(0); _CH_PERMUTE3(b, h, f);
616  CHUNK(42); _CH_PERMUTE3(b, h, d);
617  CHUNK(0); _CH_PERMUTE3(b, h, e);
618  CHUNK(33); _CH_PERMUTE3(a, h, e);
619  } while (--iters > 0);
620 
621  while (len >= 40) {
622  CHUNK(29);
623  e ^= Rotate(a, 20);
624  h += Rotate(b, 30);
625  g ^= Rotate(c, 40);
626  f += Rotate(d, 34);
627  _CH_PERMUTE3(c, h, g);
628  len -= 40;
629  }
630  if (len > 0) {
631  s = s + len - 40;
632  CHUNK(33);
633  e ^= Rotate(a, 43);
634  h += Rotate(b, 42);
635  g ^= Rotate(c, 41);
636  f += Rotate(d, 40);
637  }
638  result[0] ^= h;
639  result[1] ^= g;
640  g += h;
641  a = HashLen16(a, g + z);
642  x += y << 32;
643  b += x;
644  c = HashLen16(c, z) + h;
645  d = HashLen16(d, e + result[0]);
646  g += e;
647  h += HashLen16(x, f);
648  e = HashLen16(a, d) + g;
649  z = HashLen16(b, c) + a;
650  y = HashLen16(g, h) + c;
651  result[0] = e + z + y + x;
652  a = ShiftMix((a + y) * k0) * k0 + b;
653  result[1] += a + result[0];
654  a = ShiftMix(a * k0) * k0 + c;
655  result[2] = a + result[1];
656  a = ShiftMix((a + e) * k0) * k0;
657  result[3] = a + result[2];
658 }
659 
660 // Requires len < 240.
661 static inline void CityHashCrc256Short(const char *s, size_t len, uint64_t *result) {
662  char buf[240];
663  memcpy(buf, s, len);
664  memset(buf + len, 0, 240 - len);
665  CityHashCrc256Long(buf, 240, ~static_cast<uint32_t>(len), result);
666 }
667 
668 static inline void CityHashCrc256(const char *s, size_t len, uint64_t *result) {
669  if (LIKELY(len >= 240)) {
670  CityHashCrc256Long(s, len, 0, result);
671  } else {
672  CityHashCrc256Short(s, len, result);
673  }
674 }
675 
676 static inline local_uint128 CityHashCrc128WithSeed(const char *s, size_t len, local_uint128 seed) {
677  if (len <= 900) {
678  return CityHash128WithSeed(s, len, seed);
679  } else {
680  uint64_t result[4];
681  CityHashCrc256(s, len, result);
682  uint64_t u = Uint128High64(seed) + result[0];
683  uint64_t v = Uint128Low64(seed) + result[1];
684  return local_uint128(HashLen16(u, v + result[2]),
685  HashLen16(Rotate(v, 32), u * k0 + result[3]));
686  }
687 }
688 
689 static inline local_uint128 CityHashCrc128(const char *s, size_t len) {
690  if (len <= 900) {
691  return CityHash128(s, len);
692  } else {
693  uint64_t result[4];
694  CityHashCrc256(s, len, result);
695  return local_uint128(result[2], result[3]);
696  }
697 }
698 
699 #endif
700 #undef _CH_PERMUTE3
701 
702 
703 static inline uint64_t SimpleIntegerHash64(uint64_t s) {
704 
705  static const uint64_t m = 0xc6a4a7935bd1e995ULL;
706  static const int r = 47;
707 
708  // XOR with k0 so the hash that maps to 0 is not that common. Note
709  // that this is a one-to-one map from {0,1}^64 -> {0,1}^64, so one
710  // hash must map to 0.
711 
712 
713  uint64_t k = (uint64_t(s) ^ k0);
714 
715  k *= m;
716  k ^= k >> r;
717  k *= m;
718 
719  return k;
720 }
721 
722 static inline local_uint128 SimpleIntegerHash128(uint64_t s1, uint64_t s2) {
723 
724  // Simply use the murmer hash 2 inner loop on s and s ^ rand_int_1.
725  // This process gives good mixing (by the murmerhash 2 stuff), plus
726  // is completely reversible so there are no collisions.
727 
728  static const uint64_t m = 0xc6a4a7935bd1e995ULL;
729  static const int r = 47;
730 
731  local_uint128 k;
732 
733  k.first = s1;
734  k.second = s2;
735 
736  k.first *= m;
737  k.second *= m;
738  k.first ^= k.first >> r;
739  k.second ^= k.second >> r;
740  k.first *= m;
741  k.second *= m;
742 
743  k.second ^= k.first;
744  k.second *= m;
745 
746  return k;
747 }
748 
749 
750 static inline local_uint128 SimpleIntegerHash128(uint64_t s) {
751 
752  static const uint64_t rand_int_1 = 0x6e626e7774e95a48ULL;
753 
754  return SimpleIntegerHash128(s, rand_int_1 ^ s);
755 }
756 
757 static inline local_uint128 Murmor3MixRoutine64(uint64_t x, uint64_t y, uint64_t seed) {
758  uint64_t h1 = seed;
759  uint64_t h2 = seed;
760 
761  const uint64_t c1 = 0x87c37b91114253d5ULL;
762  const uint64_t c2 = 0x4cf5ad432745937fULL;
763 
764  // Taken exactly from the smhasher 64 bit code for murmur3 at
765  // http://code.google.com/p/smhasher/
766 
767  x *= c1; x = Rotate(x,31); x *= c2; h1 ^= x;
768 
769  h1 = Rotate(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
770 
771  y *= c2; y = Rotate(y,33); y *= c1; h2 ^= y;
772 
773  h2 = Rotate(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
774 
775  return std::make_pair(h1, h2);
776 }
777 
778 static inline local_uint128 Murmor3MixRoutine128(uint128_t x, uint128_t y, uint64_t seed) {
779  uint64_t h1 = seed;
780  uint64_t h2 = seed;
781 
782  const uint64_t c1 = 0x87c37b91114253d5ULL;
783  const uint64_t c2 = 0x4cf5ad432745937fULL;
784 
785  // Taken exactly from the smhasher 64 bit code for murmur3 at
786  // http://code.google.com/p/smhasher/
787 
788  uint64_t x1 = uint64_t(x >> 64);
789  uint64_t x2 = uint64_t(x);
790 
791  x1 *= c1; x1 = Rotate(x1,31); x1 *= c2; h1 ^= x1;
792 
793  h1 = Rotate(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
794 
795  x2 *= c2; x2 = Rotate(x2,33); x2 *= c1; h2 ^= x2;
796 
797  h2 = Rotate(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
798 
799  uint64_t y1 = uint64_t(y >> 64);
800  uint64_t y2 = uint64_t(y);
801 
802  y1 *= c1; y1 = Rotate(y1,31); y1 *= c2; h1 ^= y1;
803 
804  h1 = Rotate(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
805 
806  y2 *= c2; y = Rotate(y2,33); y2 *= c1; h2 ^= y2;
807 
808  h2 = Rotate(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
809 
810  return std::make_pair(h1, h2);
811 }
812 
813 }
814 
815 // Now, all the external wrappers for the above functions
816 
817 class flexible_type;
818 
819 /**
820  * \ingroup util
821  * \addtogroup Hashing
822  * \brief A generic set of hashing functions built around Google's cityhash.
823  * \{
824  */
825 
826 /**
827  * Returns a 128 bit hash of a string using the city hash function.
828  * The hash is strong but not cryptographically secure.
829  *
830  * \param s char* pointer to data.
831  * \param len Length of data to hash, in bytes.
832  */
833 static inline uint128_t hash128(const char* s, size_t len) {
834  cityhash_local::local_uint128 r = cityhash_local::CityHash128(s, len);
835 
836  return ((uint128_t(cityhash_local::Uint128High64(r)) << 64)
837  + uint128_t(cityhash_local::Uint128Low64(r)));
838 }
839 
840 /**
841  * Returns a 128 bit hash of a string using the city hash function.
842  * The hash is strong but not cryptographically secure.
843  *
844  * \param s std::string instance to hash.
845  */
846 static inline uint128_t hash128(const std::string& s) {
847  return hash128(s.c_str(), s.size());
848 }
849 
850 /**
851  * Returns a 128 bit hash of a uint128_t hash value.
852  * The hash is unique, has good bit-wise properties, and is very fast.
853  *
854  * \param v 128 bit integer value to hash.
855  */
856 static inline uint128_t hash128(uint128_t v) {
857  cityhash_local::local_uint128 r = cityhash_local::Murmor3MixRoutine64(
858  uint64_t(v >> 64), uint64_t(v), 0x8f84e92c0587b7e3ULL);
859 
860  return ((uint128_t(cityhash_local::Uint128High64(r)) << 64)
861  + uint128_t(cityhash_local::Uint128Low64(r)));
862 }
863 
864 /**
865  * Returns a 128 bit hash of any integer type up to 64 bits. The hash
866  * is unique, has decent bit-wise properties, and is very fast.
867  *
868  * \param v integer value to hash.
869  */
870 template <typename T>
871 static inline uint128_t hash128(
872  const T& v, typename std::enable_if<std::is_integral<T>::value && sizeof(T) <= 8>::type* = 0) {
873 
874  cityhash_local::local_uint128 r = cityhash_local::SimpleIntegerHash128(uint64_t(v));
875 
876  return ((uint128_t(cityhash_local::Uint128High64(r)) << 64)
877  + uint128_t(cityhash_local::Uint128Low64(r)));
878 }
879 
880 
881 /**
882  * Returns a 128 bit hash of a uint128_t hash value.
883  * The hash is unique, has good bit-wise properties, and is very fast.
884  *
885  * \param v 64 bit integer value to hash.
886  */
887 static inline uint128_t hash128(uint64_t v) {
888  cityhash_local::local_uint128 r = cityhash_local::SimpleIntegerHash128(v);
889 
890  return ((uint128_t(cityhash_local::Uint128High64(r)) << 64)
891  + uint128_t(cityhash_local::Uint128Low64(r)));
892 }
893 
894 /**
895  * Returns a 128 bit hash of a flexible_type value. The hash is
896  * unique, has good bit-wise properties, and is very fast.
897  *
898  * \param v flexible_type value to hash.
899  */
900 uint128_t hash128(const flexible_type& v);
901 
902 /**
903  * Returns a 128 bit hash of a vector of flexible_type values. The
904  * hash is unique, has good bit-wise properties, and is very fast.
905  *
906  * \param v vector of flexible_type values to hash.
907  */
908 uint128_t hash128(const std::vector<flexible_type>& v);
909 
910 /**
911  * Returns a 64 bit hash of a string using the city hash function.
912  * The hash is strong but not cryptographically secure.
913  *
914  * \param s char* pointer to data.
915  * \param len Length of data to hash, in bytes.
916  */
917 static inline uint64_t hash64(const char* s, size_t len) {
918  return cityhash_local::CityHash64(s, len);
919 }
920 
921 /**
922  * Returns a 64 bit hash of a string using the city hash function.
923  * The hash is strong but not cryptographically secure.
924  *
925  * \param s std::string instance to hash.
926  */
927 static inline uint64_t hash64(const std::string& s) {
928  return hash64(s.c_str(), s.size());
929 }
930 
931 /**
932  * Returns a 64 bit hash of two 64 bit integers. The hash is unique,
933  * has good bit-wise properties, and is fairly fast. The hash
934  * function is subject to change.
935  *
936  * \param v1 First integer value to hash.
937  * \param v2 Second integer value to hash.
938  */
939 static inline uint64_t hash64(uint64_t v1, uint64_t v2) {
940  static const uint64_t rand_int = 0x9fa35c8d77b96328ULL;
941 
942  cityhash_local::local_uint128 r = cityhash_local::Murmor3MixRoutine64(v1, v2, rand_int);
943 
944  return r.first ^ r.second;
945 }
946 
947 /**
948  * Returns a 64 bit hash of two 64 bit integers. The hash is unique,
949  * has good bit-wise properties, and is fairly fast. The hash
950  * function is subject to change.
951  *
952  * \param v1 First integer value to hash.
953  * \param v2 Second integer value to hash.
954  * \param v3 Third integer value to hash.
955  */
956 static inline uint64_t hash64(uint64_t v1, uint64_t v2, uint64_t v3) {
957  return hash64(v1, hash64(v2, v3));
958 }
959 
960 /**
961  * Returns a 64 bit hash of a 128 bit integer. The hash is unique,
962  * has good bit-wise properties, and is fairly fast. The hash
963  * function is subject to change.
964  *
965  * \param v integer value to hash.
966  */
967 static inline uint64_t hash64(uint128_t v) {
968  static const uint64_t rand_int = 0xf52ef6f00df6f718ULL;
969 
970  uint64_t h1 = uint64_t(v >> 64);
971  uint64_t h2 = uint64_t(v);
972 
973  cityhash_local::local_uint128 r = cityhash_local::Murmor3MixRoutine64(h1, h2, rand_int);
974 
975  return r.first ^ r.second;
976 }
977 
978 /**
979  * Returns a 64 bit hash of any integer up to 64 bits. The hash is
980  * unique, has good bit-wise properties, and is fairly fast. The hash
981  * function is subject to change.
982  *
983  * \param v integer value to hash.
984  */
985 template <typename T>
986 static inline uint64_t hash64(
987  const T& v, typename std::enable_if<std::is_integral<T>::value && sizeof(T) <= 8>::type* = 0) {
988  return cityhash_local::SimpleIntegerHash64(uint64_t(v));
989 }
990 
991 /**
992  * Returns a 64 bit hash of a flexible_type value. The hash is
993  * unique, has good bit-wise properties, and is very fast.
994  *
995  * \param v flexible_type value to hash.
996  */
997 uint64_t hash64(const flexible_type& v);
998 
999 /**
1000  * Returns a 64 bit hash of a vector of flexible_type values. The
1001  * hash is unique, has good bit-wise properties, and is very fast.
1002  *
1003  * \param v vector of flexible_type values to hash.
1004  */
1005 uint64_t hash64(const std::vector<flexible_type>& v);
1006 
1007 /**
1008  * Updates a hash that is used to track a sequential stream of objects.
1009  *
1010  * \param h A previous hash value.
1011  * \param v A new value to hash.
1012  *
1013  * \return The updated hash.
1014  */
1015 static inline uint128_t hash128_combine(uint128_t h1, uint128_t h2) {
1016 
1017  static const uint64_t rand_int = 0x5b73ff027f14f66aULL;
1018 
1019  // This hash function is okay when there are x1, x2, y1, and y2 are
1020  // not correlated, which is the case here.
1021  cityhash_local::local_uint128 r = cityhash_local::Murmor3MixRoutine128(h1, h2, rand_int);
1022 
1023  return ((uint128_t(cityhash_local::Uint128High64(r)) << 64)
1024  + uint128_t(cityhash_local::Uint128Low64(r)));
1025 }
1026 
1027 /**
1028  * Updates a hash that is used to track a sequential stream of objects.
1029  *
1030  * \param h A previous hash value.
1031  * \param v A new value to hash.
1032  *
1033  * \return The updated hash.
1034  */
1035 template <typename T>
1036 static inline uint128_t hash128_update(uint128_t h, const T& v) {
1037  return hash128_combine(h, hash128(v));
1038 }
1039 
1040 /**
1041  * Returns a 128 bit hash of a vector of strings using the city hash
1042  * function. The hash is strong but not cryptographically secure.
1043  *
1044  * \param v vector of std::strings to hash.
1045  */
1046 static inline uint128_t hash128(const std::vector<std::string>& v) {
1047  uint128_t h = hash128(v.size());
1048  for(const std::string& s : v)
1049  h = hash128_update(h, s);
1050 
1051  return h;
1052 }
1053 
1054 
1055 /**
1056  * Combines two 64 bit hashes in a simple, order dependent way.
1057  * Produces a new 64 bit hash.
1058  *
1059  * \param h1 First hash value
1060  * \param h2 Second hash value
1061  */
1062 static inline uint64_t hash64_combine(uint64_t h1, uint64_t h2) {
1063 
1064  static const uint64_t rand_int = 0x73a3916ae45d01e5ULL;
1065 
1066  cityhash_local::local_uint128 r = cityhash_local::Murmor3MixRoutine64(h1, h2, rand_int);
1067 
1068  return r.first ^ r.second;
1069 }
1070 
1071 /**
1072  * When hash64 is used as a random number function, it is nice to be
1073  * able to do the following to get a proportion:
1074  *
1075  * uint64_t threshold = hash64_proportion_cutoff(proportion);
1076  * // ...
1077  * if(hash64(...) < threshold) {
1078  * // do something that happens `proportion` of the time.
1079  * }
1080  *
1081  * Unfortunately, this computation of the proportion is prone to
1082  * numerical issues due to the 48 bits of precision of the double,
1083  * which this function gets around.
1084  */
1085 uint64_t hash64_proportion_cutoff(double proportion);
1086 
1087 /**
1088  * Updates an existing 64 bit hash in a simple, order dependent way.
1089  *
1090  * \param h First hash value
1091  * \param t New object to hash
1092  */
1093 template <typename T>
1094 static inline uint64_t hash64_update(uint64_t h1, const T& t) {
1095  return hash64_combine(h1, hash64(t));
1096 }
1097 
1098 /**
1099  * Returns a 128 bit hash of a vector of strings using the city hash
1100  * function. The hash is strong but not cryptographically secure.
1101  *
1102  * \param v vector of std::strings to hash.
1103  */
1104 static inline uint64_t hash64(const std::vector<std::string>& v) {
1105  uint64_t h = hash64(v.size());
1106  for(const std::string& s : v)
1107  h = hash64_update(h, s);
1108 
1109  return h;
1110 }
1111 
1112 
1113 /**
1114  * Returns a 32bit value based on index and seed that has reasonable
1115  * pseudorandom properties; I.e. for a given seed, each index maps to
1116  * effectively random values of size_t.
1117  *
1118  * \param index Index from which the pseudorandom map value is mapped.
1119  * \param seed Random seed governing the mapping.
1120  */
1121 static inline uint32_t simple_random_mapping(size_t index, size_t seed) {
1122  cityhash_local::local_uint128 r = cityhash_local::SimpleIntegerHash128(index, seed);
1123  // Mix the last few bits
1124  uint64_t h = r.first ^ r.second;
1125  return uint32_t(h ^ (h >> 32));
1126 }
1127 
1128 /**
1129  * Provides a simple, reversable hash for indices. This hash has
1130  * excellent mixing properties, preserves 0 (i.e. 0 maps to 0), and
1131  * is reversable by calling reverse_index_hash(...) below.
1132  *
1133  * Taken from the Murmur3 finalizer routine.
1134  * Reference: http://code.google.com/p/smhasher/wiki/MurmurHash3.
1135  */
1136 static inline uint64_t index_hash(uint64_t idx) {
1137 
1138  static constexpr uint64_t m3_final_1 = 0xff51afd7ed558ccdULL;
1139  static constexpr uint64_t m3_final_2 = 0xc4ceb9fe1a85ec53ULL;
1140 
1141  static constexpr uint64_t r = 33;
1142 
1143  uint64_t h = idx;
1144 
1145  h ^= h >> r;
1146  h *= m3_final_1;
1147  h ^= h >> r;
1148  h *= m3_final_2;
1149  h ^= h >> r;
1150 
1151  return h;
1152 }
1153 
1154 /**
1155  * The reverse of \ref index_hash. Gets the index from the index_hash
1156  */
1157 static inline uint64_t reverse_index_hash(uint64_t idx) {
1158 
1159  // Multaplicative inverses of m3_final_1 and m3_final_2 above
1160  static constexpr uint64_t m3_final_1_inv = 0x4f74430c22a54005ULL;
1161  static constexpr uint64_t m3_final_2_inv = 0x9cb4b2f8129337dbULL;
1162 
1163  static constexpr uint64_t r = 33;
1164 
1165  uint64_t h = idx;
1166 
1167  h ^= h >> r;
1168  h *= m3_final_2_inv;
1169  h ^= h >> r;
1170  h *= m3_final_1_inv;
1171  h ^= h >> r;
1172 
1173  return h;
1174 }
1175 
1176 /**
1177  * \}
1178  */
1179 
1180 } // End namespace turi
1181 
1182 namespace std {
1183 
1184 // Gets rid of a stupid clang mismatch of things
1185 #ifdef __clang__
1186 #pragma clang diagnostic push
1187 #pragma clang diagnostic ignored "-Wmismatched-tags"
1188 #endif
1189 
1190 #ifndef HASH_FOR_UINT128_DEFINED
1191 
1192 template <> struct hash<uint128_t> {
1193  size_t operator()(const uint128_t& t) const {
1194  return size_t(t ^ (t >> 64));
1195  }
1196 };
1197 
1198 #endif
1199 
1200 #ifndef HASH_FOR_INT128_DEFINED
1201 
1202 template <> struct hash<int128_t> {
1203  size_t operator()(const int128_t& t) const {
1204  return size_t(t ^ (t >> 64));
1205  }
1206 };
1207 
1208 #endif
1209 
1210 
1211 
1212 #ifdef __clang__
1213 #pragma clang diagnostic pop
1214 #endif
1215 
1216 }
1217 
1218 
1219 #endif /* _CITYHASH_GL_H_ */
static uint32_t simple_random_mapping(size_t index, size_t seed)
static uint128_t hash128(const char *s, size_t len)
static uint64_t reverse_index_hash(uint64_t idx)
STL namespace.
static uint64_t hash64_combine(uint64_t h1, uint64_t h2)
static uint128_t hash128_combine(uint128_t h1, uint128_t h2)
static uint64_t hash64(const char *s, size_t len)
static uint64_t hash64_update(uint64_t h1, const T &t)
uint64_t hash64_proportion_cutoff(double proportion)
static uint128_t hash128_update(uint128_t h, const T &v)
static uint64_t index_hash(uint64_t idx)