6 #ifndef TURI_SKETCH_HYPERLOGLOG_HPP 7 #define TURI_SKETCH_HYPERLOGLOG_HPP 11 #include <core/util/bitops.hpp> 12 #include <core/util/cityhash_tc.hpp> 13 #include <core/logging/assertions.hpp> 48 std::vector<unsigned char> m_buckets;
57 m_b(b), m_m(1 << b), m_buckets(m_m, 0) {
71 m_alpha = 0.7213 / (1 + 1.079 / (double)m_m);
81 void add(
const T& t) {
86 size_t index = h >> (64 - m_b);
87 DASSERT_LT(index, m_buckets.size());
88 unsigned char pos = h != 0 ? 1 + __builtin_clz(static_cast<unsigned int>(h)) :
sizeof(size_t);
89 m_buckets[index] = std::max(m_buckets[index], pos);
100 ASSERT_EQ(m_buckets.size(), other.m_buckets.size());
101 for (
size_t i = 0;i < m_buckets.size(); ++i) {
102 m_buckets[i] = std::max(m_buckets[i], other.m_buckets[i]);
111 return estimate() * 1.04 / std::sqrt(m_m);
119 for (
size_t i = 0;i < m_buckets.size(); ++i) {
120 E += std::pow(2.0, -(
double)m_buckets[i]);
122 E = m_alpha * m_m * m_m / E;
124 if (E <= 2.5 * m_m) {
125 size_t zero_count = 0;
126 for(
auto i: m_buckets) zero_count += (i == 0);
127 if (zero_count != 0) E = m_m * std::log((
double)m_m / zero_count);
hyperloglog(size_t b=16)
Buckets.
static uint64_t hash64(const char *s, size_t len)
void combine(const hyperloglog &other)