6 #ifndef TURI_SKETCHES_COUNTSKETCH_HPP 7 #define TURI_SKETCHES_COUNTSKETCH_HPP 11 #include <core/random/random.hpp> 12 #include <core/util/cityhash_tc.hpp> 13 #include <core/logging/assertions.hpp> 17 typedef int64_t counter_int;
42 std::vector<size_t> seeds;
43 std::vector<size_t> seeds_binary;
45 std::vector<std::vector<counter_int> > counts;
57 explicit inline countsketch(
size_t bits = 16,
size_t depth = 5,
size_t seed = 1000):
58 num_hash(depth), num_bits(bits), num_bins(1 << num_bits) {
59 ASSERT_GE(num_bins, 16);
64 for (
size_t j = 0; j < num_hash; ++j) {
65 seeds.push_back(gen.
fast_uniform<
size_t>(0, std::numeric_limits<size_t>::max()));
66 seeds_binary.push_back(gen.
fast_uniform<
size_t>(0, std::numeric_limits<size_t>::max()));
68 counts.push_back(std::vector<counter_int>(num_bins));
80 void add(
const T& t,
size_t count = 1) {
83 size_t i =
hash64(std::hash<T>()(t));
85 for (
size_t j = 0; j < num_hash; ++j) {
87 counter_int s = (counter_int)(
hash64(seeds_binary[j] ^ i) & 1);
90 size_t bin =
hash64(seeds[j] ^ i) % num_bins;
91 counts[j][bin] += s * (counter_int) count;
101 size_t i =
hash64(std::hash<T>()(t));
104 std::vector<counter_int> estimates;
105 for (
size_t j = 0; j < num_hash; ++j) {
107 counter_int s = (counter_int) (
hash64(seeds_binary[j] ^ i) & 1);
110 size_t bin =
hash64(seeds[j] ^ i) % num_bins;
111 counter_int
estimate = s * counts[j][bin];
112 estimates.push_back(estimate);
116 std::nth_element(estimates.begin(),
117 estimates.begin() + estimates.size()/2,
120 return estimates[estimates.size()/2];
128 ASSERT_EQ(num_bins, other.num_bins);
129 ASSERT_EQ(num_hash, other.num_hash);
131 for (
size_t j = 0; j < num_hash; ++j) {
132 for (
size_t b = 0; b < num_bins; ++b) {
133 counts[j][b] += other.counts[j][b];
144 for (
size_t j = 0; j < num_hash; ++j) {
146 for (
size_t b = 0; b < num_bins; ++b) {
147 std::cout << counts[j][b] <<
" ";
149 std::cout << std::endl;
158 for (
size_t j = 0; j < num_hash; ++j) {
159 for (
size_t b = 0; b < num_bins; ++b) {
160 if (counts[j][b] != 0) count += 1;
163 return (
double) count / (double) (num_hash * num_bins);
void add(const T &t, size_t count=1)
void combine(const countsketch &other)
NumType fast_uniform(const NumType min, const NumType max)
void seed()
Seed the generator using the default seed.
static uint64_t hash64(const char *s, size_t len)
countsketch(size_t bits=16, size_t depth=5, size_t seed=1000)
Buckets.
counter_int estimate(const T &t)