6 #ifndef TURI_SKETCHES_COUNTMIN_HPP 7 #define TURI_SKETCHES_COUNTMIN_HPP 11 #include <core/random/random.hpp> 12 #include <core/util/cityhash_tc.hpp> 13 #include <core/logging/assertions.hpp> 14 #include <core/storage/serialization/serialization_includes.hpp> 51 std::vector<size_t> seeds;
52 std::vector<std::vector<size_t> > counts;
64 explicit inline countmin(
size_t bits = 16,
size_t depth = 4,
size_t seed=1000):
65 num_hash(depth), num_bits(bits), num_bins(1 << num_bits) {
66 ASSERT_GE(num_bins, 16);
71 for (
size_t j = 0; j < num_hash; ++j) {
72 seeds.push_back(gen.
fast_uniform<
size_t>(0, std::numeric_limits<size_t>::max()));
73 counts.push_back(std::vector<size_t>(num_bins));
82 void add(
const T& t,
size_t count = 1) {
84 size_t i =
hash64(std::hash<T>()(t));
85 for (
size_t j = 0; j < num_hash; ++j) {
86 size_t bin =
hash64(seeds[j] ^ i) % num_bins;
87 counts[j][bin] += count;
98 size_t i =
hash64(std::hash<T>()(t));
99 for (
size_t j = 0; j < num_hash; ++j) {
100 size_t bin =
hash64(seeds[j] ^ i) % num_bins;
101 __sync_add_and_fetch(&(counts[j][bin]), count);
109 size_t E = std::numeric_limits<size_t>::max();
110 size_t i =
hash64(std::hash<T>()(t));
113 for (
size_t j = 0; j < num_hash; ++j) {
114 size_t bin =
hash64(seeds[j] ^ i) % num_bins;
115 if (counts[j][bin] < E)
126 ASSERT_EQ(num_bins, other.num_bins);
127 ASSERT_EQ(num_hash, other.num_hash);
129 for (
size_t j = 0; j < num_hash; ++j) {
130 for (
size_t b = 0; b < num_bins; ++b) {
131 counts[j][b] += other.counts[j][b];
142 for (
size_t j = 0; j < num_hash; ++j) {
144 for (
size_t b = 0; b < num_bins; ++b) {
145 std::cout << counts[j][b] <<
" ";
147 std::cout << std::endl;
156 for (
size_t j = 0; j < num_hash; ++j) {
157 for (
size_t b = 0; b < num_bins; ++b) {
158 if (counts[j][b] != 0) count += 1;
161 return (
double) count / (double) (num_hash * num_bins);
NumType fast_uniform(const NumType min, const NumType max)
void combine(const countmin &other)
void add(const T &t, size_t count=1)
The serialization input archive object which, provided with a reference to an istream, will read from the istream, providing deserialization capabilities.
void seed()
Seed the generator using the default seed.
static uint64_t hash64(const char *s, size_t len)
size_t estimate(const T &t) const
countmin(size_t bits=16, size_t depth=4, size_t seed=1000)
Buckets.
The serialization output archive object which, provided with a reference to an ostream, will write to the ostream, providing serialization capabilities.
void atomic_add(const T &t, size_t count=1)