Turi Create  4.0
space_saving_flextype.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_SKETCHES_SPACE_SAVING_SKETCH_FLEXTYPE_HPP
7 #define TURI_SKETCHES_SPACE_SAVING_SKETCH_FLEXTYPE_HPP
8 
9 #include <vector>
10 #include <ml/sketches/space_saving.hpp>
11 #include <core/data/flexible_type/flexible_type.hpp>
12 
13 namespace turi {
14 namespace sketches {
15 
16 ////////////////////////////////////////////////////////////////////////////////
17 
18 /**
19  * \ingroup sketching
20  * Provides an efficient wrapper around the space_saving sketch customized for
21  * use with flexible type. See \ref space_saving
22  */
24  private:
25  std::unique_ptr<space_saving<flex_int> > ss_integer;
26  std::unique_ptr<space_saving<flexible_type> > ss_general;
27 
28  bool is_combined = false;
29  double m_epsilon = 0;
30 
31  public:
32 
34  : ss_integer(new space_saving<flex_int>(*(ss.ss_integer)))
35  , ss_general(new space_saving<flexible_type>(*(ss.ss_general)))
36  , is_combined(ss.is_combined)
37  , m_epsilon(ss.m_epsilon)
38  {}
39 
41 
42  const space_saving_flextype& operator=(const space_saving_flextype& ss) {
43  ss_integer.reset(new space_saving<flex_int>(*(ss.ss_integer)));
44  ss_general.reset(new space_saving<flexible_type>(*(ss.ss_general)));
45  is_combined = ss.is_combined;
46  m_epsilon = ss.m_epsilon;
47  return *this;
48  }
49 
50  /**
51  * Constructs a save saving sketch using 1 / epsilon buckets.
52  * The resultant hyperloglog datastructure will 1 / epsilon memory, and
53  * guarantees that all elements with occurances >= \epsilon N will be reported.
54  */
55  inline space_saving_flextype(double epsilon = 0.0001)
56  : m_epsilon(epsilon)
57  {
58  ss_integer.reset(new space_saving<flex_int>(epsilon));
59  ss_general.reset(new space_saving<flexible_type>(epsilon));
60  }
61 
62  /**
63  * Adds an item with a specified count to the sketch.
64  */
65  inline void add(const flexible_type& t, size_t count = 1) {
66  switch(t.get_type()) {
68  ss_integer->add(t.get<flex_int>(), count);
69  is_combined = false;
70  break;
72  if(!std::isnan(t.get<flex_float>()))
73  ss_general->add(t, count);
74  break;
76  break;
77  default:
78  ss_general->add(t, count);
79  break;
80  }
81  }
82 
83  /**
84  * Returns the number of elements inserted into the sketch.
85  */
86  size_t size() const {
87  return ss_general->size() + ss_integer->size();
88  }
89 
90  /**
91  * Returns all the elements tracked by the sketch as well as an
92  * estimated count. The estimated can be a large overestimate.
93  */
94  inline std::vector<std::pair<flexible_type, size_t> > frequent_items() {
95  if(!is_combined)
96  _combine_integer_and_general();
97 
98  return ss_general->frequent_items();
99  }
100 
101  /**
102  * Returns all the elements tracked by the sketch as well as an
103  * estimated count. The estimated can be a large overestimate.
104  */
105  inline std::vector<std::pair<flexible_type, size_t> > guaranteed_frequent_items() {
106  if(!is_combined)
107  _combine_integer_and_general();
108 
109  return ss_general->guaranteed_frequent_items();
110  }
111 
112  /**
113  * Merges a second space saving sketch into the current sketch
114  */
116  if(!is_combined)
117  _combine_integer_and_general();
118  if(!other.is_combined)
119  other._combine_integer_and_general();
120 
121  ss_general->combine(*other.ss_general);
122  }
123 
124  void clear() {
125  ss_general->clear();
126  ss_integer->clear();
127  }
128 
129  ~space_saving_flextype() { }
130 
131  private:
132 
133  /** Combine in the integer and general hashes.
134  */
135  void _combine_integer_and_general() {
136  ss_general->combine(*ss_integer);
137  ss_integer->clear();
138  is_combined = true;
139  }
140 
141 };
142 
143 } // sketch
144 } // namespace turi
145 #endif
void add(const flexible_type &t, size_t count=1)
void combine(space_saving_flextype &other)
const T & get() const
flex_type_enum get_type() const
std::vector< std::pair< flexible_type, size_t > > frequent_items()
std::vector< std::pair< flexible_type, size_t > > guaranteed_frequent_items()