6 #ifndef __TC_VIS_GROUPBY 7 #define __TC_VIS_GROUPBY 9 #include <core/storage/sframe_data/groupby_aggregate_operators.hpp> 10 #include <core/data/sframe/gl_sframe.hpp> 11 #include <core/util/sys_util.hpp> 13 #include "transformation.hpp" 16 namespace visualization {
20 groupby_operators::average m_average;
21 groupby_operators::count m_count;
22 groupby_operators::max m_max;
23 groupby_operators::min m_min;
24 groupby_operators::sum m_sum;
25 groupby_operators::stdv m_stdv;
26 groupby_operators::variance m_variance;
29 void add_element_simple(
const flexible_type& value);
30 void combine(
const summary_stats& other);
31 void partial_finalize();
32 flexible_type emit()
const;
42 template<
typename Aggregation>
43 class groupby_result {
46 std::unordered_map<flexible_type, Aggregation> m_aggregators;
48 virtual void insert_category(
const flexible_type& category) {
49 TURI_ATTRIBUTE_UNUSED_NDEBUG
auto inserted =
50 m_aggregators.emplace(category, Aggregation());
52 auto& agg = m_aggregators.at(category);
54 agg.set_input_type(m_type);
58 constexpr
static size_t CATEGORY_LIMIT = 1000;
62 static void update_or_combine(Aggregation& aggregation,
const flexible_type& other) {
63 aggregation.add_element_simple(other);
65 static void update_or_combine(Aggregation& aggregation,
const Aggregation& other) {
68 const_cast<Aggregation&
>(other).partial_finalize();
69 aggregation.combine(other);
74 void update_or_combine(
const flexible_type& category,
const T& value) {
75 auto find_key = m_aggregators.find(category);
76 if (find_key == m_aggregators.end()) {
78 if (m_aggregators.size() < CATEGORY_LIMIT) {
79 this->insert_category(category);
80 groupby_result::update_or_combine(m_aggregators.at(category), value);
82 m_omitted_categories++;
85 groupby_result::update_or_combine((*find_key).second, value);
88 void update(
const flexible_type& category,
const flexible_type& value) {
93 this->set_input_type(type);
94 this->update_or_combine(category, value);
98 void combine(
const groupby_result<Aggregation>& other) {
99 this->set_input_type(other.get_input_type());
100 for (
const auto& pair : other.m_aggregators) {
101 this->update_or_combine(pair.first, pair.second);
104 void update(
const std::vector<flexible_type>&
values) {
107 DASSERT_GE(values.size(), 2);
108 this->update(values[0], values[1]);
110 std::unordered_map<flexible_type, flexible_type> get_grouped()
const {
111 std::unordered_map<flexible_type, flexible_type> ret;
112 for (
const auto& pair : m_aggregators) {
113 ret.emplace(pair.first, pair.second.emit());
117 flex_int get_omitted() {
return m_omitted_categories; }
128 void add_element_simple(
const flexible_type& value) {
134 template<
typename Result>
135 class groupby :
public transformation<gl_sframe, Result> {
137 virtual void merge_results(std::vector<Result>& transformers)
override {
138 for (
auto& result : transformers) {
139 this->m_transformer->combine(result);
144 class groupby_summary_result :
public groupby_result<summary_stats> {
147 class groupby_summary :
public groupby<groupby_summary_result> {
150 class groupby_quantile_result :
public groupby_result<groupby_operators::quantile> {
152 virtual void insert_category(
const flexible_type& category)
override;
155 class groupby_quantile :
public groupby<groupby_quantile_result> {
160 #endif // __TC_VIS_GROUPBY
std::set< T > values(const std::map< Key, T > &map)
std::vector< flexible_type > flex_list
#define DASSERT_TRUE(cond)