Turi Create  4.0
grouped_timeseries.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_GROUPED_TIMESERIES_HPP
7 #define TURI_GROUPED_TIMESERIES_HPP
8 
9 #include <model_server/lib/toolkit_class_macros.hpp>
10 #include <model_server/lib/extensions/model_base.hpp>
11 #include <model_server/extensions/grouped_sframe.hpp>
12 
13 namespace turi {
14 namespace timeseries {
15 
16 class gl_timeseries;
17 
18 class EXPORT gl_grouped_timeseries : public model_base {
19  public:
20  /**
21  * Groups a TimeSeries by the distinct values in one or more columns.
22  *
23  * Logically, this creates a TimeSeries for each "group" of values, where the
24  * new TimeSeries' all have the same number of columns as the original
25  * TimeSeries. These are accessed through the interface of this data
26  * structure.
27  *
28  * \param sf The underlying SFrame of the TimeSeries.
29  * \param index_col_name The index column of the TimeSeries.
30  * \param column_names List of column names to group by.
31  *
32  * Throws if group has already been called on this object, or the column
33  * names are not valid.
34  */
35  void group(const gl_sframe &sf,
36  std::string index_col_name,
37  const std::vector<std::string> column_names);
38 
39  /**
40  * Get the SFrame that corresponds to the group named `key`.
41  *
42  * Each group's name is its distinct value, including its type. This means
43  * that an SFrame grouped by a column of integers that has some 1s and some
44  * 2s, the name of the group with ones is the integer 1, not the string '1'.
45  * The key is given as a vector because more than one columns can be used to
46  * group.
47  *
48  * \param key Name of group to retrieve.
49  * \returns An SFrame that can immediately be interpreted as a TimeSeries
50  * (i.e. it is sorted by its time index column.)
51  */
52  gl_sframe get_group(const std::vector<flexible_type> key);
53 
54  /**
55  * The number of distinct groups found.
56  */
57  size_t num_groups() const {
58  return m_grouped_sframe.num_groups();
59  }
60 
61  /**
62  * Return an SFrame with group_info i.e key columns + number of rows in each
63  * key column.
64  */
65  gl_sframe group_info() const {
66  return m_grouped_sframe.group_info();
67  }
68 
69  /**
70  * A list of all the group names.
71  */
72  gl_sarray groups() {
73  return m_grouped_sframe.groups();
74  }
75 
76  void begin_iterator() {
77  m_grouped_sframe.begin_iterator();
78  }
79 
80  std::vector<std::pair<flexible_type, gl_sframe>> iterator_get_next(size_t num);
81 
82  /**
83  * Return the index column name of the time series (not the same as the group
84  * column)
85  */
86  gl_sframe get_sframe() const {
87  return m_grouped_sframe.get_sframe();
88  }
89 
90  /**
91  * Return the index column name of the time series (not the same as the group
92  * column)
93  */
94  std::string get_index_column_name() const {
95  return m_time_index_name;
96  }
97 
98  /**
99  * Return the value columns in the timeseries.
100  */
101  std::vector<std::string> get_value_col_names() const {
102  return m_value_col_names;
103  }
104 
105  /**
106  * Return the list of columns on which the data is grouped.
107  */
108  std::vector<std::string> get_key_col_names() const {
109  return m_key_col_names;
110  }
111 
112  private:
113  grouped_sframe m_grouped_sframe;
114  std::string m_time_index_name;
115  std::vector<std::string> m_key_col_names;
116  std::vector<std::string> m_value_col_names;
117 
118  public:
119  BEGIN_CLASS_MEMBER_REGISTRATION("_GroupedTimeseries")
120 
121  REGISTER_CLASS_MEMBER_FUNCTION(gl_grouped_timeseries::group, "data",
122  "index_name", "column_names")
123  REGISTER_CLASS_MEMBER_FUNCTION(gl_grouped_timeseries::num_groups)
124  REGISTER_CLASS_MEMBER_FUNCTION(gl_grouped_timeseries::groups)
125  REGISTER_CLASS_MEMBER_FUNCTION(gl_grouped_timeseries::group_info)
126  REGISTER_CLASS_MEMBER_FUNCTION(gl_grouped_timeseries::begin_iterator)
127  REGISTER_CLASS_MEMBER_FUNCTION(gl_grouped_timeseries::iterator_get_next,
128  "num_items")
129  REGISTER_CLASS_MEMBER_FUNCTION(gl_grouped_timeseries::get_group, "key")
130 
131  REGISTER_GETTER("sframe", gl_grouped_timeseries::get_sframe)
132  REGISTER_GETTER("index_column_name",
133  gl_grouped_timeseries::get_index_column_name)
134  REGISTER_GETTER("value_col_names", gl_grouped_timeseries::get_value_col_names)
135  REGISTER_GETTER("key_columns", gl_grouped_timeseries::get_key_col_names)
136 
138 };
139 
140 } // namespace timeseries
141 } // namespace turi
142 #endif // TURI_GROUPED_TIMESERIES_HPP
#define BEGIN_CLASS_MEMBER_REGISTRATION(python_facing_classname)
#define REGISTER_CLASS_MEMBER_FUNCTION(function,...)
sframe group(sframe sframe_in, std::string key_column)
#define END_CLASS_MEMBER_REGISTRATION
#define REGISTER_GETTER(propname, function)