Turi Create  4.0
timeseries.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TIMESERIES_H
7 #define TIMESERIES_H
8 
9 #include <string>
10 #include <vector>
11 #include <deque>
12 #include <iostream>
13 #include <cassert>
14 #include <math.h>
15 #include <core/logging/logger.hpp>
16 #include <core/data/flexible_type/flexible_type.hpp>
17 #include <core/storage/sframe_data/group_aggregate_value.hpp>
18 #include <model_server/lib/toolkit_class_macros.hpp>
19 #include <core/data/sframe/gl_sarray.hpp>
20 #include <core/data/sframe/gl_sframe.hpp>
21 #include <model_server/lib/extensions/model_base.hpp>
22 #include <model_server/extensions/timeseries/grouped_timeseries.hpp>
23 #include <model_server/extensions/timeseries/interpolate_value.hpp>
24 
25 
26 namespace turi {
27 namespace timeseries {
28 
29 
30 EXPORT gl_sarray date_range(const flexible_type &start_time,
31  const flexible_type &end_time,
32  const flexible_type & period);
33 
34 typedef std::shared_ptr<interpolator_value> interpolator_type;
35 class grouped_timeseries;
36 
37 /***
38  * gl_timeseries is the fundamental data-structure to hold multi-variate
39  * timeseries data. It is backed by a single gl_sframe and some meta-data.
40  *
41  ***/
42 class EXPORT gl_timeseries : public model_base {
43  public:
44  virtual ~gl_timeseries();
45 
46  protected:
47  static constexpr size_t TIMESERIES_VERSION = 0;
48  gl_sframe m_sframe; // The backend gl_sframe
49  bool m_initialized = false;
50 
51  void _check_if_initialized() const {
52  if(!m_initialized)
53  throw std::string("Timeseries is not initialized.");
54  }
55 
56  public:
57  std::vector<std::string> m_value_col_names;
58  std::string m_index_col_name;
59 
60  gl_sframe get_sframe() const {
61  return m_sframe;
62  }
63  void set_sframe(gl_sframe sf) {
64  m_sframe = sf;
65  }
66 
67  std::string get_index_col_name() const {
68  return m_index_col_name;
69  }
70 
71  flex_type_enum get_index_col_type() const {
72  return m_sframe[m_index_col_name].dtype();
73  }
74 
75  void set_index_col_name(std::string index_col) {
76  m_index_col_name = index_col;
77  }
78 
79  std::vector<std::string> get_value_col_names() const {
80  return m_value_col_names;
81  }
82  void set_value_col_names(std::vector<std::string> val_names) {
83  m_value_col_names = val_names;
84  }
85  /**
86  * Get a version for the object.
87  **/
88  size_t get_version() const {
89  return TIMESERIES_VERSION;
90  }
91 
92  /**
93  * Serializes gl_timeseries object. Must save the object to the file
94  * format version matching that of get_version()
95  **/
96  void save_impl(oarchive& oarc) const;
97 
98  /**
99  * Loads a gl_timeseries object previously saved at a particular version
100  * number. Should raise an exception on failure.
101  **/
102  void load_version(iarchive& iarc, size_t version);
103 
104  void init(const gl_sframe & _input_sf,const std::string & _name, bool
105  is_sorted=false,std::vector<int64_t> ranges={-1,-1});
106 
107  /**
108  * resample operator does down/up sampling.
109  *
110  * \param[in] period : Period to resample to (in seconds).
111  * \param[in] operators : Operators for aggregation.
112  * \param[in] fill_method : Interpolation scheme.
113  * \param[in] label : The timestamp recorded in the output
114  * TimeSeries to determine which 654 end point
115  * (left or right) to use to denote the time
116  * slice.
117  * \param[in] closed : Determines which side of the interval in the
118  * time slice is closed. Must be ['left' or
119  * 'right']
120  **/
121  gl_timeseries resample(
122  const flex_float& period,
123  const std::map<std::string,
124  aggregate::groupby_descriptor_type>& operators,
125  interpolator_type interpolation_fn =
126  get_builtin_interpolator("__builtin__none__"),
127  const std::string& label = "left",
128  const std::string& closed = "right") const;
129 
130  /**
131  * Python wrapper for resampling. This function is the one exposed via the
132  * SDK to python.
133  *
134  * \note The reason for this is because the aggregate, and interpolation types
135  * are not objects that are registered via Python.
136  *
137  * \note Since the SDK does not support more than 6 arguments, the
138  * downsample_params, and upsample_params have been combined.
139  *
140  *
141  * downsample_params : (ds_columns, ds_output_columns, ds_ops)
142  * upsample_params : (up_op)
143  */
144  gl_timeseries resample_wrapper(
145  double period,
146  const flex_list& downsample_params,
147  const flex_list& upsample_params,
148  const std::string& label,
149  const std::string& close) const;
150 
151  /**
152  * shift the index column of the gl_timeseries by number of seconds.
153  **/
154  gl_timeseries tshift(const flex_float & delta);
155 
156  /**
157  * Shift the non-index columns in the TimeSeries object by specified
158  * number of steps.
159  * The rows at the boundary with no values anymore are replaced by None
160  * values.
161  **/
162  gl_timeseries shift(const int64_t & steps);
163 
164  gl_timeseries slice(const flexible_type &start_time,
165  const flexible_type &end_time,
166  const std::string &closed) const;
167 
168 
169  /**
170  * join this TimeSeries with the 'other_ts' TimeSeries on their index
171  * column.
172  *
173  * other_ts: the other TimeSeries object.
174  * how: how to join two TimeSeries. Accepted methods are 'inner','outer',
175  * and 'left'
176  * index_column_name: the new name for the index column of the output
177  * TimeSeries.
178  **/
179  gl_timeseries index_join(const gl_timeseries& other_ts, const
180  std::string& how, const std::string& index_column_name);
181 
182  /**
183  * union this TimeSeries with the 'other_ts' TimeSeries.
184  **/
185  gl_timeseries ts_union(const gl_timeseries& other_ts);
186 
187  gl_grouped_timeseries group(std::vector<std::string> key_columns);
188  void add_column(const gl_sarray& data, const std::string& name="");
189  void remove_column(const std::string& name);
190 
191  BEGIN_CLASS_MEMBER_REGISTRATION("_Timeseries")
192  REGISTER_CLASS_MEMBER_FUNCTION(gl_timeseries::tshift, "delta")
193  REGISTER_CLASS_MEMBER_FUNCTION(gl_timeseries::shift, "steps")
194  REGISTER_CLASS_MEMBER_FUNCTION(gl_timeseries::slice, "start_time",
195  "end_time", "closed")
196  REGISTER_CLASS_MEMBER_FUNCTION(gl_timeseries::init, "_input_sf",
197  "_name", "is_sorted", "ranges")
198  REGISTER_CLASS_MEMBER_FUNCTION(gl_timeseries::index_join, "other_ts",
199  "how", "index_column_name")
200  REGISTER_CLASS_MEMBER_FUNCTION(gl_timeseries::group, "key_columns")
201  REGISTER_CLASS_MEMBER_FUNCTION(gl_timeseries::add_column, "data","name")
202  REGISTER_CLASS_MEMBER_FUNCTION(gl_timeseries::remove_column, "name")
203  REGISTER_CLASS_MEMBER_FUNCTION(gl_timeseries::ts_union, "other_ts")
204  REGISTER_CLASS_MEMBER_FUNCTION(gl_timeseries::resample_wrapper, "period",
205  "downsample_params", "upsample_params", "left", "close")
206 
207  REGISTER_GETTER("sframe", gl_timeseries::get_sframe)
208  REGISTER_SETTER("sframe", gl_timeseries::set_sframe)
209  REGISTER_GETTER("value_col_names", gl_timeseries::get_value_col_names)
210  REGISTER_SETTER("value_col_names", gl_timeseries::set_value_col_names)
211  REGISTER_GETTER("index_col_name", gl_timeseries::get_index_col_name)
212  REGISTER_SETTER("index_col_name", gl_timeseries::set_index_col_name)
213 
215  };
216 
217 
218 } // timeseries
219 } // turicreate
220 
221 #endif
#define BEGIN_CLASS_MEMBER_REGISTRATION(python_facing_classname)
#define REGISTER_CLASS_MEMBER_FUNCTION(function,...)
sframe group(sframe sframe_in, std::string key_column)
#define REGISTER_SETTER(propname, function)
#define END_CLASS_MEMBER_REGISTRATION
#define REGISTER_GETTER(propname, function)
std::vector< flexible_type > flex_list