Turi Create  4.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
activity_classifier.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_ACTIVITY_CLASSIFIER_H_
7 #define TURI_ACTIVITY_CLASSIFIER_H_
8 
9 #include <core/logging/table_printer/table_printer.hpp>
10 #include <model_server/lib/extensions/ml_model.hpp>
11 #include <core/data/sframe/gl_sframe.hpp>
12 #include <toolkits/activity_classification/ac_data_iterator.hpp>
13 #include <toolkits/coreml_export/mlmodel_wrapper.hpp>
14 #include <ml/neural_net/compute_context.hpp>
15 #include <ml/neural_net/model_backend.hpp>
16 #include <ml/neural_net/model_spec.hpp>
17 
18 namespace turi {
19 namespace activity_classification {
20 
21 class EXPORT activity_classifier: public ml_model_base {
22  public:
23  // TODO: Move this model-spec generation code into a separate file, ideally
24  // in the neural_net directory.
25  static std::unique_ptr<neural_net::model_spec> init_model(
26  const std::string& target, const std::vector<std::string>& features,
27  size_t prediction_window, size_t num_classes, bool use_random_init,
28  int random_seed);
29 
30  static std::tuple<gl_sframe, gl_sframe> random_split_by_session(
31  gl_sframe data, std::string session_id_column_name, float fraction,
32  size_t seed);
33 
34  // ml_model_base interface
35 
36  void init_options(const std::map<std::string, flexible_type>& opts) override;
37  size_t get_version() const override;
38  void save_impl(oarchive& oarc) const override;
39  void load_version(iarchive& iarc, size_t version) override;
40 
41  // Interface exposed via Unity server
42 
43  void train(gl_sframe data, std::string target_column_name,
44  std::string session_id_column_name, variant_type validation_data,
45  std::map<std::string, flexible_type> opts);
46  gl_sarray predict(gl_sframe data, std::string output_type);
47  gl_sframe predict_per_window(gl_sframe data, std::string output_type);
48  gl_sframe classify(gl_sframe data, std::string output_frequency);
49  gl_sframe predict_topk(gl_sframe data, std::string output_type, size_t k,
50  std::string output_frequency);
51  variant_map_type evaluate(gl_sframe data, std::string metric);
52  std::shared_ptr<coreml::MLModelWrapper> export_to_coreml(
53  std::string filename, std::string short_description,
54  std::map<std::string, flexible_type> additional_user_defined);
55  void import_from_custom_model(variant_map_type model_data, size_t version);
56 
57  // Support for iterative training.
58  virtual void init_training(gl_sframe data, std::string target_column_name,
59  std::string session_id_column_name,
60  variant_type validation_data,
61  std::map<std::string, flexible_type> opts);
62  virtual void resume_training(gl_sframe data, variant_type validation_data);
63  virtual void iterate_training();
64  virtual void synchronize_training();
65  virtual void finalize_training();
66 
67  BEGIN_CLASS_MEMBER_REGISTRATION("activity_classifier")
68 
69  IMPORT_BASE_CLASS_REGISTRATION(ml_model_base);
70 
71  REGISTER_CLASS_MEMBER_FUNCTION(activity_classifier::train, "data", "target",
72  "session_id", "validation_data", "options");
73  register_defaults("train",
74  {{"validation_data", to_variant(std::string("auto"))},
75  {"options",
76  to_variant(std::map<std::string, flexible_type>())}});
78  activity_classifier::train,
79  "----------\n"
80  "data : SFrame\n"
81  " Input data which consists of `sessions` of data where each session "
82  "is\n"
83  " a sequence of data. The data must be in `stacked` format, grouped "
84  "by\n"
85  " session. Within each session, the data is assumed to be sorted\n"
86  " temporally. Columns in `features` will be used to train a model "
87  "that\n"
88  " will make a prediction using labels in the `target` column.\n"
89  "target : string\n"
90  " Name of the column containing the target variable. The values in "
91  "this\n"
92  " column must be of string or integer type.\n"
93  "session_id : string\n"
94  " Name of the column that contains a unique ID for each session.\n"
95  "validatation_data : SFrame or string\n"
96  " A dataset for monitoring the model's generalization performance to\n"
97  " prevent the model from overfitting to the training data.\n"
98  "\n"
99  " For each row of the progress table, accuracy is measured over the\n"
100  " provided training dataset and the `validation_data`. The format of\n"
101  " this SFrame must be the same as the training set.\n"
102  "\n"
103  " When set to 'auto', a validation set is automatically sampled from "
104  "the\n"
105  " training data (if the training data has > 100 sessions).\n"
106  "options : dict\n"
107  "\n"
108  "Options\n"
109  "-------\n"
110  "features : list[string]\n"
111  " Name of the columns containing the input features that will be "
112  "used\n"
113  " for classification. If not set, all columns except `session_id` "
114  "and\n"
115  " `target` will be used.\n"
116  "prediction_window : int\n"
117  " Number of time units between predictions. For example, if your "
118  "input\n"
119  " data is sampled at 100Hz, and the `prediction_window` is set to "
120  "100\n"
121  " (the default), then this model will make a prediction every 1 "
122  "second.\n"
123  "max_iterations : int\n"
124  " Maximum number of iterations/epochs made over the data during the\n"
125  " training phase. The default is 10 iterations.\n"
126  "batch_size : int\n"
127  " Number of sequence chunks used per training step. Must be greater "
128  "than\n"
129  " the number of GPUs in use. The default is 32.\n"
130  "random_seed : int\n"
131  " The given seed is used for random weight initialization and "
132  "sampling\n"
133  " during training\n");
134 
135  REGISTER_CLASS_MEMBER_FUNCTION(activity_classifier::init_training, "data",
136  "target", "session_id", "validation_data",
137  "options");
138  register_defaults("init_training",
139  {{"validation_data", to_variant(std::string("auto"))},
140  {"options",
141  to_variant(std::map<std::string, flexible_type>())}});
142 
143  REGISTER_CLASS_MEMBER_FUNCTION(activity_classifier::resume_training, "data",
144  "validation_data");
145  register_defaults("resume_training",
146  {{"validation_data", to_variant(std::string("auto"))}});
147 
148  REGISTER_CLASS_MEMBER_FUNCTION(activity_classifier::iterate_training);
149  REGISTER_CLASS_MEMBER_FUNCTION(activity_classifier::synchronize_training);
150  REGISTER_CLASS_MEMBER_FUNCTION(activity_classifier::finalize_training);
151 
152  REGISTER_CLASS_MEMBER_FUNCTION(activity_classifier::predict, "data",
153  "output_type");
154  register_defaults("predict", {{"output_type", std::string("")}});
156  activity_classifier::predict,
157  "----------\n"
158  "data : SFrame\n"
159  " Dataset of new observations. Must include columns with the same\n"
160  " names as the features used for model training, but does not require\n"
161  " a target column. Additional columns are ignored.\n"
162  "output_type : {'class', 'probability_vector'}, optional\n"
163  " Form of each prediction which is one of:\n"
164  " - 'probability_vector': Prediction probability associated with each\n"
165  " class as a vector. The probability of the first class (sorted\n"
166  " alphanumerically by name of the class in the training set) is in\n"
167  " position 0 of the vector, the second in position 1 and so on.\n"
168  " - 'class': Class prediction. This returns the class with maximum\n"
169  " probability.\n"
170  );
171 
172  REGISTER_CLASS_MEMBER_FUNCTION(activity_classifier::predict_per_window,
173  "data", "output_type");
174  register_defaults("predict_per_window", {{"output_type", std::string("")}});
176  activity_classifier::predict_per_window,
177  "----------\n"
178  "data : SFrame\n"
179  " Dataset of new observations. Must include columns with the same\n"
180  " names as the features used for model training, but does not "
181  "require\n"
182  " a target column. Additional columns are ignored.\n"
183 
184  "output_type : {'class', 'probability_vector'}, optional\n"
185  " Form of each prediction which is one of:\n"
186  " - 'probability_vector': Prediction probability associated with "
187  "each\n"
188  " class as a vector. The probability of the first class (sorted\n"
189  " alphanumerically by name of the class in the training set) is in\n"
190  " position 0 of the vector, the second in position 1 and so on. \n"
191  " A probability_vector is given per prediction_window. \n"
192  " - 'class': Class prediction. This returns the class with maximum\n"
193  " probability per prediction_window.\n");
194 
195 
196  REGISTER_CLASS_MEMBER_FUNCTION(activity_classifier::classify, "data",
197  "output_frequency");
198  register_defaults("classify", {{"output_frequency", "per_row"}});
199 
200  REGISTER_CLASS_MEMBER_FUNCTION(activity_classifier::predict_topk, "data",
201  "output_type", "k", "output_frequency");
202  register_defaults("predict_topk", {{"output_type", "probability"},
203  {"k", 3},
204  {"output_frequency", "per_row"}});
205 
206  REGISTER_CLASS_MEMBER_FUNCTION(activity_classifier::evaluate, "data",
207  "metric");
208  register_defaults("evaluate", {{"metric", std::string("auto")}});
210  activity_classifier::evaluate,
211  "----------\n"
212  "data : SFrame\n"
213  " Dataset of new observations. Must include columns with the same\n"
214  " names as the features used for model training, but does not require\n"
215  " a target column. Additional columns are ignored.\n"
216  "metric : str, optional\n"
217  " Name of the evaluation metric. Possible values are:\n"
218  " - 'auto' : Returns all available metrics\n"
219  " - 'accuracy' : Classification accuracy (micro average)\n"
220  " - 'auc' : Area under the ROC curve (macro average)\n"
221  " - 'precision' : Precision score (macro average)\n"
222  " - 'recall' : Recall score (macro average)\n"
223  " - 'f1_score' : F1 score (macro average)\n"
224  " - 'log_loss' : Log loss\n"
225  " - 'confusion_matrix' : An SFrame with counts of possible\n"
226  " prediction/true label combinations.\n"
227  " - 'roc_curve' : An SFrame containing information needed for an\n"
228  " ROC curve\n"
229  );
230  REGISTER_CLASS_MEMBER_FUNCTION(activity_classifier::export_to_coreml,
231  "filename", "short_description", "additional_user_defined");
232  register_defaults("export_to_coreml",
233  {{"short_description", ""},
234  {"additional_user_defined", to_variant(std::map<std::string, flexible_type>())}});
235 
236  REGISTER_CLASS_MEMBER_FUNCTION(activity_classifier::import_from_custom_model,
237  "model_data", "version");
238 
240 
241  protected:
242  // Override points allowing subclasses to inject dependencies
243 
244  // Factory for data_iterator
245  virtual std::unique_ptr<data_iterator> create_iterator(
246  gl_sframe data, bool requires_labels, bool infer_class_labels,
247  bool is_train, bool use_data_augmentation) const;
248 
249  // Factory for compute_context
250  virtual std::unique_ptr<neural_net::compute_context> create_compute_context()
251  const;
252 
253  // Returns the initial neural network to train
254  virtual std::unique_ptr<neural_net::model_spec> init_model(
255  bool use_random_init) const;
256 
257  virtual std::tuple<gl_sframe, gl_sframe> init_data(
258  gl_sframe data, variant_type validation_data,
259  std::string session_id_column_name) const;
260 
261  virtual std::tuple<float, float> compute_validation_metrics(
262  size_t prediction_window, size_t num_classes, size_t batch_size);
263 
264  virtual void init_table_printer(bool has_validation);
265 
266  // Returns an SFrame where each row corresponds to one prediction, and
267  // containing four columns: "session_id" indicating the session ID shared by
268  // the samples in the prediction window, "prediction_id" indicating the index
269  // of the prediction window within the session, "preds" containing the class
270  // probability vector for the prediction window, and "num_samples" indicating
271  // the number of corresponding rows from the original SFrame (at most the
272  // prediction window size).
273  virtual gl_sframe perform_inference(data_iterator* data) const;
274 
275  // Utility code
276 
277  template <typename T>
278  T read_state(const std::string& key) const {
279  return variant_get_value<T>(get_state().at(key));
280  }
281 
282  private:
283  const neural_net::model_spec* read_model_spec() const;
284 
285  // Whether to include loss in the progress table, in addition to accuracy
286  bool show_loss_ = true;
287 
288  // Primary representation for the trained model.
289  // TODO: Replace model_spec with a Checkpoint class that encapsulates
290  // serialization.
291  mutable bool nn_spec_synchronized_ = false;
292  std::unique_ptr<neural_net::model_spec> nn_spec_;
293 
294  // Primary dependencies for training. These should be nonnull while training
295  // is in progress.
296  gl_sframe training_data_; // TODO: Avoid storing gl_sframe AND data_iterator.
297  gl_sframe validation_data_;
298  std::unique_ptr<data_iterator> training_data_iterator_;
299  std::unique_ptr<data_iterator> validation_data_iterator_;
300  std::unique_ptr<neural_net::compute_context> training_compute_context_;
301  std::unique_ptr<neural_net::model_backend> training_model_;
302 
303  // Nonnull while training is in progress, if progress printing is enabled.
304  std::unique_ptr<table_printer> training_table_printer_;
305 };
306 
307 } // namespace activity_classification
308 } // namespace turi
309 
310 #endif //TURI_ACTIVITY_CLASSIFIER_H_
#define BEGIN_CLASS_MEMBER_REGISTRATION(python_facing_classname)
#define REGISTER_CLASS_MEMBER_DOCSTRING(name, docstring)
#define REGISTER_CLASS_MEMBER_FUNCTION(function,...)
#define IMPORT_BASE_CLASS_REGISTRATION(base_class)
boost::make_recursive_variant< flexible_type, std::shared_ptr< unity_sgraph_base >, dataframe_t, std::shared_ptr< model_base >, std::shared_ptr< unity_sframe_base >, std::shared_ptr< unity_sarray_base >, std::map< std::string, boost::recursive_variant_ >, std::vector< boost::recursive_variant_ >, boost::recursive_wrapper< function_closure_info > >::type variant_type
Definition: variant.hpp:24
#define END_CLASS_MEMBER_REGISTRATION
variant_type to_variant(const T &f)
Definition: variant.hpp:308