6 #ifndef TURI_ACTIVITY_CLASSIFIER_H_ 7 #define TURI_ACTIVITY_CLASSIFIER_H_ 9 #include <core/logging/table_printer/table_printer.hpp> 10 #include <model_server/lib/extensions/ml_model.hpp> 11 #include <core/data/sframe/gl_sframe.hpp> 12 #include <toolkits/activity_classification/ac_data_iterator.hpp> 13 #include <toolkits/coreml_export/mlmodel_wrapper.hpp> 14 #include <ml/neural_net/compute_context.hpp> 15 #include <ml/neural_net/model_backend.hpp> 16 #include <ml/neural_net/model_spec.hpp> 19 namespace activity_classification {
21 class EXPORT activity_classifier:
public ml_model_base {
25 static std::unique_ptr<neural_net::model_spec> init_model(
26 const std::string& target,
const std::vector<std::string>& features,
27 size_t prediction_window,
size_t num_classes,
bool use_random_init,
30 static std::tuple<gl_sframe, gl_sframe> random_split_by_session(
31 gl_sframe data, std::string session_id_column_name,
float fraction,
36 void init_options(
const std::map<std::string, flexible_type>& opts)
override;
37 size_t get_version()
const override;
38 void save_impl(oarchive& oarc)
const override;
39 void load_version(iarchive& iarc,
size_t version)
override;
43 void train(gl_sframe data, std::string target_column_name,
44 std::string session_id_column_name,
variant_type validation_data,
45 std::map<std::string, flexible_type> opts);
46 gl_sarray predict(gl_sframe data, std::string output_type);
47 gl_sframe predict_per_window(gl_sframe data, std::string output_type);
48 gl_sframe classify(gl_sframe data, std::string output_frequency);
49 gl_sframe predict_topk(gl_sframe data, std::string output_type,
size_t k,
50 std::string output_frequency);
51 variant_map_type evaluate(gl_sframe data, std::string metric);
52 std::shared_ptr<coreml::MLModelWrapper> export_to_coreml(
53 std::string filename, std::string short_description,
54 std::map<std::string, flexible_type> additional_user_defined);
55 void import_from_custom_model(variant_map_type model_data,
size_t version);
58 virtual void init_training(gl_sframe data, std::string target_column_name,
59 std::string session_id_column_name,
61 std::map<std::string, flexible_type> opts);
62 virtual void resume_training(gl_sframe data,
variant_type validation_data);
63 virtual void iterate_training();
64 virtual void synchronize_training();
65 virtual void finalize_training();
72 "session_id", "validation_data", "options");
73 register_defaults("train",
74 {{
"validation_data",
to_variant(std::string(
"auto"))},
76 to_variant(std::map<std::string, flexible_type>())}});
78 activity_classifier::train,
81 " Input data which consists of `sessions` of data where each session " 83 " a sequence of data. The data must be in `stacked` format, grouped " 85 " session. Within each session, the data is assumed to be sorted\n" 86 " temporally. Columns in `features` will be used to train a model " 88 " will make a prediction using labels in the `target` column.\n" 90 " Name of the column containing the target variable. The values in " 92 " column must be of string or integer type.\n" 93 "session_id : string\n" 94 " Name of the column that contains a unique ID for each session.\n" 95 "validatation_data : SFrame or string\n" 96 " A dataset for monitoring the model's generalization performance to\n" 97 " prevent the model from overfitting to the training data.\n" 99 " For each row of the progress table, accuracy is measured over the\n" 100 " provided training dataset and the `validation_data`. The format of\n" 101 " this SFrame must be the same as the training set.\n" 103 " When set to 'auto', a validation set is automatically sampled from " 105 " training data (if the training data has > 100 sessions).\n" 110 "features : list[string]\n" 111 " Name of the columns containing the input features that will be " 113 " for classification. If not set, all columns except `session_id` " 115 " `target` will be used.\n" 116 "prediction_window : int\n" 117 " Number of time units between predictions. For example, if your " 119 " data is sampled at 100Hz, and the `prediction_window` is set to " 121 " (the default), then this model will make a prediction every 1 " 123 "max_iterations : int\n" 124 " Maximum number of iterations/epochs made over the data during the\n" 125 " training phase. The default is 10 iterations.\n" 127 " Number of sequence chunks used per training step. Must be greater " 129 " the number of GPUs in use. The default is 32.\n" 130 "random_seed : int\n" 131 " The given seed is used for random weight initialization and " 133 " during training\n");
136 "target",
"session_id",
"validation_data",
138 register_defaults(
"init_training",
139 {{
"validation_data",
to_variant(std::string(
"auto"))},
141 to_variant(std::map<std::string, flexible_type>())}});
145 register_defaults(
"resume_training",
146 {{
"validation_data",
to_variant(std::string(
"auto"))}});
154 register_defaults(
"predict", {{
"output_type", std::string(
"")}});
156 activity_classifier::predict,
159 " Dataset of new observations. Must include columns with the same\n" 160 " names as the features used for model training, but does not require\n" 161 " a target column. Additional columns are ignored.\n" 162 "output_type : {'class', 'probability_vector'}, optional\n" 163 " Form of each prediction which is one of:\n" 164 " - 'probability_vector': Prediction probability associated with each\n" 165 " class as a vector. The probability of the first class (sorted\n" 166 " alphanumerically by name of the class in the training set) is in\n" 167 " position 0 of the vector, the second in position 1 and so on.\n" 168 " - 'class': Class prediction. This returns the class with maximum\n" 173 "data",
"output_type");
174 register_defaults(
"predict_per_window", {{
"output_type", std::string(
"")}});
176 activity_classifier::predict_per_window,
179 " Dataset of new observations. Must include columns with the same\n" 180 " names as the features used for model training, but does not " 182 " a target column. Additional columns are ignored.\n" 184 "output_type : {'class', 'probability_vector'}, optional\n" 185 " Form of each prediction which is one of:\n" 186 " - 'probability_vector': Prediction probability associated with " 188 " class as a vector. The probability of the first class (sorted\n" 189 " alphanumerically by name of the class in the training set) is in\n" 190 " position 0 of the vector, the second in position 1 and so on. \n" 191 " A probability_vector is given per prediction_window. \n" 192 " - 'class': Class prediction. This returns the class with maximum\n" 193 " probability per prediction_window.\n");
198 register_defaults(
"classify", {{
"output_frequency",
"per_row"}});
201 "output_type",
"k",
"output_frequency");
202 register_defaults(
"predict_topk", {{
"output_type",
"probability"},
204 {
"output_frequency",
"per_row"}});
208 register_defaults(
"evaluate", {{
"metric", std::string(
"auto")}});
210 activity_classifier::evaluate,
213 " Dataset of new observations. Must include columns with the same\n" 214 " names as the features used for model training, but does not require\n" 215 " a target column. Additional columns are ignored.\n" 216 "metric : str, optional\n" 217 " Name of the evaluation metric. Possible values are:\n" 218 " - 'auto' : Returns all available metrics\n" 219 " - 'accuracy' : Classification accuracy (micro average)\n" 220 " - 'auc' : Area under the ROC curve (macro average)\n" 221 " - 'precision' : Precision score (macro average)\n" 222 " - 'recall' : Recall score (macro average)\n" 223 " - 'f1_score' : F1 score (macro average)\n" 224 " - 'log_loss' : Log loss\n" 225 " - 'confusion_matrix' : An SFrame with counts of possible\n" 226 " prediction/true label combinations.\n" 227 " - 'roc_curve' : An SFrame containing information needed for an\n" 231 "filename",
"short_description",
"additional_user_defined");
232 register_defaults(
"export_to_coreml",
233 {{
"short_description",
""},
234 {
"additional_user_defined",
to_variant(std::map<std::string, flexible_type>())}});
237 "model_data",
"version");
245 virtual std::unique_ptr<data_iterator> create_iterator(
246 gl_sframe data,
bool requires_labels,
bool infer_class_labels,
247 bool is_train,
bool use_data_augmentation)
const;
250 virtual std::unique_ptr<neural_net::compute_context> create_compute_context()
254 virtual std::unique_ptr<neural_net::model_spec> init_model(
255 bool use_random_init)
const;
257 virtual std::tuple<gl_sframe, gl_sframe> init_data(
259 std::string session_id_column_name)
const;
261 virtual std::tuple<float, float> compute_validation_metrics(
262 size_t prediction_window,
size_t num_classes,
size_t batch_size);
264 virtual void init_table_printer(
bool has_validation);
273 virtual gl_sframe perform_inference(data_iterator* data)
const;
277 template <
typename T>
278 T read_state(
const std::string& key)
const {
279 return variant_get_value<T>(get_state().at(key));
283 const neural_net::model_spec* read_model_spec()
const;
286 bool show_loss_ =
true;
291 mutable bool nn_spec_synchronized_ =
false;
292 std::unique_ptr<neural_net::model_spec> nn_spec_;
296 gl_sframe training_data_;
297 gl_sframe validation_data_;
298 std::unique_ptr<data_iterator> training_data_iterator_;
299 std::unique_ptr<data_iterator> validation_data_iterator_;
300 std::unique_ptr<neural_net::compute_context> training_compute_context_;
301 std::unique_ptr<neural_net::model_backend> training_model_;
304 std::unique_ptr<table_printer> training_table_printer_;
310 #endif //TURI_ACTIVITY_CLASSIFIER_H_ #define BEGIN_CLASS_MEMBER_REGISTRATION(python_facing_classname)
#define REGISTER_CLASS_MEMBER_DOCSTRING(name, docstring)
#define REGISTER_CLASS_MEMBER_FUNCTION(function,...)
#define IMPORT_BASE_CLASS_REGISTRATION(base_class)
boost::make_recursive_variant< flexible_type, std::shared_ptr< unity_sgraph_base >, dataframe_t, std::shared_ptr< model_base >, std::shared_ptr< unity_sframe_base >, std::shared_ptr< unity_sarray_base >, std::map< std::string, boost::recursive_variant_ >, std::vector< boost::recursive_variant_ >, boost::recursive_wrapper< function_closure_info > >::type variant_type
#define END_CLASS_MEMBER_REGISTRATION
variant_type to_variant(const T &f)