6 #ifndef TURI_SUPERVISED_LEARNING_H_ 7 #define TURI_SUPERVISED_LEARNING_H_ 10 #include <core/storage/sframe_data/sarray.hpp> 11 #include <core/storage/sframe_data/sframe.hpp> 12 #include <core/data/sframe/gl_sarray.hpp> 13 #include <core/data/sframe/gl_sframe.hpp> 16 #include <model_server/lib/extensions/ml_model.hpp> 19 #include <ml/ml_data/ml_data.hpp> 20 #include <ml/ml_data/ml_data_iterator.hpp> 23 #include <model_server/lib/variant.hpp> 24 #include <model_server/lib/unity_base_types.hpp> 25 #include <model_server/lib/variant_deep_serialize.hpp> 26 #include <toolkits/coreml_export/mlmodel_wrapper.hpp> 29 #include <Eigen/SparseCore> 31 #include <core/export.hpp> 33 #include <model_server/lib/toolkit_class_macros.hpp> 41 namespace supervised {
43 class supervised_learning_model_base;
44 typedef Eigen::Matrix<double, Eigen::Dynamic,1> DenseVector;
45 typedef Eigen::SparseVector<double> SparseVector;
51 enum class prediction_type_enum: char {
59 PROBABILITY_VECTOR = 7
68 inline prediction_type_enum prediction_type_enum_from_name(
const std::string& name) {
69 static std::map<std::string, prediction_type_enum> type_map{
70 {
"na", prediction_type_enum::NA},
71 {
"", prediction_type_enum::NA},
72 {
"class", prediction_type_enum::CLASS},
73 {
"class_index", prediction_type_enum::CLASS_INDEX},
74 {
"probability", prediction_type_enum::PROBABILITY},
75 {
"max_probability", prediction_type_enum::MAX_PROBABILITY},
76 {
"margin", prediction_type_enum::MARGIN},
77 {
"rank", prediction_type_enum::RANK},
78 {
"probability_vector", prediction_type_enum::PROBABILITY_VECTOR},
80 if (type_map.count(name) == 0) {
81 log_and_throw(std::string(
"Invalid prediction type name " + name));
83 return type_map.at(name);
97 std::shared_ptr<supervised_learning_model_base> create(
98 sframe X, sframe y, std::string model_name,
99 const variant_map_type& kwargs);
183 std::vector<std::string> metrics;
184 std::vector<std::string> tracking_metrics;
185 bool show_extra_warnings =
true;
189 std::shared_ptr<ml_metadata> ml_mdata;
203 virtual void train() = 0;
209 std::vector<std::vector<flexible_type>> get_metadata_mapping();
228 const prediction_type_enum& output_type=prediction_type_enum::NA) {
242 const DenseVector & x,
243 const prediction_type_enum& output_type=prediction_type_enum::NA) {
257 const SparseVector & x,
258 const prediction_type_enum& output_type=prediction_type_enum::NA) {
272 virtual std::map<std::string, variant_type> evaluate(
const ml_data&
273 test_data,
const std::string& evaluation_type=
"",
bool with_prediction=
false);
279 const sframe &y,
const std::string& evaluation_type=
"",
bool with_prediction=
false) {
280 ml_data data = construct_ml_data_using_current_metadata(X, y);
281 return this->evaluate(data, evaluation_type, with_prediction);
293 virtual std::shared_ptr<sarray<flexible_type>> predict(
294 const ml_data& test_data,
const std::string& output_type=
"");
299 virtual std::shared_ptr<sarray<flexible_type>>
predict(
300 const sframe& X,
const std::string& output_type=
"") {
301 ml_data data = construct_ml_data_using_current_metadata(X);
302 return predict(data, output_type);
311 log_and_throw(
"Model does not support feature extraction");
327 const std::string& output_type=
"",
329 log_and_throw(
"Predicting multiple classes is not supported by this model.");
343 const std::string& output_type=
"",
356 const std::string& output_type=
"");
362 const std::string& output_type=
"") {
364 ml_data data = construct_ml_data_using_current_metadata(X);
365 return classify(data, output_type);
376 const std::vector<flexible_type>& rows,
377 const std::string& missing_value_action =
"error",
378 const std::string& output_type =
"");
389 const std::vector<flexible_type>& rows,
390 const std::string& missing_value_action =
"error",
391 const std::string& output_type=
"",
392 const size_t topk = 5) {
393 log_and_throw(
"Not implemented yet");
403 const std::vector<flexible_type>& rows,
404 const std::string& missing_value_action =
"error");
448 tracking_metrics = _metrics;
456 show_extra_warnings = more_warnings;
463 set_evaluation_metric({
"max_error",
"rmse"});
470 set_tracking_metric({
"max_error",
"rmse"});
483 std::map<std::string, flexible_type> get_train_stats()
const;
493 sframe impute_missing_columns_using_current_metadata(
const sframe& X)
const;
506 ml_data construct_ml_data_using_current_metadata(
520 ml_data construct_ml_data_using_current_metadata(
529 size_t num_features()
const;
536 size_t num_examples()
const;
543 size_t num_unpacked_features()
const;
550 std::vector<std::string> get_feature_names()
const;
557 std::string get_target_name()
const;
565 return this->ml_mdata;
571 virtual bool is_classifier()
const = 0;
577 return ((this->ml_mdata)->num_dimensions() <= 3 * num_features()) ? true :
false;
583 std::vector<std::string> get_metrics()
const;
588 std::vector<std::string> get_tracking_metrics()
const;
593 std::string get_metric_display_name(
const std::string& metric)
const;
601 void display_regression_training_summary(std::string model_display_name)
const;
609 void display_classifier_training_summary(std::string model_display_name,
bool simple_mode =
false)
const;
623 const ml_data& validation_data) { }
635 void api_train(
gl_sframe data,
const std::string& target,
637 const std::map<std::string, flexible_type>& _options);
645 std::string output_type);
653 std::string output_type,
size_t topk = 5);
663 std::string output_type);
671 variant_map_type api_evaluate(
672 gl_sframe data, std::string missing_value_action, std::string metric,
gl_sarray predictions =
gl_sarray(),
bool with_prediction=
false);
681 gl_sframe data, std::string missing_value_action);
685 virtual std::shared_ptr<coreml::MLModelWrapper> export_to_coreml() = 0;
687 std::shared_ptr<coreml::MLModelWrapper> api_export_to_coreml(
const std::string& file);
697 "validation_data",
"options");
698 register_defaults(
"train",
701 to_variant(std::map<std::string, flexible_type>())}});
705 "missing_value_action",
"output_type");
707 register_defaults(
"predict", {{
"missing_value_action", std::string(
"auto")},
708 {
"output_type", std::string(
"")}});
712 "missing_value_action",
"output_type");
714 register_defaults(
"fast_predict",
715 {{
"missing_value_action", std::string(
"auto")},
716 {
"output_type", std::string(
"")}});
720 "missing_value_action",
"output_type",
"topk");
722 register_defaults(
"predict_topk",
723 {{
"missing_value_action", std::string(
"error")},
724 {
"output_type", std::string(
"")}});
728 "rows",
"missing_value_action",
"output_type",
"topk");
730 register_defaults(
"fast_predict_topk",
731 {{
"missing_value_action", std::string(
"auto")},
732 {
"output_type", std::string(
"")}});
736 "missing_value_action");
738 register_defaults(
"classify",
739 {{
"missing_value_action", std::string(
"auto")}});
743 "missing_value_action");
745 register_defaults(
"fast_classify",
746 {{
"missing_value_action", std::string(
"auto")}});
750 "missing_value_action",
"metric",
"predictions",
"with_predictions");
752 register_defaults(
"evaluate",
753 {{
"metric", std::string(
"_report")},
754 {
"missing_value_action", std::string(
"auto")},
756 {
"with_predictions",
false}
761 "data",
"missing_value_action");
763 register_defaults(
"extract_features",
764 {{
"missing_value_action", std::string(
"auto")}});
772 "export_to_coreml", supervised_learning_model_base::api_export_to_coreml,
775 register_defaults(
"export_to_coreml", {{
"filename", std::string(
"")}});
781 const std::string& missing_value_str)
const;
787 std::vector<toolkit_function_specification> get_toolkit_function_registration();
795 std::shared_ptr<supervised_learning_model_base> model,
796 const std::vector<flexible_type>& rows,
797 const std::string& missing_value_action =
"error",
798 const std::string& output_type =
"probability");
807 std::shared_ptr<supervised_learning_model_base> model,
808 const std::vector<flexible_type>& rows,
809 const std::string& missing_value_action =
"error",
810 const std::string& output_type =
"probability",
811 const size_t topk = 5);
820 std::shared_ptr<supervised_learning_model_base> model,
821 const std::vector<flexible_type>& rows,
822 const std::string& missing_value_action =
"error");
829 std::vector<std::vector<flexible_type>> _get_metadata_mapping(
830 std::shared_ptr<supervised_learning_model_base> model);
virtual void set_coefs(const DenseVector &coefs)
void api_train(gl_sframe data, const std::string &target, const variant_type &validation_data, const std::map< std::string, flexible_type > &_options)
#define REGISTER_CLASS_MEMBER_FUNCTION(function,...)
virtual void model_specific_init(const ml_data &data, const ml_data &validation_data)
variant_map_type api_evaluate(gl_sframe data, std::string missing_value_action, std::string metric, gl_sarray predictions=gl_sarray(), bool with_prediction=false)
#define BEGIN_BASE_CLASS_MEMBER_REGISTRATION()
std::map< std::string, flexible_type > get_train_stats() const
std::vector< std::string > get_feature_names() const
void set_tracking_metric(std::vector< std::string > _metrics)
virtual flexible_type predict_single_example(const ml_data_iterator &it, const prediction_type_enum &output_type=prediction_type_enum::NA)
virtual flexible_type predict_single_example(const SparseVector &x, const prediction_type_enum &output_type=prediction_type_enum::NA)
#define IMPORT_BASE_CLASS_REGISTRATION(base_class)
boost::make_recursive_variant< flexible_type, std::shared_ptr< unity_sgraph_base >, dataframe_t, std::shared_ptr< model_base >, std::shared_ptr< unity_sframe_base >, std::shared_ptr< unity_sarray_base >, std::map< std::string, boost::recursive_variant_ >, std::vector< boost::recursive_variant_ >, boost::recursive_wrapper< function_closure_info > >::type variant_type
virtual gl_sframe fast_predict_topk(const std::vector< flexible_type > &rows, const std::string &missing_value_action="error", const std::string &output_type="", const size_t topk=5)
void set_more_warnings(bool more_warnings)
gl_sframe api_predict_topk(gl_sframe data, std::string missing_value_action, std::string output_type, size_t topk=5)
gl_sarray api_extract_features(gl_sframe data, std::string missing_value_action)
#define END_CLASS_MEMBER_REGISTRATION
virtual std::shared_ptr< sarray< flexible_type > > predict(const sframe &X, const std::string &output_type="")
virtual std::shared_ptr< sarray< flexible_type > > extract_features(const sframe &X, ml_missing_value_action missing_value_action)
#define REGISTER_NAMED_CLASS_MEMBER_FUNCTION(name, function,...)
variant_type to_variant(const T &f)
virtual gl_sarray fast_predict(const std::vector< flexible_type > &rows, const std::string &missing_value_action="error", const std::string &output_type="")
virtual void set_default_tracking_metric()
gl_sframe api_classify(gl_sframe data, std::string missing_value_action, std::string output_type)
virtual gl_sframe fast_classify(const std::vector< flexible_type > &rows, const std::string &missing_value_action="error")
virtual std::map< std::string, variant_type > evaluate(const sframe &X, const sframe &y, const std::string &evaluation_type="", bool with_prediction=false)
std::shared_ptr< ml_metadata > get_ml_metadata() const
virtual void set_default_evaluation_metric()
void set_evaluation_metric(std::vector< std::string > _metrics)
gl_sarray api_predict(gl_sframe data, std::string missing_value_action, std::string output_type)
virtual sframe classify(const sframe &X, const std::string &output_type="")
#define DASSERT_TRUE(cond)
virtual bool support_missing_value() const
virtual flexible_type predict_single_example(const DenseVector &x, const prediction_type_enum &output_type=prediction_type_enum::NA)
virtual sframe predict_topk(const sframe &test_data, const std::string &output_type="", size_t topk=5)