Turi Create  4.0
supervised_learning_utils-inl.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_SUPERVISED_LEARNING_UTILS_H_
7 #define TURI_SUPERVISED_LEARNING_UTILS_H_
8 
9 #include <Eigen/LU>
10 // SFrame
11 #include <core/storage/sframe_data/sarray.hpp>
12 #include <core/storage/sframe_data/sframe.hpp>
13 
14 // ML-Data Utils
15 #include <ml/ml_data/ml_data.hpp>
16 #include <ml/ml_data/metadata.hpp>
17 #include <core/util/testing_utils.hpp>
18 // Supervised learning includes.
19 #include <toolkits/supervised_learning/supervised_learning.hpp>
20 
21 // Types
22 #include <model_server/lib/variant.hpp>
23 #include <model_server/lib/unity_base_types.hpp>
24 #include <model_server/lib/variant_deep_serialize.hpp>
25 #include <model_server/lib/flex_dict_view.hpp>
26 
27 /// SKD
28 #include <model_server/lib/toolkit_function_macros.hpp>
29 #include <core/storage/serialization/serialization_includes.hpp>
30 
31 
32 namespace turi {
33 namespace supervised {
34 
35 
36 /**
37  * Get standard errors from hessian.
38  */
39 
40 inline Eigen::Matrix<double, Eigen::Dynamic,1> get_stderr_from_hessian(
41  const Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>& hessian) {
42  DASSERT_EQ(hessian.rows(), hessian.cols());
43  return hessian.inverse().diagonal().cwiseSqrt();
44 }
45 
46 /**
47  * Is this model a classifier?
48  *
49  * \param[in] model_name Name of the model
50  *
51  * if the model_key is empty, then an empty model is created.
52  */
53 inline bool is_classifier(std::string model_name){
54  if(model_name.find("classifier") != std::string::npos) {
55  return true;
56  }
57  return false;
58 }
59 
60 /**
61  * Setup the ml_data for prediction.
62  */
63 inline ml_data setup_ml_data_for_prediction(
64  const sframe& X,
65  const std::shared_ptr<supervised_learning_model_base>& model,
66  ml_missing_value_action missing_value_action) {
67 
68  ml_data data;
69  data = model->construct_ml_data_using_current_metadata(X, missing_value_action);
70  return data;
71 }
72 
73 /**
74  * Setup the ml_data for evaluation.
75  */
76 inline ml_data setup_ml_data_for_evaluation(const sframe& X, const sframe& y,
77  const std::shared_ptr<supervised_learning_model_base>& model,
78  ml_missing_value_action missing_value_action) {
79  ml_data data;
80  data = model->construct_ml_data_using_current_metadata(X, y, missing_value_action);
81  return data;
82 }
83 
84 /**
85  * Check if the data is empty!
86  *
87  * \param[in] X Feature names.
88  */
89 inline void check_empty_data(sframe X){
90  if (X.num_rows() == 0){
91  log_and_throw("Input data does not contain any rows.");
92  }
93 
94  if (X.num_columns() == 0){
95  log_and_throw("Input data does not contain any features.");
96  }
97 }
98 
99 /**
100  * Check that the target types are right.
101  *
102  * Regression vs classifier:
103  *
104  * One user in our forum complained that he got an error message for logistic
105  * regression suggesting that his columns was not of numeric type. He
106  * should have gotten a message that said. Column not if integer type.
107  *
108  * Let us separate our (for the purposes of error messages) logging
109  * for classifier vs regression tasks.
110  *
111  */
112 inline void check_target_column_type(std::string model_name, sframe y){
113  DASSERT_TRUE(y.num_columns() == 1);
114 
115  std::stringstream ss;
116  std::string model_name_for_display = "";
117 
118  if (model_name == "classifier_svm"){
119  model_name_for_display = "SVM";
120  } else if (model_name == "classifier_logistic_regression"){
121  model_name_for_display = "Logistic Regression";
122  }
123 
124  // classifier tasks.
125  if(model_name == "classifier_svm" ||
126  model_name == "classifier_logistic_regression" ||
127  model_name == "random_forest_classifier" ||
128  model_name == "decision_tree_classifier" ||
129  model_name == "boosted_trees_classifier"){
130 
131  flex_type_enum ctype = y.column_type(0);
132  if (ctype != flex_type_enum::INTEGER && ctype != flex_type_enum::STRING){
133  ss.str("");
134  ss << "Column type of target '" << y.column_name(0)
135  << "' must be int or str."
136  << std::endl;
137  log_and_throw(ss.str());
138  }
139 
140  } else {
141 
142  flex_type_enum ctype = y.column_type(0);
143  if ((ctype != flex_type_enum::INTEGER) && (ctype !=
145  ss.str("");
146  ss << "Column type of target '" << y.column_name(0)
147  << "' must be int or float."
148  << std::endl;
149  log_and_throw(ss.str());
150  }
151  }
152 }
153 
154 /**
155  * Setup an SFrame as test input to predict, predict_topk, or classify function.
156  */
157 inline sframe setup_test_data_sframe(const sframe& sf,
158  std::shared_ptr<supervised_learning_model_base> model,
159  ml_missing_value_action missing_value_action) {
160  sframe ret;
161  check_empty_data(sf);
162 
163  auto expected_columns = model->get_feature_names();
164  switch (missing_value_action) {
165  case ml_missing_value_action::IMPUTE:
166  ret = model->impute_missing_columns_using_current_metadata(sf);
167  break;
168  case ml_missing_value_action::USE_NAN:
169  if (model->support_missing_value()) {
170  ret = model->impute_missing_columns_using_current_metadata(sf);
171  } else {
172  log_and_throw("Model doesn't support missing value, please set missing_value_action to \"impute\"");
173  }
174  break;
175  case ml_missing_value_action::ERROR:
176  ret = sf;
177  break;
178  default:
179  log_and_throw("Invalid missing value action");
180  }
181  ret = ret.select_columns(expected_columns);
182  return ret;
183 }
184 
185 
186 /**
187  * Fill the ml_data_row with an EigenVector using reference encoding for
188  * categorical variables. Here, the 0"th" category is used as the reference
189  * category.
190  *
191  * [in,out] An ml_data_row_reference object from which we are reading.
192  * [in,out] An eigen expression (could be a sparse, dense, or row of a matrix)
193  */
194 template <typename EigenExpr>
196 inline void fill_reference_encoding(
197  const ml_data_row_reference& row_ref,
198  EigenExpr && x) {
199 
200  x.setZero();
201  size_t offset = 0;
202 
203  row_ref.unpack(
204 
205  // The function to write out the data to x.
206  [&](ml_column_mode mode, size_t column_index,
207  size_t feature_index, double value,
208  size_t index_size, size_t index_offset) {
209 
210  if(UNLIKELY(feature_index >= index_size))
211  return;
212 
213  // Decrement if it isn't the reference category.
214  size_t idx = offset + feature_index;
215  if(mode_is_categorical(mode)) {
216  if (feature_index != 0) {
217  idx -= 1;
218  } else {
219  return;
220  }
221  }
222 
223  DASSERT_GE(idx, 0);
224  DASSERT_LT(idx, size_t(x.size()));
225  x.coeffRef(idx) = value;
226 
227  },
228 
229  /**
230  * The function to advance the offset, called after each column
231  * is finished.
232  */
233  [&](ml_column_mode mode, size_t column_index,
234  size_t index_size) GL_GCC_ONLY(GL_HOT_INLINE_FLATTEN) {
235  offset += (index_size - (mode_is_categorical(mode) ? 1 : 0));
236  });
237 }
238 
239 
240 /**
241  * Warn the user for features with low variance.
242  *
243  * [in] metadata A copy of the ml_metadata.
244  */
245 inline void check_feature_means_and_variances(
246  const std::shared_ptr<ml_metadata> metadata,
247  bool display_warnings = true) {
248 
249  std::stringstream ss;
250  std::vector<std::string> error_columns;
251 
252  // Get out the features with low variance
253  for(size_t cid = 0; cid < metadata->num_columns(); cid++){
254  const auto stats = metadata->statistics(cid);
255  size_t index_size = metadata->index_size(cid);
256  std::string col = metadata->column_name(cid);
257  for(size_t i = 0; i < index_size; i++) {
258  if (std::abs(stats->stdev(i)) < 1e-20) {
259  error_columns.push_back(col);
260  break;
261  }
262  }
263  }
264 
265  if (error_columns.size() && display_warnings) {
266  ss << "WARNING: Detected extremely low variance for feature(s) ";
267  for(size_t i=0; i < error_columns.size()-1; i++){
268  ss << "'" << error_columns[i] << "', ";
269  }
270  ss << "'" << error_columns[error_columns.size()-1] << "'"
271  << " because all entries are nearly the same.\n"
272  << "Proceeding with model training using all features. "
273  << "If the model does not provide results of adequate quality, "
274  << "exclude the above mentioned feature(s) from the input dataset.";
275  logprogress_stream << ss.str() << std::endl;
276  }
277 
278  // Get out the features with inf or nan
279  error_columns.clear();
280  bool column_with_nan = false;
281  for(size_t cid = 0; cid < metadata->num_columns(); cid++){
282  const auto stats = metadata->statistics(cid);
283  size_t index_size = metadata->index_size(cid);
284  std::string col = metadata->column_name(cid);
285  for(size_t i = 0; i < index_size; i++) {
286  if (!std::isfinite(stats->mean(i))) {
287  error_columns.push_back(col);
288  column_with_nan = true;
289  break;
290  }
291  }
292  }
293 
294  // Throw an error if a column contains NAN/INF.
295  if (column_with_nan == true) {
296  ss << "Detected inf/nan values in feature(s) ";
297  for(size_t i=0; i < error_columns.size()-1; i++){
298  ss << "'" << error_columns[i] << "', ";
299  }
300  ss << "'" << error_columns[error_columns.size()-1] << "'. "
301  << "Cannot proceed with model training.";
302  log_and_throw(ss.str());
303  }
304 }
305 
306 
307 /**
308  * For each of the provided keys, get a string of the corresponding value.
309  */
310 inline std::vector<std::string> make_evaluation_progress(
311  const std::map<std::string, float>& eval_map,
312  const std::vector<std::string> keys) {
313  std::vector<std::string> ret;
314  if (!eval_map.empty()) {
315  for (auto& k : keys)
316  // TODO: Check that k exists in eval_map.
317  ret.push_back(std::to_string(eval_map.at(k)));
318  }
319  return ret;
320 }
321 
322 inline std::vector<std::string> make_progress_string(
323  size_t iter, size_t examples, double time,
324  const std::vector<std::string>& train_eval,
325  const std::vector<std::string>& valid_eval,
326  float speed, bool padding_valid_eval) {
327 
328  std::vector<std::string> ret;
329  ret.push_back(std::to_string(iter));
330  ret.push_back(std::to_string(examples));
331  ret.push_back(std::to_string(time));
332  for (size_t i = 0 ; i < train_eval.size(); ++i) {
333  ret.push_back(train_eval[i]);
334  if (!valid_eval.empty()) {
335  ret.push_back(valid_eval[i]);
336  } else if(padding_valid_eval) {
337  ret.push_back("");
338  }
339  }
340  ret.push_back(std::to_string(speed));
341  return ret;
342 }
343 
344 /**
345  * For the provided model, print all of its desired metrics using
346  * the provided headers.
347  */
348 inline std::vector<std::pair<std::string, size_t>> make_progress_header(
349  supervised_learning_model_base& smodel,
350  const std::vector<std::string>& stat_headers,
351  bool has_validation_data) {
352 
353  auto header = std::vector<std::pair<std::string, size_t>>();
354  for (const auto& s : stat_headers) {
355  header.push_back({s, 8});
356  }
357 
358  auto metrics = std::vector<std::string>();
359  for (const auto& metric: smodel.get_tracking_metrics()) {
360  metrics.push_back(metric);
361  }
362 
363  for (const auto& m: metrics) {
364  std::string dm = smodel.get_metric_display_name(m);
365  header.push_back({std::string("Training ") + dm, 6});
366  if (has_validation_data)
367  header.push_back({std::string("Validation ") + dm, 6});
368  }
369 
370  return header;
371 }
372 
373 inline std::vector<std::string> make_progress_row_string(
374  supervised_learning_model_base& smodel,
375  const ml_data& data,
376  const ml_data& valid_data,
377  const std::vector<std::string>& stats) {
378 
379  auto train_eval = std::vector<std::string>();
380  for (auto& kv : smodel.evaluate(data, "train")) {
381  train_eval.push_back(std::to_string(variant_get_value<double>(kv.second)));
382  }
383 
384  auto valid_eval = std::vector<std::string>();
385  bool has_validation_data = valid_data.num_rows() > 0;
386  if (has_validation_data) {
387  for (auto& kv : smodel.evaluate(valid_data, "train")) {
388  valid_eval.push_back(std::to_string(variant_get_value<double>(kv.second)));
389  }
390  }
391 
392  auto ret = std::vector<std::string>();
393  for (const auto& s : stats)
394  ret.push_back(s);
395 
396  for (size_t i = 0 ; i < train_eval.size(); ++i) {
397  ret.push_back(train_eval[i]);
398  if (!valid_eval.empty()) {
399  ret.push_back(valid_eval[i]);
400  } else if(has_validation_data) {
401  ret.push_back("");
402  }
403  }
404 
405  return ret;
406 }
407 
408 /**
409  * Get the class weights based on the user options and target metadata.
410  *
411  * \param[in] options
412  * \param[in] metadata
413  * \returns Class weights
414  */
415 inline flexible_type get_class_weights_from_options(
416  const option_manager& options,
417  const std::shared_ptr<ml_metadata>& metadata){
418 
419  size_t num_classes = 2;
420  num_classes = metadata->target_index_size();
421  auto indexer = metadata->target_indexer();
422  auto stats = metadata->target_statistics();
423 
424  flex_dict class_weights(num_classes);
425  flexible_type class_weights_option = options.value("class_weights");
426 
427  // Case 1 (None): Uniform weights
428  if (class_weights_option.get_type() == flex_type_enum::UNDEFINED) {
429 
430  for(size_t i = 0; i < num_classes; i++){
431  class_weights[i] = {indexer->map_index_to_value(i), 1.0};
432  }
433 
434  // Case 2 ('auto'): Sample inversely proportional to class frequency.
435  } else if (class_weights_option == "auto") {
436 
437  // Weight inversely proportional to class frequency
438  // w_c = (1/n_c) / (sum(i in C, 1/n_i))
439  float total = 0;
440  for(size_t i = 0; i < num_classes; i++){
441  DASSERT_TRUE(stats->count(i) > 0);
442  total += 1.0 / stats->count(i);
443  }
444  for(size_t i = 0; i < num_classes; i++){
445  class_weights[i] = {indexer->map_index_to_value(i),
446  1.0 / (total * stats->count(i))};
447  }
448 
449  // Case 3 (dict): User provided weights.
450  } else if (class_weights_option.get_type() == flex_type_enum::DICT) {
451 
452  // Check that all weights were provided.
453  flex_dict_view class_weights_view(class_weights_option);
454  for(size_t i = 0; i < num_classes; i++){
455  if (!class_weights_view.has_key(indexer->map_index_to_value(i))){
456  std::stringstream ss;
457  ss << "The parameter class_weight does not contain a weight for the "
458  << "class " << indexer->map_index_to_value(i) << "."
459  << " Make sure that the types of the keys in the class_weight "
460  << "dictionary are the same as the type of the target column."
461  << std::endl;
462  log_and_throw(ss.str());
463  }
464  }
465 
466  // Save those weights. (Can't save flexible_type to flex_dict)
467  size_t i = 0;
468  for(const auto& kvp: class_weights_option.get<flex_dict>()){
469 
470  // Weights must be numeric
471  bool error = false;
472  if (kvp.second.get_type() != flex_type_enum::INTEGER &&
473  kvp.second.get_type() != flex_type_enum::FLOAT) {
474  error = true;
475 
476  // Weights must be positive
477  } else {
478  float weight = (float)kvp.second;
479  if (weight > 1e-20){
480  class_weights[i++] = {kvp.first, weight};
481  } else {
482  error = true;
483  }
484  }
485  // Throw an error message if not numeric and not in range.
486  if (error == true){
487  std::stringstream ss;
488  ss << "The class_weight parameter for the class " << kvp.first
489  << " must be a positive numeric value."
490  << std::endl;
491  log_and_throw(ss.str());
492  }
493  }
494 
495  // Error: Weights are not of dictioanry, None, or 'auto' type.
496  } else {
497  std::stringstream ss;
498  ss << "The class_weights parameter cannot be of type "
499  << flex_type_enum_to_name(class_weights_option.get_type()) << "."
500  << " Class weights must be a dictionary, None or 'auto'" << std::endl;
501  log_and_throw(ss.str());
502  }
503 
504 
505  return class_weights;
506 
507 }
508 
509 /**
510  * Get number of examples per class
511  *
512  * \param[in] metadata
513  * \returns Break down of examples per class.
514  *
515  * \warning For now, this only does it for binary classificaiton problems.
516  *
517  */
518 inline std::map<flexible_type, size_t> get_num_examples_per_class(
519  std::shared_ptr<ml_metadata> metadata){
520 
521  std::map<flexible_type, size_t> examples_per_class;
522  for(size_t k = 0; k < metadata->target_index_size(); k++){
523  examples_per_class[metadata->target_indexer()->map_index_to_value(k)] =
524  metadata->target_statistics()->count(k);
525  }
526  return examples_per_class;
527 }
528 
529 /**
530  * Get the set of classes.
531  *
532  * \param[in] ml_metadata
533  * \returns Get the set of all classes in the model.
534  *
535  */
536 inline std::vector<flexible_type> get_class_names(
537  std::shared_ptr<ml_metadata> metadata){
538 
539  std::vector<flexible_type> classes;
540  classes.resize(metadata->target_index_size());
541  for(size_t k = 0; k < classes.size(); k++){
542  classes[k] = metadata->target_indexer()->map_index_to_value(k);
543  }
544  return classes;
545 }
546 
547 /**
548  * Get the number of coefficients from meta_data.
549  * \param[in] metadata
550  * \returns Number of coefficients.
551  */
552 
553 inline size_t get_number_of_coefficients(std::shared_ptr<ml_metadata> metadata){
554 
555  size_t num_coefficients = 1;
556  for(size_t i = 0; i < metadata->num_columns(); i++) {
557  if (metadata->is_categorical(i)) {
558  num_coefficients += metadata->index_size(i) - 1;
559  } else {
560  num_coefficients += metadata->index_size(i);
561  }
562  }
563  return num_coefficients;
564 }
565 
566 
567 /**
568 * Add a column of None values to the SFrame of coefficients.
569 *
570 * \returns coefs (as SFrame)
571 */
572 inline sframe add_na_std_err_to_coef(const sframe& sf_coef) {
573  auto sa = std::make_shared<sarray<flexible_type>>(
574  sarray<flexible_type>(FLEX_UNDEFINED, sf_coef.size(), 1,
576  return sf_coef.add_column(sa, std::string("stderr"));
577 }
578 
579 /**
580 * Get one-hot-coefficients
581 *
582 * \params[in] coefs Coefficients as EigenVector
583 * \params[in] metadata Metadata
584 *
585 * \returns coefs (as SFrame)
586 */
587 inline void get_one_hot_encoded_coefs(const Eigen::Matrix<double, Eigen::Dynamic, 1>&
588  coefs, std::shared_ptr<ml_metadata> metadata,
589  std::vector<double>& one_hot_coefs) {
590 
591  size_t idx = 0;
592  size_t num_classes = metadata->target_index_size();
593  bool is_classifier = metadata->target_is_categorical();
594  if (is_classifier) {
595  num_classes -= 1; // reference class
596  }
597 
598  for (size_t c = 0; c < num_classes; c++) {
599  for (size_t i = 0; i < metadata->num_columns(); ++i) {
600  // Categorical
601  size_t start_idx = 0;
602  if (metadata->is_categorical(i)) {
603  // 0 is the reference
604  one_hot_coefs.push_back(0.0);
605  start_idx = 1;
606  }
607 
608  for (size_t j = start_idx; j < metadata->index_size(i); ++j) {
609  one_hot_coefs.push_back(coefs[idx]);
610  ++idx;
611  }
612  }
613 
614  // Intercept
615  one_hot_coefs.push_back(coefs[idx++]);
616  }
617 }
618 
619 /**
620 * Save coefficients to an SFrame, retrievable in Python
621 *
622 * \params[in] coefs Coefficients as EigenVector
623 * \params[in] metadata Metadata
624 *
625 * \returns coefs (as SFrame)
626 */
627 inline sframe get_coefficients_as_sframe(
628  const Eigen::Matrix<double, Eigen::Dynamic, 1>& coefs,
629  std::shared_ptr<ml_metadata> metadata,
630  const Eigen::Matrix<double, Eigen::Dynamic, 1>& std_err) {
631 
632  DASSERT_TRUE(coefs.size() > 0);
633  DASSERT_TRUE(metadata);
634 
635  // Classifiers need to provide target_metadata to print out the class in
636  // the coefficients.
637  bool is_classifier = metadata->target_is_categorical();
638  bool has_stderr = std_err.size() > 0;
639  DASSERT_EQ(std_err.size(), has_stderr * coefs.size());
640 
641  sframe sf_coef;
642  std::vector<std::string> coef_names;
643  coef_names.push_back("name");
644  coef_names.push_back("index");
645  if (is_classifier) coef_names.push_back("class");
646  coef_names.push_back("value");
647  if (has_stderr) coef_names.push_back("stderr");
648 
649  std::vector<flex_type_enum> coef_types;
650  coef_types.push_back(flex_type_enum::STRING);
651  coef_types.push_back(flex_type_enum::STRING);
652  if (is_classifier) coef_types.push_back(metadata->target_column_type());
653  coef_types.push_back(flex_type_enum::FLOAT);
654  if (has_stderr) coef_types.push_back(flex_type_enum::FLOAT);
655 
656  sf_coef.open_for_write(coef_names, coef_types, "", 1);
657  auto it_sf_coef = sf_coef.get_output_iterator(0);
658 
659  // Get feature names
660  std::vector<flexible_type> feature_names;
661  std::vector<flexible_type> feature_index;
662 
663  feature_names.reserve(metadata->num_dimensions());
664  feature_index.reserve(metadata->num_dimensions());
665 
666  for (size_t i = 0; i < metadata->num_columns(); ++i) {
667  bool skip_zero = metadata->is_categorical(i);
668 
669  for (size_t j = skip_zero ? 1 : 0; j < metadata->index_size(i); ++j) {
670  feature_names.push_back(metadata->column_name(i));
671 
672  if (metadata->is_indexed(i)) {
673  feature_index.push_back(
674  metadata->indexer(i)->map_index_to_value(j).to<flex_string>());
675  } else if (metadata->column_mode(i) == ml_column_mode::NUMERIC) {
676  feature_index.push_back(FLEX_UNDEFINED);
677  } else {
678  feature_index.push_back(std::to_string(j));
679  }
680  }
681  }
682 
683  // Classification
684  if (is_classifier) {
685 
686  // GLC 1.0.1- did not save things as categorical variables.
687  size_t num_classes = metadata->target_index_size();
688  size_t variables_per_class = coefs.size() / (num_classes - 1);
689  for(size_t k = 1; k < num_classes; k++){
690 
691  // Intercept
692  std::vector<flexible_type> x(4 + has_stderr);
693  x[0] = "(intercept)";
694  x[1] = FLEX_UNDEFINED;
695  x[2] = (metadata->target_indexer())->map_index_to_value(k);
696  x[3] = coefs(variables_per_class * k - 1);
697  if (has_stderr) x[4] = std_err(variables_per_class * k - 1);
698  *it_sf_coef = x;
699  ++it_sf_coef;
700 
701  // Write feature coefficients
702  for (size_t i = 0; i < feature_names.size(); ++i) {
703  x[0] = feature_names[i];
704  x[1] = feature_index[i];
705  x[2] = (metadata->target_indexer())->map_index_to_value(k);
706  x[3] = coefs(variables_per_class * (k-1) + i);
707  if (has_stderr) x[4] = std_err(variables_per_class * (k-1) + i);
708  *it_sf_coef = x;
709  ++it_sf_coef;
710  }
711 
712  }
713 
714  // Regression
715  } else {
716 
717  // Intercept
718  std::vector<flexible_type> x(3 + has_stderr);
719  x[0] = "(intercept)";
720  x[1] = FLEX_UNDEFINED;
721  x[2] = coefs(coefs.size() - 1);
722  if (has_stderr) x[3] = std_err(std_err.size() - 1);
723  *it_sf_coef = x;
724  ++it_sf_coef;
725 
726  // Write feature coefficients
727  for (size_t i = 0; i < feature_names.size(); ++i) {
728  x[0] = feature_names[i];
729  x[1] = feature_index[i];
730  x[2] = coefs(i);
731  if (has_stderr) x[3] = std_err(i);
732  *it_sf_coef = x;
733  ++it_sf_coef;
734  }
735  }
736  sf_coef.close();
737  return sf_coef;
738 }
739 inline sframe get_coefficients_as_sframe(
740  const Eigen::Matrix<double, Eigen::Dynamic, 1>& coefs,
741  std::shared_ptr<ml_metadata> metadata) {
742  Eigen::Matrix<double, Eigen::Dynamic, 1> EMPTY;
743  return get_coefficients_as_sframe(coefs, metadata, EMPTY);
744 }
745 
746 /**
747  * Get number of examples per class
748  *
749  * \param[in] target sarray
750  * \returns Break down of examples per class.
751  */
752 inline std::map<flexible_type, size_t>get_num_examples_per_class_from_sarray(
753  std::shared_ptr<sarray<flexible_type>> sa){
754  auto reader = sa->get_reader();
755  std::map<flexible_type, size_t> unique_values;
756  for(size_t seg_id = 0; seg_id < sa->num_segments(); seg_id++){
757  auto iter = reader->begin(seg_id);
758  auto enditer = reader->end(seg_id);
759  while(iter != enditer) {
760  if(unique_values.find(*iter) == unique_values.end()){
761  unique_values.insert({*iter,0});
762  } else {
763  ++unique_values[*iter];
764  }
765  ++iter;
766  }
767  }
768  return unique_values;
769 }
770 
771 } // supervised
772 } // turicreate
773 #endif
static GL_HOT_INLINE_FLATTEN bool mode_is_categorical(ml_column_mode mode)
std::set< Key > keys(const std::map< Key, T > &map)
Definition: stl_util.hpp:358
const char * flex_type_enum_to_name(flex_type_enum en)
#define logprogress_stream
Definition: logger.hpp:325
#define GL_HOT_INLINE_FLATTEN
std::string flex_string
std::vector< std::pair< flexible_type, flexible_type > > flex_dict
static flexible_type FLEX_UNDEFINED
#define DASSERT_TRUE(cond)
Definition: assertions.hpp:364