6 #ifndef TURI_EVAL_INTERFACE_H_ 7 #define TURI_EVAL_INTERFACE_H_ 9 #include <core/storage/sframe_interface/unity_sframe.hpp> 10 #include <model_server/lib/variant.hpp> 11 #include <unordered_map> 14 #pragma clang diagnostic push 15 #pragma clang diagnostic ignored "-Woverloaded-virtual" // TODO: fix these issues below 18 const double EVAL_ZERO = 1.0e-9;
21 namespace evaluation {
30 enum class average_type_enum: char {
43 inline average_type_enum average_type_enum_from_name(
const flexible_type& name) {
44 static std::unordered_map<flexible_type, average_type_enum> type_map {
46 {flexible_type(
"micro"), average_type_enum::MICRO},
47 {flexible_type(
"default"), average_type_enum::DEFAULT},
48 {flexible_type(
"macro"), average_type_enum::MACRO}
50 auto it = type_map.find(name);
51 if (it == type_map.end()) {
53 std::string(
"Invalid average type name " + name.to<std::string>() +
".")
64 std::size_t operator()(
const std::pair<flexible_type, flexible_type> &x)
const {
74 const std::unordered_set<flexible_type>& labels) {
78 for (
const auto& l : labels) {
85 for (
const auto& l : labels) {
100 std::unordered_map<flexible_type, flexible_type> scores) {
102 double average = 0.0;
103 size_t tot_classes = 0;
104 for (
const auto& sc: scores) {
106 average += sc.second.
get<
double>();
112 if (tot_classes == 0) {
123 inline void check_probability_range(
const double& pred) {
124 if ((pred < 0 - EVAL_ZERO) || (pred > (1 + EVAL_ZERO))) {
125 log_and_throw(
"Prediction scores/probabilities are expected to be " 126 "in the range [0, 1]. If they aren't, try normalizing them.");
135 log_and_throw(
"Prediction scores/probabilities cannot contain missing " 136 "values (i.e None values). Try removing them with 'dropna'.");
146 inline flexible_type compute_precision_score(
size_t tp,
size_t fp) {
148 return double(tp)/(tp + fp);
160 inline flexible_type compute_recall_score(
size_t tp,
size_t fn) {
162 return double(tp)/(tp + fn);
176 size_t tp,
size_t fp,
size_t fn,
double beta) {
188 double pr_d = pr.
get<
double>();
189 double rec_d = rec.
get<
double>();
190 double denom = std::max(1e-20, beta * beta * pr_d + rec_d);
191 return (1.0 + beta * beta) * (pr_d * rec_d) / denom;
240 virtual std::string name()
const = 0;
247 virtual void init(
size_t _n_threads = 1) = 0;
275 size_t thread_id = 0) = 0;
288 const size_t& target,
289 const size_t& prediction,
290 size_t thread_id = 0) {
291 register_example(target, prediction, thread_id);
315 std::vector<double> mse;
316 std::vector<size_t> num_examples;
324 return (std::string)(
"rmse");
330 void init(
size_t _n_threads = 1){
331 n_threads = _n_threads;
332 mse.resize(n_threads);
333 num_examples.resize(n_threads);
334 for(
size_t i = 0; i < n_threads; i++){
350 size_t thread_id = 0){
355 double a = (double)prediction - (
double)target;
356 num_examples[thread_id]++;
357 mse[thread_id] += (a * a - mse[thread_id]) / num_examples[thread_id];
365 size_t total_examples = 0;
366 for(
size_t i = 0; i < n_threads; i++){
367 rmse += num_examples[i] * mse[i];
368 total_examples += num_examples[i];
394 return (std::string)(
"max_error");
400 void init(
size_t _n_threads = 1){
401 n_threads = _n_threads;
402 max_error.resize(n_threads);
403 for (
size_t i = 0; i < n_threads; i++) {
418 size_t thread_id = 0){
420 double err = (double)prediction - (
double)target;
421 max_error[thread_id] = std::max(std::abs(err), max_error[thread_id]);
428 double max_max_error = 0;
429 for(
size_t i = 0; i < n_threads; i++){
430 max_max_error = std::max(max_max_error, max_error[i]);
443 std::vector<double> logloss;
444 std::vector<size_t> num_examples;
445 std::unordered_map<flexible_type, size_t> m_index_map;
446 size_t num_classes = size_t(-1);
454 const std::unordered_map<flexible_type, size_t>& index_map,
455 size_t num_classes =
size_t(-1)) {
456 m_index_map = index_map;
457 if (num_classes ==
size_t(-1)) {
458 this->num_classes = index_map.size();
460 this->num_classes = num_classes;
468 bool is_prob_evaluator()
const {
475 std::string name()
const {
476 return (std::string)(
"multiclass_logloss");
483 void init(
size_t _n_threads = 1){
484 n_threads = _n_threads;
485 logloss.resize(n_threads);
486 num_examples.resize(n_threads);
487 for(
size_t i = 0; i < n_threads; i++){
503 void register_unmapped_example(
const size_t& target,
504 const std::vector<double>& prediction,
505 size_t thread_id = 0){
510 if (target < prediction.size()) {
511 pred = prediction[target];
513 num_examples[thread_id]++;
514 check_probability_range(pred);
515 logloss[thread_id] += log(
516 std::max(std::min(1.0 - EVAL_ZERO, pred), EVAL_ZERO));
529 size_t thread_id = 0){
531 num_examples[thread_id]++;
535 if(prediction.
size() != this->num_classes) {
536 std::stringstream ss;
537 ss <<
"Size of prediction probability vector" 538 <<
"(" << prediction.
size() <<
") != number of classes" 539 <<
"(" << m_index_map.size() <<
")." << std::endl;
540 log_and_throw(ss.str());
544 auto it = m_index_map.find(target);
547 if (it != m_index_map.end()) {
548 label = size_t(it->second);
551 if (label < preds.size()) {
556 check_probability_range(pred);
557 logloss[thread_id] += log(
558 std::max(std::min(1.0 - 1e-15, pred), 1e-15));
565 double total_logloss = 0;
566 size_t total_examples = 0;
567 for(
size_t i = 0; i < n_threads; i++){
568 total_logloss += logloss[i];
569 total_examples += num_examples[i];
573 total_examples = std::max<size_t>(1, total_examples);
574 return to_variant(-total_logloss / total_examples);
584 std::vector<double> logloss;
585 std::vector<size_t> num_examples;
586 std::unordered_map<flexible_type, size_t> index_map;
596 std::unordered_map<flexible_type, size_t> index_map =
597 std::unordered_map<flexible_type, size_t>()) {
598 this->index_map = index_map;
604 std::string name()
const override {
605 return (std::string)(
"binary_logloss");
613 bool is_prob_evaluator()
const override {
620 void init(
size_t _n_threads = 1)
override {
621 n_threads = _n_threads;
622 logloss.resize(n_threads);
623 num_examples.resize(n_threads);
624 for(
size_t i = 0; i < n_threads; i++){
640 void register_unmapped_example(
const size_t& target,
641 const double& prediction,
642 size_t thread_id = 0) {
645 num_examples[thread_id]++;
646 check_probability_range(prediction);
647 logloss[thread_id] +=
648 log(target !=0 ? std::max(prediction, EVAL_ZERO) :
649 std::max(1.0 - prediction, EVAL_ZERO));
662 size_t thread_id = 0)
override {
664 check_undefined(prediction);
667 DASSERT_EQ(index_map.size(), 2);
670 num_examples[thread_id]++;
671 size_t label = index_map.at(target);
672 double pred = prediction.
to<
double>();
673 check_probability_range(pred);
674 logloss[thread_id] +=
675 log(label != 0 ? std::max(pred, EVAL_ZERO) : std::max(1.0 - pred, EVAL_ZERO));
682 double total_logloss = 0;
683 size_t total_examples = 0;
684 for(
size_t i = 0; i < n_threads; i++){
685 total_logloss += logloss[i];
686 total_examples += num_examples[i];
689 total_examples = std::max<size_t>(1, total_examples);
690 return to_variant(-total_logloss/total_examples);
709 std::vector<double> accuracy;
710 std::vector<size_t> num_examples;
718 return (std::string)(
"classifier_accuracy");
725 void init(
size_t _n_threads = 1){
726 n_threads = _n_threads;
727 accuracy.resize(n_threads);
728 num_examples.resize(n_threads);
729 for(
size_t i = 0; i < n_threads; i++){
747 const size_t& target,
748 const size_t& prediction,
749 size_t thread_id = 0){
751 num_examples[thread_id]++;
752 accuracy[thread_id] += (target == prediction);
765 size_t thread_id = 0){
767 num_examples[thread_id]++;
768 accuracy[thread_id] += (target == prediction);
775 double total_accuracy = 0;
776 size_t total_examples = 0;
777 for(
size_t i = 0; i < n_threads; i++){
778 total_accuracy += accuracy[i];
779 total_examples += num_examples[i];
783 return to_variant(total_accuracy * 1.0 / total_examples);
805 std::vector<std::unordered_map<std::pair<flexible_type, flexible_type>, size_t,
810 size_t n_threads = 0;
811 std::unordered_set<flexible_type> labels;
812 std::map<size_t, flexible_type> index_map;
813 std::unordered_map<std::pair<flexible_type, flexible_type>, size_t,
823 std::map<size_t, flexible_type>()) {
824 this->index_map = index_map;
831 return (std::string)(
"confusion_matrix");
837 void init(
size_t _n_threads = 1){
838 n_threads = _n_threads;
839 counts.resize(n_threads);
859 size_t thread_id = 0){
861 std::pair<flexible_type, flexible_type> pair =
862 std::make_pair(target, prediction);
864 if(counts[thread_id].count(pair) > 0){
865 counts[thread_id][pair]++;
867 counts[thread_id][pair] = 1;
877 std::unordered_map<std::pair<flexible_type, flexible_type>, size_t,
879 for(
size_t i = 0; i < n_threads; i++){
880 for (
const auto& kvp: counts[i]){
881 if(final_counts_thread.count(kvp.first) > 0){
882 final_counts_thread[kvp.first] += kvp.second;
884 final_counts_thread[kvp.first] = kvp.second;
888 final_counts = final_counts_thread;
892 for (
const auto& kvp: final_counts) {
893 if (labels.count(kvp.first.first) == 0) {
894 labels.insert(kvp.first.first);
896 if (labels.count(kvp.first.second) == 0) {
897 labels.insert(kvp.first.second);
908 this->gather_counts_and_labels();
911 if (!index_map.empty()) {
912 std::unordered_map<std::pair<flexible_type, flexible_type>, size_t,
914 for (
const auto& kvp: final_counts) {
915 size_t first_index = kvp.first.first.get<
flex_int>();
916 size_t second_index = kvp.first.second.get<
flex_int>();
918 const flexible_type& second_key = index_map.at(second_index);
921 final_counts_copy[std::make_pair(first_key,second_key)] = kvp.second;
923 final_counts = final_counts_copy;
927 sframe confusion_matrix_sf;
928 std::vector<std::string> names;
929 names.push_back(
"target_label");
930 names.push_back(
"predicted_label");
931 names.push_back(
"count");
937 for (
const auto &cf_entry: final_counts){
938 auto t_type = cf_entry.first.first.get_type();
939 auto p_type = cf_entry.first.second.get_type();
942 target_type = t_type;
951 predicted_type = p_type;
968 std::vector<flex_type_enum> types;
969 types.push_back(target_type);
970 types.push_back(predicted_type);
975 std::vector<flexible_type> x(3);
976 for (
const auto &cf_entry: final_counts){
977 x[0] = cf_entry.first.first;
978 x[1] = cf_entry.first.second;
979 x[2] = cf_entry.second;
984 confusion_matrix_sf.
close();
985 std::shared_ptr<unity_sframe> unity_confusion_matrix =
986 std::make_shared<unity_sframe>();
987 unity_confusion_matrix->construct_from_sframe(confusion_matrix_sf);
1001 average_type_enum average;
1002 std::unordered_map<flexible_type, size_t> tp;
1003 std::unordered_map<flexible_type, size_t> tn;
1004 std::unordered_map<flexible_type, size_t> fp;
1005 std::unordered_map<flexible_type, size_t> fn;
1016 for (
const auto& l : labels) {
1023 for (
const auto& l : labels) {
1036 std::string name()
const = 0;
1042 return average != average_type_enum::NONE;
1051 this->gather_counts_and_labels();
1052 for (
const auto& l: labels) {
1060 for (
const auto& kvp: final_counts) {
1063 size_t count = kvp.second;
1066 for (
const auto& l: labels) {
1069 if ( (p == l) == (t == l)) {
1115 log_and_throw(
"The beta value in the F-beta score must be > 0.0");
1118 this->average = average_type_enum_from_name(average);
1125 return (std::string)(
"fbeta_score");
1135 this->gather_global_metrics();
1138 DASSERT_EQ(fp.size(), labels.size());
1139 DASSERT_EQ(tp.size(), labels.size());
1143 std::unordered_map<flexible_type, flexible_type> fbeta_scores;
1144 for (
const auto& l: labels) {
1145 fbeta_scores[l] = compute_fbeta_score(tp[l], fp[l], fn[l], beta);
1149 if (labels.size() == 2) {
1150 return to_variant(fbeta_scores[get_reference_label()]);
1156 case average_type_enum::MICRO:
1158 size_t total_tp = 0;
1159 size_t total_fp = 0;
1160 size_t total_fn = 0;
1161 for (
const auto& l: labels) {
1166 return to_variant(compute_fbeta_score(total_tp, total_fp, total_fn, beta));
1170 case average_type_enum::DEFAULT:
1171 case average_type_enum::MACRO:
1173 return to_variant(average_with_none_skip(fbeta_scores));
1177 case average_type_enum::NONE:
1183 log_and_throw(std::string(
"Unsupported average_type_enum case"));
1184 ASSERT_UNREACHABLE();
1203 this->average = average_type_enum_from_name(average);
1210 return (std::string)(
"precision");
1219 this->gather_global_metrics();
1221 DASSERT_EQ(fp.size(), labels.size());
1222 DASSERT_EQ(tp.size(), labels.size());
1226 std::unordered_map<flexible_type, flexible_type> precision_scores;
1227 for (
const auto& l: labels) {
1228 precision_scores[l] = compute_precision_score(tp[l], fp[l]);
1232 if (labels.size() == 2) {
1233 return to_variant(precision_scores[get_reference_label()]);
1239 case average_type_enum::MICRO:
1241 size_t total_tp = 0;
1242 size_t total_fp = 0;
1243 for (
const auto& l: labels) {
1247 return to_variant(compute_precision_score(total_tp, total_fp));
1250 case average_type_enum::DEFAULT:
1251 case average_type_enum::MACRO:
1253 return to_variant(average_with_none_skip(precision_scores));
1257 case average_type_enum::NONE:
1263 log_and_throw(std::string(
"Unsupported average_type_enum case"));
1264 ASSERT_UNREACHABLE();
1284 this->average = average_type_enum_from_name(average);
1291 return (std::string)(
"recall");
1301 this->gather_global_metrics();
1303 DASSERT_EQ(fp.size(), labels.size());
1304 DASSERT_EQ(tp.size(), labels.size());
1307 std::unordered_map<flexible_type, flexible_type> recall_scores;
1308 for (
const auto& l: labels) {
1309 recall_scores[l] = compute_recall_score(tp[l], fn[l]);
1313 if (labels.size() == 2) {
1314 return to_variant(recall_scores[get_reference_label()]);
1320 case average_type_enum::MICRO:
1322 size_t total_tp = 0;
1323 size_t total_fn = 0;
1324 for (
const auto& l: labels) {
1328 return to_variant(compute_recall_score(total_tp, total_fn));
1332 case average_type_enum::DEFAULT:
1333 case average_type_enum::MACRO:
1335 return to_variant(average_with_none_skip(recall_scores));
1339 case average_type_enum::NONE:
1345 log_and_throw(std::string(
"Unsupported average_type_enum case"));
1346 ASSERT_UNREACHABLE();
1367 this->average = average_type_enum_from_name(average);
1374 return (std::string)(
"accuracy");
1384 this->gather_global_metrics();
1386 DASSERT_EQ(fp.size(), labels.size());
1387 DASSERT_EQ(tp.size(), labels.size());
1390 std::unordered_map<flexible_type, double> accuracy_scores;
1391 std::unordered_map<flexible_type, flexible_type> precision_scores;
1392 for (
const auto& l: labels) {
1393 accuracy_scores[l] = double(tp[l] + tn[l])/(tp[l] + fp[l] + tn[l] + fn[l]);
1394 precision_scores[l] = compute_precision_score(tp[l], fp[l]);
1398 if (labels.size() == 2) {
1399 return to_variant(accuracy_scores[get_reference_label()]);
1405 case average_type_enum::MICRO:
1406 case average_type_enum::DEFAULT:
1412 for (
const auto& l: labels) {
1418 double accuracy = double(tot_tp + tot_tn)/(tot_tp + tot_fp + tot_tn + tot_fn);
1423 case average_type_enum::MACRO:
1425 double average_accuracy = 0.0;
1426 for (
const auto& rec: accuracy_scores) {
1427 average_accuracy += rec.second;
1429 average_accuracy /= labels.size();
1433 case average_type_enum::NONE:
1439 log_and_throw(std::string(
"Unsupported average_type_enum case"));
1440 ASSERT_UNREACHABLE();
1472 std::unordered_map<std::pair<flexible_type, flexible_type>, size_t,
1474 std::vector<std::vector<std::vector<size_t>>> tpr;
1475 std::vector<std::vector<std::vector<size_t>>> fpr;
1476 std::vector<std::vector<size_t>> num_examples;
1481 average_type_enum average = average_type_enum::NONE;
1482 bool binary =
false;
1483 const size_t NUM_BINS=1000;
1484 size_t n_threads = 0;
1485 size_t num_classes = 0;
1488 std::unordered_map<flexible_type, size_t> index_map;
1491 std::vector<std::vector<size_t>> total_fp;
1492 std::vector<std::vector<size_t>> total_tp;
1493 std::vector<size_t> total_examples;
1505 std::unordered_map<flexible_type, size_t> index_map =
1506 std::unordered_map<flexible_type, size_t>(),
1509 size_t num_classes =
size_t(-1)) {
1510 this->average = average_type_enum_from_name(average);
1511 this->binary = binary;
1512 this->index_map = index_map;
1513 if (num_classes ==
size_t(-1)) {
1514 this->num_classes = index_map.size();
1516 this->num_classes = num_classes;
1524 return (std::string)(
"roc_curve");
1547 DASSERT_LE(binary, num_classes == 2);
1550 n_threads = _n_threads;
1553 tpr.resize(n_threads);
1554 fpr.resize(n_threads);
1555 num_examples.resize(n_threads);
1556 for (
size_t i = 0; i < n_threads; i++) {
1558 tpr[i].resize(num_classes);
1559 fpr[i].resize(num_classes);
1560 num_examples[i].resize(num_classes);
1561 for (
size_t c = 0; c < num_classes; c++) {
1563 tpr[i][c].resize(NUM_BINS);
1564 fpr[i][c].resize(NUM_BINS);
1565 num_examples[i][c] = 0;
1566 for (
size_t j = 0; j < NUM_BINS; j++) {
1574 total_fp.resize(num_classes);
1575 total_tp.resize(num_classes);
1576 total_examples.resize(num_classes);
1577 for (
size_t c = 0; c < num_classes; c++) {
1578 total_examples[c] = 0;
1579 total_fp[c].resize(NUM_BINS);
1580 total_tp[c].resize(NUM_BINS);
1581 for (
size_t j = 0; j < NUM_BINS; j++) {
1589 const float get_bin(
double prediction)
const {
1591 size_t bin = std::floor((
double) std::max(0.0, prediction * NUM_BINS));
1597 if (bin >= NUM_BINS) bin = NUM_BINS - 1;
1601 const float get_lower_bound(
size_t bin)
const {
1603 return bin/((double)NUM_BINS);
1616 size_t thread_id = 0){
1617 DASSERT_LT(thread_id, n_threads);
1618 DASSERT_LT(thread_id, fpr.size());
1619 DASSERT_LT(thread_id, tpr.size());
1620 check_undefined(prediction);
1627 auto it = index_map.find(target);
1628 if (it == index_map.end()) {
1631 idx = size_t(it->second);
1633 DASSERT_LT(idx, index_map.size());
1637 DASSERT_EQ(num_classes, 2);
1640 double pred = prediction.
to<
double>();
1641 check_probability_range(pred);
1642 size_t bin = get_bin(pred);
1645 DASSERT_LT(bin, tpr[thread_id][1].size());
1646 DASSERT_LT(bin, fpr[thread_id][0].size());
1647 tpr[thread_id][1][bin]++;
1648 fpr[thread_id][0][bin]++;
1649 num_examples[thread_id][1]++;
1651 DASSERT_LT(bin, tpr[thread_id][0].size());
1652 DASSERT_LT(bin, fpr[thread_id][1].size());
1653 fpr[thread_id][1][bin]++;
1654 tpr[thread_id][0][bin]++;
1655 num_examples[thread_id][0]++;
1662 if(prediction.
size() != num_classes) {
1663 std::stringstream ss;
1664 ss <<
"Size of prediction probability vector" 1665 <<
"(" << prediction.
size() <<
") != number of classes" 1666 <<
"(" << num_classes <<
")." << std::endl;
1667 log_and_throw(ss.str());
1671 if (idx >= prediction.
size()) {
1676 for (
size_t i = 0; i < prediction.
size(); i++) {
1677 check_probability_range(prediction[i]);
1678 size_t bin = get_bin(prediction[i]);
1682 DASSERT_LT(bin, tpr[thread_id][idx].size());
1683 tpr[thread_id][i][bin]++;
1685 DASSERT_LT(bin, fpr[thread_id][idx].size());
1686 fpr[thread_id][i][bin]++;
1689 num_examples[thread_id][idx]++;
1700 for (
size_t i = 0; i < n_threads; ++i) {
1701 for (
size_t c = 0; c < num_classes; c++) {
1702 total_examples[c] += num_examples[i][c];
1703 for (
size_t j = 0; j < NUM_BINS; ++j) {
1704 total_fp[c][j] += fpr[i][c][j];
1705 total_tp[c][j] += tpr[i][c][j];
1712 for (
size_t c = 0; c < num_classes; c++) {
1713 for (ssize_t j = NUM_BINS-2; j >= 0; --j) {
1714 total_fp[c][j] += total_fp[c][j+1];
1715 total_tp[c][j] += total_tp[c][j+1];
1726 this->gather_global_metrics();
1729 size_t total_bins = NUM_BINS;
1730 size_t _num_classes = this->num_classes;
1731 auto compute_roc_curve = [total_bins, _num_classes](
1732 const std::vector<std::vector<size_t>>& total_fp,
1733 const std::vector<std::vector<size_t>>& total_tp,
1734 const std::vector<size_t>& total_examples,
1737 const std::map<size_t, flexible_type>& inv_map =
1740 size_t all_examples = 0;
1741 for (
const auto& cex: total_examples) {
1742 all_examples += cex;
1747 std::vector<std::string> col_names {
"threshold",
"fpr",
"tpr",
"p",
"n"};
1752 flex_type_enum::INTEGER};
1756 col_names.push_back(
"class");
1758 col_types.push_back(inv_map.at(c).get_type());
1762 std::vector<flexible_type> out_v;
1771 for (
size_t j=0; j < total_bins; ++j) {
1772 DASSERT_LE(total_tp[cl][j], total_examples[cl]);
1773 DASSERT_LE(total_fp[cl][j], all_examples - total_examples[cl]);
1774 out_v = {j / double(total_bins),
1775 (1.0 * total_fp[cl][j]) / (all_examples - total_examples[cl]),
1776 (1.0 * total_tp[cl][j]) / total_examples[cl],
1777 total_examples[cl], (all_examples - total_examples[cl])};
1779 out_v.push_back(inv_map.at(cl));
1785 out_v = {1.0, 0.0, 0.0,
1786 total_examples[c], (all_examples - total_examples[c])};
1788 out_v.push_back(inv_map.at(cl));
1796 if (cl == _num_classes)
break;
1801 DASSERT_EQ(ret.
size(),
1802 (total_bins + 1) * (binary + (1 - binary) * _num_classes));
1805 std::shared_ptr<unity_sframe> tmp = std::make_shared<unity_sframe>();
1806 tmp->construct_from_sframe(ret);
1813 if (num_classes == 2) {
1814 return to_variant(compute_roc_curve(total_fp, total_tp, total_examples, 1));
1820 case average_type_enum::NONE:
1821 case average_type_enum::DEFAULT:
1825 std::map<size_t, flexible_type> inv_map;
1826 for (
const auto& kvp: index_map) {
1827 inv_map[kvp.second] = kvp.first;
1829 return compute_roc_curve(total_fp, total_tp, total_examples, 0,
false, inv_map);
1833 log_and_throw(std::string(
"Unsupported average_type_enum case"));
1834 ASSERT_UNREACHABLE();
1856 std::unordered_map<flexible_type, size_t> index_map =
1857 std::unordered_map<flexible_type, size_t>(),
1860 size_t num_classes =
size_t(-1)) {
1861 this->average = average_type_enum_from_name(average);
1862 this->binary = binary;
1863 this->index_map = index_map;
1864 if (num_classes ==
size_t(-1)) {
1865 this->num_classes = index_map.size();
1867 this->num_classes = num_classes;
1874 std::string name()
const {
1875 return (std::string)(
"auc");
1881 virtual bool is_table_printer_compatible()
const {
1882 return average != average_type_enum::NONE;
1891 this->gather_global_metrics();
1894 size_t total_bins = NUM_BINS;
1895 auto compute_auc = [total_bins](
1896 const std::vector<std::vector<size_t>>& total_fp,
1897 const std::vector<std::vector<size_t>>& total_tp,
1898 const std::vector<size_t>& total_examples,
1899 const size_t& c) ->
double {
1901 size_t all_examples = 0;
1902 for (
const auto& cex: total_examples) {
1903 all_examples += cex;
1906 double auc_score = 0;
1907 for(
size_t i = 0; i < total_bins- 1; i++) {
1908 double delta = total_fp[c][i] - total_fp[c][i+1];
1909 delta /= (all_examples - total_examples[c]);
1910 if (delta > 1e-10) {
1911 auc_score += 0.5 * (total_tp[c][i] + total_tp[c][i+1]) * delta
1912 / total_examples[c];
1919 if (num_classes == 2) {
1920 return to_variant(compute_auc(total_fp, total_tp, total_examples, 1));
1926 case average_type_enum::NONE:
1930 std::map<size_t, flexible_type> inv_map;
1931 for (
const auto& kvp: index_map) {
1932 inv_map[kvp.second] = kvp.first;
1936 std::unordered_map<flexible_type, double> auc_score;
1937 for (
size_t c = 0; c < num_classes; c++) {
1939 auc_score[k] = compute_auc(total_fp, total_tp, total_examples, c);
1944 case average_type_enum::DEFAULT:
1945 case average_type_enum::MACRO:
1947 double auc_score = 0;
1948 for (
size_t c = 0; c < num_classes; c++) {
1949 auc_score += compute_auc(total_fp, total_tp, total_examples, c);
1955 log_and_throw(std::string(
"Unsupported average_type_enum case"));
1956 ASSERT_UNREACHABLE();
1984 inline std::shared_ptr<supervised_evaluation_interface> get_evaluator_metric(
1985 const std::string& metric,
1986 const std::map<std::string, variant_type>& kwargs = std::map<std::string, variant_type>()) {
1988 std::shared_ptr<supervised_evaluation_interface> evaluator;
1989 if(metric ==
"rmse"){
1990 evaluator = std::make_shared<rmse>(
rmse());
1991 }
else if(metric ==
"max_error"){
1992 evaluator = std::make_shared<max_error>(
max_error());
1994 }
else if(metric ==
"confusion_matrix_no_map"){
1997 }
else if(metric ==
"confusion_matrix"){
2000 std::map<size_t, flexible_type>>(kwargs.at(
"inv_index_map"));
2003 }
else if(metric ==
"accuracy"){
2006 }
else if(metric ==
"binary_logloss") {
2009 std::unordered_map<flexible_type, size_t>>(kwargs.at(
"index_map"));
2010 evaluator = std::make_shared<binary_logloss>(
2011 binary_logloss(index_map));
2013 }
else if((metric ==
"multiclass_logloss") || (metric ==
"log_loss")){
2016 std::unordered_map<flexible_type, size_t>>(kwargs.at(
"index_map"));
2017 size_t num_classes = size_t(-1);
2018 if (kwargs.count(
"num_classes") > 0) {
2019 num_classes = variant_get_value<size_t>(kwargs.at(
"num_classes"));
2021 evaluator = std::make_shared<multiclass_logloss>(
2022 multiclass_logloss(index_map, num_classes));
2024 }
else if(metric ==
"roc_curve"){
2028 auto average = variant_get_value<flexible_type>(kwargs.at(
"average"));
2029 auto binary = variant_get_value<bool>(kwargs.at(
"binary"));
2031 std::unordered_map<flexible_type, size_t>>(kwargs.at(
"index_map"));
2032 size_t num_classes = size_t(-1);
2033 if (kwargs.count(
"num_classes") > 0) {
2034 num_classes = variant_get_value<size_t>(kwargs.at(
"num_classes"));
2036 evaluator = std::make_shared<roc_curve>(
2037 roc_curve(index_map, average, binary, num_classes));
2039 }
else if(metric ==
"auc"){
2043 auto average = variant_get_value<flexible_type>(kwargs.at(
"average"));
2044 auto binary = variant_get_value<bool>(kwargs.at(
"binary"));
2046 std::unordered_map<flexible_type, size_t>>(kwargs.at(
"index_map"));
2047 size_t num_classes = size_t(-1);
2048 if (kwargs.count(
"num_classes") > 0) {
2049 num_classes = variant_get_value<size_t>(kwargs.at(
"num_classes"));
2051 evaluator = std::make_shared<auc>(auc(index_map, average, binary, num_classes));
2053 }
else if(metric ==
"flexible_accuracy"){
2055 auto average = variant_get_value<flexible_type>(kwargs.at(
"average"));
2056 evaluator = std::make_shared<flexible_accuracy>(
2059 }
else if(metric ==
"precision"){
2061 auto average = variant_get_value<flexible_type>(kwargs.at(
"average"));
2062 evaluator = std::make_shared<precision>(
precision(average));
2064 }
else if(metric ==
"recall"){
2066 auto average = variant_get_value<flexible_type>(kwargs.at(
"average"));
2067 evaluator = std::make_shared<recall>(
recall(average));
2069 }
else if(metric ==
"fbeta_score"){
2072 auto beta = variant_get_value<double>(kwargs.at(
"beta"));
2073 auto average = variant_get_value<flexible_type>(kwargs.at(
"average"));
2074 evaluator = std::make_shared<fbeta_score>(
fbeta_score(beta, average));
2076 }
else if(metric ==
"f1_score"){
2078 auto average = variant_get_value<flexible_type>(kwargs.at(
"average"));
2079 evaluator = std::make_shared<fbeta_score>(
fbeta_score(1.0, average));
2082 log_and_throw(
"\'" + metric +
"\' is not a supported evaluation metric.");
2087 evaluator->init(n_threads);
2095 #pragma clang diagnostic pop void init(size_t _n_threads=1)
void gather_global_metrics()
std::vector< double > flex_vec
void init(size_t _n_threads=1)
virtual variant_type get_metric()
bool is_prob_evaluator() const
std::decay< T >::type variant_get_value(const variant_type &v)
virtual void register_unmapped_example(const size_t &target, const size_t &prediction, size_t thread_id=0)
void register_example(const flexible_type &target, const flexible_type &prediction, size_t thread_id=0)
void gather_global_metrics()
static uint64_t hash64_combine(uint64_t h1, uint64_t h2)
virtual bool is_table_printer_compatible() const
boost::make_recursive_variant< flexible_type, std::shared_ptr< unity_sgraph_base >, dataframe_t, std::shared_ptr< model_base >, std::shared_ptr< unity_sframe_base >, std::shared_ptr< unity_sarray_base >, std::map< std::string, boost::recursive_variant_ >, std::vector< boost::recursive_variant_ >, boost::recursive_wrapper< function_closure_info > >::type variant_type
void gather_counts_and_labels()
iterator get_output_iterator(size_t segmentid)
std::enable_if<!std::is_integral< T >::value &&!std::is_floating_point< T >::value, T >::type to() const
void register_example(const flexible_type &target, const flexible_type &prediction, size_t thread_id=0)
flexible_accuracy(flexible_type average="micro")
static size_t cpu_count()
variant_type get_metric()
void init(size_t _n_threads=1)
void register_unmapped_example(const size_t &target, const size_t &prediction, size_t thread_id=0)
precision(flexible_type average="macro")
fbeta_score(double beta=1.0, flexible_type average="macro")
flex_type_enum get_type() const
confusion_matrix(std::map< size_t, flexible_type > index_map=std::map< size_t, flexible_type >())
void register_example(const flexible_type &target, const flexible_type &prediction, size_t thread_id=0)
variant_type get_metric()
virtual bool is_table_printer_compatible() const
void open_for_write(const std::vector< std::string > &column_names, const std::vector< flex_type_enum > &column_types, const std::string &frame_sidx_file="", size_t nsegments=SFRAME_DEFAULT_NUM_SEGMENTS, bool fail_on_column_names=true)
flexible_type get_reference_label()
variant_type to_variant(const T &f)
variant_type get_metric()
variant_type get_metric()
void register_example(const flexible_type &target, const flexible_type &prediction, size_t thread_id=0)
void register_example(const flexible_type &target, const flexible_type &prediction, size_t thread_id=0)
bool is_table_printer_compatible() const
variant_type get_metric()
virtual bool is_table_printer_compatible() const
variant_type get_metric()
static flexible_type FLEX_UNDEFINED
variant_type get_metric()
roc_curve(std::unordered_map< flexible_type, size_t > index_map=std::unordered_map< flexible_type, size_t >(), flexible_type average=FLEX_UNDEFINED, bool binary=true, size_t num_classes=size_t(-1))
void init(size_t _n_threads=1)
#define DASSERT_TRUE(cond)
virtual bool is_prob_evaluator() const
recall(flexible_type average="macro")
variant_type get_metric()
void init(size_t _n_threads=1)