Turi Create  4.0
precision_recall.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_UNITY_RECOMMENDER_EVALUATOR
7 #define TURI_UNITY_RECOMMENDER_EVALUATOR
8 #include <vector>
9 #include <set>
10 #include <unordered_set>
11 #include <algorithm>
12 #include <map>
13 #include <core/data/flexible_type/flexible_type.hpp>
14 
15 namespace turi { namespace recsys {
16 
17 /**
18  *
19  * \ingroup toolkit_util
20  *
21  * Compute precision and recall at k. This is faster than calculating
22  * precision and recall seperately. In information retrieval terms,
23  * this represents the ratio of relevant, retrieved items to the
24  * number of relevant items.
25  *
26  * Let \f$p_k\f$ be a vector of the first \f$k\f$ elements of the argument "predicted",
27  * and let \f$a\f$ be the set of items in the "actual" argument. The "recall at K" is defined as
28  *
29  * \f[
30  * P(k) = \frac{ | a \cap p_k | }{|a|}
31  * \f]
32  *
33  * The order of the elements in predicted affects the returned score.
34  * Only unique predicted values contribute to the score.
35  * One of the provided vectors must be nonempty.
36  * If actual is empty, return 1.0. If predicted is empty, returns 0.0.
37  *
38  * \param actual A vector of observed items
39  * \param predicted A vector of predicted items
40  * \param cutoffs A vector of positive integers for which recall should be calculated
41  *
42  * return A vector of pair(precision, recall) scores corresponding to
43  * the values in cutoffs.
44  *
45  * Notes:
46  * The corner cases that involve empty lists were chosen to be consistent with the feasible
47  * set of precision-recall curves, which start at (precision, recall) = (1,0) and end at (0,1).
48  * However, we do not believe there is a well-known concensus on this choice.
49  */
50 std::vector<std::pair<double, double> > precision_and_recall(
51  std::vector<size_t> actual,
52  std::vector<size_t> predicted,
53  const std::vector<size_t>& cutoffs);
54 
55 
56 /**
57  *
58  * \ingroup toolkit_util
59  *
60  * Compute recall at k.
61  * In information retrieval terms, this represents the ratio of relevant, retrieved items
62  * to the number of relevant items.
63  *
64  * Let \f$p_k\f$ be a vector of the first \f$k\f$ elements of the argument "predicted",
65  * and let \f$a\f$ be the set of items in the "actual" argument. The "recall at K" is defined as
66  *
67  * \f[
68  * P(k) = \frac{ | a \cap p_k | }{|a|}
69  * \f]
70  *
71  * The order of the elements in predicted affects the returned score.
72  * Only unique predicted values contribute to the score.
73  * One of the provided vectors must be nonempty.
74  * If actual is empty, return 1.0. If predicted is empty, returns 0.0.
75  *
76  * \param actual an unordered vector of observed items
77  * \param predicted an vector of predicted items
78  * \param cutoffs A vector of positive integers for which recall should be calculated
79  *
80  * return A vector of recall scores corresponding to the values in cutoffs
81  *
82  * Notes:
83  * The corner cases that involve empty lists were chosen to be consistent with the feasible
84  * set of precision-recall curves, which start at (precision, recall) = (1,0) and end at (0,1).
85  * However, we do not believe there is a well-known concensus on this choice.
86  */
87 std::vector<double> recall(const std::vector<size_t>& actual,
88  const std::vector<size_t>& predicted,
89  const std::vector<size_t>& cutoffs);
90 
91 /**
92  * \ingroup toolkit_util
93  * Compute precision at k.
94  * In information retrieval terms, this represents the ratio of relevant, retrieved items
95  * to the number of retrieved items.
96  *
97  * Let \f$p_k\f$ be a vector of the first \f$k\f$ elements of the argument "predicted",
98  * and let \f$a\f$ be the set of items in the "actual" argument. The "precision at K" is defined as
99  *
100  * \f[
101  * P(k) = \frac{ | a \cap p_k | }{|p_k|}
102  * \f]
103  *
104  * The order of the elements in predicted affects the returned score.
105  * Only unique predicted values contribute to the score.
106  * One of the provided vectors must be nonempty.
107  * If actual is empty, return 0.0. If predicted is empty, returns 1.0.
108  *
109  * \param actual an unordered vector observed items
110  * \param predicted an vector of predicted items
111  * \param cutoffs A vector of positive integers for which recall should be calculated
112  *
113  * return A vector of precision scores corresponding to the values in cutoffs
114  *
115  * Notes:
116  * The corner cases that involve empty lists were chosen to be consistent with the feasible
117  * set of precision-recall curves, which start at (precision, recall) = (1,0) and end at (0,1).
118  * However, we do not believe there is a well-known concensus on this choice.
119  */
120 /** Other versions of the above code
121  */
122 
123 std::vector<double> precision(const std::vector<size_t>& actual,
124  const std::vector<size_t>& predicted,
125  const std::vector<size_t>& cutoffs);
126 
127 /**
128  * \ingroup toolkit_util
129  *
130  * Compute the average precision at k.
131  * This combines precision values at values up to k, where lower ranks are less important.
132  *
133  * Let \f$p_k\f$ be a vector of the first \f$k\f$ elements of the argument "predicted",
134  * and let \f$a\f$ be the set of items in the "actual" argument.
135  * If \f$P(k)\f$ is the precision at \f$k\f$, then the average precision at \f$k\f$ is defined as
136  *
137  * \f[
138  * AP(k) = \frac{1}{\min(k, |a|)}\sum_{k: p_k \in a} \frac{P(k)}{k}
139  * \f]
140  *
141  * \param actual an unordered set of observed items
142  * \param predicted an vector of predicted items
143  * \param k the maximum number of predicted elements
144  *
145  * \return The average precision at k for the provided lists.
146  */
147 
148 float average_precision(const std::unordered_set<flexible_type> &actual,
149  const std::vector<flexible_type> &predicted,
150  const int k);
151 
152 /**
153  * \ingroup toolkit_util
154  *
155  * Compute mean average precision across all of the elements of the provided vectors.
156  * The two vectors must have the same length.
157  *
158  * actual: a vector of unordered sets of observed items.
159  * predicted: a vector of vectors of observed items.
160  *
161  */
162 
163 float mean_average_precision(const std::vector<std::unordered_set<flexible_type>> &actual,
164  const std::vector<std::vector<flexible_type>> &predicted,
165  const int k);
166 
167 }
168 }
169 #endif
float average_precision(const std::unordered_set< flexible_type > &actual, const std::vector< flexible_type > &predicted, const int k)
std::vector< std::pair< double, double > > precision_and_recall(std::vector< size_t > actual, std::vector< size_t > predicted, const std::vector< size_t > &cutoffs)
std::vector< double > precision(const std::vector< size_t > &actual, const std::vector< size_t > &predicted, const std::vector< size_t > &cutoffs)
float mean_average_precision(const std::vector< std::unordered_set< flexible_type >> &actual, const std::vector< std::vector< flexible_type >> &predicted, const int k)
std::vector< double > recall(const std::vector< size_t > &actual, const std::vector< size_t > &predicted, const std::vector< size_t > &cutoffs)