6 #ifndef TURI_UNITY_ITEM_SIMILARITY_LOOKUP_AGGREGATORS_H_ 7 #define TURI_UNITY_ITEM_SIMILARITY_LOOKUP_AGGREGATORS_H_ 9 #include <core/data/flexible_type/flexible_type.hpp> 11 namespace turi {
namespace sparse_sim {
101 static constexpr int64_t _fixed_precision_scale_factor = (size_t(1) << 24);
103 typedef int64_t _fixed_precision_type;
105 static inline _fixed_precision_type _to_fixed(
double v) {
106 return int64_t(std::round(v * _fixed_precision_scale_factor));
109 static inline double _from_fixed(_fixed_precision_type v) {
110 return double(v) / _fixed_precision_scale_factor;
116 static std::string name() {
return "jaccard"; }
119 typedef size_t item_data_type;
120 typedef size_t interaction_data_type;
121 typedef _fixed_precision_type final_interaction_data_type;
124 static constexpr
bool require_item_locking() {
return false; }
125 static constexpr
bool require_interaction_locking() {
return false; }
126 static constexpr
bool missing_values_are_zero() {
return true; }
128 void update_item(item_data_type& v,
double target)
const {
129 if(LIKELY(target != 0)) {
134 void update_item_unsafe(item_data_type& v,
double target)
const {
135 if(LIKELY(target != 0)) {
140 void finalize_item(final_item_data_type&, item_data_type&)
const { }
141 void import_final_item_value(final_item_data_type& it,
const flexible_type& src)
const { }
143 void update_interaction(interaction_data_type& e,
144 const item_data_type& v1,
const item_data_type& v2,
145 double new_v1,
double new_v2)
const {
146 if(LIKELY((new_v1 != 0) && (new_v2 != 0)) ) {
151 void update_interaction_unsafe(interaction_data_type& e,
152 const item_data_type& v1,
const item_data_type& v2,
153 double new_v1,
double new_v2)
const {
157 e += ((new_v1 != 0) & (new_v2 != 0));
160 void finalize_interaction(final_interaction_data_type& e_out,
161 const final_item_data_type&,
162 const final_item_data_type&,
163 const interaction_data_type& e,
164 const item_data_type& v1,
165 const item_data_type& v2)
const {
171 double _e_out = ( (v1 == 0) || (v2 == 0) ) ? 0.0 :
double(e) / (v1 + v2 - e);
173 DASSERT_GE(_e_out, -1e-3);
174 DASSERT_LE(_e_out, 1.0 + 1e-3);
176 e_out = _to_fixed(_e_out);
181 bool compare_interaction_values(
const final_interaction_data_type& e1,
182 const final_interaction_data_type& e2,
183 const final_item_data_type& common_item_data,
184 const final_item_data_type& item_data_1,
185 const final_item_data_type& item_data_2)
const {
192 void import_final_interaction_value(
193 final_interaction_data_type& e,
const flexible_type& src)
const {
197 if(v < -1e-3 || v > 1 + 1e-3) {
198 auto error_out = [&]() GL_GCC_ONLY(GL_COLD_NOINLINE) {
199 std::ostringstream ss;
200 ss <<
"Values for jaccard similarity type must be between 0 and 1; " 201 <<
"Encountered " << v <<
". Please choose an appropriate " 202 <<
"similarity type or transform your values." 204 log_and_throw(ss.str().c_str());
213 double export_similarity_score(
const final_interaction_data_type& e)
const {
214 return std::max<double>(0, std::min<double>(1, _from_fixed(e)));
220 typedef size_t prediction_accumulation_type;
222 void update_prediction(prediction_accumulation_type& p,
223 const final_interaction_data_type& item_interaction_data,
224 const final_item_data_type& prediction_item_item_data,
225 const final_item_data_type& neighbor_item_item_data,
226 double prediction_item_score)
const {
228 if(LIKELY(prediction_item_score != 0)) {
233 void update_prediction_unsafe(prediction_accumulation_type& p,
234 const final_interaction_data_type& item_interaction_data,
235 const final_item_data_type& prediction_item_item_data,
236 const final_item_data_type& neighbor_item_item_data,
237 double prediction_item_score)
const {
239 p += (prediction_item_score != 0) ? item_interaction_data : 0;
242 double finalize_prediction(
const prediction_accumulation_type& p,
243 const final_item_data_type& prediction_item_data,
244 size_t n_user_ratings)
const {
245 return _from_fixed(p) / std::max<size_t>(1, n_user_ratings);
256 static std::string name() {
return "cosine"; }
259 typedef _fixed_precision_type item_data_type;
260 typedef _fixed_precision_type interaction_data_type;
262 typedef _fixed_precision_type final_interaction_data_type;
265 static constexpr
bool require_item_locking() {
return false; }
266 static constexpr
bool require_interaction_locking() {
return false; }
267 static constexpr
bool missing_values_are_zero() {
return true; }
269 void update_item(item_data_type& v,
double target)
const {
273 void update_item_unsafe(item_data_type& v,
double target)
const {
274 v += _to_fixed(target * target);
278 void finalize_item(final_item_data_type&, item_data_type&)
const { }
279 void import_final_item_value(final_item_data_type& it,
const flexible_type& src)
const { }
281 void update_interaction(interaction_data_type& e,
282 const item_data_type& v1,
const item_data_type& v2,
283 double new_v1,
double new_v2)
const {
288 void update_interaction_unsafe(interaction_data_type& e,
289 const item_data_type& v1,
const item_data_type& v2,
290 double new_v1,
double new_v2)
const {
291 e += _to_fixed(new_v1 * new_v2);
294 void finalize_interaction(final_interaction_data_type& e_out,
295 const final_item_data_type&,
296 const final_item_data_type&,
297 const interaction_data_type& e,
298 const item_data_type& v1,
299 const item_data_type& v2)
const {
303 double _e_out = ((v1 == 0) || (v2 == 0)) ? 0.0 :
double(e) / std::sqrt(
double(v1) * v2);
305 DASSERT_LT(_e_out, 1.0 + 1e-3);
306 DASSERT_GT(_e_out, -1.0 - 1e-3);
308 e_out = _to_fixed(_e_out);
313 bool compare_interaction_values(
const final_interaction_data_type& e1,
314 const final_interaction_data_type& e2,
315 const final_item_data_type& common_item_data,
316 const final_item_data_type& item_data_1,
317 const final_item_data_type& item_data_2)
const {
322 void import_final_interaction_value(
323 final_interaction_data_type& e,
const flexible_type& src)
const {
327 if(v < (-1 - 1e-3) || v > 1 + 1e-3) {
328 auto error_out = [&]() GL_GCC_ONLY(GL_COLD_NOINLINE) {
329 std::ostringstream ss;
330 ss <<
"Values for cosine similarity type must be between -1 and 1; " 331 <<
"Encountered " << v <<
". Please choose an appropriate " 332 <<
"similarity type or transform your values." 334 log_and_throw(ss.str().c_str());
343 double export_similarity_score(
const final_interaction_data_type& e)
const {
344 return std::max<double>(-1, std::min<double>(1, _from_fixed(e)));
350 typedef _fixed_precision_type prediction_accumulation_type;
352 void update_prediction_unsafe(prediction_accumulation_type& p,
353 const final_interaction_data_type& item_interaction_data,
354 const final_item_data_type& prediction_item_item_data,
355 const final_item_data_type& neighbor_item_item_data,
356 double prediction_item_score)
const {
358 _fixed_precision_type delta_prediction
359 = _fixed_precision_type(item_interaction_data * prediction_item_score);
361 p += delta_prediction;
364 void update_prediction(prediction_accumulation_type& p,
365 const final_interaction_data_type& item_interaction_data,
366 const final_item_data_type& prediction_item_item_data,
367 const final_item_data_type& neighbor_item_item_data,
368 double prediction_item_score)
const {
370 _fixed_precision_type delta_prediction
371 = _fixed_precision_type(item_interaction_data * prediction_item_score);
376 double finalize_prediction(
const prediction_accumulation_type& p,
377 const final_item_data_type& prediction_item_item_data,
378 size_t n_user_ratings)
const {
379 if(n_user_ratings == 0) {
382 return _from_fixed(p) / n_user_ratings;
394 static std::string name() {
return "pearson"; }
397 struct item_data_type {
403 typedef double interaction_data_type;
405 typedef _fixed_precision_type final_interaction_data_type;
406 typedef double final_item_data_type;
408 static constexpr
bool require_item_locking() {
return true; }
409 static constexpr
bool require_interaction_locking() {
return true; }
410 static constexpr
bool missing_values_are_zero() {
return false; }
412 void update_item(item_data_type& v,
double target)
const {
413 double old_mean = v.mean;
416 v.mean += (target - old_mean) / (v.count + 1);
418 v.var_sum += (target - old_mean) * (target - v.mean);
422 void finalize_item(final_item_data_type& fv, item_data_type& v)
const {
423 v.var_sum *= double(v.count) / std::max<size_t>(1, v.count - 1);
427 void import_final_item_value(final_item_data_type& it,
const flexible_type& src)
const {
431 void update_interaction(interaction_data_type& e,
432 const item_data_type& v1,
433 const item_data_type& v2,
434 double new_v1,
double new_v2)
const {
435 e += (new_v1 - v1.mean) * (new_v2 - v2.mean);
438 void update_interaction_unsafe(
439 interaction_data_type& e,
const item_data_type& v1,
const item_data_type& v2,
440 double new_v1,
double new_v2)
const {
442 e += (new_v1 - v1.mean) * (new_v2 - v2.mean);
446 void finalize_interaction(final_interaction_data_type& e_out,
447 const final_item_data_type&,
448 const final_item_data_type&,
449 const interaction_data_type& e,
450 const item_data_type& v1,
451 const item_data_type& v2)
const {
453 double denominator_2 = v1.var_sum * v2.var_sum;
455 double _e_out = (denominator_2 > 0) ? e / std::sqrt(denominator_2) : 0.0;
457 DASSERT_LT(_e_out, 1.0 + 1e-3);
458 DASSERT_GT(_e_out, -1.0 - 1e-3);
460 e_out = _to_fixed(_e_out);
464 void import_final_interaction_value(
465 final_interaction_data_type& e,
const flexible_type& src)
const {
469 if(v < (-1 - 1e-3) || v > 1 + 1e-3) {
470 auto error_out = [&]() GL_GCC_ONLY(GL_COLD_NOINLINE) {
471 std::ostringstream ss;
472 ss <<
"Values for pearson correlation similarity type must be between -1 and 1; " 473 <<
"Encountered " << v <<
". Please choose an appropriate " 474 <<
"similarity type or transform your values." 476 log_and_throw(ss.str().c_str());
485 double export_similarity_score(
const final_interaction_data_type& e)
const {
486 return std::max<double>(-1, std::min<double>(1, _from_fixed(e)));
492 typedef _fixed_precision_type prediction_accumulation_type;
494 bool compare_interaction_values(
const final_interaction_data_type& e1,
495 const final_interaction_data_type& e2,
496 const final_item_data_type& common_item_data,
497 const final_item_data_type& item_data_1,
498 const final_item_data_type& item_data_2)
const {
502 void update_prediction(prediction_accumulation_type& p,
503 const final_interaction_data_type& item_interaction_data,
504 const final_item_data_type& prediction_item_item_data,
505 const final_item_data_type& neighbor_item_item_data,
506 double prediction_item_score)
const {
508 _fixed_precision_type delta_prediction = _fixed_precision_type(std::round(
509 item_interaction_data * (prediction_item_score - prediction_item_item_data) ) );
514 void update_prediction_unsafe(prediction_accumulation_type& p,
515 const final_interaction_data_type& item_interaction_data,
516 const final_item_data_type& prediction_item_item_data,
517 const final_item_data_type& neighbor_item_item_data,
518 double prediction_item_score)
const {
521 _fixed_precision_type delta_prediction = _fixed_precision_type(std::round(
522 item_interaction_data * (prediction_item_score - prediction_item_item_data) ) );
524 p += delta_prediction;
527 double finalize_prediction(
const prediction_accumulation_type& p,
528 const final_item_data_type& prediction_item_item_data,
529 size_t n_user_ratings)
const {
530 if(n_user_ratings <= 0) {
533 return prediction_item_item_data + _from_fixed(p) / n_user_ratings;
545 template <
typename SimilarityType>
546 static constexpr
bool use_final_item_data() {
548 typename SimilarityType::final_item_data_type>::value;
static T atomic_increment(T &value, const U &increment=1, typename std::enable_if< std::is_integral< T >::value &&std::is_integral< U >::value >::type *=0)
Inheriting from this type will force the serializer to treat the derived type as a POD type...