6 #ifndef TURI_SGD_IMPLICIT_RANKING_SGD_SOLVER_CLASS_H_ 7 #define TURI_SGD_IMPLICIT_RANKING_SGD_SOLVER_CLASS_H_ 11 #include <type_traits> 12 #include <core/util/code_optimization.hpp> 13 #include <toolkits/ml_data_2/ml_data.hpp> 14 #include <toolkits/factorization/ranking_sgd_solver_base.hpp> 16 namespace turi {
namespace factorization {
28 template <
class SGDInterface>
38 const std::shared_ptr<sgd::sgd_interface_base>& main_interface,
39 const v2::ml_data& train_data,
40 const std::map<std::string, flexible_type>&
options)
54 typedef typename Base::x_buffer_row_type x_buffer_row_type;
55 typedef typename Base::x_buffer_type x_buffer_type;
84 std::pair<double, double> run_sgd_thread(
86 size_t thread_idx,
size_t num_threads,
87 size_t block_idx,
size_t num_blocks,
88 const v2::ml_data& data,
91 volatile bool& error_detected) GL_HOT {
93 double loss_value = 0;
95 size_t n_items = data.metadata()->column_size(1);
97 x_buffer_type x_buffer;
100 x_buffer.resize(4*1024);
102 std::vector<v2::ml_data_entry> negative_example_x;
104 neg_sample_proc_buffer neg_exm_buffer;
108 for(
auto it = data.get_block_iterator(block_idx, num_blocks); !it.done() && !error_detected;) {
114 size_t n_rows, n_rated_items;
116 std::tie(n_rows, n_rated_items) =
121 if(n_rated_items != n_items) {
129 for(
size_t i = 0; i < n_rows; ++i) {
131 const std::vector<v2::ml_data_entry>& x = x_buffer[i].first;
133 double negative_example_fx =
134 this->choose_negative_example(
138 negative_example_x, x,
140 n_rows, n_items, n_rated_items,
145 if(!std::isfinite(negative_example_fx) || std::fabs(negative_example_fx) > 1e10) {
146 error_detected =
true;
153 for(
size_t x_check = 0; x_check < n_rows; ++x_check) {
154 DASSERT_NE(negative_example_x[1].index, x_buffer[x_check].first[1].index);
158 double pw_loss_value = iface->apply_pairwise_sgd_step(
160 x, negative_example_x,
163 loss_value += pw_loss_value;
165 if(!std::isfinite(loss_value) || pw_loss_value > 1e10) {
166 error_detected =
true;
177 [&](
size_t i) {
return x_buffer[i].first[1].index; } );
180 return {loss_value, loss_value};
204 std::pair<double, double> run_loss_calculation_thread(
205 size_t thread_idx,
size_t num_threads,
206 const v2::ml_data& data,
207 SGDInterface* iface)
const {
209 double loss_value = 0;
211 size_t n_items = data.metadata()->column_size(1);
213 x_buffer_type x_buffer;
215 x_buffer.resize(4*1024);
217 std::vector<v2::ml_data_entry> negative_example_x;
219 neg_sample_proc_buffer neg_exm_buffer;
223 for(
auto it = data.get_block_iterator(thread_idx, num_threads); !it.done();) {
229 size_t n_rows, n_rated_items;
231 std::tie(n_rows, n_rated_items) =
236 if(n_rated_items != n_items) {
242 for(
size_t i = 0; i < n_rows; ++i) {
244 const std::vector<v2::ml_data_entry>& x = x_buffer[i].first;
246 double positive_fx = iface->calculate_fx(thread_idx, x);
248 double negative_example_fx =
249 this->choose_negative_example(
253 negative_example_x, x,
255 n_rows, n_items, n_rated_items,
259 if(!std::isfinite(negative_example_fx) || std::fabs(negative_example_fx) > 1e10) {
260 return {std::numeric_limits<double>::max(), std::numeric_limits<double>::max()};
264 loss_value += iface->loss_model.loss(positive_fx - negative_example_fx, 0);
273 [&](
size_t i) {
return x_buffer[i].first[1].index; } );
276 return {loss_value, loss_value};
GL_HOT_INLINE_FLATTEN void clear_item_observed_buffer(dense_bitset &item_observed, size_t n_rows, size_t n_items, const BufferIndexToItemIndexMapper &map_index) const
implicit_ranking_sgd_solver(const std::shared_ptr< sgd::sgd_interface_base > &main_interface, const v2::ml_data &train_data, const std::map< std::string, flexible_type > &options)
void shuffle(std::vector< T > &vec)
std::pair< size_t, size_t > fill_x_buffer_with_users_items(std::vector< std::pair< std::vector< v2::ml_data_entry >, double > > &x_buffer, v2::ml_data_block_iterator &it, size_t n_items, dense_bitset &item_observed) const GL_HOT_INLINE_FLATTEN
const std::map< std::string, flexible_type > options