Turi Create  4.0
ranking_sgd_solver_implicit.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_SGD_IMPLICIT_RANKING_SGD_SOLVER_CLASS_H_
7 #define TURI_SGD_IMPLICIT_RANKING_SGD_SOLVER_CLASS_H_
8 
9 #include <map>
10 #include <vector>
11 #include <type_traits>
12 #include <core/util/code_optimization.hpp>
13 #include <toolkits/ml_data_2/ml_data.hpp>
14 #include <toolkits/factorization/ranking_sgd_solver_base.hpp>
15 
16 namespace turi { namespace factorization {
17 
18 /** Implicit Ranking
19  *
20  * ============================================================
21  *
22  * When the target is not present, do a gradient descent for all items
23  * such that the negative example is predicted more highly than the
24  * positive example. Loss is defined as the number of out of order
25  * pairs.
26  *
27  */
28 template <class SGDInterface>
30  : public ranking_sgd_solver_base<SGDInterface> {
31 
32  public:
33 
34  /**
35  * Constructor.
36  */
38  const std::shared_ptr<sgd::sgd_interface_base>& main_interface,
39  const v2::ml_data& train_data,
40  const std::map<std::string, flexible_type>& options)
41 
42  : ranking_sgd_solver_base<SGDInterface>(main_interface, train_data, options)
43  {
44  // Doesn't need to do anything here.
45  }
46 
47  private:
48 
49  ////////////////////////////////////////////////////////////////////////////////
50  //
51  // Typedefs -- need to pull these here.
52 
54  typedef typename Base::x_buffer_row_type x_buffer_row_type;
55  typedef typename Base::x_buffer_type x_buffer_type;
57 
58  ////////////////////////////////////////////////////////////////////////////////
59 
60  /** The main method to do the implicit ranking stuff.
61  *
62  * \param[in] thread_idx The thread index determining this block.
63  *
64  * \param[in] num_threads The number of threads.
65  *
66  * \param[in] data The ml_data instance we're working with.
67  * Primarily needed for the metadata.
68  *
69  * \param[in] it_init The iterator inializer for the
70  * ml_data_block_iterator used for this thread.
71  *
72  * \param[in] iface The working SGD interface.
73  *
74  * \param[in] step_size The current SGD step size, set by the
75  * higher level algorithm.
76  *
77  * \param[in,out] error_detected If set to true, a numerical error
78  * is detected.
79  *
80  * \return (loss, rank_loss) -- loss is the cumulative estimated
81  * loss value for this thread on predicting the training data, and
82  * rank_loss is the weighted loss on the negative examples.
83  */
84  std::pair<double, double> run_sgd_thread(
85  size_t iteration,
86  size_t thread_idx, size_t num_threads,
87  size_t block_idx, size_t num_blocks,
88  const v2::ml_data& data,
89  SGDInterface* iface,
90  double step_size,
91  volatile bool& error_detected) GL_HOT {
92 
93  double loss_value = 0;
94 
95  size_t n_items = data.metadata()->column_size(1);
96 
97  x_buffer_type x_buffer;
98 
99  // Start out with 4K possible items per user; doubles as needed.
100  x_buffer.resize(4*1024);
101 
102  std::vector<v2::ml_data_entry> negative_example_x;
103 
104  neg_sample_proc_buffer neg_exm_buffer;
105 
106  dense_bitset item_observed(n_items);
107 
108  for(auto it = data.get_block_iterator(block_idx, num_blocks); !it.done() && !error_detected;) {
109 
110  ////////////////////////////////////////////////////////////
111  // Step 2.1: Fill up the buffer with positive
112  // examples.
113 
114  size_t n_rows, n_rated_items;
115 
116  std::tie(n_rows, n_rated_items) =
117  this->fill_x_buffer_with_users_items(x_buffer, it, n_items, item_observed);
118 
119  // If there are no negatives, we ignore this user. (The version
120  // with a target column is handled differently).
121  if(n_rated_items != n_items) {
122 
123  turi::random::shuffle(x_buffer.begin(), x_buffer.begin() + n_rows);
124 
125  ////////////////////////////////////////////////////////////
126  // Step 2.2: Loop through these rows, choosing one positive
127  // example and one negative example.
128 
129  for(size_t i = 0; i < n_rows; ++i) {
130 
131  const std::vector<v2::ml_data_entry>& x = x_buffer[i].first;
132 
133  double negative_example_fx =
134  this->choose_negative_example(
135  thread_idx,
136  data,
137  iface,
138  negative_example_x, x,
139  item_observed,
140  n_rows, n_items, n_rated_items,
141  neg_exm_buffer);
142 
143  // Check to see if there was a numerical error; if so, reset
144  // the model!
145  if(!std::isfinite(negative_example_fx) || std::fabs(negative_example_fx) > 1e10) {
146  error_detected = true;
147  break;
148  }
149 
150  // Check to make sure it's a true negative, not a false one.
151 
152 #ifndef NDEBUG
153  for(size_t x_check = 0; x_check < n_rows; ++x_check) {
154  DASSERT_NE(negative_example_x[1].index, x_buffer[x_check].first[1].index);
155  }
156 #endif
157 
158  double pw_loss_value = iface->apply_pairwise_sgd_step(
159  thread_idx,
160  x, negative_example_x,
161  step_size);
162 
163  loss_value += pw_loss_value;
164 
165  if(!std::isfinite(loss_value) || pw_loss_value > 1e10) {
166  error_detected = true;
167  break;
168  }
169 
170  } // End loop over points in the buffer
171  }
172 
173  ////////////////////////////////////////////////////////////
174  // Step 3. Clear out the points in the buffer.
175 
176  this->clear_item_observed_buffer(item_observed, n_rows, n_items,
177  [&](size_t i) { return x_buffer[i].first[1].index; } );
178  }
179 
180  return {loss_value, loss_value};
181  }
182 
183 
184  /** Calculate the loss value for the block of data assigned to a
185  * particular thread.
186  *
187  * \param[in] thread_idx The thread index determining this block.
188  *
189  * \param[in] num_threads The number of threads.
190  *
191  * \param[in] data The ml_data instance we're working with.
192  * Primarily needed for the metadata.
193  *
194  * \param[in] it_init The iterator inializer for the
195  * ml_data_block_iterator used for this thread.
196  *
197  * \param[in] iface The working SGD interface.
198  *
199  * \return (loss, rank_loss) -- loss is the cumulative estimated
200  * loss value for this thread on predicting the training data, and
201  * rank_loss is the sampled weighted loss on the positive -
202  * negative examples.
203  */
204  std::pair<double, double> run_loss_calculation_thread(
205  size_t thread_idx, size_t num_threads,
206  const v2::ml_data& data,
207  SGDInterface* iface) const {
208 
209  double loss_value = 0;
210 
211  size_t n_items = data.metadata()->column_size(1);
212 
213  x_buffer_type x_buffer;
214  // Start out with 4K possible items per user; doubles as needed.
215  x_buffer.resize(4*1024);
216 
217  std::vector<v2::ml_data_entry> negative_example_x;
218 
219  neg_sample_proc_buffer neg_exm_buffer;
220 
221  dense_bitset item_observed(n_items);
222 
223  for(auto it = data.get_block_iterator(thread_idx, num_threads); !it.done();) {
224 
225  ////////////////////////////////////////////////////////////
226  // Step 2.1: Fill up the buffer with potential positive
227  // examples.
228 
229  size_t n_rows, n_rated_items;
230 
231  std::tie(n_rows, n_rated_items) =
232  this->fill_x_buffer_with_users_items(x_buffer, it, n_items, item_observed);
233 
234  // If there are no negatives, this user contributes nothing to
235  // the count of out-of-order pairs.
236  if(n_rated_items != n_items) {
237 
238  ////////////////////////////////////////////////////////////
239  // Step 2.2: Loop through these rows, choosing one positive
240  // example and one negative example.
241 
242  for(size_t i = 0; i < n_rows; ++i) {
243 
244  const std::vector<v2::ml_data_entry>& x = x_buffer[i].first;
245 
246  double positive_fx = iface->calculate_fx(thread_idx, x);
247 
248  double negative_example_fx =
249  this->choose_negative_example(
250  thread_idx,
251  data,
252  iface,
253  negative_example_x, x,
254  item_observed,
255  n_rows, n_items, n_rated_items,
256  neg_exm_buffer);
257 
258  // If we've hit numerical errors
259  if(!std::isfinite(negative_example_fx) || std::fabs(negative_example_fx) > 1e10) {
260  return {std::numeric_limits<double>::max(), std::numeric_limits<double>::max()};
261  }
262 
263  // The loss_grad here applies to the difference
264  loss_value += iface->loss_model.loss(positive_fx - negative_example_fx, 0);
265 
266  } // End loop over points in the buffer
267  }
268 
269  ////////////////////////////////////////////////////////////
270  // Step 3. Clear out the points in the buffer.
271 
272  this->clear_item_observed_buffer(item_observed, n_rows, n_items,
273  [&](size_t i) { return x_buffer[i].first[1].index; } );
274  }
275 
276  return {loss_value, loss_value};
277  }
278 };
279 
280 }}
281 
282 #endif
GL_HOT_INLINE_FLATTEN void clear_item_observed_buffer(dense_bitset &item_observed, size_t n_rows, size_t n_items, const BufferIndexToItemIndexMapper &map_index) const
implicit_ranking_sgd_solver(const std::shared_ptr< sgd::sgd_interface_base > &main_interface, const v2::ml_data &train_data, const std::map< std::string, flexible_type > &options)
void shuffle(std::vector< T > &vec)
Definition: random.hpp:536
std::pair< size_t, size_t > fill_x_buffer_with_users_items(std::vector< std::pair< std::vector< v2::ml_data_entry >, double > > &x_buffer, v2::ml_data_block_iterator &it, size_t n_items, dense_bitset &item_observed) const GL_HOT_INLINE_FLATTEN
const std::map< std::string, flexible_type > options