Turi Create  4.0
sliced_itemitem_matrix.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_SPARSE_SIM_SLICED_MATRIX_UTILITIES_H
7 #define TURI_SPARSE_SIM_SLICED_MATRIX_UTILITIES_H
8 
9 #include <vector>
10 #include <core/logging/assertions.hpp>
11 #include <core/parallel/atomic.hpp>
12 #include <core/parallel/lambda_omp.hpp>
13 
14 namespace turi { namespace sparse_sim {
15 
16 /** The height of a given slice that excludes the items below the
17  * diaganol
18  */
19 size_t get_upper_triangular_slice_height(size_t _w, size_t _s);
20 
21 /** Calculates the number of passes required to go through a symmetric
22  * or triangular matrix with a fixed amount of memory. We return the
23  * slice or slice boundaries needed in order to do a set of passes
24  * through the matrix such that everything fits in memory.
25  */
26 std::vector<size_t> calculate_upper_triangular_slice_structure(
27  size_t num_items, size_t target_item_count_per_pass, size_t max_num_slices);
28 
29 /** A container to hold a slice of rows of an upper triangular dense
30  * matrix. All accesses assume that row_idx < col_idx, and only data
31  * conforming to this is stored. In addition, num_rows <= num_cols.
32  *
33  * This class is a minimal wrapper around a vector in order to mimic
34  * the interface of the sparse item by item array. This has the same
35  * interface as the symmetric_2d_array class.
36  *
37  */
38 template <typename T>
40  private:
41  static constexpr size_t _data_size(size_t n_rows, size_t n_cols) {
42  return ((n_cols - 1) * n_rows) - (n_rows * (n_rows - 1)) / 2;
43  }
44 
45  public:
46 
48 
49  dense_triangular_itemitem_container(size_t _num_rows, size_t _num_cols) {
50  resize(_num_rows, _num_cols);
51  }
52 
53  typedef T value_type;
54 
55  /** Clears all the data and the values, and resets the number of
56  * rows and columns to 0.
57  *
58  */
59  void clear() {
60  num_rows = 0;
61  num_cols = 0;
62  data.clear();
63  }
64 
65  /** Reserve a fixed number of elements.
66  */
67  void reserve(size_t n_elements) {
68  data.reserve(n_elements);
69  }
70 
71  size_t rows() const { return num_rows; }
72  size_t cols() const { return num_cols; }
73 
74  /** Resize and clear the data.
75  */
76  void resize(size_t _num_rows, size_t _num_cols) {
77  DASSERT_LE(_num_rows, _num_cols);
78  num_cols = _num_cols;
79  num_rows = _num_rows;
80  size_t s = _data_size(num_rows, num_cols);
81  data.assign(s, value_type());
82  setup_row_index_map();
83  }
84 
85  /** Indexible access.
86  */
87  value_type& operator()(size_t row_idx, size_t col_idx) {
88  size_t index = data_index(row_idx, col_idx);
89  return data[index];
90  }
91 
92  /** Indexible access. Const overload
93  */
94  const value_type& operator()(size_t i, size_t j) const {
95  size_t index = data_index(i, j);
96  return data[index];
97  }
98 
99  /** Apply a function to a particular element. The apply_f has
100  */
101  template <typename ApplyFunction>
103  void apply(size_t idx_1, size_t idx_2, ApplyFunction&& apply_f) {
104  size_t index = data_index(idx_1, idx_2);
105  apply_f(data[index]);
106  }
107 
108  /** Process all the elements currently in this container.
109  */
110  template <typename ProcessValueFunction>
111  void apply_all(ProcessValueFunction&& process_interaction) {
112 
113  atomic<size_t> row_idx = 0;
114 
115  in_parallel([&](size_t thread_idx, size_t num_threads) {
116 
117  while(true) {
118  size_t idx_1 = (++row_idx) - 1;
119  if(idx_1 >= num_rows) break;
120 
121  size_t idx_2 = idx_1 + 1;
122  if(idx_2 >= num_cols) continue;
123 
124  value_type * __restrict__ d_ptr = data.data() + data_index(idx_1, idx_2);
125 
126  for(; idx_2 != num_cols; ++idx_2, ++d_ptr) {
127  process_interaction(idx_1, idx_2, *d_ptr);
128  }
129  }
130  });
131  }
132 
133  private:
134  size_t num_cols=0, num_rows=0;
135  std::vector<value_type> data;
136  std::vector<size_t> row_index_map;
137 
138  void setup_row_index_map() GL_HOT_INLINE_FLATTEN {
139  row_index_map.resize(num_rows + 1);
140 
141  // The storage location is the number of elements before this,
142  // minus the shift required to compensate for the col_idx being
143  // larger than the row idx.
144  for(size_t r = 0; r < num_rows; ++r) {
145  row_index_map[r] = _data_size(r, num_cols) - r;
146  }
147 
148  row_index_map[num_rows] = data.size();
149  }
150 
151  /** Calculates the data index of a particular row and column.
152  */
153  size_t data_index(size_t row_idx, size_t col_idx) const GL_HOT_INLINE_FLATTEN {
154  DASSERT_LT(row_idx, num_rows);
155  DASSERT_LT(col_idx, num_cols);
156  DASSERT_LT(row_idx, col_idx);
157 
158  size_t index = row_index_map[row_idx] + (col_idx - 1);
159  DASSERT_LT(index, data.size());
160 
161  return index;
162  }
163 
164 };
165 
166 }}
167 
168 #endif /* SLICED_MATRIX_UTILITIES_H */
void apply_all(ProcessValueFunction &&process_interaction)
value_type & operator()(size_t row_idx, size_t col_idx)
GL_HOT_INLINE_FLATTEN void apply(size_t idx_1, size_t idx_2, ApplyFunction &&apply_f)
#define GL_HOT_INLINE_FLATTEN
const value_type & operator()(size_t i, size_t j) const
void in_parallel(const std::function< void(size_t thread_id, size_t num_threads)> &fn)
Definition: lambda_omp.hpp:35