Turi Create  4.0
row_slicing_utilities.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef ML_DATA_ROW_SLICING_UTILITIES_H_
7 #define ML_DATA_ROW_SLICING_UTILITIES_H_
8 
9 #include <vector>
10 #include <toolkits/ml_data_2/metadata.hpp>
11 #include <core/data/flexible_type/flexible_type.hpp>
12 #include <Eigen/Core>
13 #include <Eigen/SparseCore>
14 #include <model_server/lib/variant.hpp>
15 #include <model_server/lib/variant_deep_serialize.hpp>
16 
17 namespace turi { namespace v2 {
18 
19 typedef Eigen::Matrix<double, Eigen::Dynamic,1> dense_vector;
20 typedef Eigen::SparseVector<double> sparse_vector;
21 
22 /** A slicer class that allows taking a row and splitting it up by
23  * columns.
24  */
25 class row_slicer {
26 
27  public:
28 
29  row_slicer() {}
30 
31  /** Constructor -- provide ml_metadata class and a subset of column
32  * indices to use in this particular row. the columns_to_pick must
33  * be in sorted order.
34  *
35  * If the chosen columns are from untranslated columns, then they
36  * must be all untranslated columns. In this case, only the
37  * flexible_type slice method below can be used. Otherwise, none
38  * of the columns must be untranslated, and either the sparse or
39  * dense slicing methods must be used.
40  *
41  * Example:
42  *
43  * sframe X = make_integer_testing_sframe( {"C0", "C1", "C2"}, { {1,2,3}, {4,5,6} } );
44  *
45  * v2::ml_data data;
46  *
47  * data.fill(X);
48  *
49  * std::vector<v2::ml_data_entry> x_t;
50  * std::vector<flexible_type> x_u;
51  *
52  * // Select that we want columns 1 and 2, but drop 0.
53  * v2::row_slicer s_c1_c2(data.metadata(), {1, 2} );
54  *
55  * v2::dense_vector vd;
56  * v2::sparse_vector vs;
57  * std::vector<flexible_type> vu;
58  *
59  * ////////////////////////////////////////
60  *
61  * auto it = data.get_iterator();
62  *
63  * it.fill_observation(x_t);
64  * it.fill_untranslated_values(x_u);
65  *
66  * s_c1_c2.slice(vd, x_t, x_u);
67  *
68  * // There are 2 numerical columns included in this test
69  * ASSERT_EQ(vd.size(), 2);
70  * ASSERT_EQ(size_t(vd[0]), 2); // First row, 2nd column, by the slicer
71  * ASSERT_EQ(size_t(vd[1]), 3); // First row, 3nd column, by the slicer
72  *
73  * s_c1_c2.slice(vs, x_t, x_u);
74  *
75  * ASSERT_EQ(vd.nonZeros(), 2);
76  * ASSERT_EQ(size_t(vd.coeffRef(0)), 2); // First row, 2nd column, by the slicer
77  * ASSERT_EQ(size_t(vd.coeffRef(1)), 3); // First row, 2nd column, by the slicer
78  *
79  *
80  * ================================================================================
81  *
82  * Example with untranslated columns:
83  * sframe X = make_integer_testing_sframe( {"C0", "C1", "C2"}, { {1,2,3}, {4,5,6} } );
84  *
85  * v2::ml_data data;
86  *
87  * // Set column C1 and C2 to be untranslated.
88  * data.set_data(X, "", {},
89  * { {"C1", v2::ml_column_mode::UNTRANSLATED},
90  * {"C2", v2::ml_column_mode::UNTRANSLATED} });
91  *
92  * data.fill();
93  *
94  * std::vector<v2::ml_data_entry> x_t;
95  * std::vector<flexible_type> x_u;
96  *
97  * // Take the second and third columns
98  * v2::row_slicer s_c1_c2(data.metadata(), {1, 2} );
99  *
100  * std::vector<flexible_type> vu;
101  *
102  * auto it = data.get_iterator();
103  *
104  * it.fill_observation(x_t);
105  * it.fill_untranslated_values(x_u);
106  *
107  * s_c1_c2.slice(vu, x_t, x_u);
108  *
109  * // There are 2 numerical columns included in this test
110  * ASSERT_EQ(vu.size(), 2);
111  * ASSERT_EQ(size_t(vu[0]), 2); // First row, 2nd column, by the slicer
112  * ASSERT_EQ(size_t(vu[1]), 3); // First row, 3nd column, by the slicer
113  *
114  * ++it;
115  */
116  row_slicer(const std::shared_ptr<ml_metadata>& metadata,
117  const std::vector<size_t>& columns_to_pick);
118 
119  /** Take a row, represented by a pair of translated and
120  * untranslated columns (either of which may be empty), and
121  * use it to fill an eigen sparse vector with the result.
122  */
123  void slice(sparse_vector& dest,
124  const std::vector<ml_data_entry>& x_t, const std::vector<flexible_type>& x_u) const;
125 
126  /** Take a row, represented by a pair of translated and
127  * untranslated columns (either of which may be empty), and
128  * use it to fill an eigen dense vector with the result.
129  */
130  void slice(dense_vector& dest,
131  const std::vector<ml_data_entry>& x_t, const std::vector<flexible_type>& x_u) const;
132 
133  /** Take a row, represented by a pair of translated and
134  * untranslated columns (either of which may be empty), and
135  * use it to fill an untranslated row with the result.
136  */
137  void slice(std::vector<flexible_type>& dest,
138  const std::vector<ml_data_entry>& x_t, const std::vector<flexible_type>& x_u) const;
139 
140  /** For translated row types, this returns the number of dimensions
141  * present. The eigen dense vectors will have this size after a
142  * call to slice above.
143  */
144  size_t num_dimensions() { return _num_dimensions; }
145 
146  /** Serialization -- save.
147  */
148  void save(turi::oarchive& oarc) const;
149 
150  /** Serialization -- load.
151  */
152  void load(turi::iarchive& iarc);
153 
154 
155  private:
156 
157  bool pick_from_flexible_type = false;
158 
159  std::vector<size_t> flex_type_columns_to_pick;
160 
161  std::vector<int> column_pick_mask;
162 
163  std::vector<size_t> index_offsets;
164  std::vector<size_t> index_sizes;
165 
166  size_t _num_dimensions = 0;
167 };
168 
169 }}
170 
171 #endif
void slice(sparse_vector &dest, const std::vector< ml_data_entry > &x_t, const std::vector< flexible_type > &x_u) const
The serialization input archive object which, provided with a reference to an istream, will read from the istream, providing deserialization capabilities.
Definition: iarchive.hpp:60
void save(turi::oarchive &oarc) const
The serialization output archive object which, provided with a reference to an ostream, will write to the ostream, providing serialization capabilities.
Definition: oarchive.hpp:80
void load(turi::iarchive &iarc)