Turi Create  4.0
composite_row_type.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_ML_DATA_ROW_TYPES_H_
7 #define TURI_ML_DATA_ROW_TYPES_H_
8 
9 #include <vector>
10 #include <toolkits/ml_data_2/ml_data_entry.hpp>
11 #include <toolkits/ml_data_2/metadata.hpp>
12 #include <Eigen/Core>
13 #include <Eigen/SparseCore>
14 #include <core/data/flexible_type/flexible_type.hpp>
15 
16 namespace turi { namespace v2 {
17 
18 typedef Eigen::Matrix<double, Eigen::Dynamic,1> dense_vector;
19 typedef Eigen::SparseVector<double> sparse_vector;
20 
21 class composite_row_specification;
22 
23 /** A collection of subrows to put into a composite row container.
24  * Requires a composite_row_specification to first be defined; this
25  * specification determines how the container is going to be filled
26  * by the iterator.
27  *
28  * The following code segment illustrates how this is to be used:
29  *
30  * // Create a simple sframe
31  * sframe X = make_integer_testing_sframe( {"C0", "C1", "C2"}, { {1,2,3}, {4,5,6} } );
32  *
33  * v2::ml_data data;
34  *
35  * // Set column "C0" to be untranslated.
36  * data.set_data(X, "", {}, { {"C0", v2::ml_column_mode::UNTRANSLATED} });
37  *
38  * data.fill();
39  *
40  * auto row_spec = std::make_shared<v2::composite_row_specification>(data.metadata());
41  *
42  * // Add one dense subrow formed from columns 1 and 2
43  * size_t dense_row_index_1 = row_spec->add_dense_subrow( {1, 2} );
44  *
45  * // Add a sparse subrow formed from column 2
46  * size_t sparse_row_index = row_spec->add_sparse_subrow( {2} );
47  *
48  * // Add an untranslated row formed from column 0
49  * size_t flex_row_index = row_spec->add_flex_type_subrow( {0} );
50  *
51  * // Add another dense subrow formed from column 1
52  * size_t dense_row_index_2 = row_spec->add_dense_subrow( {1} );
53  *
54  * v2::composite_row_container crc(row_spec);
55  *
56  * ////////////////////////////////////////
57  *
58  * auto it = data.get_iterator();
59  *
60  * {
61  * it.fill_observation(crc);
62  *
63  * // The 1st dense component; two numerical columns.
64  * const auto& vd = crc.dense_subrows[dense_row_index_1];
65  *
66  * ASSERT_EQ(vd.size(), 2);
67  * ASSERT_EQ(size_t(vd[0]), 2); // First row, 2nd column
68  * ASSERT_EQ(size_t(vd[1]), 3); // First row, 3nd column
69  *
70  * // The 2nd dense component; one numerical columns.
71  * const auto& vd2 = crc.dense_subrows[dense_row_index_2];
72  *
73  * ASSERT_EQ(vd2.size(), 1);
74  * ASSERT_EQ(size_t(vd2[0]), 2); // First row, 2nd column
75  *
76  * // The sparse component: one numerical column
77  * const auto& vs = crc.sparse_subrows[sparse_row_index];
78  *
79  * ASSERT_EQ(vs.size(), 1);
80  * ASSERT_EQ(size_t(vs.coeff(0)), 3); // First row, 3nd column
81  *
82  * // The untranslated column.
83  * const auto& vf = crc.flex_subrows[flex_row_index];
84  *
85  * ASSERT_EQ(vf.size(), 1);
86  * ASSERT_TRUE(vf[0] == 1); // First row, 1st column
87  * }
88  *
89  */
91 
92  composite_row_container(const std::shared_ptr<composite_row_specification>& _subrow_specs)
93  : subrow_spec(_subrow_specs)
94  {}
95 
96  std::vector<dense_vector> dense_subrows;
97  std::vector<sparse_vector> sparse_subrows;
98  std::vector<std::vector<flexible_type> > flex_subrows;
99 
100  private:
101 
102  friend class ml_data_iterator_base;
103  friend class composite_row_specification;
104 
105  // Used to fill the row above.
106  std::shared_ptr<composite_row_specification> subrow_spec;
107 
108  // Used by the composite_row_specification filling function
109  std::vector<size_t> buffer;
110  std::vector<flexible_type> flextype_buffer;
111 };
112 
113 
114 /** The specification for a composite row container. See
115  * documentation on composite_row_container for use.
116  */
118  public:
119 
120  /** Constructor; requires a metadata object.
121  */
122  composite_row_specification(const std::shared_ptr<ml_metadata>& _metadata);
123 
124  /** Add in a sparse subrow. Returns the index in the sparse_subrows
125  * attribute of the composite_row_container where this particular
126  * row will go.
127  */
128  size_t add_sparse_subrow(const std::vector<size_t>& column_indices);
129 
130  /** Add in a dense subrow. Returns the index in the dense_subrows
131  * attribute of the composite_row_container where this particular
132  * row will go upon filling from the iterator.
133  */
134  size_t add_dense_subrow(const std::vector<size_t>& column_indices);
135 
136  /** Add in a flexible type subrow. Returns the index in the
137  * flex_subrows attribute of the composite_row_container where this
138  * particular row will go upon filling from the iterator.
139  */
140  size_t add_flex_type_subrow(const std::vector<size_t>& column_indices);
141 
142  private:
143 
144  friend class ml_data_iterator_base;
145 
146  std::shared_ptr<ml_metadata> metadata;
147 
148  size_t n_dense_subrows = 0;
149  size_t n_sparse_subrows = 0;
150  size_t n_flex_subrows = 0;
151 
152  // These are indexed by columns; each entry contains the subrow
153  // indices that use that particular column.
154  std::vector<std::vector<size_t> > dense_spec;
155  std::vector<std::vector<size_t> > sparse_spec;
156 
157  // These are indexed by subrow; each contains the column indices
158  // used by that particular subrow.
159  std::vector<std::vector<size_t> > flex_subrow_spec_by_subrow;
160 
161  // Sizes for the dense and sparse rows
162  std::vector<size_t> dense_spec_sizes;
163  std::vector<size_t> sparse_spec_sizes;
164 
165  /** Fill the composite container; called by the iterator.
166  */
167  void fill(composite_row_container& crc,
170  std::vector<flexible_type> flexible_type_row) GL_HOT_FLATTEN;
171 
172 };
173 
174 
175 
176 }}
177 
178 #endif
#define GL_HOT_FLATTEN