6 #ifndef TURI_FACTORIZATION_FACTORS_TO_SFRAME_H_ 7 #define TURI_FACTORIZATION_FACTORS_TO_SFRAME_H_ 9 #include <toolkits/ml_data_2/ml_data.hpp> 10 #include <core/storage/sframe_interface/unity_sframe.hpp> 11 #include <core/storage/sframe_data/sframe.hpp> 14 namespace turi {
namespace factorization {
19 template <
typename VectorType,
typename EigenMatrixType>
20 sframe fill_linear_model_sframe_from_eigen_data(
21 const std::shared_ptr<v2::ml_metadata>& metadata,
29 const std::string& w_name,
34 const std::string& V_name,
35 const EigenMatrixType& V) {
40 switch(metadata->column_mode(c_idx)) {
41 case turi::v2::ml_column_mode::CATEGORICAL:
42 case turi::v2::ml_column_mode::CATEGORICAL_VECTOR:
43 case turi::v2::ml_column_mode::DICTIONARY:
44 is_categorical =
true;
48 case turi::v2::ml_column_mode::NUMERIC:
49 is_categorical =
false;
53 case turi::v2::ml_column_mode::NUMERIC_VECTOR:
54 is_categorical =
false;
55 n_rows = metadata->column_size(c_idx);
61 std::vector<std::string> names;
62 std::vector<flex_type_enum> types;
64 names.push_back(metadata->column_name(c_idx));
68 std::set<flex_type_enum> value_types_present = metadata->indexer(c_idx)->extract_key_types();
76 if(value_types_present.size() == 0)
81 if(value_types_present.size() == 1) {
82 out_type = *value_types_present.begin();
85 <<
"dictionary on column " << metadata->column_name(c_idx)
86 <<
"; promoting all to string type." << std::endl;
90 types.push_back(out_type);
93 size_t w_col_idx = 0, V_col_idx = 0;
96 DASSERT_LE(w_idx_offset + n, w.size());
98 w_col_idx = names.size();
99 names.push_back(w_name);
103 if(include_V_terms) {
104 DASSERT_LE(V_idx_offset + n, V.rows());
106 V_col_idx = names.size();
107 names.push_back(V_name);
111 size_t num_columns = names.size();
117 out.open_for_write(names, types,
"", num_segments);
119 size_t num_factors = V.cols();
121 in_parallel([&](
size_t thread_idx,
size_t num_threads) {
123 size_t start_idx = (thread_idx * n_rows) / num_threads;
124 size_t end_idx = ( (thread_idx + 1) * n_rows) / num_threads;
126 auto it_out = out.get_output_iterator(thread_idx);
128 std::vector<flexible_type> x(num_columns);
132 for(
size_t i = start_idx; i < end_idx; ++i, ++it_out) {
134 x[0] = metadata->indexer(c_idx)->map_index_to_value(i);
140 x[w_col_idx] = w[i + w_idx_offset];
142 if(include_V_terms) {
144 for(
size_t j = 0; j < num_factors; ++j)
145 factors[j] = V(i + V_idx_offset, j);
147 x[V_col_idx] = factors;
#define ASSERT_FALSE(cond)
std::vector< double > flex_vec
static size_t cpu_count()
#define logprogress_stream
void in_parallel(const std::function< void(size_t thread_id, size_t num_threads)> &fn)