Turi Create  4.0
ml_data_column_modes.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_ML2_DATA_COLUMN_MODES_H_
7 #define TURI_ML2_DATA_COLUMN_MODES_H_
8 
9 #include <string>
10 #include <map>
11 #include <core/data/flexible_type/flexible_type.hpp>
12 
13 namespace turi {
14 
15 class sframe;
16 
17 namespace v2 {
18 
19 /** The main mode of each entry value; determines how it is stored and
20  * how it is translated and what functionality it works with.
21  */
22 enum class ml_column_mode : int {
23  NUMERIC,
24  CATEGORICAL,
25  NUMERIC_VECTOR,
26  CATEGORICAL_VECTOR,
27  DICTIONARY,
28  UNTRANSLATED};
29 
30 
31 /** Returns true if the underlying type is treated as a categorical
32  * variable, and false otherwise.
33  */
35 static inline bool mode_is_categorical(ml_column_mode mode) {
36 
37  switch(mode) {
38  case ml_column_mode::NUMERIC: return false;
39  case ml_column_mode::CATEGORICAL: return true;
40  case ml_column_mode::NUMERIC_VECTOR: return false;
41  case ml_column_mode::CATEGORICAL_VECTOR: return true;
42  case ml_column_mode::DICTIONARY: return false;
43  case ml_column_mode::UNTRANSLATED: return false;
44  }
45  return false;
46 }
47 
48 /** Returns true if the underlying type always results in constant
49  * size pattern, and false otherwise.
50  */
52 static inline bool mode_has_fixed_size(ml_column_mode mode) {
53 
54  switch(mode) {
55  case ml_column_mode::NUMERIC: return true;
56  case ml_column_mode::CATEGORICAL: return true;
57  case ml_column_mode::NUMERIC_VECTOR: return true;
58  case ml_column_mode::CATEGORICAL_VECTOR: return false;
59  case ml_column_mode::DICTIONARY: return false;
60  case ml_column_mode::UNTRANSLATED: return true;
61  default: ASSERT_TRUE(false); return false;
62  }
63 }
64 
65 /** Returns true if the underlying type is indexed, and false
66  * otherwise. This differs form the is_categorical in that
67  * dictionaries are not treated as pure categorical variables, as
68  * they have values associated with them, but they are indexed.
69  */
71 static inline bool mode_is_indexed(ml_column_mode mode) {
72 
73  switch(mode) {
74  case ml_column_mode::NUMERIC: return false;
75  case ml_column_mode::CATEGORICAL: return true;
76  case ml_column_mode::NUMERIC_VECTOR: return false;
77  case ml_column_mode::CATEGORICAL_VECTOR: return true;
78  case ml_column_mode::DICTIONARY: return true;
79  case ml_column_mode::UNTRANSLATED: return false;
80  }
81  return false;
82 }
83 
84 
85 /** For error reporting, returns a name of the mode based on the
86  * column mode value.
87  */
89 
90 
91 namespace ml_data_internal {
92 
93 /**
94  * Checks to make sure that the column type provided actually matches
95  * up with the mode used. This is just done for error checking.
96  * Throws an error message if they are not consistent.
97  */
98 void check_type_consistent_with_mode(const std::string& column_name,
99  flex_type_enum column_type, ml_column_mode mode);
100 
101 
102 /** This function handles the translation of column types to the
103  * column modes, which determines how they behave.
104  *
105  * The options that affect this are given as follows:
106  *
107  * integers_are_categorical : If true, then integers are translated
108  * to categorical values.
109  *
110  */
111 ml_column_mode choose_column_mode(
112  const std::string& column_name,
113  flex_type_enum column_type,
114  const std::map<std::string, flexible_type>& options,
115  const std::map<std::string, ml_column_mode>& mode_overrides);
116 
117 }}}
118 
119 
120 
121 #endif /* TURI_ML2_DATA_COLUMN_MODES_H_ */
static GL_HOT_INLINE_FLATTEN bool mode_has_fixed_size(ml_column_mode mode)
static GL_HOT_INLINE_FLATTEN bool mode_is_categorical(ml_column_mode mode)
#define GL_HOT_INLINE_FLATTEN
#define ASSERT_TRUE(cond)
Definition: assertions.hpp:309
const char * column_mode_enum_to_name(ml_column_mode mode)
static GL_HOT_INLINE_FLATTEN bool mode_is_indexed(ml_column_mode mode)