Turi Create  4.0
ml_data_column_modes.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_DML_DATA_COLUMN_MODES_H_
7 #define TURI_DML_DATA_COLUMN_MODES_H_
8 
9 #include <string>
10 #include <map>
11 #include <core/data/flexible_type/flexible_type.hpp>
12 
13 #ifdef ERROR
14 #undef ERROR
15 #endif
16 
17 namespace turi {
18 
19 class sframe;
20 /**
21  * \ingroup mldata
22  * \{
23  */
24 /**
25  * The missing value action.
26  */
27 enum class ml_missing_value_action : int {IMPUTE, ERROR, USE_NAN};
28 
29 /** The main mode of each entry value; determines how it is stored and
30  * how it is translated and what functionality it works with.
31  */
32 enum class ml_column_mode : int {
33  NUMERIC = 0,
34  CATEGORICAL = 1,
35  NUMERIC_VECTOR = 2,
36  CATEGORICAL_VECTOR = 3,
37  DICTIONARY = 4,
38  UNTRANSLATED = 5,
39  CATEGORICAL_SORTED = 6,
40  NUMERIC_ND_VECTOR = 7};
41 
42 
43 /** Returns true if the underlying type is treated as a categorical
44  * variable, and false otherwise.
45  */
47 static inline bool mode_is_categorical(ml_column_mode mode) {
48 
49  switch(mode) {
50  case ml_column_mode::NUMERIC: return false;
51  case ml_column_mode::CATEGORICAL: return true;
52  case ml_column_mode::NUMERIC_VECTOR: return false;
53  case ml_column_mode::CATEGORICAL_VECTOR: return true;
54  case ml_column_mode::DICTIONARY: return false;
55  case ml_column_mode::UNTRANSLATED: return false;
56  case ml_column_mode::CATEGORICAL_SORTED: return true;
57  case ml_column_mode::NUMERIC_ND_VECTOR: return false;
58  }
59  return false;
60 }
61 
62 /** Returns true if the underlying type always results in constant
63  * size pattern, and false otherwise.
64  */
66 static inline bool mode_has_fixed_size(ml_column_mode mode) {
67 
68  switch(mode) {
69  case ml_column_mode::NUMERIC: return true;
70  case ml_column_mode::CATEGORICAL: return true;
71  case ml_column_mode::NUMERIC_VECTOR: return true;
72  case ml_column_mode::CATEGORICAL_VECTOR: return false;
73  case ml_column_mode::DICTIONARY: return false;
74  case ml_column_mode::UNTRANSLATED: return true;
75  case ml_column_mode::CATEGORICAL_SORTED: return true;
76  case ml_column_mode::NUMERIC_ND_VECTOR: return true;
77  default: ASSERT_TRUE(false); return false;
78  }
79 }
80 
81 /** Returns true if the underlying type is indexed, and false
82  * otherwise. This differs form the is_categorical in that
83  * dictionaries are not treated as pure categorical variables, as
84  * they have values associated with them, but they are indexed.
85  */
87 static inline bool mode_is_indexed(ml_column_mode mode) {
88 
89  switch(mode) {
90  case ml_column_mode::NUMERIC: return false;
91  case ml_column_mode::CATEGORICAL: return true;
92  case ml_column_mode::NUMERIC_VECTOR: return false;
93  case ml_column_mode::CATEGORICAL_VECTOR: return true;
94  case ml_column_mode::DICTIONARY: return true;
95  case ml_column_mode::UNTRANSLATED: return false;
96  case ml_column_mode::CATEGORICAL_SORTED: return true;
97  case ml_column_mode::NUMERIC_ND_VECTOR: return false;
98  }
99  return false;
100 }
101 
102 
103 /** For error reporting, returns a name of the mode based on the
104  * column mode value.
105  */
106 const char* column_mode_enum_to_name(ml_column_mode mode);
107 
108 
109 namespace ml_data_internal {
110 
111 /**
112  * Checks to make sure that the column type provided actually matches
113  * up with the mode used. This is just done for error checking.
114  * Throws an error message if they are not consistent.
115  */
116 void check_type_consistent_with_mode(const std::string& column_name,
117  flex_type_enum column_type, ml_column_mode mode);
118 
119 
120 /** This function handles the translation of column types to the
121  * column modes, which determines how they behave.
122  *
123  * The options that affect this are given as follows:
124  *
125  * integers_are_categorical : If true, then integers are translated
126  * to categorical values.
127  *
128  */
129 ml_column_mode choose_column_mode(
130  const std::string& column_name,
131  flex_type_enum column_type,
132  const std::map<std::string, ml_column_mode>& mode_overrides);
133 
134 
135 /// \}
136 }}
137 
138 
139 
140 #endif /* TURI_DML_DATA_COLUMN_MODES_H_ */
static GL_HOT_INLINE_FLATTEN bool mode_has_fixed_size(ml_column_mode mode)
static GL_HOT_INLINE_FLATTEN bool mode_is_categorical(ml_column_mode mode)
#define GL_HOT_INLINE_FLATTEN
#define ASSERT_TRUE(cond)
Definition: assertions.hpp:309
const char * column_mode_enum_to_name(ml_column_mode mode)
static GL_HOT_INLINE_FLATTEN bool mode_is_indexed(ml_column_mode mode)