Turi Create  4.0
one_hot_encoder.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef _ONE_HOT_ENCODER_INDEXER_H_
7 #define _ONE_HOT_ENCODER_INDEXER_H_
8 #include <string>
9 #include <model_server/lib/toolkit_class_macros.hpp>
10 #include <toolkits/feature_engineering/transformer_base.hpp>
11 #include <toolkits/feature_engineering/topk_indexer.hpp>
12 #include <core/export.hpp>
13 
14 namespace turi {
15 namespace sdk_model {
16 namespace feature_engineering {
17 
18 class EXPORT one_hot_encoder : public transformer_base {
19 
20  static constexpr size_t ONE_HOT_ENCODER_VERSION = 0;
21  std::map<std::string, std::shared_ptr<topk_indexer>> index_map;
22  bool exclude = false;
23  std::map<std::string, size_t> start_index_map;
24  std::map<std::string, flex_type_enum> feature_types;
25  flexible_type feature_columns; // Input provided by the user.
26 
27  public:
28 
29  /**
30  * Methods that must be implemented in a new transformer model.
31  * -------------------------------------------------------------------------
32  */
33 
34  virtual inline ~one_hot_encoder() {}
35 
36  /**
37  * Set one of the options in the model. Use the option manager to set
38  * these options. If the option does not satisfy the conditions that the
39  * option manager has imposed on it. Errors will be thrown.
40  *
41  * \param[in] options Options to set
42  */
43  void init_options(const std::map<std::string, flexible_type>&_options) override;
44 
45  /**
46  * Get a version for the object.
47  */
48  size_t get_version() const override;
49 
50  /**
51  * Save the object using Turi's oarc.
52  */
53  void save_impl(turi::oarchive& oarc) const override;
54 
55  /**
56  * Load the object using Turi's iarc.
57  */
58  void load_version(turi::iarchive& iarc, size_t version) override;
59 
60 
61  /**
62  * Initialize the transformer.
63  */
64  void init_transformer(const std::map<std::string,
65  flexible_type>& _options) override;
66 
67  /**
68  * Set constant.
69  *
70  * \param[in] data (SFrame of data)
71  */
72  void fit(gl_sframe data) override;
73 
74  /**
75  * Transform the given data.
76  *
77  * \param[in] data (SFrame of data)
78  *
79  * Python side interface
80  * ------------------------
81  * This function directly interfaces with "transform" in python.
82  *
83  */
84  gl_sframe transform(gl_sframe data) override;
85 
86 
87  /**
88  * Fit and transform the given data. Intended as an optimization because
89  * fit and transform are usually always called together. The default
90  * implementaiton calls fit and then transform.
91  *
92  * \param[in] data (SFrame of data)
93  */
94  gl_sframe fit_transform(gl_sframe data) {
95  data.materialize();
96  fit(data);
97  return transform(data);
98  }
99 
100 
101  // Functions that all transformers need to register. Can be copied verbatim
102  // for other classes.
103  // --------------------------------------------------------------------------
104  BEGIN_CLASS_MEMBER_REGISTRATION("_OneHotEncoder")
105  REGISTER_CLASS_MEMBER_FUNCTION(one_hot_encoder::init_transformer, "_options");
106  REGISTER_CLASS_MEMBER_FUNCTION(one_hot_encoder::fit, "data");
107  REGISTER_CLASS_MEMBER_FUNCTION(one_hot_encoder::fit_transform, "data");
108  REGISTER_CLASS_MEMBER_FUNCTION(one_hot_encoder::transform, "data");
109  REGISTER_CLASS_MEMBER_FUNCTION(one_hot_encoder::get_current_options);
110  REGISTER_CLASS_MEMBER_FUNCTION(one_hot_encoder::list_fields);
111  REGISTER_NAMED_CLASS_MEMBER_FUNCTION("_get_default_options",
112  one_hot_encoder::get_default_options);
114  one_hot_encoder::get_value_from_state,
115  "key");
117 
118 };
119 
120 
121 } // feature_engineering
122 } // sdk_model
123 } // turicreate
124 #endif
#define BEGIN_CLASS_MEMBER_REGISTRATION(python_facing_classname)
#define REGISTER_CLASS_MEMBER_FUNCTION(function,...)
The serialization input archive object which, provided with a reference to an istream, will read from the istream, providing deserialization capabilities.
Definition: iarchive.hpp:60
#define END_CLASS_MEMBER_REGISTRATION
#define REGISTER_NAMED_CLASS_MEMBER_FUNCTION(name, function,...)
The serialization output archive object which, provided with a reference to an ostream, will write to the ostream, providing serialization capabilities.
Definition: oarchive.hpp:80
void transform(S &&input, T &&output, TransformFn transformfn, std::set< size_t > constraint_segments=std::set< size_t >())
Definition: algorithm.hpp:64