Turi Create  4.0
ngram_counter.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_FE_ngram_counter_H_
7 #define TURI_FE_ngram_counter_H_
8 #include <model_server/lib/toolkit_class_macros.hpp>
9 #include <toolkits/feature_engineering/transformer_base.hpp>
10 #include <toolkits/feature_engineering/transform_utils.hpp>
11 #include <core/export.hpp>
12 
13 namespace turi {
14 namespace sdk_model {
15 namespace feature_engineering {
16 
17 class EXPORT ngram_counter: public transformer_base {
18 
19  static constexpr size_t NGRAM_COUNTER_VERSION = 0;
20  size_t n = 2;
21  bool fitted = false;
22  bool to_lower = true;
23  bool ignore_punct = true;
24  bool ignore_space = true;
25  bool exclude = false;
26  std::map<std::string, flex_type_enum> feature_types;
27  std::vector<std::string> feature_columns;
28  flexible_type unprocessed_features; // List of feature columns provided by the user.
29  flexible_type delimiters; // List of delimiters provided by the user.
30  flex_string ngram_type;
31 
32  private:
33  transform_utils::string_filter_list string_filters;
34 
35  void set_string_filters();
36 
37  public:
38  /**
39  * Methods that must be implemented in a new transformer model.
40  * -------------------------------------------------------------------------
41  */
42 
43  virtual inline ~ngram_counter() {}
44 
45  /**
46  * Set one of the options in the model. Use the option manager to set
47  * these options. If the option does not satisfy the conditions that the
48  * option manager has imposed on it. Errors will be thrown.
49  *
50  * \param[in] options Options to set
51  */
52  void init_options(const std::map<std::string, flexible_type>&_options) override;
53  /**
54  * Set constant.
55  *
56  * \param[in] data (SFrame of data)
57  */
58  size_t get_version() const override;
59 
60  /**
61  * Save the object using Turi's oarc.
62  */
63  void save_impl(turi::oarchive& oarc) const override;
64 
65  /**
66  * Load the object using Turi's iarc.
67  */
68  void load_version(turi::iarchive& iarc, size_t version) override;
69 
70 
71  /**
72  * Initialize the transformer.
73  */
74  void init_transformer(const std::map<std::string,
75  flexible_type>& _options) override;
76 
77  /**
78  * Set constant.
79  *
80  * \param[in] data (SFrame of data)
81  */
82  void fit(gl_sframe data) override;
83 
84  /**
85  * Transform the given data.
86  *
87  * \param[in] data (SFrame of data)
88  *
89  * Python side interface
90  * ------------------------
91  * This function directly interfaces with "transform" in python.
92  *
93  */
94  gl_sframe transform(gl_sframe data) override;
95 
96  /**
97  * Fit and transform the given data. Intended as an optimization because
98  * fit and transform are usually always called together. The default
99  * implementaiton calls fit and then transform.
100  *
101  * \param[in] data (SFrame of data)
102  */
103  gl_sframe fit_transform(gl_sframe data) {
104  data.materialize();
105  fit(data);
106  return transform(data);
107  }
108 
109  // Functions that all transformers need to register. Can be copied verbatim
110  // for other classes.
111  // --------------------------------------------------------------------------
112  BEGIN_CLASS_MEMBER_REGISTRATION("_NGramCounter")
113  REGISTER_CLASS_MEMBER_FUNCTION(ngram_counter::init_transformer, "_options");
114  REGISTER_CLASS_MEMBER_FUNCTION(ngram_counter::fit, "data");
115  REGISTER_CLASS_MEMBER_FUNCTION(ngram_counter::fit_transform, "data");
116  REGISTER_CLASS_MEMBER_FUNCTION(ngram_counter::transform, "data");
117  REGISTER_CLASS_MEMBER_FUNCTION(ngram_counter::get_current_options);
118  REGISTER_CLASS_MEMBER_FUNCTION(ngram_counter::list_fields);
119  REGISTER_NAMED_CLASS_MEMBER_FUNCTION("_get_default_options",
120  ngram_counter::get_default_options);
122  ngram_counter::get_value_from_state,
123  "key");
125 
126 };
127 
128 } // feature_engineering
129 } // sdk_model
130 } // turicreate
131 
132 #endif
#define BEGIN_CLASS_MEMBER_REGISTRATION(python_facing_classname)
#define REGISTER_CLASS_MEMBER_FUNCTION(function,...)
The serialization input archive object which, provided with a reference to an istream, will read from the istream, providing deserialization capabilities.
Definition: iarchive.hpp:60
#define END_CLASS_MEMBER_REGISTRATION
#define REGISTER_NAMED_CLASS_MEMBER_FUNCTION(name, function,...)
std::string flex_string
The serialization output archive object which, provided with a reference to an ostream, will write to the ostream, providing serialization capabilities.
Definition: oarchive.hpp:80
void transform(S &&input, T &&output, TransformFn transformfn, std::set< size_t > constraint_segments=std::set< size_t >())
Definition: algorithm.hpp:64