Turi Create  4.0
bm25.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef _BM25_H_
7 #define _BM25_H_
8 #include <string>
9 #include <model_server/lib/toolkit_class_macros.hpp>
10 #include <toolkits/feature_engineering/transformer_base.hpp>
11 #include <toolkits/feature_engineering/topk_indexer.hpp>
12 #include <core/export.hpp>
13 
14 namespace turi {
15 namespace sdk_model {
16 namespace feature_engineering {
17 
18 class EXPORT bm25 : public transformer_base {
19 
20  static constexpr size_t BM25_VERSION = 0;
21  std::map<std::string, std::shared_ptr<topk_indexer>> index_map;
22  bool exclude = false;
23  std::map<std::string, flex_type_enum> feature_types;
24  flexible_type feature_columns;
25 
26  public:
27 
28  /**
29  * Methods that must be implemented in a new transformer model.
30  * -------------------------------------------------------------------------
31  */
32 
33  virtual inline ~bm25() {}
34 
35  /**
36  * Set one of the options in the model. Use the option manager to set
37  * these options. If the option does not satisfy the conditions that the
38  * option manager has imposed on it. Errors will be thrown.
39  *
40  * \param[in] options Options to set
41  */
42  void init_options(const std::map<std::string, flexible_type>&_options) override;
43 
44  /**
45  * Get a version for the object.
46  */
47  size_t get_version() const override;
48 
49  /**
50  * Save the object using Turi's oarc.
51  */
52  void save_impl(turi::oarchive& oarc) const override;
53 
54  /**
55  * Load the object using Turi's iarc.
56  */
57  void load_version(turi::iarchive& iarc, size_t version) override;
58 
59 
60  /**
61  * Initialize the transformer.
62  */
63  void init_transformer(const std::map<std::string,
64  flexible_type>& _options) override;
65 
66  /**
67  * Set constant.
68  *
69  * \param[in] data (SFrame of data)
70  */
71  void fit(gl_sframe data) override;
72 
73  /**
74  * Transform the given data.
75  *
76  * \param[in] data (SFrame of data)
77  *
78  * Python side interface
79  * ------------------------
80  * This function directly interfaces with "transform" in python.
81  *
82  */
83  gl_sframe transform(gl_sframe data) override;
84 
85  /**
86  * Fit and transform the given data. Intended as an optimization because
87  * fit and transform are usually always called together. The default
88  * implementaiton calls fit and then transform.
89  *
90  * \param[in] data (SFrame of data)
91  */
92  gl_sframe fit_transform(gl_sframe data) {
93  data.materialize();
94  fit(data);
95  return transform(data);
96  }
97 
98  // Functions that all transformers need to register. Can be copied verbatim
99  // for other classes.
100  // --------------------------------------------------------------------------
102  REGISTER_CLASS_MEMBER_FUNCTION(bm25::init_transformer, "_options");
103  REGISTER_CLASS_MEMBER_FUNCTION(bm25::fit, "data");
104  REGISTER_CLASS_MEMBER_FUNCTION(bm25::fit_transform, "data");
106  REGISTER_CLASS_MEMBER_FUNCTION(bm25::get_current_options);
107  REGISTER_CLASS_MEMBER_FUNCTION(bm25::list_fields);
108  REGISTER_NAMED_CLASS_MEMBER_FUNCTION("_get_default_options",
109  bm25::get_default_options);
111  bm25::get_value_from_state,
112  "key");
114 
115 };
116 
117 
118 } // feature_engineering
119 } // sdk_model
120 } // turicreate
121 #endif
#define BEGIN_CLASS_MEMBER_REGISTRATION(python_facing_classname)
#define REGISTER_CLASS_MEMBER_FUNCTION(function,...)
The serialization input archive object which, provided with a reference to an istream, will read from the istream, providing deserialization capabilities.
Definition: iarchive.hpp:60
#define END_CLASS_MEMBER_REGISTRATION
#define REGISTER_NAMED_CLASS_MEMBER_FUNCTION(name, function,...)
The serialization output archive object which, provided with a reference to an ostream, will write to the ostream, providing serialization capabilities.
Definition: oarchive.hpp:80
void transform(S &&input, T &&output, TransformFn transformfn, std::set< size_t > constraint_segments=std::set< size_t >())
Definition: algorithm.hpp:64