docs/cpp/sparse__nn_8hpp_source.html

 /* Copyright © 2018 Apple Inc. All rights reserved.
  *
  * Use of this source code is governed by a BSD-3-clause license that can
  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
  */
 #ifndef SPARSE_NN_HPP
 #define SPARSE_NN_HPP

 #include <core/export.hpp>
 #include <model_server/lib/extensions/ml_model.hpp>
 #include <core/data/sframe/gl_sframe.hpp>
 #include <model_server/lib/toolkit_class_macros.hpp>

 namespace turi {
 namespace prototype {

 class EXPORT sparse_nn : public ml_model_base {
  public:
   static constexpr size_t SPARSE_NN_VERSION = 0;

   typedef uint128_t hash_type;

   // Call this function to set up the training data.  All columns must be string
   // or integer columns and are treated as categorical indicators.
   void train(const gl_sframe& data, const std::string& id_column);

   // Returns a dictionary of id to  distance for the top k values
   flex_dict query(const flex_dict& x, size_t k) const;

   // Serialization
   void save_impl(oarchive& oarc) const override;
   void load_version(iarchive& iarc, size_t version) override;

   BEGIN_CLASS_MEMBER_REGISTRATION("_sparse_nn")
   IMPORT_BASE_CLASS_REGISTRATION(ml_model_base);
   REGISTER_CLASS_MEMBER_FUNCTION(sparse_nn::train, "data", "id_column");
   REGISTER_CLASS_MEMBER_FUNCTION(sparse_nn::query, "x", "k");
   END_CLASS_MEMBER_REGISTRATION

  private:
   // Some metadata.
   size_t m_num_columns = 0;
   std::vector<flexible_type> m_ids;

   // This is optimized for extremely efficient lookup on small, in-memory
   // datasets.
   //
   // Query Algorithm:
   //
   // 1. For each feature, do bisection search to determine which index is the
   // correct one for the given column/feature combo.
   //
   // 2. Increment the corresponding values in hit_indices that are denoted by
   // the bounds given in access_bounds at the index of the found hash in hashes.
   std::vector<hash_type> m_hashes;

   std::vector<std::pair<uint32_t, uint32_t> > m_access_bounds;
   std::vector<uint32_t> m_hit_indices;

   inline hash_type feature_hash(const std::string& column,
                                 const flexible_type& feature) const {
     return hash128_combine(hash128(column), feature.hash128());
   }
 };

 }  // namespace prototype
 }  // namespace turi

 #endif
BEGIN_CLASS_MEMBER_REGISTRATION
#define BEGIN_CLASS_MEMBER_REGISTRATION(python_facing_classname)
Definition: toolkit_class_macros.hpp:68

REGISTER_CLASS_MEMBER_FUNCTION
#define REGISTER_CLASS_MEMBER_FUNCTION(function,...)
Definition: toolkit_class_macros.hpp:113

turi::hash128
static uint128_t hash128(const char *s, size_t len)
Definition: cityhash_tc.hpp:833

std
STL namespace.

IMPORT_BASE_CLASS_REGISTRATION
#define IMPORT_BASE_CLASS_REGISTRATION(base_class)
Definition: toolkit_class_macros.hpp:171

turi::hash128_combine
static uint128_t hash128_combine(uint128_t h1, uint128_t h2)
Definition: cityhash_tc.hpp:1015

END_CLASS_MEMBER_REGISTRATION
#define END_CLASS_MEMBER_REGISTRATION
Definition: toolkit_class_macros.hpp:427

turi
SKD.
Definition: capi_initialization.hpp:11

turi::flex_dict
std::vector< std::pair< flexible_type, flexible_type > > flex_dict
Definition: flexible_type_base_types.hpp:57