Turi Create  4.0
turi::recsys::recsys_model_base Class Referenceabstract

#include <toolkits/recsys/recsys_model_base.hpp>

Public Member Functions

 recsys_model_base ()
 Default constructor.
 
virtual sframe predict (const v2::ml_data &test_data) const =0
 
virtual sframe get_similar_users (std::shared_ptr< sarray< flexible_type > > users, size_t k) const =0
 
virtual sframe get_similar_items (std::shared_ptr< sarray< flexible_type > > items, size_t k) const =0
 
virtual void get_item_similarity_scores (size_t item, std::vector< std::pair< size_t, double > > &sim_scores) const
 
virtual sframe get_item_intersection_info (const sframe &unindexed_item_pairs) const
 
virtual void score_all_items (std::vector< std::pair< size_t, double > > &scores, const std::vector< v2::ml_data_entry > &query_row, size_t top_k, const std::vector< std::pair< size_t, double > > &user_item_list, const std::vector< std::pair< size_t, double > > &new_user_item_data, const std::vector< v2::ml_data_row_reference > &new_observation_data, const std::shared_ptr< v2::ml_data_side_features > &known_side_features) const =0
 
v2::ml_data create_ml_data (const sframe &data, const sframe &new_user_side_data=sframe(), const sframe &new_item_side_data=sframe()) const
 
flex_type_enum user_type () const
 
flex_type_enum item_type () const
 
void setup_and_train (const sframe &observation_data, const sframe &user_side_data=sframe(), const sframe &item_side_data=sframe(), const std::map< std::string, variant_type > &other_data=(std::map< std::string, variant_type >()))
 
void import_all_from_other_model (const recsys_model_base *other)
 
std::shared_ptr< recsys_model_baseget_popularity_baseline () const
 
sframe recommend (const sframe &reference_data, size_t top_k, const sframe &restriction_data=sframe(), const sframe &exclusion_data=sframe(), const sframe &new_observation_data=sframe(), const sframe &new_user_data=sframe(), const sframe &new_item_data=sframe(), bool exclude_training_interactions=true, double diversity_factor=0, size_t random_seed=0) const
 
sframe precision_recall_stats (const sframe &indexed_validation_data, const sframe &recommend_output, const std::vector< size_t > &cutoffs) const
 
sframe get_num_items_per_user () const
 
sframe get_num_users_per_item () const
 
size_t get_version () const override
 
virtual void save_impl (turi::oarchive &oarc) const override
 Serialization – save.
 
void load_version (turi::iarchive &iarc, size_t version) override
 Serialization – load.
 
std::map< std::string, flexible_typeget_train_stats ()
 Get stats about algorithm runtime.
 
virtual void init_options (const std::map< std::string, flexible_type > &_options)
 
std::vector< std::string > list_fields ()
 
const variant_typeget_value_from_state (std::string key)
 
const std::map< std::string, flexible_type > & get_current_options () const
 
std::map< std::string, flexible_typeget_default_options () const
 
const flexible_typeget_option_value (const std::string &name) const
 
const std::map< std::string, variant_type > & get_state () const
 
bool is_trained () const
 
void set_options (const std::map< std::string, flexible_type > &_options)
 
void add_or_update_state (const std::map< std::string, variant_type > &dict)
 
const std::vector< option_handling::option_info > & get_option_info () const
 
virtual const char * name ()=0
 
virtual const std::string & uid ()=0
 
void save_to_url (const std::string &url, const variant_map_type &side_data={})
 
void save_model_to_data (std::ostream &out)
 
const std::map< std::string, std::vector< std::string > > & list_functions ()
 
const std::vector< std::string > & list_get_properties ()
 
const std::vector< std::string > & list_set_properties ()
 
variant_type call_function (const std::string &function, variant_map_type argument)
 
variant_type get_property (const std::string &property)
 
variant_type set_property (const std::string &property, variant_map_type argument)
 
const std::string & get_docstring (const std::string &symbol)
 
virtual void perform_registration ()
 

Static Public Attributes

static constexpr size_t USER_COLUMN_INDEX = 0
 The metadata needed for translating the data back and forth.
 

Protected Member Functions

virtual std::map< std::string, flexible_typetrain (const v2::ml_data &training_data)=0
 
virtual std::map< std::string, flexible_typetrain (const v2::ml_data &training_data_by_user, const v2::ml_data &training_data_by_item)
 
template<typename GetSimilarFunction >
sframe _create_similar_sframe (size_t column_index, std::shared_ptr< sarray< flexible_type > > items, size_t k, GetSimilarFunction &&similar) const
 
virtual void internal_save (turi::oarchive &oarc) const =0
 
virtual void internal_load (turi::iarchive &iarc, size_t version)=0
 
void register_function (std::string fnname, const std::vector< std::string > &arguments, impl_fn fn)
 
void register_defaults (const std::string &fnname, const variant_map_type &arguments)
 
void register_setter (const std::string &propname, impl_fn setfn)
 
void register_getter (const std::string &propname, impl_fn getfn)
 
void register_docstring (const std::pair< std::string, std::string > &fnname_docstring)
 

Protected Attributes

std::map< std::string, variant_typestate
 

Detailed Description

The base class for recsys model classes. Individual models are expected to implement all of the pure virtual functions below, along with (optionally) overriding any of the other virtual methods.

Definition at line 46 of file recsys_model_base.hpp.

Member Function Documentation

◆ _create_similar_sframe()

template<typename GetSimilarFunction >
sframe turi::recsys::recsys_model_base::_create_similar_sframe ( size_t  column_index,
std::shared_ptr< sarray< flexible_type > >  items,
size_t  k,
GetSimilarFunction &&  similar 
) const
protected

Utility function to aid in the retrieval of similar items.

GetSimilarFunction is a function called as f(size_t idx, std::vector<std::pair<size_t, double> >& idx_dist_dest);

Definition at line 476 of file recsys_model_base.hpp.

◆ add_or_update_state()

void turi::ml_model_base::add_or_update_state ( const std::map< std::string, variant_type > &  dict)
inherited

Append the key value store of the model.

Parameters
[in]dictOptions (Key-Value pairs) to set

◆ call_function()

variant_type turi::model_base::call_function ( const std::string &  function,
variant_map_type  argument 
)
inherited

Calls a user defined function.

◆ create_ml_data()

v2::ml_data turi::recsys::recsys_model_base::create_ml_data ( const sframe data,
const sframe new_user_side_data = sframe(),
const sframe new_item_side_data = sframe() 
) const

Creates an ml_data object according to the given schema. No target column.

◆ get_current_options()

const std::map<std::string, flexible_type>& turi::ml_model_base::get_current_options ( ) const
inherited

Get current options.

Returns
Dictionary containing current options.

Python side interface

Interfaces with the get_current_options function in the Python side.

◆ get_default_options()

std::map<std::string, flexible_type> turi::ml_model_base::get_default_options ( ) const
inherited

Get default options.

Returns
Dictionary with default options.

Python side interface

Interfaces with the get_default_options function in the Python side.

◆ get_docstring()

const std::string& turi::model_base::get_docstring ( const std::string &  symbol)
inherited

Returns the toolkit documentation for a function or property.

◆ get_item_intersection_info()

virtual sframe turi::recsys::recsys_model_base::get_item_intersection_info ( const sframe unindexed_item_pairs) const
virtual

Returns information about all the users in the overlap of the item pairs listed in two columns in unindexed_item_pairs. All these items must be present in the training data.

Returns an sframe with information about this intersection. Columns are item_1, item_2, num_users_1, num_users_2, item_intersection (dict, user ->

◆ get_item_similarity_scores()

virtual void turi::recsys::recsys_model_base::get_item_similarity_scores ( size_t  item,
std::vector< std::pair< size_t, double > > &  sim_scores 
) const
inlinevirtual

For each of the items in sim_scores (first part of tuple), sets a similarity score (second part of tuple) that is higher for items similar to item.

Reimplemented in turi::recsys::recsys_factorization_model_base.

Definition at line 107 of file recsys_model_base.hpp.

◆ get_num_items_per_user()

sframe turi::recsys::recsys_model_base::get_num_items_per_user ( ) const

Return an SFrame containing each user id and the number of observations with that user in the training set.

◆ get_num_users_per_item()

sframe turi::recsys::recsys_model_base::get_num_users_per_item ( ) const

Return an SFrame containing each item and the number of observations with that item in the training set.

◆ get_option_info()

const std::vector<option_handling::option_info>& turi::ml_model_base::get_option_info ( ) const
inherited

Returns the option information struct for each of the set parameters.

◆ get_option_value()

const flexible_type& turi::ml_model_base::get_option_value ( const std::string &  name) const
inherited

Returns the value of an option. Throws an error if the option does not exist.

Parameters
[in]nameName of the option to get.

◆ get_popularity_baseline()

std::shared_ptr<recsys_model_base> turi::recsys::recsys_model_base::get_popularity_baseline ( ) const

Creates and returns a popularity baseline

◆ get_property()

variant_type turi::model_base::get_property ( const std::string &  property)
inherited

Reads a property.

◆ get_similar_items()

virtual sframe turi::recsys::recsys_model_base::get_similar_items ( std::shared_ptr< sarray< flexible_type > >  items,
size_t  k 
) const
pure virtual

Get the nearest k items for each of the provided items. If no items are provided, then similar items are retrieved for all items observed during training.

Implemented in turi::recsys::recsys_itemcf.

◆ get_similar_users()

virtual sframe turi::recsys::recsys_model_base::get_similar_users ( std::shared_ptr< sarray< flexible_type > >  users,
size_t  k 
) const
pure virtual

Get the nearest k users for each of the provided users. If no users are provided, then similar users are retrieved for all items observed during training.

Implemented in turi::recsys::recsys_itemcf.

◆ get_state()

const std::map<std::string, variant_type>& turi::ml_model_base::get_state ( ) const
inherited

Get model.

Returns
Model map.

◆ get_value_from_state()

const variant_type& turi::ml_model_base::get_value_from_state ( std::string  key)
inherited

Returns the value of a particular key from the state.

Returns
Value of a key model_base for details.

Python side interface

From the python side, this is interfaced with the get() function or the [] operator in python.

◆ get_version()

size_t turi::recsys::recsys_model_base::get_version ( ) const
inlineoverridevirtual

Returns the current version of the toolkit class for this instance, for serialization purposes.

Reimplemented from turi::model_base.

Definition at line 380 of file recsys_model_base.hpp.

◆ import_all_from_other_model()

void turi::recsys::recsys_model_base::import_all_from_other_model ( const recsys_model_base other)

Some of the models, such as popularity, can be built entirely from data already contained in the model. This method allows us to create a new model while bypassing the typical setup_and_train method. This simply imports all the relevant variables over; the final training is left up to the model.

◆ init_options()

◆ internal_load()

virtual void turi::recsys::recsys_model_base::internal_load ( turi::iarchive iarc,
size_t  version 
)
protectedpure virtual

Implement serialization (load). The model subclass should reimplement this particular function. The syntax follows the standard turicreate load() method.

When this method is called, all the model options have been set up in the base class and are readily accessible. Furthermore, once this function is called, the model is treated as trained and ready to be used for prediction and ranking. Thus loading a model can effectively replace the training stage.

Implemented in turi::recsys::recsys_itemcf, and turi::recsys::recsys_factorization_model_base.

◆ internal_save()

virtual void turi::recsys::recsys_model_base::internal_save ( turi::oarchive oarc) const
protectedpure virtual

Implement serialization (save). The model subclass should reimplement this particular function. The syntax follows the standard turicreate save() method.

Implemented in turi::recsys::recsys_itemcf, and turi::recsys::recsys_factorization_model_base.

◆ is_trained()

bool turi::ml_model_base::is_trained ( ) const
inherited

Is this model trained.

Returns
True if already trained.

◆ item_type()

flex_type_enum turi::recsys::recsys_model_base::item_type ( ) const
inline

Returns the flexible data type of the item column; The model must be trained at this point.

Definition at line 216 of file recsys_model_base.hpp.

◆ list_fields()

std::vector<std::string> turi::ml_model_base::list_fields ( )
inherited

Methods with already meaningful default implementations.

Lists all the keys accessible in the "model" map.

Returns
List of keys in the model map. model_base for details.

Python side interface

This is the function that the list_fields should call in python.

◆ list_functions()

const std::map<std::string, std::vector<std::string> >& turi::model_base::list_functions ( )
inherited

Lists all the registered functions. Returns a map of function name to array of argument names for the function.

◆ list_get_properties()

const std::vector<std::string>& turi::model_base::list_get_properties ( )
inherited

Lists all the get-table properties of the class.

◆ list_set_properties()

const std::vector<std::string>& turi::model_base::list_set_properties ( )
inherited

Lists all the set-table properties of the class.

◆ name()

virtual const char* turi::model_base::name ( )
pure virtualinherited

Returns the name of the toolkit class, as exposed to client code. For example, the Python proxy for this instance will have a type with this name.

Note: this function is typically overridden using the BEGIN_CLASS_MEMBER_REGISTRATION macro.

◆ perform_registration()

virtual void turi::model_base::perform_registration ( )
virtualinherited

Declare the base registration function. This class has to be handled specially; the macros don't work here due to the override declarations.

Reimplemented in turi::model_proxy.

◆ precision_recall_stats()

sframe turi::recsys::recsys_model_base::precision_recall_stats ( const sframe indexed_validation_data,
const sframe recommend_output,
const std::vector< size_t > &  cutoffs 
) const

Compute the precision and recall for a (potentially held out) set of observations.

Parameters
validation_dataA ml_data giving the validation set the precision and recall should be calculated on.
recommend_outputThe output of the recommend method. Note that recommend should be called with top_k larger than the max value in cutoffs.
cutoffsA vector of cutoffs for computing e.g. the top [5,10,50] rankings.
Returns
An sframe with 5 columns – user, cutoff, precision, recall, and item counts.

◆ predict()

virtual sframe turi::recsys::recsys_model_base::predict ( const v2::ml_data &  test_data) const
pure virtual

Run predictions on each element in the test data set. Returns a vector corresponding to the response prediction of each observation in the test_data set. Also takes a ml_data in the same format containing observations that are considered "available" during prediction time.

Implemented in turi::recsys::recsys_itemcf, and turi::recsys::recsys_factorization_model_base.

◆ recommend()

sframe turi::recsys::recsys_model_base::recommend ( const sframe reference_data,
size_t  top_k,
const sframe restriction_data = sframe(),
const sframe exclusion_data = sframe(),
const sframe new_observation_data = sframe(),
const sframe new_user_data = sframe(),
const sframe new_item_data = sframe(),
bool  exclude_training_interactions = true,
double  diversity_factor = 0,
size_t  random_seed = 0 
) const

Return the top_k ranks for this model based on sorted predictions.

Here, for each user in users, the top_k ranks are returned in the same format as the previous function.

If exclude_observations is given, these observations are excluded from the returned values.

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

◆ register_defaults()

void turi::model_base::register_defaults ( const std::string &  fnname,
const variant_map_type &  arguments 
)
protectedinherited

Registers default argument values

◆ register_docstring()

void turi::model_base::register_docstring ( const std::pair< std::string, std::string > &  fnname_docstring)
protectedinherited

Adds a docstring for the specified function or property name.

◆ register_function()

void turi::model_base::register_function ( std::string  fnname,
const std::vector< std::string > &  arguments,
impl_fn  fn 
)
protectedinherited

Adds a function with the specified name, and argument list.

◆ register_getter()

void turi::model_base::register_getter ( const std::string &  propname,
impl_fn  getfn 
)
protectedinherited

Adds a property getter with the specified name.

◆ register_setter()

void turi::model_base::register_setter ( const std::string &  propname,
impl_fn  setfn 
)
protectedinherited

Adds a property setter with the specified name.

◆ save_model_to_data()

void turi::model_base::save_model_to_data ( std::ostream &  out)
inherited

Save a toolkit class to a data stream.

◆ save_to_url()

void turi::model_base::save_to_url ( const std::string &  url,
const variant_map_type &  side_data = {} 
)
inherited

Save a toolkit class to disk.

Parameters
urlThe destination url to store the class.
sidedataAny additional side information

◆ score_all_items()

virtual void turi::recsys::recsys_model_base::score_all_items ( std::vector< std::pair< size_t, double > > &  scores,
const std::vector< v2::ml_data_entry > &  query_row,
size_t  top_k,
const std::vector< std::pair< size_t, double > > &  user_item_list,
const std::vector< std::pair< size_t, double > > &  new_user_item_data,
const std::vector< v2::ml_data_row_reference > &  new_observation_data,
const std::shared_ptr< v2::ml_data_side_features > &  known_side_features 
) const
pure virtual

For a given base observation, predict the score for all the items with all non-item columns replaced by the values in the base observation.

The base_observation vector is used to generate all the observations predicted. New observations are generated by repeatedly copying template_observation, then replacing the values in item_column_index by each possible item value.

Implemented in turi::recsys::recsys_itemcf, and turi::recsys::recsys_factorization_model_base.

◆ set_options()

void turi::ml_model_base::set_options ( const std::map< std::string, flexible_type > &  _options)
inherited

Set one of the options in the algorithm.

The value are checked with the requirements given by the option instance.

Parameters
[in]nameName of the option.
[in]valueValue for the option.

◆ set_property()

variant_type turi::model_base::set_property ( const std::string &  property,
variant_map_type  argument 
)
inherited

Sets a property. The new value of the property should appear in the argument map under the key "value".

◆ setup_and_train()

void turi::recsys::recsys_model_base::setup_and_train ( const sframe observation_data,
const sframe user_side_data = sframe(),
const sframe item_side_data = sframe(),
const std::map< std::string, variant_type > &  other_data = (std::map< std::string, variant_type >()) 
)

Train the model using an sframe as the primary observations. This method constructs the internal ml_data objects from the current options.

Parameters
observation_dataAn SFrame containing at least a column containing user ids and a column containing item ids.
user_side_dataAn SFrame containing side information about users, where one column matches with the user column of observation data.
item_side_dataAn SFrame containing side information about items, where one column matches with the item column of observation data.
other_dataWhen provided, each model can implement a method set_extra_data in order to use this argument during training.
Returns
Statistics about the training.

◆ train() [1/2]

virtual std::map<std::string, flexible_type> turi::recsys::recsys_model_base::train ( const v2::ml_data &  training_data)
protectedpure virtual

Train the algorithm. Takes a training/validation split. Returns a map of information about the run.

Implemented in turi::recsys::recsys_itemcf, and turi::recsys::recsys_factorization_model_base.

◆ train() [2/2]

virtual std::map<std::string, flexible_type> turi::recsys::recsys_model_base::train ( const v2::ml_data &  training_data_by_user,
const v2::ml_data &  training_data_by_item 
)
inlineprotectedvirtual

Takes two datasets for training.

Parameters
[in]training_data_by_userML-Data sorted by user
[in]training_data_by_itemML-Data sorted by item

Reimplemented in turi::recsys::recsys_factorization_model_base.

Definition at line 68 of file recsys_model_base.hpp.

◆ uid()

virtual const std::string& turi::model_base::uid ( )
pure virtualinherited

Returns a unique identifier for the toolkit class. It can be any unique ID. The UID is only used at runtime (to determine the concrete type of an arbitrary model_base instance) and is never stored.

Note: this function is typically overridden using the BEGIN_CLASS_MEMBER_REGISTRATION macro.

Implemented in turi::model_proxy.

◆ user_type()

flex_type_enum turi::recsys::recsys_model_base::user_type ( ) const
inline

Returns the flexible data type of the user column; The model must be trained at this point.

Definition at line 209 of file recsys_model_base.hpp.

Member Data Documentation

◆ state

std::map<std::string, variant_type> turi::ml_model_base::state
protectedinherited

All things python

Definition at line 206 of file ml_model.hpp.


The documentation for this class was generated from the following file: