Turi Create  4.0
turi::recsys::recsys_factorization_model_base Class Referenceabstract

#include <toolkits/recsys/models/factorization_models.hpp>

Public Member Functions

void init_options (const std::map< std::string, flexible_type > &_options)
 
std::map< std::string, flexible_typetrain (const v2::ml_data &training_data)
 
sframe predict (const v2::ml_data &test_data) const
 
void get_item_similarity_scores (size_t item, std::vector< std::pair< size_t, double > > &sim_scores) const
 
void score_all_items (std::vector< std::pair< size_t, double > > &scores, const std::vector< v2::ml_data_entry > &query_row, size_t top_k, const std::vector< std::pair< size_t, double > > &user_item_list, const std::vector< std::pair< size_t, double > > &new_user_item_data, const std::vector< v2::ml_data_row_reference > &new_observation_data, const std::shared_ptr< v2::ml_data_side_features > &known_side_features) const
 
void internal_save (turi::oarchive &oarc) const
 
void internal_load (turi::iarchive &iarc, size_t version)
 
virtual sframe get_similar_users (std::shared_ptr< sarray< flexible_type > > users, size_t k) const =0
 
virtual sframe get_similar_items (std::shared_ptr< sarray< flexible_type > > items, size_t k) const =0
 
virtual sframe get_item_intersection_info (const sframe &unindexed_item_pairs) const
 
v2::ml_data create_ml_data (const sframe &data, const sframe &new_user_side_data=sframe(), const sframe &new_item_side_data=sframe()) const
 
flex_type_enum user_type () const
 
flex_type_enum item_type () const
 
void setup_and_train (const sframe &observation_data, const sframe &user_side_data=sframe(), const sframe &item_side_data=sframe(), const std::map< std::string, variant_type > &other_data=(std::map< std::string, variant_type >()))
 
void import_all_from_other_model (const recsys_model_base *other)
 
std::shared_ptr< recsys_model_baseget_popularity_baseline () const
 
sframe recommend (const sframe &reference_data, size_t top_k, const sframe &restriction_data=sframe(), const sframe &exclusion_data=sframe(), const sframe &new_observation_data=sframe(), const sframe &new_user_data=sframe(), const sframe &new_item_data=sframe(), bool exclude_training_interactions=true, double diversity_factor=0, size_t random_seed=0) const
 
sframe precision_recall_stats (const sframe &indexed_validation_data, const sframe &recommend_output, const std::vector< size_t > &cutoffs) const
 
sframe get_num_items_per_user () const
 
sframe get_num_users_per_item () const
 
size_t get_version () const override
 
virtual void save_impl (turi::oarchive &oarc) const override
 Serialization – save.
 
void load_version (turi::iarchive &iarc, size_t version) override
 Serialization – load.
 
std::map< std::string, flexible_typeget_train_stats ()
 Get stats about algorithm runtime.
 
std::vector< std::string > list_fields ()
 
const variant_typeget_value_from_state (std::string key)
 
const std::map< std::string, flexible_type > & get_current_options () const
 
std::map< std::string, flexible_typeget_default_options () const
 
const flexible_typeget_option_value (const std::string &name) const
 
const std::map< std::string, variant_type > & get_state () const
 
bool is_trained () const
 
void set_options (const std::map< std::string, flexible_type > &_options)
 
void add_or_update_state (const std::map< std::string, variant_type > &dict)
 
const std::vector< option_handling::option_info > & get_option_info () const
 
virtual const char * name ()=0
 
virtual const std::string & uid ()=0
 
void save_to_url (const std::string &url, const variant_map_type &side_data={})
 
void save_model_to_data (std::ostream &out)
 
const std::map< std::string, std::vector< std::string > > & list_functions ()
 
const std::vector< std::string > & list_get_properties ()
 
const std::vector< std::string > & list_set_properties ()
 
variant_type call_function (const std::string &function, variant_map_type argument)
 
variant_type get_property (const std::string &property)
 
variant_type set_property (const std::string &property, variant_map_type argument)
 
const std::string & get_docstring (const std::string &symbol)
 
virtual void perform_registration ()
 

Static Public Attributes

static constexpr size_t USER_COLUMN_INDEX = 0
 The metadata needed for translating the data back and forth.
 

Protected Member Functions

virtual bool include_ranking_options () const =0
 
std::map< std::string, flexible_typetrain (const v2::ml_data &training_data_by_user, const v2::ml_data &training_data_by_item)
 
template<typename GetSimilarFunction >
sframe _create_similar_sframe (size_t column_index, std::shared_ptr< sarray< flexible_type > > items, size_t k, GetSimilarFunction &&similar) const
 
void register_defaults (const std::string &fnname, const variant_map_type &arguments)
 
void register_function (std::string fnname, const std::vector< std::string > &arguments, impl_fn fn)
 
void register_setter (const std::string &propname, impl_fn setfn)
 
void register_getter (const std::string &propname, impl_fn getfn)
 
void register_docstring (const std::pair< std::string, std::string > &fnname_docstring)
 

Protected Attributes

std::map< std::string, variant_typestate
 

Detailed Description

Implements all the factorization stuff – a thin wrapper to the factorization models.

Definition at line 27 of file factorization_models.hpp.

Member Function Documentation

◆ _create_similar_sframe()

template<typename GetSimilarFunction >
sframe turi::recsys::recsys_model_base::_create_similar_sframe ( size_t  column_index,
std::shared_ptr< sarray< flexible_type > >  items,
size_t  k,
GetSimilarFunction &&  similar 
) const
protectedinherited

Utility function to aid in the retrieval of similar items.

GetSimilarFunction is a function called as f(size_t idx, std::vector<std::pair<size_t, double> >& idx_dist_dest);

Definition at line 476 of file recsys_model_base.hpp.

◆ add_or_update_state()

void turi::ml_model_base::add_or_update_state ( const std::map< std::string, variant_type > &  dict)
inherited

Append the key value store of the model.

Parameters
[in]dictOptions (Key-Value pairs) to set

◆ call_function()

variant_type turi::model_base::call_function ( const std::string &  function,
variant_map_type  argument 
)
inherited

Calls a user defined function.

◆ create_ml_data()

v2::ml_data turi::recsys::recsys_model_base::create_ml_data ( const sframe data,
const sframe new_user_side_data = sframe(),
const sframe new_item_side_data = sframe() 
) const
inherited

Creates an ml_data object according to the given schema. No target column.

◆ get_current_options()

const std::map<std::string, flexible_type>& turi::ml_model_base::get_current_options ( ) const
inherited

Get current options.

Returns
Dictionary containing current options.

Python side interface

Interfaces with the get_current_options function in the Python side.

◆ get_default_options()

std::map<std::string, flexible_type> turi::ml_model_base::get_default_options ( ) const
inherited

Get default options.

Returns
Dictionary with default options.

Python side interface

Interfaces with the get_default_options function in the Python side.

◆ get_docstring()

const std::string& turi::model_base::get_docstring ( const std::string &  symbol)
inherited

Returns the toolkit documentation for a function or property.

◆ get_item_intersection_info()

virtual sframe turi::recsys::recsys_model_base::get_item_intersection_info ( const sframe unindexed_item_pairs) const
virtualinherited

Returns information about all the users in the overlap of the item pairs listed in two columns in unindexed_item_pairs. All these items must be present in the training data.

Returns an sframe with information about this intersection. Columns are item_1, item_2, num_users_1, num_users_2, item_intersection (dict, user ->

◆ get_item_similarity_scores()

void turi::recsys::recsys_factorization_model_base::get_item_similarity_scores ( size_t  item,
std::vector< std::pair< size_t, double > > &  sim_scores 
) const
virtual

For each of the items in sim_scores (first part of tuple), sets a similarity score (second part of tuple) that is higher for items similar to item.

Reimplemented from turi::recsys::recsys_model_base.

◆ get_num_items_per_user()

sframe turi::recsys::recsys_model_base::get_num_items_per_user ( ) const
inherited

Return an SFrame containing each user id and the number of observations with that user in the training set.

◆ get_num_users_per_item()

sframe turi::recsys::recsys_model_base::get_num_users_per_item ( ) const
inherited

Return an SFrame containing each item and the number of observations with that item in the training set.

◆ get_option_info()

const std::vector<option_handling::option_info>& turi::ml_model_base::get_option_info ( ) const
inherited

Returns the option information struct for each of the set parameters.

◆ get_option_value()

const flexible_type& turi::ml_model_base::get_option_value ( const std::string &  name) const
inherited

Returns the value of an option. Throws an error if the option does not exist.

Parameters
[in]nameName of the option to get.

◆ get_popularity_baseline()

std::shared_ptr<recsys_model_base> turi::recsys::recsys_model_base::get_popularity_baseline ( ) const
inherited

Creates and returns a popularity baseline

◆ get_property()

variant_type turi::model_base::get_property ( const std::string &  property)
inherited

Reads a property.

◆ get_similar_items()

virtual sframe turi::recsys::recsys_model_base::get_similar_items ( std::shared_ptr< sarray< flexible_type > >  items,
size_t  k 
) const
pure virtualinherited

Get the nearest k items for each of the provided items. If no items are provided, then similar items are retrieved for all items observed during training.

Implemented in turi::recsys::recsys_itemcf.

◆ get_similar_users()

virtual sframe turi::recsys::recsys_model_base::get_similar_users ( std::shared_ptr< sarray< flexible_type > >  users,
size_t  k 
) const
pure virtualinherited

Get the nearest k users for each of the provided users. If no users are provided, then similar users are retrieved for all items observed during training.

Implemented in turi::recsys::recsys_itemcf.

◆ get_state()

const std::map<std::string, variant_type>& turi::ml_model_base::get_state ( ) const
inherited

Get model.

Returns
Model map.

◆ get_value_from_state()

const variant_type& turi::ml_model_base::get_value_from_state ( std::string  key)
inherited

Returns the value of a particular key from the state.

Returns
Value of a key model_base for details.

Python side interface

From the python side, this is interfaced with the get() function or the [] operator in python.

◆ get_version()

size_t turi::recsys::recsys_model_base::get_version ( ) const
inlineoverridevirtualinherited

Returns the current version of the toolkit class for this instance, for serialization purposes.

Reimplemented from turi::model_base.

Definition at line 380 of file recsys_model_base.hpp.

◆ import_all_from_other_model()

void turi::recsys::recsys_model_base::import_all_from_other_model ( const recsys_model_base other)
inherited

Some of the models, such as popularity, can be built entirely from data already contained in the model. This method allows us to create a new model while bypassing the typical setup_and_train method. This simply imports all the relevant variables over; the final training is left up to the model.

◆ include_ranking_options()

virtual bool turi::recsys::recsys_factorization_model_base::include_ranking_options ( ) const
protectedpure virtual

This term determines whether we work in ranking factorization or not.

◆ init_options()

void turi::recsys::recsys_factorization_model_base::init_options ( const std::map< std::string, flexible_type > &  _options)
virtual

Set one of the options in the algorithm. Use the option manager to set these options. If the option does not satisfy the conditions that the option manager has imposed on it. Errors will be thrown.

Parameters
[in]optionsOptions to set

Reimplemented from turi::ml_model_base.

◆ internal_load()

void turi::recsys::recsys_factorization_model_base::internal_load ( turi::iarchive iarc,
size_t  version 
)
virtual

Implement serialization (load). The model subclass should reimplement this particular function. The syntax follows the standard turicreate load() method.

When this method is called, all the model options have been set up in the base class and are readily accessible. Furthermore, once this function is called, the model is treated as trained and ready to be used for prediction and ranking. Thus loading a model can effectively replace the training stage.

Implements turi::recsys::recsys_model_base.

◆ internal_save()

void turi::recsys::recsys_factorization_model_base::internal_save ( turi::oarchive oarc) const
virtual

Implement serialization (save). The model subclass should reimplement this particular function. The syntax follows the standard turicreate save() method.

Implements turi::recsys::recsys_model_base.

◆ is_trained()

bool turi::ml_model_base::is_trained ( ) const
inherited

Is this model trained.

Returns
True if already trained.

◆ item_type()

flex_type_enum turi::recsys::recsys_model_base::item_type ( ) const
inlineinherited

Returns the flexible data type of the item column; The model must be trained at this point.

Definition at line 216 of file recsys_model_base.hpp.

◆ list_fields()

std::vector<std::string> turi::ml_model_base::list_fields ( )
inherited

Methods with already meaningful default implementations.

Lists all the keys accessible in the "model" map.

Returns
List of keys in the model map. model_base for details.

Python side interface

This is the function that the list_fields should call in python.

◆ list_functions()

const std::map<std::string, std::vector<std::string> >& turi::model_base::list_functions ( )
inherited

Lists all the registered functions. Returns a map of function name to array of argument names for the function.

◆ list_get_properties()

const std::vector<std::string>& turi::model_base::list_get_properties ( )
inherited

Lists all the get-table properties of the class.

◆ list_set_properties()

const std::vector<std::string>& turi::model_base::list_set_properties ( )
inherited

Lists all the set-table properties of the class.

◆ name()

virtual const char* turi::model_base::name ( )
pure virtualinherited

Returns the name of the toolkit class, as exposed to client code. For example, the Python proxy for this instance will have a type with this name.

Note: this function is typically overridden using the BEGIN_CLASS_MEMBER_REGISTRATION macro.

◆ perform_registration()

virtual void turi::model_base::perform_registration ( )
virtualinherited

Declare the base registration function. This class has to be handled specially; the macros don't work here due to the override declarations.

Reimplemented in turi::model_proxy.

◆ precision_recall_stats()

sframe turi::recsys::recsys_model_base::precision_recall_stats ( const sframe indexed_validation_data,
const sframe recommend_output,
const std::vector< size_t > &  cutoffs 
) const
inherited

Compute the precision and recall for a (potentially held out) set of observations.

Parameters
validation_dataA ml_data giving the validation set the precision and recall should be calculated on.
recommend_outputThe output of the recommend method. Note that recommend should be called with top_k larger than the max value in cutoffs.
cutoffsA vector of cutoffs for computing e.g. the top [5,10,50] rankings.
Returns
An sframe with 5 columns – user, cutoff, precision, recall, and item counts.

◆ predict()

sframe turi::recsys::recsys_factorization_model_base::predict ( const v2::ml_data &  test_data) const
virtual

Run predictions on each element in the test data set. Returns a vector corresponding to the response prediction of each observation in the test_data set. Also takes a ml_data in the same format containing observations that are considered "available" during prediction time.

Implements turi::recsys::recsys_model_base.

◆ recommend()

sframe turi::recsys::recsys_model_base::recommend ( const sframe reference_data,
size_t  top_k,
const sframe restriction_data = sframe(),
const sframe exclusion_data = sframe(),
const sframe new_observation_data = sframe(),
const sframe new_user_data = sframe(),
const sframe new_item_data = sframe(),
bool  exclude_training_interactions = true,
double  diversity_factor = 0,
size_t  random_seed = 0 
) const
inherited

Return the top_k ranks for this model based on sorted predictions.

Here, for each user in users, the top_k ranks are returned in the same format as the previous function.

If exclude_observations is given, these observations are excluded from the returned values.

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

◆ register_defaults()

void turi::model_base::register_defaults ( const std::string &  fnname,
const variant_map_type &  arguments 
)
protectedinherited

Registers default argument values

◆ register_docstring()

void turi::model_base::register_docstring ( const std::pair< std::string, std::string > &  fnname_docstring)
protectedinherited

Adds a docstring for the specified function or property name.

◆ register_function()

void turi::model_base::register_function ( std::string  fnname,
const std::vector< std::string > &  arguments,
impl_fn  fn 
)
protectedinherited

Adds a function with the specified name, and argument list.

◆ register_getter()

void turi::model_base::register_getter ( const std::string &  propname,
impl_fn  getfn 
)
protectedinherited

Adds a property getter with the specified name.

◆ register_setter()

void turi::model_base::register_setter ( const std::string &  propname,
impl_fn  setfn 
)
protectedinherited

Adds a property setter with the specified name.

◆ save_model_to_data()

void turi::model_base::save_model_to_data ( std::ostream &  out)
inherited

Save a toolkit class to a data stream.

◆ save_to_url()

void turi::model_base::save_to_url ( const std::string &  url,
const variant_map_type &  side_data = {} 
)
inherited

Save a toolkit class to disk.

Parameters
urlThe destination url to store the class.
sidedataAny additional side information

◆ score_all_items()

void turi::recsys::recsys_factorization_model_base::score_all_items ( std::vector< std::pair< size_t, double > > &  scores,
const std::vector< v2::ml_data_entry > &  query_row,
size_t  top_k,
const std::vector< std::pair< size_t, double > > &  user_item_list,
const std::vector< std::pair< size_t, double > > &  new_user_item_data,
const std::vector< v2::ml_data_row_reference > &  new_observation_data,
const std::shared_ptr< v2::ml_data_side_features > &  known_side_features 
) const
virtual

For a given base observation, predict the score for all the items with all non-item columns replaced by the values in the base observation.

The base_observation vector is used to generate all the observations predicted. New observations are generated by repeatedly copying template_observation, then replacing the values in item_column_index by each possible item value.

Implements turi::recsys::recsys_model_base.

◆ set_options()

void turi::ml_model_base::set_options ( const std::map< std::string, flexible_type > &  _options)
inherited

Set one of the options in the algorithm.

The value are checked with the requirements given by the option instance.

Parameters
[in]nameName of the option.
[in]valueValue for the option.

◆ set_property()

variant_type turi::model_base::set_property ( const std::string &  property,
variant_map_type  argument 
)
inherited

Sets a property. The new value of the property should appear in the argument map under the key "value".

◆ setup_and_train()

void turi::recsys::recsys_model_base::setup_and_train ( const sframe observation_data,
const sframe user_side_data = sframe(),
const sframe item_side_data = sframe(),
const std::map< std::string, variant_type > &  other_data = (std::map< std::string, variant_type >()) 
)
inherited

Train the model using an sframe as the primary observations. This method constructs the internal ml_data objects from the current options.

Parameters
observation_dataAn SFrame containing at least a column containing user ids and a column containing item ids.
user_side_dataAn SFrame containing side information about users, where one column matches with the user column of observation data.
item_side_dataAn SFrame containing side information about items, where one column matches with the item column of observation data.
other_dataWhen provided, each model can implement a method set_extra_data in order to use this argument during training.
Returns
Statistics about the training.

◆ train() [1/2]

std::map<std::string, flexible_type> turi::recsys::recsys_factorization_model_base::train ( const v2::ml_data &  training_data)
virtual

Train the algorithm. Takes a training/validation split. Returns a map of information about the run.

Implements turi::recsys::recsys_model_base.

◆ train() [2/2]

std::map<std::string, flexible_type> turi::recsys::recsys_factorization_model_base::train ( const v2::ml_data &  training_data_by_user,
const v2::ml_data &  training_data_by_item 
)
protectedvirtual

Takes two datasets for training.

Parameters
[in]training_data_by_userML-Data sorted by user
[in]training_data_by_itemML-Data sorted by item

Reimplemented from turi::recsys::recsys_model_base.

◆ uid()

virtual const std::string& turi::model_base::uid ( )
pure virtualinherited

Returns a unique identifier for the toolkit class. It can be any unique ID. The UID is only used at runtime (to determine the concrete type of an arbitrary model_base instance) and is never stored.

Note: this function is typically overridden using the BEGIN_CLASS_MEMBER_REGISTRATION macro.

Implemented in turi::model_proxy.

◆ user_type()

flex_type_enum turi::recsys::recsys_model_base::user_type ( ) const
inlineinherited

Returns the flexible data type of the user column; The model must be trained at this point.

Definition at line 209 of file recsys_model_base.hpp.

Member Data Documentation

◆ state

std::map<std::string, variant_type> turi::ml_model_base::state
protectedinherited

All things python

Definition at line 206 of file ml_model.hpp.


The documentation for this class was generated from the following file: