Turi Create
4.0
|
#include <toolkits/recsys/models/itemcf.hpp>
Public Member Functions | |
void | init_options (const std::map< std::string, flexible_type > &options) override |
std::map< std::string, flexible_type > | train (const v2::ml_data &data) override |
sframe | predict (const v2::ml_data &test_data) const override |
void | score_all_items (std::vector< std::pair< size_t, double > > &scores, const std::vector< v2::ml_data_entry > &query_row, size_t top_k, const std::vector< std::pair< size_t, double > > &user_item_list, const std::vector< std::pair< size_t, double > > &new_user_item_data, const std::vector< v2::ml_data_row_reference > &new_observation_data, const std::shared_ptr< v2::ml_data_side_features > &known_side_features) const override |
std::string | response_column_name () const |
void | internal_save (turi::oarchive &oarc) const override |
void | internal_load (turi::iarchive &iarc, size_t version) override |
sframe | get_similar_items (std::shared_ptr< sarray< flexible_type > > items, size_t topk=0) const override |
sframe | get_similar_users (std::shared_ptr< sarray< flexible_type > > items, size_t topk=0) const override |
virtual void | get_item_similarity_scores (size_t item, std::vector< std::pair< size_t, double > > &sim_scores) const |
virtual sframe | get_item_intersection_info (const sframe &unindexed_item_pairs) const |
v2::ml_data | create_ml_data (const sframe &data, const sframe &new_user_side_data=sframe(), const sframe &new_item_side_data=sframe()) const |
flex_type_enum | user_type () const |
flex_type_enum | item_type () const |
void | setup_and_train (const sframe &observation_data, const sframe &user_side_data=sframe(), const sframe &item_side_data=sframe(), const std::map< std::string, variant_type > &other_data=(std::map< std::string, variant_type >())) |
void | import_all_from_other_model (const recsys_model_base *other) |
std::shared_ptr< recsys_model_base > | get_popularity_baseline () const |
sframe | recommend (const sframe &reference_data, size_t top_k, const sframe &restriction_data=sframe(), const sframe &exclusion_data=sframe(), const sframe &new_observation_data=sframe(), const sframe &new_user_data=sframe(), const sframe &new_item_data=sframe(), bool exclude_training_interactions=true, double diversity_factor=0, size_t random_seed=0) const |
sframe | precision_recall_stats (const sframe &indexed_validation_data, const sframe &recommend_output, const std::vector< size_t > &cutoffs) const |
sframe | get_num_items_per_user () const |
sframe | get_num_users_per_item () const |
size_t | get_version () const override |
virtual void | save_impl (turi::oarchive &oarc) const override |
Serialization – save. | |
void | load_version (turi::iarchive &iarc, size_t version) override |
Serialization – load. | |
std::map< std::string, flexible_type > | get_train_stats () |
Get stats about algorithm runtime. | |
std::vector< std::string > | list_fields () |
const variant_type & | get_value_from_state (std::string key) |
const std::map< std::string, flexible_type > & | get_current_options () const |
std::map< std::string, flexible_type > | get_default_options () const |
const flexible_type & | get_option_value (const std::string &name) const |
const std::map< std::string, variant_type > & | get_state () const |
bool | is_trained () const |
void | set_options (const std::map< std::string, flexible_type > &_options) |
void | add_or_update_state (const std::map< std::string, variant_type > &dict) |
const std::vector< option_handling::option_info > & | get_option_info () const |
virtual const char * | name ()=0 |
virtual const std::string & | uid ()=0 |
void | save_to_url (const std::string &url, const variant_map_type &side_data={}) |
void | save_model_to_data (std::ostream &out) |
const std::map< std::string, std::vector< std::string > > & | list_functions () |
const std::vector< std::string > & | list_get_properties () |
const std::vector< std::string > & | list_set_properties () |
variant_type | call_function (const std::string &function, variant_map_type argument) |
variant_type | get_property (const std::string &property) |
variant_type | set_property (const std::string &property, variant_map_type argument) |
const std::string & | get_docstring (const std::string &symbol) |
virtual void | perform_registration () |
Static Public Attributes | |
static constexpr size_t | USER_COLUMN_INDEX = 0 |
The metadata needed for translating the data back and forth. | |
Protected Member Functions | |
virtual std::map< std::string, flexible_type > | train (const v2::ml_data &training_data_by_user, const v2::ml_data &training_data_by_item) |
template<typename GetSimilarFunction > | |
sframe | _create_similar_sframe (size_t column_index, std::shared_ptr< sarray< flexible_type > > items, size_t k, GetSimilarFunction &&similar) const |
void | register_defaults (const std::string &fnname, const variant_map_type &arguments) |
void | register_function (std::string fnname, const std::vector< std::string > &arguments, impl_fn fn) |
void | register_setter (const std::string &propname, impl_fn setfn) |
void | register_getter (const std::string &propname, impl_fn getfn) |
void | register_docstring (const std::pair< std::string, std::string > &fnname_docstring) |
Protected Attributes | |
std::shared_ptr< sparse_similarity_lookup > | item_sim |
std::map< std::string, variant_type > | state |
This provides an implementation of a collaborative filtering algorithm. The premise is to compute similarities (or distances) between all pairs of items. Several choices of similarity will be available, and these are functions of the list of users that were observed with the pair of items. Some choices of similarity can also leverage a score that the item was given by the user, e.g. a rating.
In the following, let u(a) be the set of users who rated item a, let E be the set of all (user, item) pairs, and let be the rating that user gave to item .
The three functions currently implemented are: Jaccard similarity: Let . Then Jaccard similarity is defined by:
Cosine similarity: This compares the ratings given by all users who rated both items (where all unobserved ratings are considered to be 0.
Pearson Correlation similarity: A problem with Cosine similarity measure is that it does not consider the differences in the mean and variance of the ratings of items a and b. Pearson Correlation is a popular measure where the effects of mean and variance have been removed. Let denote the set of users who rated both items a and b.
Implementation details:
Details of computing item similarities:
Definition at line 96 of file itemcf.hpp.
|
protectedinherited |
Utility function to aid in the retrieval of similar items.
GetSimilarFunction is a function called as f(size_t idx, std::vector<std::pair<size_t, double> >& idx_dist_dest);
Definition at line 476 of file recsys_model_base.hpp.
|
inherited |
Append the key value store of the model.
[in] | dict | Options (Key-Value pairs) to set |
|
inherited |
Calls a user defined function.
|
inherited |
Creates an ml_data object according to the given schema. No target column.
|
inherited |
Get current options.
Interfaces with the get_current_options function in the Python side.
|
inherited |
Get default options.
Interfaces with the get_default_options function in the Python side.
|
inherited |
Returns the toolkit documentation for a function or property.
|
virtualinherited |
Returns information about all the users in the overlap of the item pairs listed in two columns in unindexed_item_pairs. All these items must be present in the training data.
Returns an sframe with information about this intersection. Columns are item_1, item_2, num_users_1, num_users_2, item_intersection (dict, user ->
|
inlinevirtualinherited |
For each of the items in sim_scores (first part of tuple), sets a similarity score (second part of tuple) that is higher for items similar to item.
Reimplemented in turi::recsys::recsys_factorization_model_base.
Definition at line 107 of file recsys_model_base.hpp.
|
inherited |
Return an SFrame containing each user id and the number of observations with that user in the training set.
|
inherited |
Return an SFrame containing each item and the number of observations with that item in the training set.
|
inherited |
Returns the option information struct for each of the set parameters.
|
inherited |
Returns the value of an option. Throws an error if the option does not exist.
[in] | name | Name of the option to get. |
|
inherited |
Creates and returns a popularity baseline
|
inherited |
Reads a property.
|
overridevirtual |
Get the nearest neighbors of a set of items.
[in] | indexed_items | A SArray of items in flexible_type |
[in] | topk | Number of neighbors returned for each item |
Implements turi::recsys::recsys_model_base.
|
inlineoverridevirtual |
Get the nearest neighbors of a set of users.
[in] | indexed_users | A SArray of users in flexible_type |
[in] | topk | Number of neighbors returned for each item |
Implements turi::recsys::recsys_model_base.
Definition at line 212 of file itemcf.hpp.
|
inherited |
Get model.
|
inherited |
Returns the value of a particular key from the state.
From the python side, this is interfaced with the get() function or the [] operator in python.
|
inlineoverridevirtualinherited |
Returns the current version of the toolkit class for this instance, for serialization purposes.
Reimplemented from turi::model_base.
Definition at line 380 of file recsys_model_base.hpp.
|
inherited |
Some of the models, such as popularity, can be built entirely from data already contained in the model. This method allows us to create a new model while bypassing the typical setup_and_train method. This simply imports all the relevant variables over; the final training is left up to the model.
|
overridevirtual |
Set one of the options in the algorithm. Use the option manager to set these options. If the option does not satisfy the conditions that the option manager has imposed on it. Errors will be thrown.
[in] | options | Options to set |
Reimplemented from turi::ml_model_base.
|
overridevirtual |
Implement serialization (load). The model subclass should reimplement this particular function. The syntax follows the standard turicreate load() method.
When this method is called, all the model options have been set up in the base class and are readily accessible. Furthermore, once this function is called, the model is treated as trained and ready to be used for prediction and ranking. Thus loading a model can effectively replace the training stage.
Implements turi::recsys::recsys_model_base.
|
overridevirtual |
Implement serialization (save). The model subclass should reimplement this particular function. The syntax follows the standard turicreate save() method.
Implements turi::recsys::recsys_model_base.
|
inherited |
Is this model trained.
|
inlineinherited |
Returns the flexible data type of the item column; The model must be trained at this point.
Definition at line 216 of file recsys_model_base.hpp.
|
inherited |
Lists all the keys accessible in the "model" map.
This is the function that the list_fields should call in python.
|
inherited |
Lists all the registered functions. Returns a map of function name to array of argument names for the function.
|
inherited |
Lists all the get-table properties of the class.
|
inherited |
Lists all the set-table properties of the class.
|
pure virtualinherited |
Returns the name of the toolkit class, as exposed to client code. For example, the Python proxy for this instance will have a type with this name.
Note: this function is typically overridden using the BEGIN_CLASS_MEMBER_REGISTRATION macro.
|
virtualinherited |
Declare the base registration function. This class has to be handled specially; the macros don't work here due to the override declarations.
Reimplemented in turi::model_proxy.
|
inherited |
Compute the precision and recall for a (potentially held out) set of observations.
validation_data | A ml_data giving the validation set the precision and recall should be calculated on. |
recommend_output | The output of the recommend method. Note that recommend should be called with top_k larger than the max value in cutoffs. |
cutoffs | A vector of cutoffs for computing e.g. the top [5,10,50] rankings. |
|
overridevirtual |
During the predict phase, we perform the "vector matrix product" where we compute a score for a particular (user, item) pair. This score is a sum of similarities between an item and all the items observed for the given user. For similarity functions that incorporate some target value for each (user, item) pair, this prediction also multiples each similarity by that value, e.g. a rating they gave the item in question.
Implements turi::recsys::recsys_model_base.
|
inherited |
Return the top_k ranks for this model based on sorted predictions.
Here, for each user in users, the top_k ranks are returned in the same format as the previous function.
If exclude_observations is given, these observations are excluded from the returned values.
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
|
protectedinherited |
Registers default argument values
|
protectedinherited |
Adds a docstring for the specified function or property name.
|
protectedinherited |
Adds a function with the specified name, and argument list.
|
protectedinherited |
Adds a property getter with the specified name.
|
protectedinherited |
Adds a property setter with the specified name.
std::string turi::recsys::recsys_itemcf::response_column_name | ( | ) | const |
Utilities
|
inherited |
Save a toolkit class to a data stream.
|
inherited |
Save a toolkit class to disk.
url | The destination url to store the class. |
sidedata | Any additional side information |
|
overridevirtual |
For a given base observation, predict the score for all the items with all non-item columns replaced by the values in the base observation.
The base_observation vector is used to generate all the observations predicted. New observations are generated by repeatedly copying template_observation, then replacing the values in item_column_index by each possible item value.
Implements turi::recsys::recsys_model_base.
|
inherited |
Set one of the options in the algorithm.
The value are checked with the requirements given by the option instance.
[in] | name | Name of the option. |
[in] | value | Value for the option. |
|
inherited |
Sets a property. The new value of the property should appear in the argument map under the key "value".
|
inherited |
Train the model using an sframe as the primary observations. This method constructs the internal ml_data objects from the current options.
observation_data | An SFrame containing at least a column containing user ids and a column containing item ids. |
user_side_data | An SFrame containing side information about users, where one column matches with the user column of observation data. |
item_side_data | An SFrame containing side information about items, where one column matches with the item column of observation data. |
other_data | When provided, each model can implement a method set_extra_data in order to use this argument during training. |
|
inlineprotectedvirtualinherited |
Takes two datasets for training.
[in] | training_data_by_user | ML-Data sorted by user |
[in] | training_data_by_item | ML-Data sorted by item |
Reimplemented in turi::recsys::recsys_factorization_model_base.
Definition at line 68 of file recsys_model_base.hpp.
|
overridevirtual |
When the number of items is less than 20k, it uses in memory computations train_in_memory(). Otherwise, it uses the implementation based on SGraph train_using_sgraph().
Implements turi::recsys::recsys_model_base.
|
pure virtualinherited |
Returns a unique identifier for the toolkit class. It can be any unique ID. The UID is only used at runtime (to determine the concrete type of an arbitrary model_base instance) and is never stored.
Note: this function is typically overridden using the BEGIN_CLASS_MEMBER_REGISTRATION macro.
Implemented in turi::model_proxy.
|
inlineinherited |
Returns the flexible data type of the user column; The model must be trained at this point.
Definition at line 209 of file recsys_model_base.hpp.
|
protected |
The primary tool for the item similarity modeling part.
Definition at line 184 of file itemcf.hpp.
|
protectedinherited |
All things python
Definition at line 206 of file ml_model.hpp.