Turi Create
4.0
|
#include <toolkits/recsys/recsys_model_base.hpp>
Public Member Functions | |
recsys_model_base () | |
Default constructor. | |
virtual sframe | predict (const v2::ml_data &test_data) const =0 |
virtual sframe | get_similar_users (std::shared_ptr< sarray< flexible_type > > users, size_t k) const =0 |
virtual sframe | get_similar_items (std::shared_ptr< sarray< flexible_type > > items, size_t k) const =0 |
virtual void | get_item_similarity_scores (size_t item, std::vector< std::pair< size_t, double > > &sim_scores) const |
virtual sframe | get_item_intersection_info (const sframe &unindexed_item_pairs) const |
virtual void | score_all_items (std::vector< std::pair< size_t, double > > &scores, const std::vector< v2::ml_data_entry > &query_row, size_t top_k, const std::vector< std::pair< size_t, double > > &user_item_list, const std::vector< std::pair< size_t, double > > &new_user_item_data, const std::vector< v2::ml_data_row_reference > &new_observation_data, const std::shared_ptr< v2::ml_data_side_features > &known_side_features) const =0 |
v2::ml_data | create_ml_data (const sframe &data, const sframe &new_user_side_data=sframe(), const sframe &new_item_side_data=sframe()) const |
flex_type_enum | user_type () const |
flex_type_enum | item_type () const |
void | setup_and_train (const sframe &observation_data, const sframe &user_side_data=sframe(), const sframe &item_side_data=sframe(), const std::map< std::string, variant_type > &other_data=(std::map< std::string, variant_type >())) |
void | import_all_from_other_model (const recsys_model_base *other) |
std::shared_ptr< recsys_model_base > | get_popularity_baseline () const |
sframe | recommend (const sframe &reference_data, size_t top_k, const sframe &restriction_data=sframe(), const sframe &exclusion_data=sframe(), const sframe &new_observation_data=sframe(), const sframe &new_user_data=sframe(), const sframe &new_item_data=sframe(), bool exclude_training_interactions=true, double diversity_factor=0, size_t random_seed=0) const |
sframe | precision_recall_stats (const sframe &indexed_validation_data, const sframe &recommend_output, const std::vector< size_t > &cutoffs) const |
sframe | get_num_items_per_user () const |
sframe | get_num_users_per_item () const |
size_t | get_version () const override |
virtual void | save_impl (turi::oarchive &oarc) const override |
Serialization – save. | |
void | load_version (turi::iarchive &iarc, size_t version) override |
Serialization – load. | |
std::map< std::string, flexible_type > | get_train_stats () |
Get stats about algorithm runtime. | |
virtual void | init_options (const std::map< std::string, flexible_type > &_options) |
std::vector< std::string > | list_fields () |
const variant_type & | get_value_from_state (std::string key) |
const std::map< std::string, flexible_type > & | get_current_options () const |
std::map< std::string, flexible_type > | get_default_options () const |
const flexible_type & | get_option_value (const std::string &name) const |
const std::map< std::string, variant_type > & | get_state () const |
bool | is_trained () const |
void | set_options (const std::map< std::string, flexible_type > &_options) |
void | add_or_update_state (const std::map< std::string, variant_type > &dict) |
const std::vector< option_handling::option_info > & | get_option_info () const |
virtual const char * | name ()=0 |
virtual const std::string & | uid ()=0 |
void | save_to_url (const std::string &url, const variant_map_type &side_data={}) |
void | save_model_to_data (std::ostream &out) |
const std::map< std::string, std::vector< std::string > > & | list_functions () |
const std::vector< std::string > & | list_get_properties () |
const std::vector< std::string > & | list_set_properties () |
variant_type | call_function (const std::string &function, variant_map_type argument) |
variant_type | get_property (const std::string &property) |
variant_type | set_property (const std::string &property, variant_map_type argument) |
const std::string & | get_docstring (const std::string &symbol) |
virtual void | perform_registration () |
Static Public Attributes | |
static constexpr size_t | USER_COLUMN_INDEX = 0 |
The metadata needed for translating the data back and forth. | |
Protected Member Functions | |
virtual std::map< std::string, flexible_type > | train (const v2::ml_data &training_data)=0 |
virtual std::map< std::string, flexible_type > | train (const v2::ml_data &training_data_by_user, const v2::ml_data &training_data_by_item) |
template<typename GetSimilarFunction > | |
sframe | _create_similar_sframe (size_t column_index, std::shared_ptr< sarray< flexible_type > > items, size_t k, GetSimilarFunction &&similar) const |
virtual void | internal_save (turi::oarchive &oarc) const =0 |
virtual void | internal_load (turi::iarchive &iarc, size_t version)=0 |
void | register_function (std::string fnname, const std::vector< std::string > &arguments, impl_fn fn) |
void | register_defaults (const std::string &fnname, const variant_map_type &arguments) |
void | register_setter (const std::string &propname, impl_fn setfn) |
void | register_getter (const std::string &propname, impl_fn getfn) |
void | register_docstring (const std::pair< std::string, std::string > &fnname_docstring) |
Protected Attributes | |
std::map< std::string, variant_type > | state |
The base class for recsys model classes. Individual models are expected to implement all of the pure virtual functions below, along with (optionally) overriding any of the other virtual methods.
Definition at line 46 of file recsys_model_base.hpp.
|
protected |
Utility function to aid in the retrieval of similar items.
GetSimilarFunction is a function called as f(size_t idx, std::vector<std::pair<size_t, double> >& idx_dist_dest);
Definition at line 476 of file recsys_model_base.hpp.
|
inherited |
Append the key value store of the model.
[in] | dict | Options (Key-Value pairs) to set |
|
inherited |
Calls a user defined function.
v2::ml_data turi::recsys::recsys_model_base::create_ml_data | ( | const sframe & | data, |
const sframe & | new_user_side_data = sframe() , |
||
const sframe & | new_item_side_data = sframe() |
||
) | const |
Creates an ml_data object according to the given schema. No target column.
|
inherited |
Get current options.
Interfaces with the get_current_options function in the Python side.
|
inherited |
Get default options.
Interfaces with the get_default_options function in the Python side.
|
inherited |
Returns the toolkit documentation for a function or property.
|
virtual |
Returns information about all the users in the overlap of the item pairs listed in two columns in unindexed_item_pairs. All these items must be present in the training data.
Returns an sframe with information about this intersection. Columns are item_1, item_2, num_users_1, num_users_2, item_intersection (dict, user ->
|
inlinevirtual |
For each of the items in sim_scores (first part of tuple), sets a similarity score (second part of tuple) that is higher for items similar to item.
Reimplemented in turi::recsys::recsys_factorization_model_base.
Definition at line 107 of file recsys_model_base.hpp.
sframe turi::recsys::recsys_model_base::get_num_items_per_user | ( | ) | const |
Return an SFrame containing each user id and the number of observations with that user in the training set.
sframe turi::recsys::recsys_model_base::get_num_users_per_item | ( | ) | const |
Return an SFrame containing each item and the number of observations with that item in the training set.
|
inherited |
Returns the option information struct for each of the set parameters.
|
inherited |
Returns the value of an option. Throws an error if the option does not exist.
[in] | name | Name of the option to get. |
std::shared_ptr<recsys_model_base> turi::recsys::recsys_model_base::get_popularity_baseline | ( | ) | const |
Creates and returns a popularity baseline
|
inherited |
Reads a property.
|
pure virtual |
Get the nearest k items for each of the provided items. If no items are provided, then similar items are retrieved for all items observed during training.
Implemented in turi::recsys::recsys_itemcf.
|
pure virtual |
Get the nearest k users for each of the provided users. If no users are provided, then similar users are retrieved for all items observed during training.
Implemented in turi::recsys::recsys_itemcf.
|
inherited |
Get model.
|
inherited |
Returns the value of a particular key from the state.
From the python side, this is interfaced with the get() function or the [] operator in python.
|
inlineoverridevirtual |
Returns the current version of the toolkit class for this instance, for serialization purposes.
Reimplemented from turi::model_base.
Definition at line 380 of file recsys_model_base.hpp.
void turi::recsys::recsys_model_base::import_all_from_other_model | ( | const recsys_model_base * | other | ) |
Some of the models, such as popularity, can be built entirely from data already contained in the model. This method allows us to create a new model while bypassing the typical setup_and_train method. This simply imports all the relevant variables over; the final training is left up to the model.
|
inlinevirtualinherited |
Set one of the options in the algorithm. Use the option manager to set these options. If the option does not satisfy the conditions that the option manager has imposed on it. Errors will be thrown.
[in] | options | Options to set |
Reimplemented in turi::kmeans::kmeans_model, turi::pattern_mining::fp_growth, turi::text::topic_model, turi::recsys::recsys_itemcf, turi::supervised::logistic_regression, turi::supervised::linear_svm, turi::sdk_model::feature_engineering::transformer_base, turi::text::alias_topic_model, turi::sdk_model::feature_engineering::random_projection, turi::supervised::xgboost::xgboost_model, turi::sdk_model::feature_engineering::feature_binner, turi::supervised::xgboost::boosted_trees_classifier, turi::supervised::xgboost::random_forest_classifier, turi::sdk_model::feature_engineering::count_featurizer, turi::sdk_model::feature_engineering::sample_transformer, turi::supervised::linear_regression, turi::supervised::xgboost::boosted_trees_regression, turi::supervised::xgboost::random_forest_regression, and turi::recsys::recsys_factorization_model_base.
Definition at line 80 of file ml_model.hpp.
|
protectedpure virtual |
Implement serialization (load). The model subclass should reimplement this particular function. The syntax follows the standard turicreate load() method.
When this method is called, all the model options have been set up in the base class and are readily accessible. Furthermore, once this function is called, the model is treated as trained and ready to be used for prediction and ranking. Thus loading a model can effectively replace the training stage.
Implemented in turi::recsys::recsys_itemcf, and turi::recsys::recsys_factorization_model_base.
|
protectedpure virtual |
Implement serialization (save). The model subclass should reimplement this particular function. The syntax follows the standard turicreate save() method.
Implemented in turi::recsys::recsys_itemcf, and turi::recsys::recsys_factorization_model_base.
|
inherited |
Is this model trained.
|
inline |
Returns the flexible data type of the item column; The model must be trained at this point.
Definition at line 216 of file recsys_model_base.hpp.
|
inherited |
Lists all the keys accessible in the "model" map.
This is the function that the list_fields should call in python.
|
inherited |
Lists all the registered functions. Returns a map of function name to array of argument names for the function.
|
inherited |
Lists all the get-table properties of the class.
|
inherited |
Lists all the set-table properties of the class.
|
pure virtualinherited |
Returns the name of the toolkit class, as exposed to client code. For example, the Python proxy for this instance will have a type with this name.
Note: this function is typically overridden using the BEGIN_CLASS_MEMBER_REGISTRATION macro.
|
virtualinherited |
Declare the base registration function. This class has to be handled specially; the macros don't work here due to the override declarations.
Reimplemented in turi::model_proxy.
sframe turi::recsys::recsys_model_base::precision_recall_stats | ( | const sframe & | indexed_validation_data, |
const sframe & | recommend_output, | ||
const std::vector< size_t > & | cutoffs | ||
) | const |
Compute the precision and recall for a (potentially held out) set of observations.
validation_data | A ml_data giving the validation set the precision and recall should be calculated on. |
recommend_output | The output of the recommend method. Note that recommend should be called with top_k larger than the max value in cutoffs. |
cutoffs | A vector of cutoffs for computing e.g. the top [5,10,50] rankings. |
|
pure virtual |
Run predictions on each element in the test data set. Returns a vector corresponding to the response prediction of each observation in the test_data set. Also takes a ml_data in the same format containing observations that are considered "available" during prediction time.
Implemented in turi::recsys::recsys_itemcf, and turi::recsys::recsys_factorization_model_base.
sframe turi::recsys::recsys_model_base::recommend | ( | const sframe & | reference_data, |
size_t | top_k, | ||
const sframe & | restriction_data = sframe() , |
||
const sframe & | exclusion_data = sframe() , |
||
const sframe & | new_observation_data = sframe() , |
||
const sframe & | new_user_data = sframe() , |
||
const sframe & | new_item_data = sframe() , |
||
bool | exclude_training_interactions = true , |
||
double | diversity_factor = 0 , |
||
size_t | random_seed = 0 |
||
) | const |
Return the top_k ranks for this model based on sorted predictions.
Here, for each user in users, the top_k ranks are returned in the same format as the previous function.
If exclude_observations is given, these observations are excluded from the returned values.
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
|
protectedinherited |
Registers default argument values
|
protectedinherited |
Adds a docstring for the specified function or property name.
|
protectedinherited |
Adds a function with the specified name, and argument list.
|
protectedinherited |
Adds a property getter with the specified name.
|
protectedinherited |
Adds a property setter with the specified name.
|
inherited |
Save a toolkit class to a data stream.
|
inherited |
Save a toolkit class to disk.
url | The destination url to store the class. |
sidedata | Any additional side information |
|
pure virtual |
For a given base observation, predict the score for all the items with all non-item columns replaced by the values in the base observation.
The base_observation vector is used to generate all the observations predicted. New observations are generated by repeatedly copying template_observation, then replacing the values in item_column_index by each possible item value.
Implemented in turi::recsys::recsys_itemcf, and turi::recsys::recsys_factorization_model_base.
|
inherited |
Set one of the options in the algorithm.
The value are checked with the requirements given by the option instance.
[in] | name | Name of the option. |
[in] | value | Value for the option. |
|
inherited |
Sets a property. The new value of the property should appear in the argument map under the key "value".
void turi::recsys::recsys_model_base::setup_and_train | ( | const sframe & | observation_data, |
const sframe & | user_side_data = sframe() , |
||
const sframe & | item_side_data = sframe() , |
||
const std::map< std::string, variant_type > & | other_data = (std::map< std::string, variant_type >()) |
||
) |
Train the model using an sframe as the primary observations. This method constructs the internal ml_data objects from the current options.
observation_data | An SFrame containing at least a column containing user ids and a column containing item ids. |
user_side_data | An SFrame containing side information about users, where one column matches with the user column of observation data. |
item_side_data | An SFrame containing side information about items, where one column matches with the item column of observation data. |
other_data | When provided, each model can implement a method set_extra_data in order to use this argument during training. |
|
protectedpure virtual |
Train the algorithm. Takes a training/validation split. Returns a map of information about the run.
Implemented in turi::recsys::recsys_itemcf, and turi::recsys::recsys_factorization_model_base.
|
inlineprotectedvirtual |
Takes two datasets for training.
[in] | training_data_by_user | ML-Data sorted by user |
[in] | training_data_by_item | ML-Data sorted by item |
Reimplemented in turi::recsys::recsys_factorization_model_base.
Definition at line 68 of file recsys_model_base.hpp.
|
pure virtualinherited |
Returns a unique identifier for the toolkit class. It can be any unique ID. The UID is only used at runtime (to determine the concrete type of an arbitrary model_base instance) and is never stored.
Note: this function is typically overridden using the BEGIN_CLASS_MEMBER_REGISTRATION macro.
Implemented in turi::model_proxy.
|
inline |
Returns the flexible data type of the user column; The model must be trained at this point.
Definition at line 209 of file recsys_model_base.hpp.
|
protectedinherited |
All things python
Definition at line 206 of file ml_model.hpp.