Turi Create  4.0
turi::aggregate Namespace Reference

All the available groupby aggregators aggregators. See gl_sframe::groupby for details. More...

Classes

struct  groupby_descriptor_type
 

Functions

template<typename T , typename... Args>
groupby_descriptor_type make_aggregator (const std::vector< std::string > &group_columns, const Args &... args)
 
groupby_descriptor_type SUM (const std::string &col)
 
groupby_descriptor_type MAX (const std::string &col)
 
groupby_descriptor_type MIN (const std::string &col)
 
groupby_descriptor_type COUNT ()
 
groupby_descriptor_type MEAN (const std::string &col)
 
groupby_descriptor_type AVG (const std::string &col)
 
groupby_descriptor_type VAR (const std::string &col)
 
groupby_descriptor_type VARIANCE (const std::string &col)
 
groupby_descriptor_type STD (const std::string &col)
 
groupby_descriptor_type STDV (const std::string &col)
 
groupby_descriptor_type SELECT_ONE (const std::string &col)
 
groupby_descriptor_type COUNT_DISTINCT (const std::string &col)
 
groupby_descriptor_type ARGMAX (const std::string &agg, const std::string &out)
 
groupby_descriptor_type ARGMIN (const std::string &agg, const std::string &out)
 
groupby_descriptor_type CONCAT (const std::string &col)
 
groupby_descriptor_type CONCAT (const std::string &key, const std::string &value)
 
groupby_descriptor_type QUANTILE (const std::string &col, double quantile)
 
groupby_descriptor_type QUANTILE (const std::string &col, const std::vector< double > &quantiles)
 

Detailed Description

All the available groupby aggregators aggregators. See gl_sframe::groupby for details.

Function Documentation

◆ ARGMAX()

groupby_descriptor_type turi::aggregate::ARGMAX ( const std::string &  agg,
const std::string &  out 
)

Builtin arg maximum aggregator for groupby.

Example: Get the movie with maximum rating per user.

sf.groupby({"user"},
{{"best_movie", aggregate::ARGMAX("rating","movie")}});

◆ ARGMIN()

groupby_descriptor_type turi::aggregate::ARGMIN ( const std::string &  agg,
const std::string &  out 
)

Builtin arg minimum aggregator for groupby.

Example: Get the movie with minimum rating per user.

sf.groupby("user",
{{"best_movie", aggregate::ARGMIN("rating","movie")}});

◆ AVG()

groupby_descriptor_type turi::aggregate::AVG ( const std::string &  col)

Builtin average aggregator for groupby.

Synonym for aggregate::MEAN.

Example: Get the average rating of each user.

sf.groupby({"user"},
{{"rating_avg",aggregate::AVG("rating")}});
See also
gl_sframe::groupby

◆ CONCAT() [1/2]

groupby_descriptor_type turi::aggregate::CONCAT ( const std::string &  col)

Builtin aggregator that combines values from one or two columns in one group into either a dictionary value, list value or array value.

For example, to combine values from two columns that belong to one group into one dictionary value:

sf.groupby({"document"},
{{"word_count", aggregate::CONCAT("word", "count")}});

To combine values from one column that belong to one group into a list value:

sf.groupby({"user"},
{{"friends", aggregate::CONCAT("friend")}});

◆ CONCAT() [2/2]

groupby_descriptor_type turi::aggregate::CONCAT ( const std::string &  key,
const std::string &  value 
)

Builtin aggregator that combines values from one or two columns in one group into either a dictionary value, list value or array value.

For example, to combine values from two columns that belong to one group into one dictionary value:

sf.groupby({"document"},
{{"word_count", aggregate::CONCAT("word", "count")}});

To combine values from one column that belong to one group into a list value:

sf.groupby({"user"},
{{"friends", aggregate::CONCAT("friend")}});

◆ COUNT()

groupby_descriptor_type turi::aggregate::COUNT ( )

Builtin count aggregator for groupby.

Example: Get the number of occurences of each user

sf.groupby({"user"},
{{"rating_count",aggregate::COUNT()}});
See also
gl_sframe::groupby

◆ COUNT_DISTINCT()

groupby_descriptor_type turi::aggregate::COUNT_DISTINCT ( const std::string &  col)

Builtin arg minimum aggregator for groupby.

Example: Get the number of unique movies

sf.groupby("user",
{{"best_movie", aggregate::COUNT_DISTINCT("rating")}});

◆ make_aggregator()

template<typename T , typename... Args>
groupby_descriptor_type turi::aggregate::make_aggregator ( const std::vector< std::string > &  group_columns,
const Args &...  args 
)

Create a groupby_descriptor_type of user defined groupby aggregator type T.

Parameters
group_columnsA vector of column names expected by the groupby aggregator.
constArgs&... Extra argument to construct T
class my_aggregator : public group_aggregate_value {
// default constructible
my_aggregator();
...
};
auto aggregator1 = make_aggregator<my_aggregator>({"col1"});
class my_complicated_aggregator : public group_aggregate_value {
// constructor requires extra arguments
my_complicated_aggregator(const std::vector<double>& initial_values);
...
};
std::vector<double> initial_values {1,2,3};
auto aggregator2 = make_aggregator<my_aggregator>({"col1", "col2"},
initial_values);

Definition at line 99 of file gl_sframe.hpp.

◆ MAX()

groupby_descriptor_type turi::aggregate::MAX ( const std::string &  col)

Builtin max aggregator for groupby.

Example: Get the max of the rating column for each user.

sf.groupby({"user"},
{{"rating_max",aggregate::MAX("rating")}});
See also
gl_sframe::groupby

◆ MEAN()

groupby_descriptor_type turi::aggregate::MEAN ( const std::string &  col)

Builtin average aggregator for groupby.

Synonym for aggregate::AVG.

Example: Get the average rating of each user.

sf.groupby({"user"},
{{"rating_avg",aggregate::AVG("rating")}});
See also
gl_sframe::groupby

◆ MIN()

groupby_descriptor_type turi::aggregate::MIN ( const std::string &  col)

Builtin min aggregator for groupby.

Example: Get the min of the rating column for each user.

sf.groupby({"user"},
{{"rating_min",aggregate::MAX("rating")}});
See also
gl_sframe::groupby

◆ QUANTILE() [1/2]

groupby_descriptor_type turi::aggregate::QUANTILE ( const std::string &  col,
double  quantile 
)

Builtin approximate quantile aggregator for groupby.

Accepts as an argument, one or more of a list of quantiles to query.

To extract the median:

sf.groupby({"user"},
{{"rating_quantiles", aggregate::QUANTILE("rating", 0.5)}});

To extract a few quantiles:

sf.groupby({"user"},
{{"rating_quantiles", aggregate::QUANTILE("rating", {0.25,0.5,0.75})}});

Or equivalently

sf.groupby({"user"},
{{"rating_quantiles", aggregate::QUANTILE("rating", {0.25,0.5,0.75})}});

The returned quantiles are guaranteed to have 0.5% accuracy. That is to say, if the requested quantile is 0.50, the resultant quantile value may be between 0.495 and 0.505 of the true quantile.

◆ QUANTILE() [2/2]

groupby_descriptor_type turi::aggregate::QUANTILE ( const std::string &  col,
const std::vector< double > &  quantiles 
)

Builtin approximate quantile aggregator for groupby.

Accepts as an argument, one or more of a list of quantiles to query.

To extract the median:

sf.groupby({"user"},
{{"rating_quantiles", aggregate::QUANTILE("rating", 0.5)}});

To extract a few quantiles:

sf.groupby({"user"},
{{"rating_quantiles", aggregate::QUANTILE("rating", {0.25,0.5,0.75})}});

Or equivalently

sf.groupby({"user"},
{{"rating_quantiles", aggregate::QUANTILE("rating", {0.25,0.5,0.75})}});

The returned quantiles are guaranteed to have 0.5% accuracy. That is to say, if the requested quantile is 0.50, the resultant quantile value may be between 0.495 and 0.505 of the true quantile.

◆ SELECT_ONE()

groupby_descriptor_type turi::aggregate::SELECT_ONE ( const std::string &  col)

Builtin aggregator for groupby which selects one row in the group.

Example: Get one rating row from a user.

sf.groupby({"user"},
{{"rating",aggregate::SELECT_ONE("rating")}});

If multiple columns are selected, they are guaranteed to come from the same row. for instance:

sf.groupby({"user"},
{{"rating", aggregate::SELECT_ONE("rating")},
{"item", aggregate::SELECT_ONE("item")}});

The selected "rating" and "item" value for each user will come from the same row in the gl_sframe.

◆ STD()

groupby_descriptor_type turi::aggregate::STD ( const std::string &  col)

Builtin standard deviation aggregator for groupby.

Synonym for aggregate::STDV.

Example: Get the rating standard deviation of each user.

sf.groupby({"user"},
{{"rating_std",aggregate::STD("rating")}});
See also
aggregate::STDV
aggregate::VAR
gl_sframe::groupby

◆ STDV()

groupby_descriptor_type turi::aggregate::STDV ( const std::string &  col)

Builtin standard deviation aggregator for groupby.

Synonym for aggregate::STD.

Example: Get the rating standard deviation of each user.

sf.groupby({"user"},
{{"rating_std",aggregate::STDV("rating")}});
See also
turi::aggregate::STD
turi::aggregate::VAR
gl_sframe::groupby

◆ SUM()

groupby_descriptor_type turi::aggregate::SUM ( const std::string &  col)

Builtin sum aggregator for groupby

Example: Get the sum of the rating column for each user.

sf.groupby({"user"},
{{"rating_sum",aggregate::SUM("rating")}});
See also
gl_sframe::groupby

◆ VAR()

groupby_descriptor_type turi::aggregate::VAR ( const std::string &  col)

Builtin variance aggregator for groupby.

Synonym for aggregate::VARIANCE

Example: Get the rating variance of each user.

sf.groupby({"user"},
{{"rating_var",aggregate::VAR("rating")}});
See also
aggregate::VAR
aggregate::STD
gl_sframe::groupby

◆ VARIANCE()

groupby_descriptor_type turi::aggregate::VARIANCE ( const std::string &  col)

Builtin variance aggregator for groupby.

Synonym for aggregate::VAR.

Example: Get the rating variance of each user.

sf.groupby({"user"},
{{"rating_var",aggregate::VARIANCE("rating")}});
See also
aggregate::VARIANCE
aggregate::STD
gl_sframe::groupby