Classes
struct	turi::option_handling::option_info

class	turi::option_manager

class	turi::recsys::lm_data_generator

class	turi::indexed_column_groupby

class	spmat

Functions
template<typename InputIterator1 , typename InputIterator2 , typename ComparisonFunction , typename AccumulateFunction >
static void	turi::accumulate_intersection (InputIterator1 first1, const InputIterator1 &last1, InputIterator2 first2, const InputIterator2 &last2, const ComparisonFunction &less_than_operator, const AccumulateFunction &accumulate_matching_pair)

template<typename InputIterator1 , typename InputIterator2 , typename AccumulateFunction >
static void	turi::accumulate_intersection (InputIterator1 first1, const InputIterator1 &last1, InputIterator2 first2, const InputIterator2 &last2, const AccumulateFunction &accumulate_matching_pair)

template<typename InputIterator1 , typename InputIterator2 >
static size_t	turi::count_intersection (InputIterator1 first1, const InputIterator1 &last1, InputIterator2 first2, const InputIterator2 &last2)

template<typename InputIterator1 , typename InputIterator2 , typename ComparisonFunction >
static size_t	turi::count_intersection (InputIterator1 first1, const InputIterator1 &last1, InputIterator2 first2, const InputIterator2 &last2, const ComparisonFunction &less_than_operator)

std::vector< size_t >	turi::get_unique_values (std::shared_ptr< sarray< flexible_type > > indexed_column)

std::shared_ptr< sarray< flexible_type > >	turi::make_unique (std::shared_ptr< sarray< flexible_type > > indexed_column)

std::vector< std::pair< double, double > >	turi::recsys::precision_and_recall (std::vector< size_t > actual, std::vector< size_t > predicted, const std::vector< size_t > &cutoffs)

std::vector< double >	turi::recsys::recall (const std::vector< size_t > &actual, const std::vector< size_t > &predicted, const std::vector< size_t > &cutoffs)

std::vector< double >	turi::recsys::precision (const std::vector< size_t > &actual, const std::vector< size_t > &predicted, const std::vector< size_t > &cutoffs)

float	turi::recsys::average_precision (const std::unordered_set< flexible_type > &actual, const std::vector< flexible_type > &predicted, const int k)

float	turi::recsys::mean_average_precision (const std::vector< std::unordered_set< flexible_type >> &actual, const std::vector< std::vector< flexible_type >> &predicted, const int k)

std::pair< sframe, sframe >	turi::split_sframe_on_index (const sframe &src, std::function< bool(size_t)> switch_function)

std::shared_ptr< sarray< flexible_type > >	turi::matrix_to_sarray (const Eigen::MatrixXd &m)

template<typename GenFunction >
sframe	turi::sframe_from_ranged_generator (const std::vector< std::string > &column_names, const std::vector< flex_type_enum > &column_types, size_t num_rows, GenFunction &&generator_function)

Detailed Description

Random toolkit utility methods

Function Documentation

◆ accumulate_intersection() [1/2]

template<typename InputIterator1 , typename InputIterator2 , typename ComparisonFunction , typename AccumulateFunction >

static void turi::accumulate_intersection	(	InputIterator1	first1,
		const InputIterator1 &	last1,
		InputIterator2	first2,
		const InputIterator2 &	last2,
		const ComparisonFunction &	less_than_operator,
		const AccumulateFunction &	accumulate_matching_pair
	)

inlinestatic

Calls an accumulator on all intersections in two sorted ranges. This behavior is analogous to std::set_intersection, except that the intersections are simply accumulated. Matching is performed through the use of less_than_operator.

Template Parameters

InputIterator1	The type of an input iterator over the first range.
InputIterator2	The type of an input iterator over the second range.

Parameters

first1	The begin() iterator of the first range.
last1	The end() iterator of the first range.
first2	The begin() iterator of the second range.
last2	The end() iterator of the second range.
less_than_operator	A comparison function that determines the ordering.

Definition at line 39 of file algorithmic_utils.hpp.

◆ accumulate_intersection() [2/2]

template<typename InputIterator1 , typename InputIterator2 , typename AccumulateFunction >

static void turi::accumulate_intersection	(	InputIterator1	first1,
		const InputIterator1 &	last1,
		InputIterator2	first2,
		const InputIterator2 &	last2,
		const AccumulateFunction &	accumulate_matching_pair
	)

inlinestatic

Calls an accumulator on all intersections in two sorted ranges. This behavior is analogous to std::set_intersection, except that the intersections are simply accumulated.

Template Parameters

InputIterator1	The type of an input iterator over the first range.
InputIterator2	The type of an input iterator over the second range.

Parameters

first1	The begin() iterator of the first range.
last1	The end() iterator of the first range.
first2	The begin() iterator of the second range.
last2	The end() iterator of the second range.

Definition at line 80 of file algorithmic_utils.hpp.

◆ average_precision()

float turi::recsys::average_precision	(	const std::unordered_set< flexible_type > &	actual,
		const std::vector< flexible_type > &	predicted,
		const int	k
	)

Compute the average precision at k. This combines precision values at values up to k, where lower ranks are less important.

Let $p_k$ be a vector of the first $k$ elements of the argument "predicted", and let $a$ be the set of items in the "actual" argument. If $P(k)$ is the precision at $k$ , then the average precision at $k$ is defined as

$AP(k) = \frac{1}{\min(k, |a|)}\sum_{k: p_k \in a} \frac{P(k)}{k}$

Parameters

actual	an unordered set of observed items
predicted	an vector of predicted items
k	the maximum number of predicted elements

Returns: The average precision at k for the provided lists.

◆ count_intersection() [1/2]

template<typename InputIterator1 , typename InputIterator2 >

static size_t turi::count_intersection	(	InputIterator1	first1,
		const InputIterator1 &	last1,
		InputIterator2	first2,
		const InputIterator2 &	last2
	)

static

Counts the number of intersections in two sorted ranges. This behavior is analogous to std::set_intersection, except that the intersections are simply stored and not output.

Template Parameters

InputIterator1	The type of an input iterator over the first range.
InputIterator2	The type of an input iterator over the second range.

Parameters

first1	The begin() iterator of the first range.
last1	The end() iterator of the first range.
first2	The begin() iterator of the second range.
last2	The end() iterator of the second range.

Definition at line 114 of file algorithmic_utils.hpp.

◆ count_intersection() [2/2]

template<typename InputIterator1 , typename InputIterator2 , typename ComparisonFunction >

static size_t turi::count_intersection	(	InputIterator1	first1,
		const InputIterator1 &	last1,
		InputIterator2	first2,
		const InputIterator2 &	last2,
		const ComparisonFunction &	less_than_operator
	)

inlinestatic

Counts the number of intersections in two sorted ranges. This behavior is analogous to std::set_intersection, except that the intersections are simply stored and not output. Matching is performed through the use of less_than_operator.

Template Parameters

InputIterator1	The type of an input iterator over the first range.
InputIterator2	The type of an input iterator over the second range.

Parameters

first1	The begin() iterator of the first range.
last1	The end() iterator of the first range.
first2	The begin() iterator of the second range.
last2	The end() iterator of the second range.
less_than_operator	A comparison function that determines the ordering.

Definition at line 152 of file algorithmic_utils.hpp.

◆ get_unique_values()

std::vector<size_t> turi::get_unique_values ( std::shared_ptr< sarray< flexible_type > > indexed_column )

Constructs a vector of the unique values present in an sframe column having integer type. The resulting vector is in sorted order, so membership can be queried using std::binary_search. When the 0, ..., n condition is met, this is faster than .unique().

◆ make_unique()

std::shared_ptr<sarray<flexible_type> > turi::make_unique ( std::shared_ptr< sarray< flexible_type > > indexed_column )

Convenience function: Same as get_unique_values, but returns the result as an sarray.

◆ matrix_to_sarray()

std::shared_ptr<sarray<flexible_type> > turi::matrix_to_sarray ( const Eigen::MatrixXd & m )

Create an SArray of vector type, where each element is a row of the provided matrix.

◆ mean_average_precision()

float turi::recsys::mean_average_precision	(	const std::vector< std::unordered_set< flexible_type >> &	actual,
		const std::vector< std::vector< flexible_type >> &	predicted,
		const int	k
	)

Compute mean average precision across all of the elements of the provided vectors. The two vectors must have the same length.

actual: a vector of unordered sets of observed items. predicted: a vector of vectors of observed items.

◆ precision()

std::vector<double> turi::recsys::precision	(	const std::vector< size_t > &	actual,
		const std::vector< size_t > &	predicted,
		const std::vector< size_t > &	cutoffs
	)

Compute precision at k. In information retrieval terms, this represents the ratio of relevant, retrieved items to the number of retrieved items.

Let $p_k$ be a vector of the first $k$ elements of the argument "predicted", and let $a$ be the set of items in the "actual" argument. The "precision at K" is defined as

$P(k) = \frac{ | a \cap p_k | }{|p_k|}$

The order of the elements in predicted affects the returned score. Only unique predicted values contribute to the score. One of the provided vectors must be nonempty. If actual is empty, return 0.0. If predicted is empty, returns 1.0.

Parameters

actual	an unordered vector observed items
predicted	an vector of predicted items
cutoffs	A vector of positive integers for which recall should be calculated

return A vector of precision scores corresponding to the values in cutoffs

Notes: The corner cases that involve empty lists were chosen to be consistent with the feasible set of precision-recall curves, which start at (precision, recall) = (1,0) and end at (0,1). However, we do not believe there is a well-known concensus on this choice.Other versions of the above code

◆ precision_and_recall()

std::vector<std::pair<double, double> > turi::recsys::precision_and_recall	(	std::vector< size_t >	actual,
		std::vector< size_t >	predicted,
		const std::vector< size_t > &	cutoffs
	)

Compute precision and recall at k. This is faster than calculating precision and recall seperately. In information retrieval terms, this represents the ratio of relevant, retrieved items to the number of relevant items.

Let $p_k$ be a vector of the first $k$ elements of the argument "predicted", and let $a$ be the set of items in the "actual" argument. The "recall at K" is defined as

$P(k) = \frac{ | a \cap p_k | }{|a|}$

The order of the elements in predicted affects the returned score. Only unique predicted values contribute to the score. One of the provided vectors must be nonempty. If actual is empty, return 1.0. If predicted is empty, returns 0.0.

Parameters

actual	A vector of observed items
predicted	A vector of predicted items
cutoffs	A vector of positive integers for which recall should be calculated

return A vector of pair(precision, recall) scores corresponding to the values in cutoffs.

Notes: The corner cases that involve empty lists were chosen to be consistent with the feasible set of precision-recall curves, which start at (precision, recall) = (1,0) and end at (0,1). However, we do not believe there is a well-known concensus on this choice.

◆ recall()

std::vector<double> turi::recsys::recall	(	const std::vector< size_t > &	actual,
		const std::vector< size_t > &	predicted,
		const std::vector< size_t > &	cutoffs
	)

Compute recall at k. In information retrieval terms, this represents the ratio of relevant, retrieved items to the number of relevant items.

Let $p_k$ be a vector of the first $k$ elements of the argument "predicted", and let $a$ be the set of items in the "actual" argument. The "recall at K" is defined as

$P(k) = \frac{ | a \cap p_k | }{|a|}$

The order of the elements in predicted affects the returned score. Only unique predicted values contribute to the score. One of the provided vectors must be nonempty. If actual is empty, return 1.0. If predicted is empty, returns 0.0.

Parameters

actual	an unordered vector of observed items
predicted	an vector of predicted items
cutoffs	A vector of positive integers for which recall should be calculated

return A vector of recall scores corresponding to the values in cutoffs

Notes: The corner cases that involve empty lists were chosen to be consistent with the feasible set of precision-recall curves, which start at (precision, recall) = (1,0) and end at (0,1). However, we do not believe there is a well-known concensus on this choice.

◆ sframe_from_ranged_generator()

template<typename GenFunction >

sframe turi::sframe_from_ranged_generator	(	const std::vector< std::string > &	column_names,
		const std::vector< flex_type_enum > &	column_types,
		size_t	num_rows,
		GenFunction &&	generator_function
	)

Generate an SFrame from a transform function that takes an index and fills a vector of flexible type. The signature of the transform function should be:

gen_fill_func(size_t row_index, std::vector<flexible_type>& out_values);

Access there is done in parallel.

s

Definition at line 48 of file sframe_utils.hpp.

◆ split_sframe_on_index()

std::pair<sframe,sframe> turi::split_sframe_on_index	(	const sframe &	src,
		std::function< bool(size_t)>	switch_function
	)

Filters the rows of an sframe into the first (false) or second (true) sframe by a switch function.

Classes

Functions

Detailed Description

Function Documentation

◆ accumulate_intersection() [1/2]

◆ accumulate_intersection() [2/2]

◆ average_precision()

◆ count_intersection() [1/2]

◆ count_intersection() [2/2]

◆ get_unique_values()

◆ make_unique()

◆ matrix_to_sarray()

◆ mean_average_precision()

◆ precision()

◆ precision_and_recall()

◆ recall()

◆ sframe_from_ranged_generator()

◆ split_sframe_on_index()