Turi Create
4.0
|
#include <core/data/sframe/gl_sarray.hpp>
Public Member Functions | |
gl_sarray () | |
Constructs an empty SArray. | |
gl_sarray (const gl_sarray &) | |
Copy Constructor. | |
gl_sarray (gl_sarray &&) | |
Move Constructor. | |
gl_sarray & | operator= (const gl_sarray &) |
Copy Assignment. | |
gl_sarray & | operator= (gl_sarray &&) |
Move Assignment. | |
gl_sarray (const std::string &directory) | |
gl_sarray (const std::vector< flexible_type > &values, flex_type_enum dtype=flex_type_enum::UNDEFINED) | |
gl_sarray (const std::initializer_list< flexible_type > &values) | |
gl_sarray | contains (const flexible_type &other) const |
flexible_type | operator[] (int64_t i) const |
gl_sarray | operator[] (const gl_sarray &slice) const |
gl_sarray | operator[] (const std::initializer_list< int64_t > &slice) const |
void | materialize_to_callback (std::function< bool(size_t, const std::shared_ptr< sframe_rows > &)> callback, size_t nthreads=(size_t)(-1)) |
gl_sarray_range | range_iterator (size_t start=0, size_t end=(size_t)(-1)) const |
void | save (const std::string &directory, const std::string &format="binary") const |
size_t | size () const |
bool | empty () const |
flex_type_enum | dtype () const |
void | materialize () const |
bool | is_materialized () const |
gl_sarray | head (size_t n) const |
gl_sarray | tail (size_t n) const |
gl_sarray | count_words (bool to_lower=true, turi::flex_list delimiters={"\, "\", "\", "\", "\", " "}) const |
gl_sarray | count_ngrams (size_t n=2, std::string method="word", bool to_lower=true, bool ignore_space=true) const |
gl_sarray | dict_trim_by_keys (const std::vector< flexible_type > &keys, bool exclude=true) const |
gl_sarray | dict_trim_by_values (const flexible_type &lower=FLEX_UNDEFINED, const flexible_type &upper=FLEX_UNDEFINED) const |
gl_sarray | dict_keys () const |
gl_sarray | dict_values () const |
gl_sarray | dict_has_any_keys (const std::vector< flexible_type > &keys) const |
gl_sarray | dict_has_all_keys (const std::vector< flexible_type > &keys) const |
gl_sarray | apply (std::function< flexible_type(const flexible_type &)> fn, flex_type_enum dtype, bool skip_undefined=true) const |
gl_sarray | filter (std::function< bool(const flexible_type &)> fn, bool skip_undefined=true) const |
gl_sarray | sample (double fraction) const |
gl_sarray | sample (double fraction, size_t seed, bool exact=false) const |
gl_sarray | hash (size_t seed=0) const |
bool | all () const |
bool | any () const |
flexible_type | max () const |
flexible_type | min () const |
flexible_type | sum () const |
flexible_type | mean () const |
flexible_type | std () const |
size_t | nnz () const |
size_t | num_missing () const |
gl_sarray | datetime_to_str (const std::string &str_format="%Y-%m-%dT%H:%M:%S%ZP") const |
gl_sarray | str_to_datetime (const std::string &str_format="%Y-%m-%dT%H:%M:%S%ZP") const |
gl_sarray | pixel_array_to_image (size_t width, size_t height, size_t channels=3, bool undefined_on_failure=true) const |
gl_sarray | astype (flex_type_enum dtype, bool undefined_on_failure=true) const |
gl_sarray | clip (flexible_type lower=FLEX_UNDEFINED, flexible_type upper=FLEX_UNDEFINED) const |
gl_sarray | clip_lower (flexible_type threshold) const |
gl_sarray | clip_upper (flexible_type threshold) const |
gl_sarray | dropna () const |
gl_sarray | fillna (flexible_type value) const |
gl_sarray | topk_index (size_t topk=10, bool reverse=false) const |
gl_sarray | append (const gl_sarray &other) const |
gl_sarray | unique () const |
gl_sarray | item_length () const |
gl_sframe | split_datetime (const std::string &column_name_prefix="X", const std::vector< std::string > &limit={"year","month","day","hour","minute","second"}, bool tzone=false) const |
gl_sframe | unpack (const std::string &column_name_prefix="X", const std::vector< flex_type_enum > &column_types=std::vector< flex_type_enum >(), const flexible_type &na_value=FLEX_UNDEFINED, const std::vector< flexible_type > &limit=std::vector< flexible_type >()) const |
gl_sarray | sort (bool ascending=true) const |
gl_sarray | subslice (flexible_type start=FLEX_UNDEFINED, flexible_type stop=FLEX_UNDEFINED, flexible_type step=FLEX_UNDEFINED) const |
gl_sarray | cumulative_aggregate (std::shared_ptr< group_aggregate_value > aggregator) const |
gl_sarray | cumulative_sum () const |
gl_sarray | builtin_rolling_apply (const std::string &fn_name, ssize_t start, ssize_t end, size_t min_observations=size_t(-1)) const |
void | show (const std::string &path_to_client, const flexible_type &title, const flexible_type &xlabel, const flexible_type &ylabel) const |
std::shared_ptr< visualization::Plot > | plot (const flexible_type &title, const flexible_type &xlabel, const flexible_type &ylabel) const |
virtual std::shared_ptr< unity_sarray > | get_proxy () const |
Numeric operator overloads. | |
Most operators are overloaded and will perform element-wise operations on the entire array. For instance: gl_sarray a{1,2,3,4,5}; // an array of 5 exclamation marks // results in ret being the array ["1!", "3!", "5!", "7!", "9!"]; Comparison operators will return a gl_sarray of binary integers. Logical and bitwise operators are equivalent: & and && mean the same thing and | and || and provide logical element-wise "and" and "or"s. gl_sarray a{1,2,3,4,5}; auto ret = a > 1 && a <= 4; // ret will be an integer array containing [0,1,1,1,0] These are useful for the logical filter operation: gl_sarray a{1,2,3,4,5}; gl_sarray b = a.astype(flex_type_enum::STRING); auto ret = b[a > 1 && a <= 4]; // ret will be an string array containing ["2","3","4"] The logical and bitwise operators can be used with non-integral arrays in which case all empty values evaluate to False. i.e. for string, list, and dictionary SArrays, empty values are interpreted as false. For instance: | |
gl_sarray | operator+ (const gl_sarray &other) const |
gl_sarray | operator- (const gl_sarray &other) const |
gl_sarray | operator* (const gl_sarray &other) const |
gl_sarray | operator/ (const gl_sarray &other) const |
gl_sarray | operator< (const gl_sarray &other) const |
gl_sarray | operator> (const gl_sarray &other) const |
gl_sarray | operator<= (const gl_sarray &other) const |
gl_sarray | operator>= (const gl_sarray &other) const |
gl_sarray | operator== (const gl_sarray &other) const |
gl_sarray | operator+ (const flexible_type &other) const |
gl_sarray | operator- (const flexible_type &other) const |
gl_sarray | operator* (const flexible_type &other) const |
gl_sarray | operator/ (const flexible_type &other) const |
gl_sarray | operator< (const flexible_type &other) const |
gl_sarray | operator> (const flexible_type &other) const |
gl_sarray | operator<= (const flexible_type &other) const |
gl_sarray | operator>= (const flexible_type &other) const |
gl_sarray | operator== (const flexible_type &other) const |
gl_sarray | operator+= (const gl_sarray &other) |
gl_sarray | operator-= (const gl_sarray &other) |
gl_sarray | operator*= (const gl_sarray &other) |
gl_sarray | operator/= (const gl_sarray &other) |
gl_sarray | operator+= (const flexible_type &other) |
gl_sarray | operator-= (const flexible_type &other) |
gl_sarray | operator*= (const flexible_type &other) |
gl_sarray | operator/= (const flexible_type &other) |
gl_sarray | operator && (const gl_sarray &other) const |
gl_sarray | operator|| (const gl_sarray &other) const |
gl_sarray | operator & (const gl_sarray &other) const |
gl_sarray | operator| (const gl_sarray &other) const |
Static Public Member Functions | |
static gl_sarray | from_const (const flexible_type &value, size_t size) |
static gl_sarray | from_sequence (size_t start, size_t end, bool reverse=false) |
static gl_sarray | read_json (const std::string &url) |
An immutable, homogeneously typed array object backed by persistent storage.
The gl_sarray is a contiguous column of a single type with missing value support, and works with disk to support the holding of data that is much larger than the machine's main memory. Runtime typing of the gl_sarray is managed through the flexible_type, which is an efficient runtime typed value. The types supported by the flexible_type are listed in flex_type_enum.
Abstractly the gl_sarray provides an interface to read and write flexible_type values where all values have the same type at runtime (for instance flex_type_enum::INTEGER). The special type flex_type_enum::UNDEFINED (or the value FLEX_UNDEFINED ) is used to denote a missing value and can be used in combination with any types.
For instance:
While the gl_sarray is conceptually immutable, all that really means is that element-wise modifications are not permitted. However, full SArray assignments are permitted.
The gl_sarray API is designed to very closely mimic the Python SArray API and supports much of the Python-like capabilities, but in C++.
For instance, vector and operations:
Logical filters:
Python Range slicing:
And many others.
The gl_sarray can be read inefficiently using operator[]
Or iterated efficiently using the range_iterator
The range_iterator materializes the SFrame if not already materialized, but materialize_to_callback can be used to read the SFrame without materialization.
The gl_sarray can constructed in a variety of means:
When used as an input argument in an SDK function, it permits a Python SArray to be passed as an argument. When used in an output argument, it will return a Python SArray.
For instance:
Will allow this to be done in Python:
The gl_sarray is internally a reference object. i.e. in the code below, both a and b will point to the same underlying sarray. However since gl_sarray's are immutable, this does not introduce any interface quirks.
The gl_sarray is also lazy evaluated behind the scenes to minimize disk access. Thus regardless of the size of the SArray or the complexity of the lambda operation, this operation will run quickly.
This may have the unfortunate effect of hiding errors until materialization is forced to occur. i.e. it might be some time much later in your code that errors in some_complicated_function will trigger.
However, not all operations are lazy and certain operations will force materialization, and that is a constant target for optimization.
If you want to force materialization yourself, use materialize()
Definition at line 199 of file gl_sarray.hpp.
|
explicit |
turi::gl_sarray::gl_sarray | ( | const std::vector< flexible_type > & | values, |
flex_type_enum | dtype = flex_type_enum::UNDEFINED |
||
) |
turi::gl_sarray::gl_sarray | ( | const std::initializer_list< flexible_type > & | values | ) |
Constructs a gl_sarray from an initializer list of values.
Type is automatically determined.
bool turi::gl_sarray::all | ( | ) | const |
Return true if every element of the gl_sarray evaluates to true. For numeric gl_sarray objects zeros and missing values ("None") evaluate to false, while all non-zero, non-missing values evaluate to true. For string, list, and dictionary gl_sarray objects, empty values (zero length strings, lists or dictionaries) or missing values ("None") evaluate to false. All other values evaluate to true. Returns true on an empty gl_sarray.
Example:
Produces output:
bool turi::gl_sarray::any | ( | ) | const |
Return true if any element of the gl_sarray evaluates to true. For numeric gl_sarray objects any non-zero value evaluates to true. For string, list, and dictionary gl_sarray objects, any element of non-zero length evaluates to true. Returns false on an empty gl_sarray.
Example:
Produces output:
gl_sarray turi::gl_sarray::apply | ( | std::function< flexible_type(const flexible_type &)> | fn, |
flex_type_enum | dtype, | ||
bool | skip_undefined = true |
||
) | const |
Transform each element of the gl_sarray by a given function. The result gl_sarray is of type "dtype". "fn" should be a function that returns exactly one value which can be cast into the type specified by "dtype".
fn | The function to transform each element. Must return exactly one value which can be cast into the type specified by "dtype". |
dtype | The data type of the new gl_sarray. |
skip_undefined | Optional. If true, will not apply "fn" to any undefined values. Defaults to true. |
Example:
Produces output:
gl_sarray turi::gl_sarray::astype | ( | flex_type_enum | dtype, |
bool | undefined_on_failure = true |
||
) | const |
Create a new gl_sarray with all values cast to the given type. Throws an exception if the types are not castable to the given type.
dtype | The type to cast the elements to in gl_sarray |
undefined_on_failure | Optional. Defaults to True. If set to true, runtime cast failures will be emitted as missing values rather than failing. |
Example:
Produces output:
Given an SArray of strings that look like dicts, convert to a dictionary type:
Produces output:
gl_sarray turi::gl_sarray::builtin_rolling_apply | ( | const std::string & | fn_name, |
ssize_t | start, | ||
ssize_t | end, | ||
size_t | min_observations = size_t(-1) |
||
) | const |
Apply an aggregate function over a moving window.
input | The input SArray (expects to be materialized) |
fn_name | string representation of the aggregation function to use. The mapping is the same string mapping used by the groupby aggregate function. |
window_start | The start of the moving window relative to the current value being calculated, inclusive. For example, 2 values behind the current would be -2, and 0 indicates that the start of the window is the current value. |
window_end | The end of the moving window relative to the current value being calculated, inclusive. Must be greater than window_start . For example, 0 would indicate that the current value is the end of the window, and 2 would indicate that the window ends at 2 data values after the current. |
min_observations | The minimum allowed number of non-NULL values in the moving window for the emitted value to be non-NULL. size_t(-1) indicates that all values must be non-NULL. |
Returns an SArray of the same length as the input, with a type that matches the type output by the aggregation function.
Throws an exception if:
Example:
Produces an SArray with these values:
gl_sarray turi::gl_sarray::clip | ( | flexible_type | lower = FLEX_UNDEFINED , |
flexible_type | upper = FLEX_UNDEFINED |
||
) | const |
Create a new gl_sarray with each value clipped to be within the given bounds. In this case, "clipped" means that values below the lower bound will be set to the lower bound value. Values above the upper bound will be set to the upper bound value. This function can operate on gl_sarray objects of numeric type as well as array type, in which case each individual element in each array is clipped. By default "lower" and "upper" are set to "float('nan')" which indicates the respective bound should be ignored. The method fails if invoked on an gl_sarray of non-numeric type.
lower | Optional. The lower bound used to clip. Ignored if equal to FLEX_UNDEFINED (the default). |
upper | Optional. The upper bound used to clip. Ignored if equal to FLEX_UNDEFINED (the default). |
Example:
Produces output:
gl_sarray turi::gl_sarray::clip_lower | ( | flexible_type | threshold | ) | const |
Create new gl_sarray with all values clipped to the given lower bound. This function can operate on numeric arrays, as well as vector arrays, in which case each individual element in each vector is clipped. Throws an exception if the gl_sarray is empty or the types are non-numeric.
threshold | The lower bound used to clip values. |
Example:
Produces output:
gl_sarray turi::gl_sarray::clip_upper | ( | flexible_type | threshold | ) | const |
Create new gl_sarray with all values clipped to the given upper bound. This function can operate on numeric arrays, as well as vector arrays, in which case each individual element in each vector is clipped.
threshold | The upper bound used to clip values. |
Example:
Produces output:
gl_sarray turi::gl_sarray::contains | ( | const flexible_type & | other | ) | const |
Performs an element-wise substring search of "item". The current array must contains strings and item must be a string. Produces a 1 for each row if item is a substring of the row and 0 otherwise.
gl_sarray turi::gl_sarray::count_ngrams | ( | size_t | n = 2 , |
std::string | method = "word" , |
||
bool | to_lower = true , |
||
bool | ignore_space = true |
||
) | const |
Return an SArray of dict type where each element contains the count for each of the n-grams that appear in the corresponding input element. The n-grams can be specified to be either character n-grams or word n-grams. The input SArray must contain strings. Parameters:
n | Optional. The number of words in each n-gram. An n value of 1 returns word counts. Defaults to 2. |
method | Optional. Either "word" or "character". If “word”, the function performs a count of word n-grams. If “character”, does a character n-gram count. Defaults to "word". |
to_lower | Optional. If true, all words are converted to lower case before counting. Defaults to true. |
ignore_space | Optional. If method is “character”, indicates if spaces between words are counted as part of the n-gram. For instance, with the input SArray element of “fun games”, if this parameter is set to False one tri-gram would be ‘n g’. If ignore_space is set to True, there would be no such tri-gram (there would still be ‘nga’). This parameter has no effect if the method is set to “word”. Defaults to true. |
mp*
gl_sarray turi::gl_sarray::count_words | ( | bool | to_lower = true , |
turi::flex_list | delimiters = {"\, "\", "\", "\", "\", " "} |
||
) | const |
Count words in the gl_sarray.
to_lower | Optional. If True, all words are converted to lower case before counting. |
Return an gl_sarray of dictionary type where each element contains the word count for each word that appeared in the corresponding input element. The words are split on all whitespace and punctuation characters. Only works if this SArray is of string type. Parameters:
gl_sarray turi::gl_sarray::cumulative_aggregate | ( | std::shared_ptr< group_aggregate_value > | aggregator | ) | const |
An abstraction to perform cumulative aggregates. y <- x.cumulative_aggregate(f, w_0)
The abstraction is as follows: y[i+1], w[i+1] = func(x[i], w[i]) where w[i] is some arbitary state.
[in] | Built | in aggregate to use (e.g, sum, min, max etc.) |
produces an SArray that looks like the following: dtype: int [1, 3, 6, 10, 15]
gl_sarray turi::gl_sarray::cumulative_sum | ( | ) | const |
This returns an SArray where each element is a cumulative aggregate of all its previous elements. Only works in an SArray of numeric type or numeric-array types.
produces an SArray that looks like the following: dtype: int [1, 3, 6, 10, 15]
gl_sarray turi::gl_sarray::datetime_to_str | ( | const std::string & | str_format = "%Y-%m-%dT%H:%M:%S%ZP" | ) | const |
Create a new gl_sarray with all the values cast to str. The string format is specified by the 'str_format' parameter.
str_format | The format to output the string. Default format is "%Y-%m-%dT%H:%M:%S%ZP". See the strftime specification for details on the format string. |
Example:
Produces output:
gl_sarray turi::gl_sarray::dict_has_all_keys | ( | const std::vector< flexible_type > & | keys | ) | const |
Create a boolean gl_sarray by checking the keys of an gl_sarray of dictionaries. An element of the output gl_sarray is True if the corresponding input element's dictionary has all of the given keys. Fails on gl_sarray objects whose data type is not "dict".
keys | A list of key values to check each dictionary against. |
Example:
Produces output:
gl_sarray turi::gl_sarray::dict_has_any_keys | ( | const std::vector< flexible_type > & | keys | ) | const |
Create a boolean gl_sarray by checking the keys of an gl_sarray of dictionaries. An element of the output gl_sarray is True if the corresponding input element's dictionary has any of the given keys. Fails on gl_sarray objects whose data type is not "dict".
keys | A list of key values to check each dictionary against. |
Example:
Produces output:
gl_sarray turi::gl_sarray::dict_keys | ( | ) | const |
Create an gl_sarray that contains all the keys from each dictionary element as a list. Fails on gl_sarray objects whose data type is not "dict".
Example:
Produces output:
gl_sarray turi::gl_sarray::dict_trim_by_keys | ( | const std::vector< flexible_type > & | keys, |
bool | exclude = true |
||
) | const |
Filter an SArray of dictionary type by the given keys. By default, all keys that are in the provided list in "keys" are excluded from the returned SArray.
keys | A collection of keys to trim down the elements in the SArray. |
exclude | Optional If True, all keys that are in the input key list are removed. If False, only keys that are in the input key list are retained. Defaults to true. |
gl_sarray turi::gl_sarray::dict_trim_by_values | ( | const flexible_type & | lower = FLEX_UNDEFINED , |
const flexible_type & | upper = FLEX_UNDEFINED |
||
) | const |
Filter dictionary values to a given range (inclusive). Trimming is only performed on values which can be compared to the bound values. Fails on SArrays whose data type is not dict
.
lower | Optional. The lowest dictionary value that would be retained in the result. If FLEX_UNDEFINED , lower bound is not applied. Defaults to FLEX_UNDEFINED. |
upper | Optional. The highest dictionary value that would be retained in the result. If FLEX_UNDEFINED, upper bound is not applied. Defaults to FLEX_UNDEFINED. |
Example:
Produces output:
gl_sarray turi::gl_sarray::dict_values | ( | ) | const |
gl_sarray turi::gl_sarray::dropna | ( | ) | const |
flex_type_enum turi::gl_sarray::dtype | ( | ) | const |
Returns data type of the gl_sarray.
bool turi::gl_sarray::empty | ( | ) | const |
True if size() == 0.
gl_sarray turi::gl_sarray::fillna | ( | flexible_type | value | ) | const |
Create new gl_sarray with all missing values (FLEX_UNDEFINED or NaN) filled in with the given value. The size of the new gl_sarray will be the same as the original gl_sarray. If the given value is not the same type as the values in the gl_sarray, "fillna" will attempt to convert the value to the original gl_sarray's type. If this fails, an error will be raised.
value | The value used to replace all missing values |
gl_sarray turi::gl_sarray::filter | ( | std::function< bool(const flexible_type &)> | fn, |
bool | skip_undefined = true |
||
) | const |
Filter this gl_sarray by a function. Returns a new gl_sarray filtered by this gl_sarray. If "fn" evaluates an element to true, this element is copied to the new gl_sarray. If not, it isn't. Throws an exception if the return type of "fn" is not castable to a boolean value.
fn | Function that filters the gl_sarray. Must evaluate to bool or int. |
skip_undefined | Optional. If true, will not apply fn to any undefined values. |
Example:
Produces output:
This function is equivalent to the combination of a logical_filter and an apply.
|
static |
Returns a gl_sarray of size with a constant value.
value | The value to fill the array |
size | The size of the array |
|
static |
Returns a gl_sarray of a sequence of integer values.
start | The starting value |
end | One past the last value |
reverse | If the values are in reverse |
|
virtual |
Gets the internal implementation object.
Reimplemented in turi::const_gl_sarray_reference, and turi::gl_sarray_reference.
gl_sarray turi::gl_sarray::hash | ( | size_t | seed = 0 | ) | const |
Returns an SArray with a hash of each element. seed can be used to change the hash function to allow this method to be used for random number generation.
seed | Defaults to 0. Can be changed to different values to get different hash results. |
Example:
Produces output:
gl_sarray turi::gl_sarray::head | ( | size_t | n | ) | const |
bool turi::gl_sarray::is_materialized | ( | ) | const |
Returns whether or not the sarray has been materialized.
gl_sarray turi::gl_sarray::item_length | ( | ) | const |
Length of each element in the current gl_sarray. Only works on gl_sarray objects of dict, array, or list type. If a given element is a missing value, then the output elements is also a missing value. This function is equivalent to the following:
sa_item_len = sa.apply([](const flexible_type& x) { return flexible_type(x.get_type() == flex_type_enum::UNDEFINED ? 0 : x.size();) });
Example:
Produces output:
void turi::gl_sarray::materialize | ( | ) | const |
For a gl_sarray that is lazily evaluated, force persist this sarray to disk, committing all lazy evaluated operations.
void turi::gl_sarray::materialize_to_callback | ( | std::function< bool(size_t, const std::shared_ptr< sframe_rows > &)> | callback, |
size_t | nthreads = (size_t)(-1) |
||
) |
Calls a callback function passing each row of the SArray.
This does not materialize the array if not necessary. The callback may be called in parallel in which case the argument provides a thread number. The function should return false, but may return true at anytime to quit the iteration process. It may also throw exceptions which will be forwarded to the caller of this function.
Each call to the callback passes:
The sframe_rows object looks like a vector<vector<flexible_type>>. i.e. to look at all the rows, you need to write:
callback | The callback to call |
nthreads | Number of threads. If not specified, #cpus is used |
flexible_type turi::gl_sarray::max | ( | ) | const |
flexible_type turi::gl_sarray::mean | ( | ) | const |
flexible_type turi::gl_sarray::min | ( | ) | const |
size_t turi::gl_sarray::nnz | ( | ) | const |
Number of non-zero elements in the gl_sarray.
size_t turi::gl_sarray::num_missing | ( | ) | const |
Number of missing elements in the gl_sarray.
flexible_type turi::gl_sarray::operator[] | ( | int64_t | i | ) | const |
Returns the value at a particular array index; generally inefficient.
This returns the value of the array at a particular index. Will raise an exception if the index is out of bounds. This operation is generally inefficient: the range_iterator() is prefered.
Performs a logical filter.
This function performs a logical filter: i.e. it subselects all the elements in this array where the corresponding value in the other array evaluates to true.
gl_sarray turi::gl_sarray::operator[] | ( | const std::initializer_list< int64_t > & | slice | ) | const |
Performs a slice Python style.
slice | A list of 2 or 3 values. If 2 values, this is interpreted as {start, end} indices, with an implicit value of step = 1. If 3 values, this is interpreted as {start, step, end}. Values at the positions [start, start+step, start+2*start, ...] are returned until end (exclusive) is reached. Negative start and end values are interpreted as offsets from the end of the array. |
Given a gl_sarray
Slicing a consecutive range:
Slicing a range with a step:
Using negative indexing:
gl_sarray turi::gl_sarray::pixel_array_to_image | ( | size_t | width, |
size_t | height, | ||
size_t | channels = 3 , |
||
bool | undefined_on_failure = true |
||
) | const |
Create a new gl_sarray with all the values cast to turi::image_type of uniform size.
width | int The width of the new images. |
height | int The height of the new images. |
channels | int. Number of channels of the new images. |
undefined_on_failure | optional. defaults to true. If true, return FLEX_UNDEFINED type instead of Image type on failure. If false, raises error upon failure. |
allow_rounding | optional. Default to false. If true, rounds non-integer values when converting to Image type. If false, raises error upon rounding. |
std::shared_ptr<visualization::Plot> turi::gl_sarray::plot | ( | const flexible_type & | title, |
const flexible_type & | xlabel, | ||
const flexible_type & | ylabel | ||
) | const |
Return a visualization of the SArray.
gl_sarray_range turi::gl_sarray::range_iterator | ( | size_t | start = 0 , |
size_t | end = (size_t)(-1) |
||
) | const |
Returns a one pass range object with begin() and end() iterators.
This will materialize the array.
See materialize_to_callback for a lazy version.
start | The starting index of the range |
end | The ending index of the range |
Or more compactly with C++11 syntax:
The range returned only supports one pass. The outcome of a second call to begin() is undefined after any iterator is advanced.
|
static |
Constructs an SArray from a json record files.
A json record file contains an array of dictionaries. Resultant SArray is of dictionary type.
gl_sarray turi::gl_sarray::sample | ( | double | fraction | ) | const |
Create an gl_sarray which contains a subsample of the current gl_sarray.
fraction | The fraction of the rows to fetch. Must be between 0 and 1. |
Example:
Produces output:
gl_sarray turi::gl_sarray::sample | ( | double | fraction, |
size_t | seed, | ||
bool | exact = false |
||
) | const |
Create an gl_sarray which contains a subsample of the current gl_sarray.
fraction | The fraction of the rows to fetch. Must be between 0 and 1. |
seed | The random seed for the random number generator. Deterministic output is obtained if this is set to a constant. |
Example:
Produces output:
void turi::gl_sarray::save | ( | const std::string & | directory, |
const std::string & | format = "binary" |
||
) | const |
Saves the gl_sarray to file.
When format is "binary" (default), the saved SArray will be in a directory named with the targetfile
parameter. When format is "text" or "csv", it is saved as a single human readable text file.
filename | A local path or a remote URL. If format is 'text', it will be saved as a text file. If format is 'binary', a directory will be created at the location which will contain the SArray. |
format | Either "binary", "text", "csv". Defaults to "binary". optional. Format in which to save the SFrame. Binary saved SArrays can be loaded much faster and without any format conversion losses. 'text' and 'csv' are synonymous: Each SArray row will be written as a single line in an output text file. If not given, will try to infer the format from filename given. If file name ends with 'csv', 'txt' or '.csv.gz', then save as 'csv' format, otherwise save as 'binary' format. |
void turi::gl_sarray::show | ( | const std::string & | path_to_client, |
const flexible_type & | title, | ||
const flexible_type & | xlabel, | ||
const flexible_type & | ylabel | ||
) | const |
Show a visualization of the SArray.
size_t turi::gl_sarray::size | ( | ) | const |
The size of the SArray.
gl_sarray turi::gl_sarray::sort | ( | bool | ascending = true | ) | const |
Sort all values in this gl_sarray. Sort only works for sarray of type str, int and float, otherwise TypeError will be raised. Creates a new, sorted gl_sarray.
ascending | Optional. Defaults to True. If true, the sarray values are sorted in ascending order, otherwise, descending order. |
Example:
Produces output:
gl_sframe turi::gl_sarray::split_datetime | ( | const std::string & | column_name_prefix = "X" , |
const std::vector< std::string > & | limit = {"year","month","day","hour","minute","second"} , |
||
bool | tzone = false |
||
) | const |
Splits an gl_sarray of datetime type to multiple columns, return a new gl_sframe that contains expanded columns. A gl_sarray of datetime will be split by default into an gl_sframe of 6 columns, one for each year/month/day/hour/minute/second element.
When splitting a gl_sarray of datetime type, new columns are named: prefix.year, prefix.month, etc. The prefix is set by the parameter "column_name_prefix" and defaults to 'X'. If column_name_prefix is FLEX_UNDEFINED or empty, then no prefix is used.
If tzone parameter is true, then timezone information is represented as one additional column which is a float shows the offset from GMT(0.0) or from UTC.
column_name_prefix | Optional. If provided, expanded column names would start with the given prefix. Defaults to "X". |
limit | Optional. Limits the set of datetime elements to expand. Elements are 'year','month','day','hour','minute', and 'second'. |
tzone | Optional. A boolean parameter that determines whether to show timezone column or not. Defaults to false. |
Example:
Produces output:
flexible_type turi::gl_sarray::std | ( | ) | const |
gl_sarray turi::gl_sarray::str_to_datetime | ( | const std::string & | str_format = "%Y-%m-%dT%H:%M:%S%ZP" | ) | const |
Create a new gl_sarray with all the values cast to datetime. The string format is specified by the 'str_format' parameter.
str_format | The format to parse the string. Default format is "%Y-%m-%dT%H:%M:%S%ZP". See the strptime specification for details on the format string. |
Example:
Produces output:
gl_sarray turi::gl_sarray::subslice | ( | flexible_type | start = FLEX_UNDEFINED , |
flexible_type | stop = FLEX_UNDEFINED , |
||
flexible_type | step = FLEX_UNDEFINED |
||
) | const |
This returns an SArray with each element sliced accordingly to the slice specified.
start | The start position of the slice |
stop | The stop position of the slice |
step | The step size of the slice (default = 1) |
This is conceptually equivalent to the python equivalent of:
The SArray must be of type list, vector, or string.
For instance:
Produces output:
Negative indeices:
Produces output:
Arrays:
Produces output:
flexible_type turi::gl_sarray::sum | ( | ) | const |
Sum of all values in this gl_sarray.
Raises an exception if called on an gl_sarray of strings, lists, or dictionaries. If the gl_sarray contains numeric arrays (flex_vec) and all the arrays are the same length, the sum over all the arrays will be returned. Returns FLEX_UNDEFINED on an empty gl_sarray. For large values, this may overflow without warning.
gl_sarray turi::gl_sarray::tail | ( | size_t | n | ) | const |
gl_sarray turi::gl_sarray::topk_index | ( | size_t | topk = 10 , |
bool | reverse = false |
||
) | const |
Create an gl_sarray indicating which elements are in the top k. Entries are '1' if the corresponding element in the current gl_sarray is a part of the top k elements, and '0' if that corresponding element is not. Order is descending by default.
topk | Optional. Defaults to 10. The number of elements to determine if 'top' |
reverse | Optional. Defaults to false. If true, return the topk elements in ascending order |
gl_sarray turi::gl_sarray::unique | ( | ) | const |
Get all unique values in the current gl_sarray. Raises an error if the gl_sarray is of dictionary type. Will not necessarily preserve the order of the given gl_sarray in the new gl_sarray.
gl_sframe turi::gl_sarray::unpack | ( | const std::string & | column_name_prefix = "X" , |
const std::vector< flex_type_enum > & | column_types = std::vector< flex_type_enum >() , |
||
const flexible_type & | na_value = FLEX_UNDEFINED , |
||
const std::vector< flexible_type > & | limit = std::vector< flexible_type >() |
||
) | const |
Convert an gl_sarray of list, array, or dict type to an gl_sframe with multiple columns.
"unpack" expands an gl_sarray using the values of each vector/list/dict as elements in a new gl_sframe of multiple columns. For example, an gl_sarray of lists each of length 4 will be expanded into an gl_sframe of 4 columns, one for each list element. An gl_sarray of lists/arrays of varying size will be expand to a number of columns equal to the longest list/array. An gl_sarray of dictionaries will be expanded into as many columns as there are keys.
When unpacking an gl_sarray of list or vector type, new columns are named: "column_name_prefix".0, "column_name_prefix".1, etc. If unpacking a column of dict type, unpacked columns are named "column_name_prefix".key1, "column_name_prefix".key2, etc.
When unpacking an gl_sarray of list or dictionary types, missing values in the original element remain as missing values in the resultant columns. If the "na_value" parameter is specified, all values equal to this given value are also replaced with missing values. In an gl_sarray of vector type, NaN is interpreted as a missing value.
gl_sframe::pack_columns() is the reverse effect of unpack.
column_name_prefix | Optional. If provided, unpacked column names would start with the given prefix. Defaults to "X". If the empty string is used, no prefix is used. |
column_types | Optional. Column types for the unpacked columns. If not provided, column types are automatically inferred from first 100 rows. Defaults to FLEX_UNDEFINED. |
na_value | Optional. Convert all values that are equal to "na_value" to missing value if specified. |
limit | optional limits in the set of list/vector/dict keys to unpack. For list/vector gl_sarrays, "limit" must contain integer indices. For dict gl_sarrays, "limit" must contain dictionary keys. |
Example:
Produces output:
Unpack only the key "word":
Produces output:
Convert all zeros to missing values:
Produces output: