6 #ifndef TURI_UNITY_SARRAY_HPP 7 #define TURI_UNITY_SARRAY_HPP 11 #include <core/data/flexible_type/flexible_type.hpp> 12 #include <model_server/lib/api/unity_sarray_interface.hpp> 13 #include <visualization/server/plot.hpp> 23 class sarray_iterator;
25 namespace query_eval {
76 void construct_from_vector(
const std::vector<flexible_type>& vec,
flex_type_enum type);
87 void construct_from_const(
const flexible_type& value,
size_t size,
93 void construct_from_planner_node(std::shared_ptr<query_eval::planner_node> node);
105 void construct_from_sarray_index(std::string location);
122 void construct_from_json_record_files(std::string url);
130 void construct_from_autodetect(std::string url,
flex_type_enum type);
136 void save_array(std::string target_directory);
143 void save_array_by_index_file(std::string index_file);
164 std::shared_ptr<sarray<flexible_type> > get_underlying_sarray();
169 std::shared_ptr<query_eval::planner_node> get_planner_node();
177 std::shared_ptr<unity_sarray_base> head(
size_t nrows);
182 std::vector<flexible_type>
_head(
size_t nrows) {
183 auto result = head(nrows);
184 auto ret = result->to_vector();
197 std::shared_ptr<unity_sarray_base>
transform(
const std::string& lambda,
206 std::shared_ptr<unity_sarray_base> transform_native(
221 std::shared_ptr<unity_sarray_base> append(std::shared_ptr<unity_sarray_base> other);
235 std::shared_ptr<unity_sarray_base> vector_slice(
size_t start,
size_t end);
244 std::shared_ptr<unity_sarray_base> filter(
const std::string& lambda,
bool skip_undefined, uint64_t seed);
253 std::shared_ptr<unity_sarray_base> logical_filter(std::shared_ptr<unity_sarray_base> index);
261 std::shared_ptr<unity_sarray_base> topk_index(
size_t k,
bool reverse);
281 std::shared_ptr<unity_sarray_base> datetime_to_str(std::string format);
288 std::shared_ptr<unity_sarray_base> str_to_datetime(std::string format);
299 std::shared_ptr<unity_sarray_base> astype(
flex_type_enum dtype,
bool undefined_on_failure =
false);
309 std::shared_ptr<unity_sarray_base> lazy_astype(
flex_type_enum dtype,
310 bool undefined_on_failure =
false);
404 size_t num_missing();
440 std::shared_ptr<unity_sarray_base> left_scalar_operator(
flexible_type other,
464 std::shared_ptr<unity_sarray_base> right_scalar_operator(
flexible_type other, std::string op);
489 std::shared_ptr<unity_sarray_base> vector_operator(
490 std::shared_ptr<unity_sarray_base> other, std::string op);
498 std::shared_ptr<unity_sarray_base> drop_missing_values();
509 std::shared_ptr<unity_sarray_base> missing_mask(
bool recursive =
false,
bool missing_is_true =
true);
516 std::shared_ptr<unity_sarray_base> fill_missing_values(
flexible_type default_value);
525 std::shared_ptr<unity_sarray_base> tail(
size_t nrows=10);
527 std::vector<flexible_type> _tail(
size_t nrows=10) {
528 auto result = tail(nrows);
529 auto ret = result->to_vector();
537 std::shared_ptr<unity_sarray_base> sample(
float percent, uint64_t random_seed,
bool exact=
false);
544 std::shared_ptr<unity_sarray_base> hash(uint64_t seed);
549 std::shared_ptr<unity_sarray_base> count_bag_of_words(std::map<std::string, flexible_type> options);
557 std::shared_ptr<unity_sarray_base> count_character_ngrams(
size_t n, std::map<std::string, flexible_type> options);
565 std::shared_ptr<unity_sarray_base> count_ngrams(
size_t n, std::map<std::string, flexible_type> options);
572 std::shared_ptr<unity_sarray_base> dict_trim_by_keys(
const std::vector<flexible_type>&
keys,
bool exclude);
587 std::shared_ptr<unity_sarray_base> dict_keys();
593 std::shared_ptr<unity_sarray_base> dict_values();
600 std::shared_ptr<unity_sarray_base> dict_has_any_keys(
const std::vector<flexible_type>& keys);
607 std::shared_ptr<unity_sarray_base> dict_has_all_keys(
const std::vector<flexible_type>& keys);
615 std::shared_ptr<unity_sarray_base> item_length();
628 std::shared_ptr<unity_sframe_base> expand(
629 const std::string& column_name_prefix,
630 const std::vector<flexible_type>& expanded_column_elements,
631 const std::vector<flex_type_enum>& expanded_columns_types);
646 std::shared_ptr<unity_sframe_base> unpack(
647 const std::string& column_name_prefix,
648 const std::vector<flexible_type>& unpacked_keys,
649 const std::vector<flex_type_enum>& unpacked_columns_types,
662 std::shared_ptr<unity_sframe_base> unpack_dict(
663 const std::string& column_name_prefix,
664 const std::vector<flexible_type>& limit,
684 std::shared_ptr<unity_sarray_base> ternary_operator(std::shared_ptr<unity_sarray_base> is_true,
685 std::shared_ptr<unity_sarray_base> is_false);
716 void begin_iterator();
730 std::vector<flexible_type> iterator_get_next(
size_t len);
737 return iterator_get_next(size());
750 bool is_materialized();
764 size_t get_content_identifier();
772 std::shared_ptr<unity_sarray_base>
copy_range(
size_t start,
size_t step,
size_t end);
774 static std::shared_ptr<unity_sarray_base>
775 create_sequential_sarray(ssize_t size, ssize_t start,
bool reverse);
787 static std::shared_ptr<unity_sarray_base> make_uniform_boolean_array(
size_t size,
789 uint64_t random_seed,
797 static std::shared_ptr<unity_sarray_base> make_exact_uniform_boolean_array(
size_t size,
799 uint64_t random_seed);
805 static std::shared_ptr<unity_sarray_base> make_uniform_int_array(
size_t size,
size_t max_int);
807 std::shared_ptr<unity_sarray_base> builtin_rolling_apply(
808 const std::string &fn_name,
811 size_t min_observations);
813 std::shared_ptr<unity_sarray_base> builtin_cumulative_aggregate(
const std::string& name);
819 void show(
const std::string& path_to_client,
824 std::shared_ptr<model_base> plot(
const flexible_type& _title,
833 std::shared_ptr<query_eval::planner_node> m_planner_node;
840 size_t iterator_next_segment_id = 0;
847 std::unique_ptr<sarray_reader<flexible_type> > iterator_sarray_ptr;
853 std::unique_ptr<sarray_iterator<flexible_type>> iterator_current_segment_iter;
859 std::unique_ptr<sarray_iterator<flexible_type>> iterator_current_segment_enditer;
887 std::shared_ptr<unity_sarray_base> scalar_operator(
flexible_type other,
889 bool right_operator);
892 void construct_from_unity_sarray(
const unity_sarray& other);
The serialization input archive object which, provided with a reference to an istream, will read from the istream, providing deserialization capabilities.
std::set< Key > keys(const std::map< Key, T > &map)
void copy_range(S &&input, T &&output, size_t start, size_t step, size_t end)
std::vector< flexible_type > _head(size_t nrows)
std::vector< flexible_type > to_vector()
The serialization output archive object which, provided with a reference to an ostream, will write to the ostream, providing serialization capabilities.
void transform(S &&input, T &&output, TransformFn transformfn, std::set< size_t > constraint_segments=std::set< size_t >())