6 #ifndef TURI_UNITY_SFRAME_HPP 7 #define TURI_UNITY_SFRAME_HPP 12 #include <model_server/lib/api/unity_sframe_interface.hpp> 13 #include <core/storage/sframe_interface/unity_sarray.hpp> 14 #include <core/storage/sframe_data/group_aggregate_value.hpp> 15 #include <core/storage/sframe_data/sframe_rows.hpp> 16 #include <visualization/server/plot.hpp> 24 class sframe_iterator;
26 namespace query_eval {
68 void construct_from_dataframe(
const dataframe_t& df)
override;
73 void construct_from_sframe(
const sframe& sf);
84 void construct_from_sframe_index(std::string index_file)
override;
113 std::map<std::string, std::shared_ptr<unity_sarray_base>> construct_from_csvs(
115 std::map<std::string, flexible_type> parsing_config,
116 std::map<std::string, flex_type_enum> column_type_hints)
override;
118 void construct_from_planner_node(std::shared_ptr<query_eval::planner_node> node,
119 const std::vector<std::string>& column_names);
125 void save_frame(std::string target_directory)
override;
134 void save_frame_reference(std::string target_directory)
override;
141 void save_frame_by_index_file(std::string index_file);
146 void clear()
override;
151 size_t size()
override;
157 size_t num_columns()
override;
164 std::vector<flex_type_enum> dtype()
override;
181 std::vector<std::string> column_names()
override;
188 std::shared_ptr<unity_sframe_base> head(
size_t nrows)
override;
193 size_t column_index(
const std::string& name)
override;
198 const std::string& column_name(
size_t index);
204 bool contains_column(
const std::string &name);
216 std::shared_ptr<unity_sframe_base> tail(
size_t nrows)
override;
227 std::shared_ptr<unity_sarray_base> select_column(
const std::string &name)
override;
233 std::shared_ptr<unity_sarray_base> select_column(
size_t idx);
241 std::shared_ptr<unity_sframe_base> logical_filter(std::shared_ptr<unity_sarray_base> index)
override;
247 std::shared_ptr<unity_sframe_base> select_columns(
const std::vector<std::string> &names)
override;
252 std::shared_ptr<unity_sframe_base> select_columns(
const std::vector<size_t>& indices);
257 std::shared_ptr<unity_sframe_base>
copy();
265 void add_column(std::shared_ptr<unity_sarray_base >data,
const std::string &name)
override;
277 void add_columns(std::list<std::shared_ptr<unity_sarray_base>> data_list,
278 std::vector<std::string> name_vec)
override;
284 std::shared_ptr<unity_sarray_base>
transform(
const std::string& lambda,
287 uint64_t
seed)
override;
296 uint64_t seed)
override;
302 std::shared_ptr<unity_sarray_base> transform_lambda(
311 std::shared_ptr<unity_sframe_base> flat_map(
const std::string& lambda,
312 std::vector<std::string> output_column_names,
313 std::vector<flex_type_enum> output_column_types,
315 uint64_t seed)
override;
322 void set_column_name(
size_t i, std::string name)
override;
327 void remove_column(
size_t i)
override;
332 void swap_columns(
size_t i,
size_t j)
override;
337 std::shared_ptr<sframe> get_underlying_sframe();
342 std::shared_ptr<query_eval::planner_node> get_planner_node();
347 void set_sframe(
const std::shared_ptr<sframe>& sf_ptr);
369 void begin_iterator()
override;
383 std::vector< std::vector<flexible_type> > iterator_get_next(
size_t len)
override;
408 void save_as_csv(
const std::string& url,
409 std::map<std::string, flexible_type> writing_config)
override;
416 std::list<std::shared_ptr<unity_sframe_base>> random_split(
float percent, uint64_t random_seed,
bool exact=
false)
override;
423 std::shared_ptr<unity_sframe_base>
shuffle()
override;
430 std::shared_ptr<unity_sframe_base> sample(
float percent, uint64_t random_seed,
bool exact=
false)
override;
437 void materialize()
override;
442 bool is_materialized()
override;
447 std::string query_plan_string()
override;
452 bool has_size()
override;
461 const std::vector<std::string>& key_columns,
462 const std::vector<std::vector<std::string>>& group_columns,
463 const std::vector<std::string>& group_output_columns,
464 const std::vector<std::string>& group_operations)
override;
470 const std::vector<std::string>& key_columns,
471 const std::vector<std::vector<std::string>>& group_columns,
472 const std::vector<std::string>& group_output_columns,
473 const std::vector<std::shared_ptr<group_aggregate_value>>& group_operations);
480 std::shared_ptr<unity_sframe_base> append(std::shared_ptr<unity_sframe_base> other)
override;
482 inline std::shared_ptr<unity_sframe_base> join(std::shared_ptr<unity_sframe_base >right,
483 const std::string join_type,
484 const std::map<std::string,std::string>& join_keys)
override 485 {
return join_with_custom_name(right, join_type, join_keys, {}); }
487 std::shared_ptr<unity_sframe_base> join_with_custom_name(std::shared_ptr<unity_sframe_base >right,
488 const std::string join_type,
489 const std::map<std::string,std::string>& join_keys,
490 const std::map<std::string,std::string>& alternative_names)
override;
492 std::shared_ptr<unity_sframe_base>
sort(
const std::vector<std::string>& sort_keys,
493 const std::vector<int>& sort_ascending)
override;
511 std::shared_ptr<unity_sarray_base> pack_columns(
512 const std::vector<std::string>& pack_column_names,
513 const std::vector<std::string>& dict_key_names,
541 std::shared_ptr<unity_sframe_base> stack(
542 const std::string& column_name,
543 const std::vector<std::string>& new_column_names,
544 const std::vector<flex_type_enum>& new_column_types,
545 bool drop_na)
override;
553 std::shared_ptr<unity_sframe_base>
copy_range(
size_t start,
size_t step,
size_t end)
override;
574 std::list<std::shared_ptr<unity_sframe_base>> drop_missing_values(
575 const std::vector<std::string>& column_names,
bool all,
bool split,
576 bool recursive)
override;
580 void save(
oarchive& oarc)
const override;
584 void delete_on_close()
override;
595 std::list<std::shared_ptr<unity_sframe_base>> logical_filter_split(
596 std::shared_ptr<unity_sarray_base> logical_filter_array);
598 void explore(
const std::string& path_to_client,
const std::string& title)
override;
599 void show(
const std::string& path_to_client)
override;
600 std::shared_ptr<model_base> plot()
override;
607 std::shared_ptr<query_eval::planner_node> m_planner_node;
609 std::vector<std::string> m_column_names;
611 std::shared_ptr<sframe> m_cached_sframe;
618 size_t iterator_next_segment_id = 0;
624 std::unique_ptr<sframe_reader> iterator_sframe_ptr;
630 std::unique_ptr<sframe_iterator> iterator_current_segment_iter;
636 std::unique_ptr<sframe_iterator> iterator_current_segment_enditer;
649 std::vector<size_t> _convert_column_names_to_indices(
const std::vector<std::string>& column_names);
665 std::string generate_next_column_name();
The serialization input archive object which, provided with a reference to an istream, will read from the istream, providing deserialization capabilities.
std::shared_ptr< sframe > sort(std::shared_ptr< planner_node > sframe_planner_node, const std::vector< std::string > column_names, const std::vector< size_t > &sort_column_indices, const std::vector< bool > &sort_orders)
void copy_range(S &&input, T &&output, size_t start, size_t step, size_t end)
std::vector< sframe > shuffle(sframe sframe_in, size_t n, std::function< size_t(const std::vector< flexible_type > &)> hash_fn, std::function< void(const std::vector< flexible_type > &, size_t)> emit_call_back=std::function< void(const std::vector< flexible_type > &, size_t)>())
void copy(Iterator begin, Iterator end, SWriter &&writer)
std::shared_ptr< sframe > groupby_aggregate(const std::shared_ptr< planner_node > &source, const std::vector< std::string > &source_column_names, const std::vector< std::string > &keys, const std::vector< std::string > &output_column_names, const std::vector< std::pair< std::vector< std::string >, std::shared_ptr< group_aggregate_value >>> &groups)
void split(S &&input, T &&output1, T &&output2, FilterFn filterfn, size_t random_seed=std::time(NULL))
The serialization output archive object which, provided with a reference to an ostream, will write to the ostream, providing serialization capabilities.
void transform(S &&input, T &&output, TransformFn transformfn, std::set< size_t > constraint_segments=std::set< size_t >())