Turi Create  4.0
indexed_sframe_tools.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_UNITY_INDEXED_SFRAME_TOOLS_H_
7 #define TURI_UNITY_INDEXED_SFRAME_TOOLS_H_
8 
9 #include <vector>
10 #include <map>
11 #include <memory>
12 
13 namespace turi {
14 
15 class flexible_type;
16 template <typename T> class sarray;
17 
18 /**
19  * \ingroup toolkit_util
20  * Constructs a vector of the unique values present in an sframe
21  * column having integer type. The resulting vector is in sorted order,
22  * so membership can be queried using std::binary_search. When the
23  * 0, ..., n condition is met, this is faster than .unique().
24  */
25 std::vector<size_t> get_unique_values(std::shared_ptr<sarray<flexible_type> > indexed_column);
26 
27 /**
28  * \ingroup toolkit_util
29  * Convenience function: Same as get_unique_values, but returns the
30  * result as an sarray.
31  */
32 std::shared_ptr<sarray<flexible_type> > make_unique(std::shared_ptr<sarray<flexible_type> > indexed_column);
33 
34 /**
35  * \ingroup toolkit_util
36  * Creates an in-memory group lookup table from one integer column to
37  * another. In this representation, all the items in one column
38  * associated with a given value in the other column can be retrieved
39  * by that value.
40  */
42  public:
43 
44  /** Builds a mapping of integer values in src_column to a vector of
45  * all the associated values in dest_column. This can be queried
46  * using dest_group.
47  *
48  * If sort is true, then the returned elements are sorted.
49  *
50  * If uniquify is true, then the returned vector has at most one of
51  * any given element.
52  */
53  indexed_column_groupby(std::shared_ptr<sarray<flexible_type> > src_column,
54  std::shared_ptr<sarray<flexible_type> > dest_column,
55  bool sort,
56  bool uniquify);
57 
58  /** Returns a vector with all the associated values in dest_column
59  * that have src_value in the corresponding location of src_column.
60  * If src_value does not appear in src_column, then the returned
61  * vector is empty.
62  */
63  const std::vector<size_t>& dest_group(size_t src_value) const;
64 
65  private:
66  std::vector<size_t> empty_vector;
67  std::map<size_t, std::vector<size_t> > group_lookup;
68 };
69 
70 
71 }
72 
73 #endif
std::shared_ptr< sframe > sort(std::shared_ptr< planner_node > sframe_planner_node, const std::vector< std::string > column_names, const std::vector< size_t > &sort_column_indices, const std::vector< bool > &sort_orders)
std::shared_ptr< sarray< flexible_type > > make_unique(std::shared_ptr< sarray< flexible_type > > indexed_column)
indexed_column_groupby(std::shared_ptr< sarray< flexible_type > > src_column, std::shared_ptr< sarray< flexible_type > > dest_column, bool sort, bool uniquify)
std::vector< size_t > get_unique_values(std::shared_ptr< sarray< flexible_type > > indexed_column)
const std::vector< size_t > & dest_group(size_t src_value) const