Turi Create  4.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
sort.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_QUERY_EVAL_SORT_HPP
7 #define TURI_QUERY_EVAL_SORT_HPP
8 
9 #include <vector>
10 #include <memory>
11 
12 namespace turi {
13 
14 class sframe;
15 
16 namespace query_eval {
17 
18 struct planner_node;
19 
20 /**
21  * \ingroup sframe_query_engine
22  * \addtogroup Algorithms Algorithms
23  * \{
24  */
25 
26 /**
27  * Sort given SFrame.
28  *
29  * The algorithm is like the following:
30  * - First do a quantile sketch over all sort columns and use the quantile sketch to
31  * figure out the partition keys that we will use to split the sframe rows into
32  * small chunks so that each chunk is realtively sorted. Each chunk is small enough
33  * so that we could sort in memory
34  * - Scatter partition the sframe according to above partition keys. The resulting
35  * value is persisted. Each partition is stored as one segment in a sarray.
36  * - The sorting resulting is then lazily materialized through le_sort operator
37  *
38  * There are a few optimizations along the way:
39  * - if all sorting keys are the same, then no need to sort
40  * - if the sframe is small enough to fit into memory, then we simply do a in
41  * memory sort
42  * - if some partitions of the sframe have the same sorting key, then that partition
43  * will not be sorted
44  *
45  * Also see \ref ec_sort for another sort implementation
46  *
47  * \param sframe_planner_node The lazy sframe to be sorted
48  * \param sort_column_names The columns to be sorted
49  * \param sort_orders The order for each column to be sorted, true is ascending
50  * \return The sorted sframe
51  **/
52 std::shared_ptr<sframe> sort(
53  std::shared_ptr<planner_node> sframe_planner_node,
54  const std::vector<std::string> column_names,
55  const std::vector<size_t>& sort_column_indices,
56  const std::vector<bool>& sort_orders);
57 
58 } // end of query_eval
59 } // end of turicreate
60 
61 /// \}
62 
63 #endif //TURI_SFRAME_SORT_HPP
std::shared_ptr< sframe > sort(std::shared_ptr< planner_node > sframe_planner_node, const std::vector< std::string > column_names, const std::vector< size_t > &sort_column_indices, const std::vector< bool > &sort_orders)