Turi Create  4.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
shuffle.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_SFRAME_SHUFFLE_HPP
7 #define TURI_SFRAME_SHUFFLE_HPP
8 
9 #include <vector>
10 #include <core/storage/sframe_data/sframe.hpp>
11 
12 namespace turi {
13 
14 /**
15  * \ingroup sframe_physical
16  * \addtogroup sframe_main Main SFrame Objects
17  * \{
18  */
19 
20 
21 /**
22  * Shuffle the rows in one sframe into a collection of n sframes.
23  * Each output SFrame contains one segment.
24  *
25  * \code
26  * std::vector<sframe> ret(n);
27  * for (auto& sf : ret) {
28  * INIT_WITH_NAMES_COLUMNS_AND_ONE_SEG(sframe_in.column_names(), sframe_in.column_types());
29  * }
30  * for (auto& row : sframe_in) {
31  * size_t idx = hash_fn(row) % n;
32  * add_row_to_sframe(ret[idx], row); // the order of addition is not guaranteed.
33  * }
34  * \endcode
35  *
36  * The result sframes have the same column names and types (including
37  * empty sframes). A result sframe can have 0 rows if non of the
38  * rows in the input sframe is hashed to it. (If n is greater than
39  * the size of input sframe, there will be at (n - sframe_in.size())
40  * empty sframes in the return vector.
41  *
42  * \param n the number of output sframe.
43  * \param hash_fn the hash function for each row in the input sframe.
44  *
45  * \return A vector of n sframes.
46  */
47 std::vector<sframe> shuffle(
48  sframe sframe_in,
49  size_t n,
50  std::function<size_t(const std::vector<flexible_type>&)> hash_fn,
51  std::function<void(const std::vector<flexible_type>&, size_t)> emit_call_back
52  = std::function<void(const std::vector<flexible_type>&, size_t)>());
53 
54 /// \}
55 //
56 } // turi
57 
58 #endif
std::vector< sframe > shuffle(sframe sframe_in, size_t n, std::function< size_t(const std::vector< flexible_type > &)> hash_fn, std::function< void(const std::vector< flexible_type > &, size_t)> emit_call_back=std::function< void(const std::vector< flexible_type > &, size_t)>())