Turi Create  4.0
unity_sframe_builder.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_UNITY_SFRAME_BUILDER_HPP
7 #define TURI_UNITY_SFRAME_BUILDER_HPP
8 
9 #include <vector>
10 #include <set>
11 #include <core/storage/sframe_data/sframe.hpp>
12 #include <boost/circular_buffer.hpp>
13 #include <model_server/lib/api/unity_sframe_builder_interface.hpp>
14 
15 typedef boost::circular_buffer<std::vector<turi::flexible_type>> row_history_t;
16 
17 namespace turi {
18 
19 /**
20  * Provides a Python interface to incrementally build an SFrame.
21  *
22  * Unlike most other unity objects, this is not a wrapper of another
23  * "sframe_builder" class, but provides the implementation. This is because it
24  * is a slightly embellished wrapper around the SArray's output iterator, so
25  * there is no further functionality that needs to be available for the C++
26  * side.
27  *
28  * The unity_sframe_builder is designed to append values until \ref close is
29  * called, which returns the SFrame. No "reopening" is allowed, and no
30  * operations in that instance of unity_sframe_builder will work after close is
31  * called.
32  *
33  * This also doesn't wrap the already existing \ref unity_sarray_builder
34  * despite its similarity, because using the sframe output iterator allows for
35  * multiple columns to be kept in the same file.
36  */
37 class unity_sframe_builder: public unity_sframe_builder_base {
38  public:
39  /**
40  * Default constructor. Does nothing
41  */
43 
44  /**
45  * Initialize the unity_sarray_buidler.
46  *
47  * This essentially opens the output iterator for writing. Column names and
48  * column types are required arguments.
49  */
50  void init(size_t num_segments,
51  size_t history_size,
52  std::vector<std::string> column_names,
53  std::vector<flex_type_enum> column_types,
54  std::string save_location);
55 
56  /**
57  * Add a single row of flexible_types to the SFrame.
58  *
59  * The \p segment number allows the user to use the parallel interface provided
60  * by the underlying output_iterator.
61  *
62  * Throws if:
63  * - init hasn't been called or close has been called
64  * - segment number is invalid
65  * - the type of \p row differs from the type of the elements already
66  * appended (except if only UNDEFINED elements have been appended).
67  *
68  */
69  void append(const std::vector<flexible_type> &row, size_t segment);
70 
71  /**
72  * A wrapper of \ref append which adds multiple rows to SFrame.
73  *
74  * Throws if:
75  * - init hasn't been called or close has been called
76  * - segment number is invalid
77  * - the type of any values in \p rows differs from the type of the
78  * elements already appended (except if only UNDEFINED elements have been
79  * appended).
80  */
81  void append_multiple(const std::vector<std::vector<flexible_type>> &rows,
82  size_t segment);
83 
84  /**
85  * Return the column names of the future SFrame.
86  */
87  std::vector<std::string> column_names();
88 
89  /**
90  * Return the column types of the future SFrame.
91  */
92  std::vector<flex_type_enum> column_types();
93 
94  /**
95  * Return the last \p num_elems rows appended.
96  */
97  std::vector<std::vector<flexible_type>> read_history(size_t num_elems,
98  size_t segment);
99 
100  /**
101  * Finalize SFrame and return it.
102  */
103  std::shared_ptr<unity_sframe_base> close();
104 
106  unity_sframe_builder& operator=(const unity_sframe_builder&) = delete;
107  private:
108  /// Methods
109 
110  /// Variables
111  bool m_inited = false;
112  bool m_closed = false;
113  sframe m_sframe;
114  std::vector<sframe::iterator> m_out_iters;
115  std::string m_sframe_index_file;
116 
117  std::vector<std::shared_ptr<row_history_t>> m_history;
118 
119  dir_archive m_dirarc;
120 
121 };
122 
123 } // namespace turi
124 #endif // TURI_UNITY_SFRAME_BUILDER_HPP
std::vector< flex_type_enum > column_types()
void append_multiple(const std::vector< std::vector< flexible_type >> &rows, size_t segment)
void append(const std::vector< flexible_type > &row, size_t segment)
void init(size_t num_segments, size_t history_size, std::vector< std::string > column_names, std::vector< flex_type_enum > column_types, std::string save_location)
std::vector< std::string > column_names()
std::vector< std::vector< flexible_type > > read_history(size_t num_elems, size_t segment)
std::shared_ptr< unity_sframe_base > close()