Turi Create  4.0
siterable.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_UNITY_SITERABLE_HPP
7 #define TURI_UNITY_SITERABLE_HPP
8 #include <cstddef>
9 #include <vector>
10 namespace turi {
11 
12 /**
13  * \internal
14  * \ingroup sframe_physical
15  * \addtogroup sframe_internal SFrame Internal
16  * \{
17  */
18 
19 /**
20  * The base interface type SIterable<T> conceptually provide a simple,
21  * minimal parallel InputIterator concept.
22  *
23  * The SIterable manages the entire collection of parallel iterators within
24  * one object for simplicity. Conceptually, the SIterable defines a sequence
25  * of objects of type T which is cut into a collection of segments (#segments
26  * returned by num_segments). You can get iterator of the segment via
27  * (begin(segmentid), and end(segmentid)), Basically, parallel iteration can
28  * be written as:
29  * \code
30  * #pragma omp parallel for
31  * for (int s = 0; s < sarray.num_segments(); ++s) {
32  * auto iter = sarray.begin(s);
33  * auto end = sarray.end(s);
34  * while(iter != end) {
35  * ...
36  * ++iter;
37  * }
38  * }
39  * \endcode
40  */
41 template<typename Iterator>
42 class siterable {
43  public:
44  typedef Iterator iterator;
45  typedef typename Iterator::value_type value_type;
46 
47  inline virtual ~siterable() { };
48 
49  /// Return the number of segments in the collection.
50  virtual size_t num_segments() const = 0;
51 
52  /// Return the number of rows in the segment.
53  virtual size_t segment_length(size_t segment) const = 0;
54 
55  /// Return the begin iterator of the segment.
56  virtual Iterator begin (size_t segmentid) const = 0;
57 
58  /// Return the end iterator of the segment.
59  virtual Iterator end (size_t segmentid) const = 0;
60 
61  /**
62  * Reads a collection of rows, storing the result in out_obj.
63  * This function is independent of the begin/end iterator
64  * functions, and can be called anytime. This function is also fully
65  * concurrent.
66  * \param row_start First row to read
67  * \param row_end one past the last row to read (i.e. EXCLUSIVE). row_end can
68  * be beyond the end of the array, in which case,
69  * fewer rows will be read.
70  * \param out_obj The output array
71  * \returns Actual number of rows read. Return (size_t)(-1) on failure.
72  *
73  * \note This function is not always efficient. Different file formats
74  * implementations will have different characteristics.
75  */
76  virtual size_t read_rows(size_t row_start,
77  size_t row_end,
78  std::vector<typename Iterator::value_type>& out_obj) = 0;
79 
80 
81  /// Reset all iterators (must be called in between creating
82  /// two iterators on the same segment)
83  virtual void reset_iterators() = 0;
84 };
85 
86 /// \}
87 } // namespace turi
88 #endif
virtual size_t num_segments() const =0
Return the number of segments in the collection.
virtual void reset_iterators()=0
virtual size_t segment_length(size_t segment) const =0
Return the number of rows in the segment.
virtual Iterator end(size_t segmentid) const =0
Return the end iterator of the segment.
virtual size_t read_rows(size_t row_start, size_t row_end, std::vector< typename Iterator::value_type > &out_obj)=0
virtual Iterator begin(size_t segmentid) const =0
Return the begin iterator of the segment.