Turi Create  4.0
ml_data_block_iterator.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_ML2_DATA_BLOCK_ITERATOR_H_
7 #define TURI_ML2_DATA_BLOCK_ITERATOR_H_
8 
9 #include <toolkits/ml_data_2/iterators/ml_data_iterator.hpp>
10 
11 
12 namespace turi { namespace v2 {
13 
14 /** This iterator acts similarly to the regular ml_data_iterator
15  * class; however, it also enables the user to implement simple
16  * iteration over blocks of rows. Here, a block is defined by a
17  * collection of rows in which the first value is common.
18  *
19  * The ml_data_block_iterator does this by providing two additional
20  * functionalities beyond ml_data_iterator:
21  *
22  * 1. is_start_of_new_block() returns true only if the first value
23  * in the current row differs from the first value in the previous
24  * row. (It is also true at the starting bound of iteration). Thus
25  * the user knows when to switch to a new block.
26  *
27  * 2. If the iteration range is broken up by threads,
28  * i.e. num_threads > 1, then the effective bounds of the partitions
29  * of each individual iterator will always be on the boundaries
30  * between blocks. Thus parallel iteration will never split a block
31  * between two threads.
32  *
33  * In all other respects, this iterator behaves just like
34  * ml_data_iterator.
35  */
37 public:
38 
39  void internal_setup(const std::map<std::string, flexible_type>& options);
40 
41  /** This function returns true if the current observation is the
42  * start of a new block.
43  */
44  bool is_start_of_new_block() const {
45  return current_row_is_start_of_new_block;
46  }
47 
48  /** Advance the iterator to the next row.
49  */
51  DASSERT_LT(current_row_index, global_row_end);
52 
53  size_t old_entry = _raw_row_entry(0).index_value;
54 
55  // If this flag is true, then the iterator falsely thinks we're
56  // done and doesn't load a new block
57  current_row_is_start_of_new_block = false;
58 
59  DASSERT_LT(current_row_index, global_row_end);
60 
61  advance_row();
62 
63  DASSERT_LE(current_row_index, global_row_end);
64 
65  if(!done()) {
66  size_t current_entry = _raw_row_entry(0).index_value;
67 
68  current_row_is_start_of_new_block = (current_entry != old_entry);
69 
70  } else {
71  current_row_is_start_of_new_block = true;
72  }
73 
74  return *this;
75  }
76 
77  /**
78  * Resets the iterator to the start of the sframes in ml_data.
79  */
80  void reset();
81 
82  /** Returns true if we are done with the iteration range of the
83  * current iterator and false otherwise.
84  */
85  inline bool done() const GL_HOT_INLINE_FLATTEN {
86 
87  DASSERT_LE(current_row_index, global_row_end);
88 
89  // We aren't done until we hit the end of a block
90  return (current_row_index == global_row_end ||
92  && current_row_is_start_of_new_block));
93  }
94 
95 private:
96 
97  // Set by operator++() or true at the very start of the larger
98  // block.
99  bool current_row_is_start_of_new_block;
100 };
101 
102 }}
103 
104 #endif
void advance_row() GL_HOT_INLINE_FLATTEN
ml_data_internal::entry_value _raw_row_entry(size_t raw_index) const GL_HOT_INLINE_FLATTEN
#define GL_HOT_INLINE_FLATTEN
bool done() const GL_HOT_INLINE_FLATTEN
const ml_data_block_iterator & operator++() GL_HOT_INLINE_FLATTEN