Turi Create  4.0
od_data_iterator.hpp
1 /* Copyright © 2018 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 
7 #ifndef TURI_OBJECT_DETECTION_OD_DATA_ITERATOR_HPP_
8 #define TURI_OBJECT_DETECTION_OD_DATA_ITERATOR_HPP_
9 
10 #include <random>
11 #include <string>
12 #include <unordered_map>
13 #include <vector>
14 
15 #include <core/data/sframe/gl_sframe.hpp>
16 #include <ml/neural_net/float_array.hpp>
17 #include <ml/neural_net/image_augmentation.hpp>
18 
19 namespace turi {
20 namespace object_detection {
21 
22 /**
23  * Pure virtual interface for classes that produce batches of data
24  * (pre-augmentation) from a raw SFrame.
25  */
27 public:
28 
29  /* Enumerate possible values for image origin */
30  enum class annotation_origin_enum {
31  TOP_LEFT,
32  BOTTOM_LEFT
33  };
34 
35  /* Enumerate possible values for the annotation scale */
36  enum class annotation_scale_enum {
37  PIXEL,
38  NORMALIZED
39  };
40 
41  /* Enumerate possible values for the annotation position */
42  enum class annotation_position_enum {
43  CENTER,
44  TOP_LEFT,
45  BOTTOM_LEFT
46  };
47 
48  /**
49  * Defines the inputs to a data_iterator factory function.
50  */
51  struct parameters {
52 
53  /** The SFrame to traverse */
55 
56  /**
57  * The name of the column containing the annotations.
58  *
59  * The values must either be dictionaries containing an annotation, or a
60  * list of such dictionaries. An annotation dictionary has a "label" key
61  * whose value is a string, and a "coordinates" key whose value is another
62  * dictionary containing "x", "y", "width", and "height", describing the
63  * position of the center and the size of the bounding box (in the image's
64  * coordinates, with the origin at the top left).
65  */
67 
68  /**
69  * Optional name of a column containing predictions.
70  *
71  * If not empty, then the iterator will parse and yield a secondary stream
72  * of bounding boxes, intended for use in evaluating existing predictions.
73  */
75 
76  /**
77  * The name of the column containing the images.
78  *
79  * Each value is either an image or a path to an image file on disk.
80  */
81  std::string image_column_name;
82 
83  /**
84  * The expected class labels, indexed by identifier.
85  *
86  * If empty, then the labels will be inferred from the data. If non-empty,
87  * an exception will be thrown upon encountering an unexpected label.
88  *
89  * \TODO: This should be a flex_list to accomodate integer labels!
90  */
91  std::vector<std::string> class_labels;
92 
93  annotation_origin_enum annotation_origin = annotation_origin_enum::TOP_LEFT;
94  annotation_scale_enum annotation_scale = annotation_scale_enum::PIXEL;
95  annotation_position_enum annotation_position = annotation_position_enum::CENTER;
96 
97  /**
98  * Whether to traverse the data more than once.
99  */
100  bool repeat = true;
101 
102  /** Whether to shuffle the data on subsequent traversals. */
103  bool shuffle = true;
104 
105  /** Whether the process is in training or not. */
106  bool is_training = false;
107 
108  /** Determines results of shuffle operations if enabled. */
109  int random_seed = 0;
110  };
111 
112  virtual ~data_iterator() = default;
113 
114  /**
115  * Returns a vector whose size is equal to `batch_size`.
116  *
117  * If `repeat` was set in the parameters, then the iterator will cycle
118  * indefinitely through the SFrame over and over. Otherwise, the last
119  * non-empty batch may contain fewer than `batch_size` elements, and every
120  * batch after that will be empty.
121  *
122  * The x,y coordinates in the returned annotations indicate the upper-left
123  * corner of the bounding box.
124  */
125  virtual std::vector<neural_net::labeled_image>
126  next_batch(size_t batch_size) = 0;
127 
128  /** Returns true when `next_batch` will return a non-empty value. */
129  virtual bool has_next_batch() const = 0;
130 
131  /**
132  * Returns a sorted list of the unique "label" values found in the
133  * annotations.
134  */
135  virtual const std::vector<std::string>& class_labels() const = 0;
136 
137  /**
138  * Returns the number of annotations (bounding boxes) found across all rows.
139  */
140  virtual size_t num_instances() const = 0;
141 };
142 
143 /**
144  * Concrete data_iterator implementation that doesn't attempt any
145  * parallelization or background I/O.
146  *
147  * \todo This classs should become an abstract_data_iterator base class with
148  * override points for dispatching work to other threads.
149  */
151 public:
152 
153  simple_data_iterator(const parameters& params);
154 
155  // Not copyable or movable.
157  simple_data_iterator& operator=(const simple_data_iterator&) = delete;
158 
159  std::vector<neural_net::labeled_image> next_batch(size_t batch_size) override;
160 
161  bool has_next_batch() const override {
162  // TODO: gl_sframe_range::end() should be a const method.
163  gl_sframe_range range_iterator(range_iterator_);
164  return next_row_ != range_iterator.end();
165  }
166 
167  const std::vector<std::string>& class_labels() const override {
168  return annotation_properties_.classes;
169  }
170 
171  size_t num_instances() const override {
172  return annotation_properties_.num_instances;
173  }
174 
175 private:
176  struct annotation_properties {
177  std::vector<std::string> classes;
178  std::unordered_map<std::string, int> class_to_index_map;
179  size_t num_instances;
180  };
181 
182  annotation_properties compute_properties(
183  const gl_sarray& annotations,
184  std::vector<std::string> expected_class_labels);
185 
186  gl_sframe data_;
187  const ssize_t annotations_index_;
188  const ssize_t predictions_index_;
189  const size_t image_index_;
190 
191  annotation_origin_enum annotation_origin_;
192  annotation_scale_enum annotation_scale_;
193  annotation_position_enum annotation_position_;
194 
195  const bool repeat_;
196  const bool shuffle_;
197  const bool is_training_;
198 
199  const annotation_properties annotation_properties_;
200 
201  gl_sframe_range range_iterator_;
202  gl_sframe_range::iterator next_row_;
203  std::default_random_engine random_engine_;
204 };
205 
206 } // object_detection
207 } // turi
208 
209 #endif // TURI_OBJECT_DETECTION_OD_DATA_ITERATOR_HPP_
virtual std::vector< neural_net::labeled_image > next_batch(size_t batch_size)=0
virtual bool has_next_batch() const =0
std::vector< sframe > shuffle(sframe sframe_in, size_t n, std::function< size_t(const std::vector< flexible_type > &)> hash_fn, std::function< void(const std::vector< flexible_type > &, size_t)> emit_call_back=std::function< void(const std::vector< flexible_type > &, size_t)>())
virtual const std::vector< std::string > & class_labels() const =0
virtual size_t num_instances() const =0
const std::vector< std::string > & class_labels() const override