Turi Create  4.0
model_spec.hpp
1 
2 /* Copyright © 2018 Apple Inc. All rights reserved.
3  *
4  * Use of this source code is governed by a BSD-3-clause license that can
5  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
6  */
7 
8 #ifndef UNITY_TOOLKITS_NEURAL_NET_MODEL_SPEC_HPP_
9 #define UNITY_TOOLKITS_NEURAL_NET_MODEL_SPEC_HPP_
10 
11 #include <array>
12 #include <functional>
13 #include <memory>
14 #include <string>
15 #include <vector>
16 
17 #include <ml/neural_net/float_array.hpp>
18 #include <ml/neural_net/weight_init.hpp>
19 
20 // Forward declare CoreML::Specification::NeuralNetwork in lieu of including
21 // problematic protocol buffer headers.
22 namespace CoreML {
23 namespace Specification {
24 class NeuralNetwork;
25 class Pipeline;
26 class WeightParams;
27 }
28 }
29 
30 namespace turi {
31 namespace neural_net {
32 
33 /**
34  * Representation for a neural-network model (structure and parameters),
35  * optimized for convenient export to CoreML.
36  *
37  * This class just wraps CoreML::Specification::NeuralNetwork, helping to
38  * insulate toolkits from protobuf code.
39  */
40 class model_spec {
41 public:
42 
43  /** Parameter for convolution and pooling layers. */
44  enum class padding_type {
45  VALID,
46  SAME,
47  };
48 
49  /** Parameter for the padding layer */
50  enum class padding_policy {
51  REFLECTIVE,
52  REPLICATION,
53  ZERO,
54  };
55 
56  /** Parameter for pooling types. */
57  enum class pooling_type { MAX, AVERAGE, L2 };
58 
59  /**
60  * Creates an empty model_spec (with no layers).
61  */
62  model_spec();
63 
64  /**
65  * Initializes a model_spec from a NeuralNetwork proto.
66  */
67  model_spec(const CoreML::Specification::NeuralNetwork& nn_model);
68 
69  /**
70  * Initializes a model_spec from the top-level NeuralNetwork found inside a
71  * CoreML model specification on disk.
72  *
73  * \param mlmodel_path Path to a CoreM::Specification::Model proto on disk.
74  * \throw If the indicated path could not be read or parsed.
75  */
76  model_spec(const std::string& mlmodel_path);
77 
78  // Declared here and defined in the .cpp file just to prevent the implicit
79  // default destructor from attempting (and failing) to instantiate
80  // std::unique_ptr<NeuralNetwork>::~unique_ptr()
82  model_spec& operator=(model_spec&&);
83  ~model_spec();
84 
85  /**
86  * Exposes the underlying CoreML proto.
87  */
88  const CoreML::Specification::NeuralNetwork& get_coreml_spec() const {
89  return *impl_;
90  }
91 
92  /**
93  * Transfer ownership of the underlying CoreML proto, invalidating the current
94  * instance (leaving it in a "moved-from" state).
95  *
96  * (Note that this method may only be invoked from a model_spec&&)
97  */
98  std::unique_ptr<CoreML::Specification::NeuralNetwork> move_coreml_spec() &&;
99 
100  /**
101  * Creates a shared_float_array view (weak reference) into the parameters of
102  * the model, indexed by layer name.
103  *
104  * \return A dictionary whose keys are of the form
105  * "$layername_$paramname". The layer names are taken from the name
106  * field of each NeuralNetworkLayer containing a supported layer. The
107  * supported layers are ConvolutionLayerParams (with params "weight"
108  * (in NCHW order) and "bias") and BatchnormLayerParams (with params
109  * "gamma", "beta", "running_mean", and "running_var").
110  * \throw If a NeuralNetworkLayer in the specification seems malformed
111  * (e.g. WeightParams with size inconsistent with declared layer
112  * shape).
113  *
114  * To avoid copying data, the data backing the shared_float_array instances in
115  * the return value will only remain valid for the lifetime of this instance!
116  */
117  float_array_map export_params_view() const;
118 
119  /**
120  * Overwrites existing WeightParams values using the provided float_array
121  * values.
122  *
123  * \param weights A dictionary whose keys follow the same naming scheme used
124  * by `export_params_view`.
125  * \param use_quantization If true, weights are stored in half precision.
126  * \throw If a float_array's shape does not match the corresponding
127  * NeuralNetworkLayer.
128  */
129  void update_params(const float_array_map& weights, bool use_quantization = false);
130 
131  /**
132  * Determines whether the neural network contains a layer with the given
133  * output name.
134  *
135  * In general, it is only safe to add a new layer that takes a named input if
136  * this method returns true for that name.
137  */
138  bool has_layer_output(const std::string& layer_name) const;
139 
140  /**
141  * Appends a ReLU activation layer.
142  *
143  * \param name The name of the layer and its output
144  * \param input The name of the layer's input
145  */
146  void add_relu(const std::string& name, const std::string& input);
147 
148  /**
149  * Appends a leaky ReLU activation layer.
150  *
151  * \param name The name of the layer and its output
152  * \param input The name of the layer's input
153  * \param alpha Multiplied to negative inputs
154  */
155  void add_leakyrelu(const std::string& name, const std::string& input,
156  float alpha);
157 
158  /**
159  * Appends a sigmoid activation layer.
160  *
161  * \param name The name of the layer and its output
162  * \param input The name of the layer's input
163  */
164  void add_sigmoid(const std::string& name, const std::string& input);
165 
166  /**
167  * Appends a pooling layer.
168  * By default, it's a max pooling layer.
169  *
170  * It can be of type:
171  * - MAX
172  * - AVERAGE
173  * - L2
174  *
175  * \param pooling this sets the type of pooling this layer performs.
176  * \param use_poolexcludepadding padded values are excluded from the
177  * count (denominator) when computing average pooling.
178  */
179  void add_pooling(const std::string& name, const std::string& input, size_t kernel_height,
180  size_t kernel_width, size_t stride_h, size_t stride_w, padding_type padding,
181  bool use_poolexcludepadding = false, pooling_type pooling = pooling_type::MAX);
182 
183  /**
184  * Appends a convolution layer.
185  *
186  * \param name The name of the layer and its output
187  * \param input The name of the layer's input
188  * \param num_output_channels The number of distinct filters in this layer
189  * \param num_kernel_channels The number of input features per "pixel"
190  * \param kernel_size The height and width of the kernel
191  * \param weight_initializer_fn Callback used to initialize the conv weights
192  * \param bias_initializer_fn Callback used to initialize the conv bias. If
193  * nullptr, then no bias vector is set.
194  */
195  void add_convolution(const std::string& name, const std::string& input,
196  size_t num_output_channels, size_t num_kernel_channels,
197  size_t kernel_height, size_t kernel_width,
198  size_t stride_h, size_t stride_w, padding_type padding,
199  weight_initializer weight_initializer_fn,
200  weight_initializer bias_initializer_fn = nullptr);
201 
202  /**
203  * Appends a padding layer.
204  *
205  * \param name The name of the layer and its output
206  * \param input The name of the layer's input
207  * \param padding_top The padding on the top
208  * \param padding_bottom The padding on the bottom
209  * \param padding_left The padding to the left
210  * \param padding_right The padding to the right
211  * \param policy The padding policy of zero, reflective, or replication
212  */
213  void add_padding(const std::string& name, const std::string& input,
214  size_t padding_top, size_t padding_bottom,
215  size_t padding_left, size_t padding_right,
216  padding_policy policy = padding_policy::REFLECTIVE);
217 
218  /**
219  * Appends an upsampling layer.
220  *
221  * \param name The name of the layer and its output
222  * \param input The name of the layer's input
223  * \param scaling_x The upsample scale on the x axis
224  * \param scaling_y The upsample scale on the y axis
225  */
226  void add_upsampling(const std::string& name, const std::string& input,
227  size_t scaling_x, size_t scaling_y);
228 
229  /**
230  * Appends an inner-product (dense, fully connected) layer.
231  *
232  * \param name The name of the layer and its output
233  * \param input The name of the layer's input
234  * \param num_output_channels Size of the output vector
235  * \param num_input_channels Size of the input vector
236  * \param weight_initializer_fn Callback used to initialize the weights
237  * \param bias_initializer_fn Callback used to initialize the bias. If
238  * nullptr, then no bias vector is set.
239  */
240  void add_inner_product(const std::string& name, const std::string& input,
241  size_t num_output_channels, size_t num_input_channels,
242  weight_initializer weight_initializer_fn,
243  weight_initializer bias_initializer_fn = nullptr);
244 
245  /**
246  * Appends a batch norm layer.
247  *
248  * The beta and mean parameters are initialized to 0.f; the gamma and variance
249  * parameters are initialized to 1.f
250  *
251  * \param name The name of the layer and its output
252  * \param input The name of the layer's input
253  * \param num_channels The C dimension of the input and output
254  * \param epsilon Added to the variance for each input before normalizing
255  */
256  void add_batchnorm(const std::string& name, const std::string& input,
257  size_t num_channels, float epsilon);
258 
259  /**
260  * Appends an instance norm layer.
261  *
262  * The beta is initialized to 0.f; the gamma is initialized to 1.f
263  *
264  * \param name The name of the layer and its output
265  * \param input The name of the layer's input
266  * \param num_channels The C dimension of the input and output
267  * \param epsilon Added to the variance for each input before normalizing
268  */
269  void add_instancenorm(const std::string& name, const std::string& input,
270  size_t num_channels, float epsilon);
271 
272  /**
273  * Appends a layer that concatenates its inputs along the channel axis.
274  *
275  * \param name The name of the layer and its output
276  * \param inputs The names of the layer's inputs
277  */
278  void add_channel_concat(const std::string& name,
279  const std::vector<std::string>& inputs);
280 
281  /**
282  * Appends a layer that performs softmax normalization (along channel axis).
283  *
284  * \param name The name of the layer and its output
285  * \param input The name of the layer's input
286  */
287  void add_softmax(const std::string& name, const std::string& input);
288 
289  /**
290  * Appends a layer that performs flatten normalization (along channel axis).
291  *
292  * currently only supports channel first flattening, which means if the input order is
293  * ``[C, H, W]``, then output array will be ``[C * H * W, 1, 1]``, still `C-major`
294  * orderring. No underlying array storage will be changed.
295  *
296  * \param name The name of the layer and its output
297  * \param input The name of the layer's input
298  */
299  void add_flatten(const std::string& name, const std::string& input);
300 
301  /**
302  * Appends a layer that performs elementwise addition.
303  *
304  * \param name The name of the layer and its output
305  * \param inputs The names of the layer's inputs
306  */
307  void add_addition(const std::string& name,
308  const std::vector<std::string>& inputs);
309 
310  /**
311  * Appends a layer that performs elementwise multiplication.
312  *
313  * \param name The name of the layer and its output
314  * \param inputs The names of the layer's inputs
315  */
316  void add_multiplication(const std::string& name,
317  const std::vector<std::string>& inputs);
318 
319  /**
320  * Appends a layer that applies the unary function f(x) = e^x to its input.
321  *
322  * \param name The name of the layer and its output
323  * \param input The name of the layer's input
324  */
325  void add_exp(const std::string& name, const std::string& input);
326 
327 
328  /**
329  * Appends a layer that performs elementwise multiplication between its input
330  * and some fixed weights.
331  *
332  * \param name The name of the layer and its output
333  * \param input The name of the layer's input
334  * \param shape_c_h_w The shape of the input and output
335  * \param weight_initializer_fn Callback used to initialize the weights
336  */
337  void add_scale(const std::string& name, const std::string& input,
338  const std::vector<size_t>& shape_c_h_w,
339  weight_initializer scale_initializer_fn);
340 
341  /**
342  * Appends a layer with fixed values.
343  *
344  * \param name The name of the layer and its output
345  * \param shape_c_h_w The shape of the output
346  * \param weight_initializer_fn Callback used to initialize the weights
347  */
348  void add_constant(const std::string& name,
349  const std::array<size_t, 3>& shape_c_h_w,
350  weight_initializer weight_initializer_fn);
351 
352  /**
353  * Appends a layer that reshapes its input.
354  *
355  * \param name The name of the layer and its output
356  * \param input The name of the layer's input
357  * \param shape_c_h_w The shape of the output
358  */
359  void add_reshape(const std::string& name, const std::string& input,
360  const std::array<size_t, 4>& seq_c_h_w);
361 
362  /**
363  * Appends a layer that transposes the dimensions of its input
364  *
365  * \param name The name of the layer and its output
366  * \param input The name of the layer's input
367  * \param axis_permutation A permutation of [0, 1, 2, 3], describing how to
368  * rearrange the [Seq, C, H, W] input.
369  */
370  void add_permute(const std::string& name, const std::string& input,
371  const std::array<size_t, 4>& axis_permutation);
372 
373  /**
374  * Appends a layer that slices the input along the channel axis.
375  *
376  * \param name The name of the layer and its output
377  * \param input The name of the layer's input
378  * \param start_index The first channel to include
379  * \param end_index The first channel to stop including. If negative, then the
380  * number of channels is added first (so -1 becomes n - 1).
381  * \param stride The interval between channels to include
382  */
383  void add_channel_slice(const std::string& name, const std::string& input,
384  int start_index, int end_index, size_t stride);
385 
386  /**
387  * Appends an LSTM layer.
388  *
389  * \param name The name of the layer and its output
390  * \param input The name of the layer's input
391  * \param hidden_input The name of the initial hidden state
392  * \param cell_input The name of the initial cell state
393  * \param hidden_output The name of the resulting hidden state
394  * \param cell_output The name of the resulting cell state
395  * \param input_vector_size The size of the input vector
396  * \param output_vector_size The size of the output vector (hidden state and
397  * cell state)
398  * \param cell_clip_threshold Maximum magnitude of cell state values
399  * \param initializers LSTM weights
400  */
401  void add_lstm(const std::string& name, const std::string& input,
402  const std::string& hidden_input, const std::string& cell_input,
403  const std::string& hidden_output,
404  const std::string& cell_output, size_t input_vector_size,
405  size_t output_vector_size, float cell_clip_threshold,
406  const lstm_weight_initializers& initializers);
407 
408  // TODO: Support additional layers (and further parameterize the above) as
409  // needed. If/when we support the full range of NeuralNetworkLayer values,
410  // this could be shared in some form with coremltools.
411 
412  /**
413  * Appends a preprocessing layer
414  * Now only support image scaling preprocessing though.
415  */
416  void add_preprocessing(const std::string& feature_name,
417  const float image_scale);
418 
419  /**
420  * Appends an Transpose layer.
421  *
422  * \param name The name of the layer and its output
423  * \param input The name of the layer's input
424  * \param axes The ordering of the axes to transpose for instance {0, 2, 1, 3}
425  * would flip the channel and height axes
426  */
427  void add_transpose(const std::string& name, const std::string& input,
428  std::vector<size_t> axes);
429 
430  /**
431  * Appends an Split layer.
432  *
433  * \param name The name of the layer and its output
434  * \param input The name of the layer's input
435  * \param axis The axis to split the layer on
436  * \param num_splits The number of splits to perform
437  * \param split_sizes The size of each split
438  */
439  void add_split_nd(const std::string& name, const std::string& input,
440  size_t axis, size_t num_splits,
441  const std::vector<size_t>& split_sizes);
442 
443  /**
444  * Appends an Concat layer.
445  *
446  * \param name The name of the layer and its output
447  * \param inputs The vector of names of the layer's inputs
448  * \param axis The axis to concat the layer on
449  */
450  void add_concat_nd(const std::string& name,
451  const std::vector<std::string>& inputs, size_t axis);
452 
453  /**
454  * Appends a Reshape Static layer.
455  *
456  * \param name The name of the layer and its output
457  * \param input The vector of names of the layer's input
458  * \param targetShape The target shape
459  */
460  void add_reshape_static(const std::string& name, const std::string& input,
461  const std::vector<size_t>& targetShape);
462 
463  /**
464  * Appends a Reshape Dynamic layer.
465  *
466  * \param name The name of the layer and its output
467  * \param inputs The vector of names of the layer's inputs
468  */
469  void add_reshape_dynamic(const std::string& name,
470  const std::vector<std::string>& inputs);
471 
472  /**
473  * Appends an Expand Dims layer.
474  *
475  * \param name The name of the layer and its output
476  * \param input The vector of names of the layer's input
477  * \param axes The axes to expand the layer on
478  */
479  void add_expand_dims(const std::string& name, const std::string& input,
480  const std::vector<size_t>& axes,
481  const std::vector<size_t>& inputVector,
482  const std::vector<size_t>& outputVector);
483 
484  /**
485  * Appends a Squeeze layer.
486  *
487  * \param name The name of the layer and its output
488  * \param input The vector of names of the layer's input
489  * \param axes The axes to squeeze the layer on
490  */
491  void add_squeeze(const std::string& name, const std::string& input,
492  const std::vector<size_t>& axes,
493  const std::vector<size_t>& inputVector,
494  const std::vector<size_t>& outputVector);
495 
496  /**
497  * Appends an Add Broadcastable layer.
498  *
499  * \param name The name of the layer and its output
500  * \param inputs The vector of names of the layer's inputs
501  */
502  void add_add_broadcastable(const std::string& name,
503  const std::vector<std::string>& inputs);
504 
505  /**
506  * Appends a Gather layer.
507  *
508  * \param name The name of the layer and its output
509  * \param inputs The vector of names of the layer's inputs
510  */
511  void add_gather(const std::string& name,
512  const std::vector<std::string>& inputs);
513 
514  /**
515  * Appends a Constant ND layer.
516  *
517  * \param name The name of the layer and its output
518  * \param shape The shape of the constant layer
519  * \param data The data being loaded in the constant layer
520  */
521  void add_constant_nd(const std::string& name,
522  const std::vector<size_t>& shape,
523  const weight_initializer& data);
524 
525  /**
526  * Appends a Get Shape layer.
527  *
528  * \param name The name of the layer and its output
529  * \param input The vector of names of the layer's input
530  */
531  void add_get_shape(const std::string& name, const std::string& input);
532 
533  /**
534  * Appends dynamic slicing.
535  *
536  * \param name The name of the layer and its output
537  * \param inputs The name of the layer's inputs
538  */
539  void add_slice_dynamic(const std::string& name, const std::vector<std::string>& inputs);
540 
541  /**
542  * Appends a non maximum suppression layer.
543  *
544  * \param name The name of the layer and its output
545  * \param inputs The name of the layer's inputs
546  * \param outputs The outputs of the layer
547  * \param iou_thrsshold The default value for the iou threshold
548  * \param confidence_threshold The default value for the confidence threshold
549  * \param max_boxes The maximum number of boxes you want NMS to run
550  * \param per_class_suppression When false, suppression happens for all
551  * classes.
552  */
553  void add_nms_layer(const std::string& name, const std::vector<std::string>& inputs,
554  const std::vector<std::string>& outputs, float iou_threshold,
555  float confidence_threshold, size_t max_boxes, bool per_class_supression);
556 
557  private:
558  std::unique_ptr<CoreML::Specification::NeuralNetwork> impl_;
559 };
560 
561 /**
562  * Simple wrapper around CoreML::Specification::Pipeline that allows client code
563  * to pass around instances without importing full protobuf headers.
564  *
565  * \todo As needed, elaborate this class and move into its own file.
566  */
568  public:
569  pipeline_spec(std::unique_ptr<CoreML::Specification::Pipeline> impl);
570 
572  pipeline_spec& operator=(pipeline_spec&&);
573 
574  // Declared here and defined in the .cpp file just to prevent the implicit
575  // default destructor from attempting (and failing) to instantiate
576  // std::unique_ptr<Pipeline>::~unique_ptr()
577  ~pipeline_spec();
578 
579  /**
580  * Exposes the underlying CoreML proto.
581  */
582  const CoreML::Specification::Pipeline& get_coreml_spec() const {
583  return *impl_;
584  }
585 
586  /**
587  * Transfer ownership of the underlying CoreML proto, invalidating the current
588  * instance (leaving it in a "moved-from" state).
589  *
590  * (Note that this method may only be invoked from a pipeline_spec&&)
591  */
592  std::unique_ptr<CoreML::Specification::Pipeline> move_coreml_spec() &&;
593 
594  private:
595  std::unique_ptr<CoreML::Specification::Pipeline> impl_;
596 };
597 
598 } // neural_net
599 } // turi
600 
601 #endif // UNITY_TOOLKITS_NEURAL_NET_MODEL_SPEC_HPP_
const CoreML::Specification::Pipeline & get_coreml_spec() const
Definition: model_spec.hpp:582
const CoreML::Specification::NeuralNetwork & get_coreml_spec() const
Definition: model_spec.hpp:88