docs/cpp/image__augmentation_8hpp_source.html

 /* Copyright © 2018 Apple Inc. All rights reserved.
  *
  * Use of this source code is governed by a BSD-3-clause license that can
  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
  */

 #ifndef TURI_NEURAL_NET_IMAGE_AUGMENTATION_HPP_
 #define TURI_NEURAL_NET_IMAGE_AUGMENTATION_HPP_

 #include <memory>
 #include <ostream>
 #include <vector>

 #include <ml/neural_net/Image.hpp>
 #include <ml/neural_net/float_array.hpp>

 namespace turi {
 namespace neural_net {

 /**
  * Represents a rectangular area within an image.
  *
  * The coordinate system is defined by the user. Any rect without a positive
  * width and a positive height is an empty or null rect.
  */
 struct image_box {
   image_box() = default;
   image_box(float x, float y, float width, float height)
     : x(x), y(y), width(width), height(height)
   {}

   bool empty() const { return width <= 0.f || height <= 0.f; }

   // Computes the area if the width and height are positive, otherwise returns 0
   float area() const {
     return empty() ? 0.f : (width * height);
   }

   // Divides each coordinate and length by the appropriate normalizer.
   void normalize(float image_width, float image_height);

   // Sets this instance to the intersection with the given image_box. If no
   // intersection exists, then the result will have area() of 0.f (and may have
   // negative width or height).
   void clip(image_box clip_box = image_box(0.f, 0.f, 1.f, 1.f));

   // Grows this instance (minimally) so that its area contains the (non-empty)
   // area of the other image_box.
   void extend(const image_box& other);

   float x = 0.f;
   float y = 0.f;
   float width = 0.f;
   float height = 0.f;
 };

 bool operator==(const image_box& a, const image_box& b);
 std::ostream& operator<<(std::ostream& out, const image_box& box);

 /**
  * Represents a labelled or predicted entity inside an image.
  */
 struct image_annotation {
   int identifier = 0;
   image_box bounding_box;
   float confidence = 0.f;  // Typically 1 for training data
 };

 bool operator==(const image_annotation& a, const image_annotation& b);

 /**
  * Contains one image and its associated annotations.
  */
 struct labeled_image {
   std::shared_ptr<Image> image;
   std::vector<image_annotation> annotations;

   // Used when parsing saved predictions for evaluation.
   std::vector<image_annotation> predictions;
 };

 /**
  * Pure virtual interface for objects that process/augment/mutate images and
  * their associated annotations.
  */
 class image_augmenter {
 public:

   /** Parameters governing random crops. */
   struct crop_options {

     /** Lower bound for the uniformly sampled aspect ratio (width/height) */
     float min_aspect_ratio = 0.8f;

     /** Upper bound for the uniformly sampled aspect ratio (width/height) */
     float max_aspect_ratio = 1.25f;

     /**
      * Given a sampled aspect ratio, determines the lower bound of the uniformly
      * sampled height.
      */
     float min_area_fraction = 0.15f;

     /**
      * Given a sampled aspect ratio, determines the upper bound of the uniformly
      * sampled height.
      */
     float max_area_fraction = 1.f;

     /**
      * Given a sampled crop (aspect ratio, height, and location), specifies the
      * minimum fraction of each bounding box's area that must be included to
      * accept the crop. If 0.f, then the crop need not touch any object.
      */
     float min_object_covered = 0.f;

     /**
      * The maximum number of random crops to sample in an attempt to generate
      * one that satisfies the min_object_covered constraint.
      */
     size_t max_attempts = 50;

     /**
      * Given an accepted crop, the minimum fraction of each bounding box's area
      * that must be included to keep the (potentially cropped) bounding box in
      * the annotations (instead of discarding it).
      */
     float min_eject_coverage = 0.5f;
   };

   /** Parameters governing random padding. */
   struct pad_options {

     /** Lower bound for the uniformly sampled aspect ratio (width/height) */
     float min_aspect_ratio = 0.8f;

     /** Upper bound for the uniformly sampled aspect ratio (width/height) */
     float max_aspect_ratio = 1.25f;

     /**
      * Given a sampled aspect ratio, determines the lower bound of the uniformly
      * sampled height.
      */
     float min_area_fraction = 1.f;

     /**
      * Given a sampled aspect ratio, determines the upper bound of the uniformly
      * sampled height.
      */
     float max_area_fraction = 2.f;

     /**
      * The maximum number of random aspect ratios to sample, looking for one
      * that satisfies the constraints on area.
      */
     size_t max_attempts = 50;
   };

   /**
    * Parameters for constructing new image_augmenter instances.
    *
    * Default constructed values perform no augmentation, outside of resizing to
    * the output width and height (which must be specified).
    */
   struct options {

     /** The N dimension of the resulting float array. */
     size_t batch_size = 0;

     /** The W dimension of the resulting float array. */
     size_t output_width = 0;

     /** The H dimension of the resulting float array. */
     size_t output_height = 0;

     /** Seed for all pseudo-random number generation used by augmentation. */
     int random_seed = 0;

     /** The probability of applying (attempting) a random crop. */
     float crop_prob = 0.f;
     crop_options crop_opts;

     /** The probability of applying (attempting) a random pad. */
     float pad_prob = 0.f;
     pad_options pad_opts;

     /** The probability of flipping the image horizontally. */
     float horizontal_flip_prob = 0.f;

     // TODO: The semantics below are adopted from Core Image.
     // What should a shared interface specify?
     // See also https://developer.apple.com/library/archive/documentation/GraphicsImaging/Reference/CoreImageFilterReference/index.html#//apple_ref/doc/filter/ci/CIColorControls

     /**
      * Maximum pixel value to add or subtract to each channel.
      *
      * For example, a value of 0.05 adds a random value between -0.05 and 0.05
      * to each channel of each pixel (represented as a value from 0 to 1).
      */
     float brightness_max_jitter = 0.f;

     /**
      * Maximum proportion to increase or decrease contrast.
      *
      * For example, a value of 0.05 multiplies the contrast by a random value
      * between 0.95 and 1.05.
      */
     float contrast_max_jitter = 0.f;

     /**
      * Maximum proportion to increase or decrease saturation.
      *
      * For example, a value of 0.05 multiplies the saturation by a random value
      * between 0.95 and 1.05.
      */
     float saturation_max_jitter = 0.f;

     /**
      * Maximum proportion to rotate the hues.
      *
      * For example, a value of 0.05 applies a random rotation between
      * -0.05 * pi and 0.05 * pi.
      */
     float hue_max_jitter = 0.f;
   };

   /** The output of an image_augmenter. */
   struct result {

     /** The augmented images, represented as a single NHWC array (RGB). */
     shared_float_array image_batch;

     /**
      * The transformed annotations for each augmented image. This vector's size
      * should equal the size of the source batch that generated the result, and
      * each inner vector should have the same length as the corresponding input
      * image's annotations vector. */
     std::vector<std::vector<image_annotation>> annotations_batch;
   };

   virtual ~image_augmenter() = default;

   /** Returns the options parameterizing this instance. */
   virtual const options& get_options() const = 0;

   /**
    * Performs augmentation on a batch of images (and their annotations).
    *
    * If the source batch is smaller than the batch size specified in the
    * options, then the result is padded with zeroes as needed.
    */
   virtual result prepare_images(std::vector<labeled_image> source_batch) = 0;
 };

 /**
  * An abstract class that inherits from image_augmenter used to convert
  * input images, annotations and predictions to shared_float_arrays for
  * tf_image_augmenter.
  * Subclass must be written for it if needed. The subclass must implement the
  * pure virtual method prepare_augmented_images.
  */
 class float_array_image_augmenter : public image_augmenter {
  public:
   float_array_image_augmenter(const options& opts) : opts_(opts) {}

   const options& get_options() const override { return opts_; }

   result prepare_images(std::vector<labeled_image> source_batch) override;

  protected:
   /** The output sent from TensorFlow after augmenting the images. */
   struct float_array_result {
     /** The images after augmenting sent from Tensorflow */
     shared_float_array images;

     /** The annotations associated with augmented images sent from Tensorflow */
     std::vector<shared_float_array> annotations;
   };

   /** The output sent to TensorFlow to augment the images. */
   struct labeled_float_image {
     /** The images to be augmented are raw images decoded
      * and send to tf_image_augmneter as vector of shared_float_array
      */
     std::vector<shared_float_array> images;

     /** The annotations of the images to be augmented are raw images decoded
      * and send to tf_image_augmneter as vector of shared_float_array
      */
     std::vector<shared_float_array> annotations;
   };

   virtual float_array_result prepare_augmented_images(
       labeled_float_image data_to_augment) = 0;

  private:
   options opts_;
 };

 }  // neural_net
 }  // turi

 #endif  // TURI_NEURAL_NET_IMAGE_AUGMENTATION_HPP_
turi::neural_net::image_annotation
Definition: image_augmentation.hpp:63

turi::neural_net::image_box
Definition: image_augmentation.hpp:26

turi::neural_net::float_array_image_augmenter::float_array_result
Definition: image_augmentation.hpp:272

turi::neural_net::float_array_image_augmenter::float_array_result::annotations
std::vector< shared_float_array > annotations
Definition: image_augmentation.hpp:277

turi::neural_net::image_augmenter::result::annotations_batch
std::vector< std::vector< image_annotation > > annotations_batch
Definition: image_augmentation.hpp:238

turi::neural_net::float_array_image_augmenter::labeled_float_image
Definition: image_augmentation.hpp:281

turi::neural_net::float_array_image_augmenter
Definition: image_augmentation.hpp:262

turi::neural_net::image_augmenter::crop_options
Definition: image_augmentation.hpp:90

turi
SKD.
Definition: capi_initialization.hpp:11

turi::neural_net::image_augmenter::result::image_batch
shared_float_array image_batch
Definition: image_augmentation.hpp:231

turi::neural_net::image_augmenter::options
Definition: image_augmentation.hpp:165

turi::neural_net::float_array_image_augmenter::labeled_float_image::annotations
std::vector< shared_float_array > annotations
Definition: image_augmentation.hpp:290

turi::neural_net::labeled_image
Definition: image_augmentation.hpp:74

turi::neural_net::image_augmenter::result
Definition: image_augmentation.hpp:228

turi::neural_net::image_augmenter
Definition: image_augmentation.hpp:86

turi::neural_net::float_array_image_augmenter::float_array_result::images
shared_float_array images
Definition: image_augmentation.hpp:274

turi::neural_net::float_array_image_augmenter::labeled_float_image::images
std::vector< shared_float_array > images
Definition: image_augmentation.hpp:285

turi::neural_net::image_augmenter::pad_options
Definition: image_augmentation.hpp:132

turi::neural_net::float_array_image_augmenter::get_options
const options & get_options() const override
Definition: image_augmentation.hpp:266