Source code for data.datasets.detection.coco_ssd

#
# For licensing see accompanying LICENSE file.
# Copyright (C) 2023 Apple Inc. All Rights Reserved.
#

import argparse
import math
from typing import List, Mapping, Tuple, Union

import torch
from torch import Tensor

from cvnets.anchor_generator import build_anchor_generator
from cvnets.matcher_det import build_matcher
from data.collate_fns import COLLATE_FN_REGISTRY
from data.datasets import DATASET_REGISTRY
from data.datasets.detection.coco_base import COCODetection
from data.transforms import image_pil as T
from data.transforms.common import Compose
from utils import logger


[docs]@DATASET_REGISTRY.register(name="coco_ssd", type="detection") class COCODetectionSSD(COCODetection): """Dataset class for the MS COCO Object Detection using Single Shot Object Detector (SSD). Args: opts: Command-line arguments """
[docs] def __init__( self, opts: argparse.Namespace, *args, **kwargs, ) -> None: if getattr(opts, "matcher.name") != "ssd": logger.error("For SSD, we need --matcher.name as ssd") anchor_gen_name = getattr(opts, "anchor_generator.name") if anchor_gen_name is None or anchor_gen_name != "ssd": logger.error("For SSD, we need --anchor-generator.name to be ssd") super().__init__(opts=opts, *args, **kwargs) # we build the anchor generator and matching inside the dataset # so that we can use it with variable batch samplers. self.anchor_box_generator = build_anchor_generator(opts=opts, is_numpy=True) self.match_prior = build_matcher(opts=opts) # output strides for generating anchors self.output_strides = self.anchor_box_generator.output_strides # set the collate functions for the dataset setattr(opts, "dataset.collate_fn_name_train", "coco_ssd_collate_fn") setattr(opts, "dataset.collate_fn_name_val", "coco_ssd_collate_fn") setattr(opts, "dataset.collate_fn_name_test", "coco_ssd_collate_fn")
def _training_transforms(self, size: Tuple[int, int], *args, **kwargs) -> Compose: """Data augmentation during training. Order of transformation is SSDCroping, PhotometricDistort, RandomHorizontalFlip, Resize, BoxPercentCoords, ToTensor Args: size: Size for resizing the input image. Expected to be a tuple (height, width) Returns: An instance of `data.transforms.common.Compose.` """ aug_list = [ T.SSDCroping(opts=self.opts), T.PhotometricDistort(opts=self.opts), T.RandomHorizontalFlip(opts=self.opts), T.Resize(opts=self.opts, img_size=size), T.BoxPercentCoords(opts=self.opts), T.ToTensor(opts=self.opts), ] return Compose(opts=self.opts, img_transforms=aug_list) def _validation_transforms(self, size: Tuple[int, int], *args, **kwargs) -> Compose: """Data augmentation during validation or evaluation. Default order of transformation is Resize, BoxPercentCoords, ToTensor. Args: size: Size for resizing the input image. Expected to be a tuple (height, width) Returns: An instance of `data.transforms.common.Compose.` """ aug_list = [ T.Resize(opts=self.opts), T.BoxPercentCoords(opts=self.opts), T.ToTensor(opts=self.opts), ] return Compose(opts=self.opts, img_transforms=aug_list)
[docs] def generate_anchors(self, height: int, width: int) -> Tensor: """Given the height and width of the input to the SSD model, generate anchors Args: height: Height of the input image to the SSD model width: Width of the input image to the SSD model Returns: Tensor containing anchor locations """ anchors = [] for output_stride in self.output_strides: if output_stride == -1: fm_width = fm_height = 1 else: fm_width = int(math.ceil(width / output_stride)) fm_height = int(math.ceil(height / output_stride)) fm_anchor = self.anchor_box_generator( fm_height=fm_height, fm_width=fm_width, fm_output_stride=output_stride, ) anchors.append(fm_anchor) anchors = torch.cat(anchors, dim=0) return anchors
def __getitem__( self, sample_size_and_index: Tuple[int, int, int], *args, **kwargs ) -> Mapping[str, Union[Tensor, Mapping[str, Tensor]]]: """Returns the sample corresponding to the input sample index. Returned sample is transformed into the size specified by the input. Args: sample_size_and_index: Tuple of the form (crop_size_h, crop_size_w, sample_index) Returns: A dictionary with `samples` and `targets` as keys corresponding to input and labels of a sample, respectively. Shapes: The shape of values in output dictionary, output_data, are as follows: output_data["samples"]["image"]: Shape is [Channels, Height, Width] output_data["targets"]["box_labels"]: Shape is [Num of matched boxes] output_data["targets"]["box_coordinates"]: Shape is [Num of matched boxes, 4] output_data["targets"]["image_id"]: Shape is [1] output_data["targets"]["image_width"]: Shape is [1] output_data["targets"]["image_height"]: Shape is [1] """ crop_size_h, crop_size_w, img_index = sample_size_and_index transform_fn = self.get_augmentation_transforms(size=(crop_size_h, crop_size_w)) image_id = self.ids[img_index] image, img_fname = self.get_image(image_id=image_id) im_width, im_height = image.size boxes, labels, _ = self.get_boxes_and_labels( image_id=image_id, image_width=im_width, image_height=im_height ) data = {"image": image, "box_labels": labels, "box_coordinates": boxes} data = transform_fn(data) # convert to priors anchors = self.generate_anchors(height=crop_size_h, width=crop_size_w) gt_coordinates, gt_labels = self.match_prior( gt_boxes=data["box_coordinates"], gt_labels=data["box_labels"], anchors=anchors, ) output_data = { "samples": {"image": data.pop("image")}, "targets": { "box_labels": gt_labels, "box_coordinates": gt_coordinates, "image_id": torch.tensor(image_id), "image_width": torch.tensor(im_width), "image_height": torch.tensor(im_height), }, } return output_data
[docs] def extra_repr(self) -> str: extra_repr_str = super().extra_repr() extra_repr_str += ( f"\n\tmatcher={self.match_prior}" f"\n\tanchor_generator={self.anchor_box_generator}" ) return extra_repr_str
[docs]@COLLATE_FN_REGISTRY.register(name="coco_ssd_collate_fn") def coco_ssd_collate_fn( batch: List[Mapping[str, Union[Tensor, Mapping[str, Tensor]]]], opts: argparse.Namespace, ) -> Mapping[str, Union[Tensor, Mapping[str, Tensor]]]: """Combines a list of dictionaries into a single dictionary by concatenating matching fields. For expected keys, see the keys in the output of `__getitem__` function of COCODetectionSSD class. Args: batch: A list of dictionaries opts: Command-line arguments Returns: A dictionary with `samples` and `targets` as keys. """ new_batch = { "samples": {"image": []}, "targets": { "box_labels": [], "box_coordinates": [], "image_id": [], "image_width": [], "image_height": [], }, } for b_id, batch_ in enumerate(batch): # prepare inputs new_batch["samples"]["image"].append(batch_["samples"]["image"]) # prepare outputs new_batch["targets"]["box_labels"].append(batch_["targets"]["box_labels"]) new_batch["targets"]["box_coordinates"].append( batch_["targets"]["box_coordinates"] ) new_batch["targets"]["image_id"].append(batch_["targets"]["image_id"]) new_batch["targets"]["image_width"].append(batch_["targets"]["image_width"]) new_batch["targets"]["image_height"].append(batch_["targets"]["image_height"]) # stack inputs new_batch["samples"]["image"] = torch.stack(new_batch["samples"]["image"], dim=0) # stack outputs new_batch["targets"]["box_labels"] = torch.stack( new_batch["targets"]["box_labels"], dim=0 ) new_batch["targets"]["box_coordinates"] = torch.stack( new_batch["targets"]["box_coordinates"], dim=0 ) new_batch["targets"]["image_id"] = torch.stack( new_batch["targets"]["image_id"], dim=0 ) new_batch["targets"]["image_width"] = torch.stack( new_batch["targets"]["image_width"], dim=0 ) new_batch["targets"]["image_height"] = torch.stack( new_batch["targets"]["image_height"], dim=0 ) return new_batch