Source code for cvnets.anchor_generator.ssd_anchor_generator

#
# For licensing see accompanying LICENSE file.
# Copyright (C) 2023 Apple Inc. All Rights Reserved.
#

import argparse
from itertools import product
from typing import List, Optional

import numpy as np
import torch
from torch import Tensor

from cvnets.anchor_generator import ANCHOR_GEN_REGISTRY, BaseAnchorGenerator
from utils import logger


[docs]@ANCHOR_GEN_REGISTRY.register(name="ssd") class SSDAnchorGenerator(BaseAnchorGenerator): """ This class generates anchors (or priors) ``on-the-fly`` for the `single shot object detector (SSD) <https://arxiv.org/abs/1512.02325>`_. """
[docs] def __init__(self, opts, *args, **kwargs) -> None: output_strides = getattr( opts, "anchor_generator.ssd.output_strides", [32, 64, 128, 256, -1] ) aspect_ratios = getattr( opts, "anchor_generator.ssd.aspect_ratios", [[2, 3]] * len(output_strides) ) min_ratio = getattr(opts, "anchor_generator.ssd.min_scale_ratio", 0.1) max_ratio = getattr(opts, "anchor_generator.ssd.max_scale_ratio", 1.05) no_clipping = getattr(opts, "anchor_generator.ssd.no_clipping", False) step = getattr(opts, "anchor_generator.ssd.step", [1]) if isinstance(step, int): step = [step] * len(output_strides) elif isinstance(step, List) and len(step) <= len(output_strides): step = step + [1] * (len(output_strides) - len(step)) else: logger.error( "--anchor-generator.ssd.step should be either a list of ints with the same length as " "the output strides OR an integer" ) super().__init__() aspect_ratios = [list(set(ar)) for ar in aspect_ratios] output_strides_aspect_ratio = dict() for k, v in zip(output_strides, aspect_ratios): output_strides_aspect_ratio[k] = v self.output_strides_aspect_ratio = output_strides_aspect_ratio self.output_strides = output_strides self.anchors_dict = dict() self.num_output_strides = len(output_strides) self.num_aspect_ratios = len(aspect_ratios) scales = np.linspace(min_ratio, max_ratio, len(output_strides) + 1) self.sizes = dict() for i, s in enumerate(output_strides): self.sizes[s] = { "min": scales[i], "max": (scales[i] * scales[i + 1]) ** 0.5, "step": step[i], } self.clip = not no_clipping self.min_scale_ratio = min_ratio self.max_scale_ratio = max_ratio self.step = step
def __repr__(self): return "{}(min_scale_ratio={}, max_scale_ratio={}, n_output_strides={}, n_aspect_ratios={}, clipping={})".format( self.__class__.__name__, self.min_scale_ratio, self.max_scale_ratio, self.num_output_strides, self.num_aspect_ratios, self.clip, )
[docs] @classmethod def add_arguments(cls, parser: argparse.ArgumentParser) -> argparse.ArgumentParser: """ Adds SSD anchor generator-specific arguments to the parser """ group = parser.add_argument_group(title=cls.__name__) group.add_argument( "--anchor-generator.ssd.output-strides", nargs="+", type=int, help="Output strides of the feature maps for which we want to generate anchors", ) group.add_argument( "--anchor-generator.ssd.aspect-ratios", nargs="+", type=float, action="append", help="Aspect ratios at each output stride", ) # prior box arguments # SSD sample priors between min and max box sizes. # for example, if we use feature maps from three spatial levels (or output strides), then we # sample width and height for anchor boxes as: # scales = np.linspace(min_box_size, max_box_size, len(output_strides) + 1) # min_box dimensions for the first feature map is scales[0] * feature_map_dimensions # while the max_box dimensions will be sqrt(scales[0] * scales[1]) * feature_map dimensions. And so on group.add_argument( "--anchor-generator.ssd.min-scale-ratio", type=float, help="Min. scale ratio", ) group.add_argument( "--anchor-generator.ssd.max-scale-ratio", type=float, help="Max. scale ratio", ) group.add_argument( "--anchor-generator.ssd.no-clipping", action="store_true", help="Don't clip the anchors", ) group.add_argument( "--anchor-generator.ssd.step", type=int, default=[1], nargs="+", help="Step between pixels", ) return parser
[docs] def num_anchors_per_os(self) -> List: """ Returns anchors per output stride for SSD """ return [2 + 2 * len(ar) for os, ar in self.output_strides_aspect_ratio.items()]
@torch.no_grad() def _generate_anchors( self, height: int, width: int, output_stride: int, device: Optional[str] = "cpu", *args, **kwargs ) -> Tensor: min_size_h = self.sizes[output_stride]["min"] min_size_w = self.sizes[output_stride]["min"] max_size_h = self.sizes[output_stride]["max"] max_size_w = self.sizes[output_stride]["max"] aspect_ratio = self.output_strides_aspect_ratio[output_stride] step = max(1, self.sizes[output_stride]["step"]) default_anchors_ctr = [] start_step = max(0, step // 2) # Note that feature maps are in NCHW format for y, x in product( range(start_step, height, step), range(start_step, width, step) ): # [x, y, w, h] format cx = (x + 0.5) / width cy = (y + 0.5) / height # small box size default_anchors_ctr.append([cx, cy, min_size_w, min_size_h]) # big box size default_anchors_ctr.append([cx, cy, max_size_w, max_size_h]) # change h/w ratio of the small sized box based on aspect ratios for ratio in aspect_ratio: ratio = ratio**0.5 default_anchors_ctr.extend( [ [cx, cy, min_size_w * ratio, min_size_h / ratio], [cx, cy, min_size_w / ratio, min_size_h * ratio], ] ) default_anchors_ctr = torch.tensor( default_anchors_ctr, dtype=torch.float, device=device ) if self.clip: default_anchors_ctr = torch.clamp(default_anchors_ctr, min=0.0, max=1.0) return default_anchors_ctr