Source code for cvnets.models.classification.config.regnet

#
# For licensing see accompanying LICENSE file.
# Copyright (C) 2023 Apple Inc. All Rights Reserved.
#

import argparse
from dataclasses import dataclass
from typing import Dict, List, Tuple, Union

import numpy as np

from utils import logger
from utils.math_utils import make_divisible

supported_modes = [
    "x_200mf",
    "x_400mf",
    "x_600mf",
    "x_800mf",
    "x_1.6gf",
    "x_3.2gf",
    "x_4.0gf",
    "x_6.4gf",
    "x_8.0gf",
    "x_12gf",
    "x_16gf",
    "x_32gf",
    "y_200mf",
    "y_400mf",
    "y_800mf",
    "y_600mf",
    "y_1.6gf",
    "y_3.2gf",
    "y_4.0gf",
    "y_6.4gf",
    "y_8.0gf",
    "y_12gf",
    "y_16gf",
    "y_32gf",
]


[docs]@dataclass class BlockParamsConfig: """ This class stores the quantized linear block params. It is adapted from torchvision.models.regnet: https://github.com/pytorch/vision/blob/c06d52b1c5f6aee36802661c3ebc6347b97cc59e/torchvision/models/regnet.py#L203 Args: depth: The total number of XBlocks in the network w_0: Initial width w_a: Width slope w_m: Width slope in the log space groups: The number of groups to use in the XBlock. Referred to se_ratio: The squeeze-excitation ratio. The number of channels in the SE module will be the input channels scaled by this ratio. bottleneck_multiplier: The number of output channels in the intermediate conv layers in bottleneck/Xblock block will be scaled by this value. quant: Block widths will be divisible by this value stride: The stride of the 3x3 conv of the XBlocks """
[docs] def __init__( self, depth: int, w_0: int, w_a: float, w_m: float, groups: int, se_ratio: float = 0.0, bottleneck_multiplier: float = 1.0, quant: int = 8, stride: int = 2, ) -> None: if w_a < 0 or w_0 <= 0 or w_m <= 1 or w_0 % 8 != 0: raise ValueError( f"Invalid RegNet settings. Need" f" w_a >= 0, given w_a={w_a};" f" w_m > 1, given w_m={w_m};" f" w_0 > 0 and w_0 % 8 == 0, given w_0={w_0}." ) # Continuous widths for each block. Each stage has a unique block width block_widths_cont = np.arange(depth) * w_a + w_0 # u_j in eq. (2) of paper block_capacity = np.round( np.log(block_widths_cont / w_0) / np.log(w_m) ) # s_j in eq. (3) of paper # Quantized block widths block_widths_quant = ( (np.round(np.divide(w_0 * np.power(w_m, block_capacity), quant)) * quant) .astype(int) .tolist() ) num_stages = len(set(block_widths_quant)) # Convert to per stage parameters split_helper = zip( block_widths_quant + [0], [0] + block_widths_quant, block_widths_quant + [0], [0] + block_widths_quant, ) splits = [w != wp or r != rp for w, wp, r, rp in split_helper] stage_widths = [w for w, t in zip(block_widths_quant, splits[:-1]) if t] stage_depths = ( np.diff([d for d, t in enumerate(splits) if t]).astype(int).tolist() ) strides = [stride] * num_stages bottleneck_multipliers = [bottleneck_multiplier] * num_stages stage_groups = [groups] * num_stages # Adjust the compatibility of stage widths and group widths stage_widths, stage_groups = self._make_widths_compatible( stage_widths, stage_groups, bottleneck_multipliers ) self.depths = stage_depths self.widths = stage_widths self.stage_groups = stage_groups self.bottleneck_multipliers = bottleneck_multipliers self.strides = strides self.se_ratio = se_ratio
def _make_widths_compatible( self, stage_widths: List[int], stage_groups: List[int], bottleneck_multipliers: List[float], ) -> Tuple[List[int], List[int]]: """ Scales widths by bottleneck multipliers and adjusts them to be compatible with the specified groups. """ # Scale widths according to bottleneck multipliers widths = [ int(width * multiplier) for width, multiplier in zip(stage_widths, bottleneck_multipliers) ] group_widths_min = [ min(groups, width) for groups, width in zip(stage_groups, widths) ] # Ensure the widths are divisible by groups bottleneck_widths = [ make_divisible(width, groups) for width, groups in zip(widths, group_widths_min) ] # Undo scaling stage_widths = [ int(width / multiplier) for width, multiplier in zip(bottleneck_widths, bottleneck_multipliers) ] return stage_widths, group_widths_min
[docs] def extra_repr(self) -> str: extra_repr_str = "" extra_repr_str += f"\n\tdepths={self.depths}" extra_repr_str += f"\n\twidths={self.widths}" extra_repr_str += f"\n\tstrides={self.strides}" extra_repr_str += f"\n\tstage_groups={self.stage_groups}" extra_repr_str += f"\n\tbottleneck_multipliers={self.bottleneck_multipliers}" extra_repr_str += f"\n\tse_ratio={self.se_ratio}" return extra_repr_str
def __repr__(self) -> str: return "{}({}\n)".format(self.__class__.__name__, self.extra_repr())
[docs]def get_configuration( opts: argparse.Namespace, ) -> Dict[str, Dict[str, Union[int, float]]]: """Gets the RegNet model configuration for the specified RegNet mode. Args: opts: command-line arguments Returns: * A dictionary containing the configuration for each layer. Each key is of the form layer<i> and the corresponding value is another dictionary with the following keys: depth: The depth of the stage at layer<i> width: The width of the blocks at this stage groups: The convolution groups of each block at this stage stride: The stride of the convolutions in each block at this stage bottleneck_multiplier: The multiplier for the bottleneck conv in each of this stage's blocks se_ratio: The squeeze-excitation ratio for each block in this stage """ network_mode = getattr(opts, "model.classification.regnet.mode") if network_mode is None: logger.error( "RegNet mode can't be none. Please specify --model.classification.regnet.mode" ) network_config = dict() block_params_config = { "x_200mf": BlockParamsConfig(13, 24, 36.44, 2.49, 8), "x_400mf": BlockParamsConfig(22, 24, 24.48, 2.54, 16), "x_600mf": BlockParamsConfig(16, 48, 36.97, 2.24, 24), "x_800mf": BlockParamsConfig(16, 56, 35.73, 2.28, 16), "x_1.6gf": BlockParamsConfig(18, 80, 34.01, 2.25, 24), "x_3.2gf": BlockParamsConfig(25, 88, 26.31, 2.25, 48), "x_4.0gf": BlockParamsConfig(23, 96, 38.65, 2.43, 40), "x_6.4gf": BlockParamsConfig(17, 184, 60.83, 2.07, 56), "x_8.0gf": BlockParamsConfig(23, 80, 49.56, 2.88, 120), "x_12gf": BlockParamsConfig(19, 168, 73.36, 2.37, 112), "x_16gf": BlockParamsConfig(22, 216, 55.59, 2.1, 128), "x_32gf": BlockParamsConfig(23, 320, 69.86, 2.0, 168), "y_200mf": BlockParamsConfig(13, 24, 36.44, 2.49, 8, se_ratio=0.25), "y_400mf": BlockParamsConfig(16, 48, 27.89, 2.09, 8, se_ratio=0.25), "y_600mf": BlockParamsConfig(15, 48, 32.54, 2.32, 16, se_ratio=0.25), "y_800mf": BlockParamsConfig(14, 56, 38.84, 2.4, 16, se_ratio=0.25), "y_1.6gf": BlockParamsConfig(27, 48, 20.71, 2.65, 24, se_ratio=0.25), "y_3.2gf": BlockParamsConfig(21, 80, 42.63, 2.66, 24, se_ratio=0.25), "y_4.0gf": BlockParamsConfig(22, 96, 31.41, 2.24, 64, se_ratio=0.25), "y_6.4gf": BlockParamsConfig(25, 112, 33.22, 2.27, 72, se_ratio=0.25), "y_8.0gf": BlockParamsConfig(17, 192, 76.82, 2.19, 56, se_ratio=0.25), "y_12gf": BlockParamsConfig(19, 168, 73.36, 2.37, 112, se_ratio=0.25), "y_16gf": BlockParamsConfig(18, 200, 106.23, 2.48, 112, se_ratio=0.25), "y_32gf": BlockParamsConfig(20, 232, 115.89, 2.53, 232, se_ratio=0.25), } if network_mode in block_params_config: regnet_block_params_cfg = block_params_config[network_mode] stage_depths = regnet_block_params_cfg.depths stage_widths = regnet_block_params_cfg.widths stage_groups = regnet_block_params_cfg.stage_groups bottleneck_multipliers = regnet_block_params_cfg.bottleneck_multipliers strides = regnet_block_params_cfg.strides se_ratio = regnet_block_params_cfg.se_ratio for i, layer_name in enumerate([f"layer{i}" for i in range(1, 5)]): network_config[layer_name] = { "depth": stage_depths[i], "width": stage_widths[i], "groups": stage_groups[i], "stride": strides[i], "bottleneck_multiplier": bottleneck_multipliers[i], "se_ratio": se_ratio, } else: logger.error( f"Current supported modes for RegNet are {', '.join(supported_modes)}. Got: {network_mode}" ) return network_config