Source code for cvnets.models.classification.config.efficientnet

#
# For licensing see accompanying LICENSE file.
# Copyright (C) 2023 Apple Inc. All Rights Reserved.
#

import math
from dataclasses import dataclass
from functools import partial
from typing import Dict

from utils import logger
from utils.math_utils import make_divisible


[docs]@dataclass class CompoundScalingConfig: """This class stores the compound scaling configuration""" width_mult: float depth_mult: float train_resolution: int
[docs]@dataclass class EfficientNetBlockConfig: """This class stores the config for each block in EfficientNet i.e. MBConv layers in Table 1 of `EfficientNet paper <https://arxiv.org/abs/1905.11946>`_ Notably, this class takes width_mult and depth_mult as input too and adjusts layers' depth and width, as is required in different modes of EfficientNet. """
[docs] def __init__( self, expand_ratio: float, kernel: int, stride: int, in_channels: int, out_channels: int, num_layers: int, width_mult: float, depth_mult: float, ): self.expand_ratio = expand_ratio self.kernel = kernel self.stride = stride self.in_channels = int(make_divisible(in_channels * width_mult, 8)) self.out_channels = int(make_divisible(out_channels * width_mult, 8)) self.num_layers = int(math.ceil(num_layers * depth_mult))
[docs]def get_configuration(opts) -> Dict: network_mode = getattr(opts, "model.classification.efficientnet.mode") if network_mode is None: logger.error( "EfficientNet mode can't be none. Please specify --model.classification.efficientnet.mode" ) network_mode = network_mode.lower() network_config = dict() # EfficientNet scales depth, width and resolution. # We will make use of resolution in the yaml configuration file, but leave it here for the sake of completeness compound_scaling_cfg = { "b0": CompoundScalingConfig(1.0, 1.0, 224), "b1": CompoundScalingConfig(1.0, 1.1, 240), "b2": CompoundScalingConfig(1.1, 1.2, 260), "b3": CompoundScalingConfig(1.2, 1.4, 300), "b4": CompoundScalingConfig(1.4, 1.8, 380), "b5": CompoundScalingConfig(1.6, 2.2, 456), "b6": CompoundScalingConfig(1.8, 2.6, 528), "b7": CompoundScalingConfig(2.0, 3.1, 600), "b8": CompoundScalingConfig(2.2, 3.6, 672), } if network_mode in compound_scaling_cfg: compound_scaling_cfg_mode_i = compound_scaling_cfg[network_mode] width_mult = compound_scaling_cfg_mode_i.width_mult depth_mult = compound_scaling_cfg_mode_i.depth_mult # pre-feed depth and width multipliers as they are always used and same across layers. block_builder = partial( EfficientNetBlockConfig, width_mult=width_mult, depth_mult=depth_mult ) # Build the configuration at each spatial level. # The format of configuraiton is: (expand_ratio, kernel, stride, in_channels, out_channels, num_layers) # Configuration at output stride of 2 network_config["layer_1"] = [block_builder(1, 3, 1, 32, 16, 1)] # Configuration at output stride of 4 network_config["layer_2"] = [ block_builder(6, 3, 2, 16, 24, 2), ] # Configuration at output stride of 8 network_config["layer_3"] = [ block_builder(6, 5, 2, 24, 40, 2), ] # Configuration at output stride of 16 network_config["layer_4"] = [ block_builder(6, 3, 2, 40, 80, 3), block_builder(6, 5, 1, 80, 112, 3), ] # Configuration at output stride of 32 network_config["layer_5"] = [ block_builder(6, 5, 2, 112, 192, 4), block_builder(6, 3, 1, 192, 320, 1), ] network_config["last_channels"] = 4 * network_config["layer_5"][-1].out_channels else: logger.error( "Current supported modes for EfficientNet are b[0-7]. Got: {}".format( network_mode ) ) # Count the total number of layers throughout all blocks. # This will be used for stochastic depth (if enabled) total_layers = 0 for layer_name in ["layer_1", "layer_2", "layer_3", "layer_4", "layer_5"]: for block_config in network_config[layer_name]: total_layers += block_config.num_layers network_config["total_layers"] = total_layers return network_config