Source code for cvnets.models.classification.config.efficientnet

#
# For licensing see accompanying LICENSE file.
# Copyright (C) 2023 Apple Inc. All Rights Reserved.
#

import math
from dataclasses import dataclass
from functools import partial
from typing import Dict

from utils import logger
from utils.math_utils import make_divisible


[docs]@dataclass
class CompoundScalingConfig:
    """This class stores the compound scaling configuration"""

    width_mult: float
    depth_mult: float
    train_resolution: int


[docs]@dataclass
class EfficientNetBlockConfig:
    """This class stores the config for each block in EfficientNet i.e. MBConv layers
    in Table 1 of `EfficientNet paper <https://arxiv.org/abs/1905.11946>`_
    Notably, this class takes width_mult and depth_mult as input too and adjusts
    layers' depth and width, as is required in different modes of EfficientNet.
    """

[docs]    def __init__(
        self,
        expand_ratio: float,
        kernel: int,
        stride: int,
        in_channels: int,
        out_channels: int,
        num_layers: int,
        width_mult: float,
        depth_mult: float,
    ):
        self.expand_ratio = expand_ratio
        self.kernel = kernel
        self.stride = stride
        self.in_channels = int(make_divisible(in_channels * width_mult, 8))
        self.out_channels = int(make_divisible(out_channels * width_mult, 8))
        self.num_layers = int(math.ceil(num_layers * depth_mult))


[docs]def get_configuration(opts) -> Dict:
    network_mode = getattr(opts, "model.classification.efficientnet.mode")

    if network_mode is None:
        logger.error(
            "EfficientNet mode can't be none. Please specify --model.classification.efficientnet.mode"
        )

    network_mode = network_mode.lower()
    network_config = dict()

    # EfficientNet scales depth, width and resolution.
    # We will make use of resolution in the yaml configuration file, but leave it here for the sake of completeness

    compound_scaling_cfg = {
        "b0": CompoundScalingConfig(1.0, 1.0, 224),
        "b1": CompoundScalingConfig(1.0, 1.1, 240),
        "b2": CompoundScalingConfig(1.1, 1.2, 260),
        "b3": CompoundScalingConfig(1.2, 1.4, 300),
        "b4": CompoundScalingConfig(1.4, 1.8, 380),
        "b5": CompoundScalingConfig(1.6, 2.2, 456),
        "b6": CompoundScalingConfig(1.8, 2.6, 528),
        "b7": CompoundScalingConfig(2.0, 3.1, 600),
        "b8": CompoundScalingConfig(2.2, 3.6, 672),
    }

    if network_mode in compound_scaling_cfg:
        compound_scaling_cfg_mode_i = compound_scaling_cfg[network_mode]
        width_mult = compound_scaling_cfg_mode_i.width_mult
        depth_mult = compound_scaling_cfg_mode_i.depth_mult

        # pre-feed depth and width multipliers as they are always used and same across layers.
        block_builder = partial(
            EfficientNetBlockConfig, width_mult=width_mult, depth_mult=depth_mult
        )

        # Build the configuration at each spatial level.
        # The format of configuraiton is: (expand_ratio, kernel, stride, in_channels, out_channels, num_layers)

        # Configuration at output stride of 2
        network_config["layer_1"] = [block_builder(1, 3, 1, 32, 16, 1)]

        # Configuration at output stride of 4
        network_config["layer_2"] = [
            block_builder(6, 3, 2, 16, 24, 2),
        ]

        # Configuration at output stride of 8
        network_config["layer_3"] = [
            block_builder(6, 5, 2, 24, 40, 2),
        ]

        # Configuration at output stride of 16
        network_config["layer_4"] = [
            block_builder(6, 3, 2, 40, 80, 3),
            block_builder(6, 5, 1, 80, 112, 3),
        ]
        # Configuration at output stride of 32
        network_config["layer_5"] = [
            block_builder(6, 5, 2, 112, 192, 4),
            block_builder(6, 3, 1, 192, 320, 1),
        ]
        network_config["last_channels"] = 4 * network_config["layer_5"][-1].out_channels
    else:
        logger.error(
            "Current supported modes for EfficientNet are b[0-7]. Got: {}".format(
                network_mode
            )
        )

    # Count the total number of layers throughout all blocks.
    # This will be used for stochastic depth (if enabled)
    total_layers = 0
    for layer_name in ["layer_1", "layer_2", "layer_3", "layer_4", "layer_5"]:
        for block_config in network_config[layer_name]:
            total_layers += block_config.num_layers
    network_config["total_layers"] = total_layers
    return network_config