#
# For licensing see accompanying LICENSE file.
# Copyright (C) 2023 Apple Inc. All Rights Reserved.
#
import argparse
from typing import Any, Optional, Tuple
from torch import nn
from cvnets.layers import ConvLayer2d, Dropout, GlobalPool, LinearLayer
from cvnets.models import MODEL_REGISTRY
from cvnets.models.classification.base_image_encoder import BaseImageEncoder
from cvnets.models.classification.config.efficientnet import (
EfficientNetBlockConfig,
get_configuration,
)
from cvnets.modules import EfficientNetBlock
[docs]@MODEL_REGISTRY.register(name="efficientnet", type="classification")
class EfficientNet(BaseImageEncoder):
"""
This class defines the `EfficientNet architecture <https://arxiv.org/abs/1905.11946>`_
"""
[docs] def __init__(
self,
opts,
*args,
**kwargs: Any,
) -> None:
super().__init__(opts, *args, **kwargs)
classifier_dropout = getattr(opts, "model.classification.classifier_dropout")
network_config = get_configuration(opts)
last_channels = network_config["last_channels"]
total_layers = network_config["total_layers"]
num_classes = getattr(opts, "model.classification.n_classes", 1000)
stochastic_depth_prob = getattr(
opts, "model.classification.efficientnet.stochastic_depth_prob", 0.2
)
# building first layer
image_channels = 3
in_channels = network_config["layer_1"][0].in_channels
self.conv_1 = ConvLayer2d(
opts=opts,
in_channels=image_channels,
out_channels=in_channels,
kernel_size=3,
stride=2,
use_norm=True,
use_act=True,
)
self.model_conf_dict["conv1"] = {"in": image_channels, "out": in_channels}
# building inverted residual blocks
prev_layers_cnt = 0 # counts the number of layers added so far
for layer_name in ["layer_1", "layer_2", "layer_3", "layer_4", "layer_5"]:
dilation = False
if layer_name == "layer_4":
dilation = self.dilate_l4
elif layer_name == "layer_5":
dilation = self.dilate_l5
layer, prev_layers_cnt = self._make_layer(
opts=opts,
block_config=network_config[layer_name],
stochastic_depth_prob=stochastic_depth_prob,
prev_layers_cnt=prev_layers_cnt,
total_layers=total_layers,
dilate=dilation,
)
setattr(self, layer_name, layer)
# we have saved mappings without underscore in layer_name, so removing it
self.model_conf_dict[layer_name.replace("_", "")] = {
"in": network_config[layer_name][0].in_channels,
"out": network_config[layer_name][-1].out_channels,
}
# building last several layers
in_channels = network_config["layer_5"][-1].out_channels
out_channels = last_channels
self.conv_1x1_exp = ConvLayer2d(
opts=opts,
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
use_act=True,
use_norm=True,
)
self.model_conf_dict["exp_before_cls"] = {
"in": in_channels,
"out": out_channels,
}
pool_type = getattr(opts, "model.layer.global_pool", "mean")
self.classifier = nn.Sequential()
self.classifier.add_module(
name="global_pool", module=GlobalPool(pool_type=pool_type, keep_dim=False)
)
if 0.0 < classifier_dropout < 1.0:
self.classifier.add_module(
name="classifier_dropout",
module=Dropout(p=classifier_dropout, inplace=True),
)
self.classifier.add_module(
name="classifier_fc",
module=LinearLayer(
in_features=out_channels, out_features=num_classes, bias=True
),
)
def _make_layer(
self,
opts,
block_config,
stochastic_depth_prob: float,
prev_layers_cnt: int, # number of layers before calling this function
total_layers: int, # Total number of layers in the network
dilate: Optional[bool] = False,
*args,
**kwargs,
) -> Tuple[nn.Module, int]:
# This is to accommodate segmentation architectures modifying strides of the backbone network.
prev_dilation = self.dilation
# For classification, dilation here should always be 1.
block = []
count = 0
for layer_config in block_config:
assert isinstance(layer_config, EfficientNetBlockConfig)
in_channels = layer_config.in_channels
out_channels = layer_config.out_channels
for layer_idx in range(layer_config.num_layers):
stride = layer_config.stride if layer_idx == 0 else 1
if dilate and stride == 2:
self.dilation *= stride
stride = 1
dilate = False
sd_prob = (
stochastic_depth_prob
* float(prev_layers_cnt + count)
/ total_layers
)
sd_prob = round(sd_prob, 4)
efficient_net_layer = EfficientNetBlock(
stochastic_depth_prob=sd_prob,
opts=opts,
in_channels=in_channels,
out_channels=out_channels,
kernel_size=layer_config.kernel,
stride=stride,
expand_ratio=layer_config.expand_ratio,
dilation=prev_dilation if count == 0 else self.dilation,
use_hs=False,
use_se=True,
use_input_as_se_dim=True,
squeeze_factor=layer_config.expand_ratio * 4,
act_fn_name="swish",
se_scale_fn_name="sigmoid",
)
block.append(efficient_net_layer)
count += 1
in_channels = out_channels
prev_layers_cnt += count
return nn.Sequential(*block), prev_layers_cnt
[docs] @classmethod
def add_arguments(cls, parser: argparse.ArgumentParser) -> argparse.ArgumentParser:
group = parser.add_argument_group(title=cls.__name__)
group.add_argument(
"--model.classification.efficientnet.mode",
type=str,
choices=[f"b{i}" for i in range(8)],
)
group.add_argument(
"--model.classification.efficientnet.stochastic-depth-prob",
type=float,
default=0.0,
)
return parser