#
# For licensing see accompanying LICENSE file.
# Copyright (C) 2023 Apple Inc. All Rights Reserved.
#
import argparse
import copy
import torch.nn as nn
from cvnets.layers import GlobalPool, Identity, LinearLayer
from cvnets.models import MODEL_REGISTRY
from cvnets.models.classification.base_image_encoder import BaseImageEncoder
from cvnets.models.classification.config.mobileone import get_configuration
from cvnets.modules.mobileone_block import MobileOneBlock
[docs]@MODEL_REGISTRY.register(name="mobileone", type="classification")
class MobileOne(BaseImageEncoder):
"""
This class implements `MobileOne architecture <https://arxiv.org/pdf/2206.04040.pdf>`_
"""
[docs] def __init__(self, opts, *args, **kwargs) -> None:
cfg = get_configuration(opts=opts)
super().__init__(opts, *args, **kwargs)
self.opts = opts
image_channels = 3
num_blocks_per_stage = cfg["num_blocks_per_stage"]
width_multipliers = cfg["width_multipliers"]
use_se = cfg["use_se"]
self.num_conv_branches = cfg["num_conv_branches"]
num_classes = getattr(opts, "model.classification.n_classes")
self.inference_mode = getattr(
opts, "model.classification.mobileone.inference_mode"
)
assert len(width_multipliers) == 4
self.in_planes = min(64, int(64 * width_multipliers[0]))
self.model_conf_dict = dict()
# First dense conv
self.conv_1 = MobileOneBlock(
opts,
in_channels=image_channels,
out_channels=self.in_planes,
kernel_size=3,
stride=2,
padding=1,
inference_mode=self.inference_mode,
)
self.model_conf_dict["conv1"] = {"in": image_channels, "out": self.in_planes}
self.layer_1 = Identity()
self.model_conf_dict["layer1"] = {"in": self.in_planes, "out": self.in_planes}
# Build stages
self.cur_layer_idx = 1
self.model_conf_dict["layer2"] = {
"in": self.in_planes,
"out": int(64 * width_multipliers[0]),
}
self.layer_2 = self._make_stage(
int(64 * width_multipliers[0]), num_blocks_per_stage[0], num_se_blocks=0
)
self.model_conf_dict["layer3"] = {
"in": self.in_planes,
"out": int(128 * width_multipliers[1]),
}
self.layer_3 = self._make_stage(
int(128 * width_multipliers[1]), num_blocks_per_stage[1], num_se_blocks=0
)
self.model_conf_dict["layer4"] = {
"in": self.in_planes,
"out": int(256 * width_multipliers[2]),
}
self.layer_4 = self._make_stage(
int(256 * width_multipliers[2]),
num_blocks_per_stage[2],
num_se_blocks=int(num_blocks_per_stage[2] // 2) if use_se else 0,
)
self.model_conf_dict["layer5"] = {
"in": self.in_planes,
"out": int(512 * width_multipliers[3]),
}
self.layer_5 = self._make_stage(
int(512 * width_multipliers[3]),
num_blocks_per_stage[3],
num_se_blocks=num_blocks_per_stage[3] if use_se else 0,
)
# No extra 1x1 conv before classifier
self.conv_1x1_exp = Identity()
self.model_conf_dict["exp_before_cls"] = {
"in": int(512 * width_multipliers[3]),
"out": int(512 * width_multipliers[3]),
}
# Build classifier
pool_type = getattr(opts, "model.layer.global_pool")
self.classifier = nn.Sequential()
self.classifier.add_module(
name="global_pool", module=GlobalPool(pool_type=pool_type, keep_dim=False)
)
self.classifier.add_module(
name="classifier_fc",
module=LinearLayer(
in_features=int(512 * width_multipliers[3]),
out_features=num_classes,
bias=True,
),
)
self.model_conf_dict["cls"] = {
"in": int(512 * width_multipliers[3]),
"out": num_classes,
}
# check model
self.check_model()
# weight initialization
self.reset_parameters(opts=opts)
[docs] @classmethod
def add_arguments(cls, parser: argparse.ArgumentParser) -> argparse.ArgumentParser:
"""Add model specific arguments"""
group = parser.add_argument_group(title=cls.__name__)
group.add_argument(
"--model.classification.mobileone.variant",
type=str,
default="s1",
help="Variant string for MobileOne. Default: s1",
)
group.add_argument(
"--model.classification.mobileone.inference-mode",
type=bool,
default=False,
help="Flag to instantiate inference mode architecture. Default: False",
)
return parser
def _make_stage(
self, planes: int, num_blocks: int, num_se_blocks: int
) -> nn.Sequential:
"""Build a stage of MobileOne model.
Args:
planes: Number of output channels.
num_blocks: Number of blocks in this stage.
num_se_blocks: Number of SE blocks in this stage.
Returns:
A stage of MobileOne model.
"""
# Get strides for all layers
strides = [2] + [1] * (num_blocks - 1)
blocks = []
for ix, stride in enumerate(strides):
use_se = False
if num_se_blocks > num_blocks:
raise ValueError(
f"Number of SE blocks ({num_se_blocks}) cannot exceed number of layers ({num_blocks})."
)
if ix >= (num_blocks - num_se_blocks):
use_se = True
# MobileOne block with depthwise conv
blocks.append(
MobileOneBlock(
self.opts,
in_channels=self.in_planes,
out_channels=self.in_planes,
kernel_size=3,
stride=stride,
padding=1,
groups=self.in_planes,
inference_mode=self.inference_mode,
use_se=use_se,
num_conv_branches=self.num_conv_branches,
)
)
# MobileOne block with pointwise conv
blocks.append(
MobileOneBlock(
self.opts,
in_channels=self.in_planes,
out_channels=planes,
kernel_size=1,
stride=1,
padding=0,
groups=1,
inference_mode=self.inference_mode,
use_se=use_se,
num_conv_branches=self.num_conv_branches,
)
)
self.in_planes = planes
self.cur_layer_idx += 1
return nn.Sequential(*blocks)
[docs] def get_exportable_model(self) -> nn.Module:
"""Method returns a model where a multi-branched structure
used in training is re-parameterized into a single branch
for inference.
Returns:
Reparametrized MobileOne model for faster inference.
"""
# Avoid editing original graph
model = copy.deepcopy(self)
for module in model.modules():
if hasattr(module, "reparameterize"):
module.reparameterize()
return model