Source code for cvnets.models.classification.resnet

#
# For licensing see accompanying LICENSE file.
# Copyright (C) 2023 Apple Inc. All Rights Reserved.
#

import argparse
from functools import partial
from typing import Dict, List, Tuple

import numpy as np
from torch import nn

from cvnets.layers import ConvLayer2d, Dropout, GlobalPool, Identity, LinearLayer
from cvnets.models import MODEL_REGISTRY
from cvnets.models.classification.base_image_encoder import BaseImageEncoder
from cvnets.models.classification.config.resnet import get_configuration
from cvnets.modules import BasicResNetBlock, BottleneckResNetBlock


[docs]@MODEL_REGISTRY.register(name="resnet", type="classification") class ResNet(BaseImageEncoder): """ This class implements the `ResNet architecture <https://arxiv.org/pdf/1512.03385.pdf>`_ .. note:: Our ResNet implementation is different from the original implementation in two ways: 1. First 7x7 strided conv is replaced with 3x3 strided conv 2. MaxPool operation is replaced with another 3x3 strided depth-wise conv """
[docs] def __init__(self, opts: argparse.Namespace, *args, **kwargs) -> None: image_channels = 3 input_channels = 64 num_classes = getattr(opts, "model.classification.n_classes") classifier_dropout = getattr(opts, "model.classification.classifier_dropout") stochastic_depth_prob = getattr( opts, "model.classification.resnet.stochastic_depth_prob" ) pool_type = getattr(opts, "model.layer.global_pool") cfg = get_configuration(opts=opts) super().__init__(opts, *args, **kwargs) self.model_conf_dict = dict() self.conv_1 = ConvLayer2d( opts=opts, in_channels=image_channels, out_channels=input_channels, kernel_size=3, stride=2, use_norm=True, use_act=True, ) self.model_conf_dict["conv1"] = {"in": image_channels, "out": input_channels} self.layer_1 = ConvLayer2d( opts=opts, in_channels=input_channels, out_channels=input_channels, kernel_size=3, stride=2, use_norm=True, use_act=True, groups=input_channels, ) self.model_conf_dict["layer1"] = {"in": input_channels, "out": input_channels} # Stochastic depth variables block_repeats = [cfg[f"layer{i}"].get("num_blocks", 2) for i in range(2, 6)] block_start_indices = np.cumsum([0] + block_repeats[:-1]) net_num_blocks = sum(block_repeats) stochastic_depth_fn = partial( self._block_stochastic_depth_prob, stochastic_depth_prob=stochastic_depth_prob, net_num_blocks=net_num_blocks, ) start_idx = block_start_indices[0] num_blocks = cfg["layer2"]["num_blocks"] self.layer_2, out_channels = self._make_layer( opts=opts, in_channels=input_channels, layer_config=cfg["layer2"], stochastic_depth_probs=[ stochastic_depth_fn(start_idx=start_idx, idx=idx) for idx in range(num_blocks) ], ) self.model_conf_dict["layer2"] = {"in": input_channels, "out": out_channels} input_channels = out_channels start_idx = block_start_indices[1] num_blocks = cfg["layer3"]["num_blocks"] self.layer_3, out_channels = self._make_layer( opts=opts, in_channels=input_channels, layer_config=cfg["layer3"], stochastic_depth_probs=[ stochastic_depth_fn(start_idx=start_idx, idx=idx) for idx in range(num_blocks) ], ) self.model_conf_dict["layer3"] = {"in": input_channels, "out": out_channels} input_channels = out_channels start_idx = block_start_indices[2] num_blocks = cfg["layer4"]["num_blocks"] self.layer_4, out_channels = self._make_layer( opts=opts, in_channels=input_channels, layer_config=cfg["layer4"], stochastic_depth_probs=[ stochastic_depth_fn(start_idx=start_idx, idx=idx) for idx in range(num_blocks) ], dilate=self.dilate_l4, ) self.model_conf_dict["layer4"] = {"in": input_channels, "out": out_channels} input_channels = out_channels start_idx = block_start_indices[3] num_blocks = cfg["layer5"]["num_blocks"] self.layer_5, out_channels = self._make_layer( opts=opts, in_channels=input_channels, layer_config=cfg["layer5"], stochastic_depth_probs=[ stochastic_depth_fn(start_idx=start_idx, idx=idx) for idx in range(num_blocks) ], dilate=self.dilate_l5, ) self.model_conf_dict["layer5"] = {"in": input_channels, "out": out_channels} input_channels = out_channels self.conv_1x1_exp = Identity() self.model_conf_dict["exp_before_cls"] = { "in": input_channels, "out": input_channels, } self.classifier = nn.Sequential() self.classifier.add_module( name="global_pool", module=GlobalPool(pool_type=pool_type, keep_dim=False) ) if 0.0 < classifier_dropout < 1.0: self.classifier.add_module( name="classifier_dropout", module=Dropout(p=classifier_dropout) ) self.classifier.add_module( name="classifier_fc", module=LinearLayer( in_features=input_channels, out_features=num_classes, bias=True ), ) self.model_conf_dict["cls"] = {"in": input_channels, "out": num_classes} # check model self.check_model() # weight initialization self.reset_parameters(opts=opts)
def _block_stochastic_depth_prob( self, stochastic_depth_prob: float, idx: int, start_idx: int, net_num_blocks: int, ): """Computes the stochastic depth probability for a particular block in the network""" return round( stochastic_depth_prob * (idx + start_idx) / (net_num_blocks - 1), 4 ) def _make_layer( self, opts: argparse.Namespace, in_channels: int, layer_config: Dict, stochastic_depth_probs: List[float], dilate: bool = False, *args, **kwargs, ) -> Tuple[nn.Sequential, int]: block_type = ( BottleneckResNetBlock if layer_config.get("block_type", "bottleneck").lower() == "bottleneck" else BasicResNetBlock ) mid_channels = layer_config.get("mid_channels") num_blocks = layer_config.get("num_blocks", 2) stride = layer_config.get("stride", 1) squeeze_channels = layer_config.get("squeeze_channels", None) previous_dilation = self.dilation if dilate: self.dilation *= stride stride = 1 out_channels = block_type.expansion * mid_channels dropout = getattr(opts, "model.classification.resnet.dropout") block = nn.Sequential() block.add_module( name="block_0", module=block_type( opts=opts, in_channels=in_channels, mid_channels=mid_channels, out_channels=out_channels, stride=stride, dilation=previous_dilation, dropout=dropout, stochastic_depth_prob=stochastic_depth_probs[0], squeeze_channels=squeeze_channels, ), ) for block_idx in range(1, num_blocks): block.add_module( name="block_{}".format(block_idx), module=block_type( opts=opts, in_channels=out_channels, mid_channels=mid_channels, out_channels=out_channels, stride=1, dilation=self.dilation, dropout=dropout, stochastic_depth_prob=stochastic_depth_probs[block_idx], squeeze_channels=squeeze_channels, ), ) return block, out_channels
[docs] @classmethod def add_arguments(cls, parser: argparse.ArgumentParser) -> argparse.ArgumentParser: group = parser.add_argument_group(title=cls.__name__) group.add_argument("--model.classification.resnet.depth", type=int, default=50) group.add_argument( "--model.classification.resnet.dropout", type=float, default=0.0, help="Dropout in Resnet blocks. Defaults to 0.", ) group.add_argument( "--model.classification.resnet.stochastic-depth-prob", type=float, default=0.0, help="Stochastic depth drop probability in Resnet blocks. Defaults to 0.", ) group.add_argument( "--model.classification.resnet.se-resnet", action="store_true", default=False, help="Whether to use SE block to construct SE-ResNet model. Defaults to False.", ) return parser