Source code for cvnets.layers.conv_layer

#
# For licensing see accompanying LICENSE file.
# Copyright (C) 2023 Apple Inc. All Rights Reserved.
#

import argparse
from typing import Optional, Tuple, Type, Union

from torch import Tensor, nn

from cvnets.layers.activation import build_activation_layer
from cvnets.layers.base_layer import BaseLayer
from cvnets.layers.normalization.layer_norm import LayerNorm, LayerNorm2D_NCHW
from cvnets.layers.normalization_layers import get_normalization_layer
from utils import logger


[docs]class Conv2d(nn.Conv2d): """ Applies a 2D convolution over an input. Args: in_channels: :math:`C_{in}` from an expected input of size :math:`(N, C_{in}, H_{in}, W_{in})`. out_channels: :math:`C_{out}` from an expected output of size :math:`(N, C_{out}, H_{out}, W_{out})`. kernel_size: Kernel size for convolution. stride: Stride for convolution. Default: 1. padding: Padding for convolution. Default: 0. dilation: Dilation rate for convolution. Default: 1. groups: Number of groups in convolution. Default: 1. bias: Use bias. Default: ``False``. padding_mode: Padding mode ('zeros', 'reflect', 'replicate' or 'circular'). Default: ``zeros``. use_norm: Use normalization layer after convolution. Default: ``True``. use_act: Use activation layer after convolution (or convolution and normalization). Default: ``True``. act_name: Use specific activation function. Overrides the one specified in command line args. Shape: - Input: :math:`(N, C_{in}, H_{in}, W_{in})`. - Output: :math:`(N, C_{out}, H_{out}, W_{out})`. """
[docs] def __init__( self, in_channels: int, out_channels: int, kernel_size: Union[int, Tuple[int, int]], stride: Optional[Union[int, Tuple[int, int]]] = 1, padding: Optional[Union[int, Tuple[int, int]]] = 0, dilation: Optional[Union[int, Tuple[int, int]]] = 1, groups: Optional[int] = 1, bias: Optional[bool] = False, padding_mode: Optional[str] = "zeros", *args, **kwargs, ) -> None: super().__init__( in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias, padding_mode=padding_mode, )
class _BaseConvNormActLayer(BaseLayer): """ Applies an N-dimensional convolution over an input. Args: opts: Command line options. in_channels: :math:`C_{out}` from an expected output of size :math:`(bs, C_{in}, X_{1}, ..., X_{N})`. out_channels: :math:`C_{out}` from an expected output of size :math:`(bs, C_{out}, Y_{1}, ..., Y_{N})`. kernel_size: Kernel size for convolution. An integer, or tuple of length ``N``. stride: Stride for convolution. An integer, or tuple of length ``N``. Default: 1. dilation: Dilation rate for convolution. An integer, or tuple of length ``N``. Default: ``1``. padding: Padding for convolution. An integer, or tuple of length ``N``. If not specified, padding is automatically computed based on kernel size and dilation range. Default : ``None`` (equivalent to ``[ int((kernel_size[i] - 1) / 2) * dilation[i] for i in range(N)]``). groups: Number of groups in convolution. Default: ``1``. bias: Use bias. Default: ``False``. padding_mode: Padding mode ('zeros', 'reflect', 'replicate' or 'circular'). Default: ``zeros``. use_norm: Use normalization layer after convolution. Default: ``True``. use_act: Use activation layer after convolution (or convolution and normalization). Default: ``True``. norm_layer: If not None, the provided normalization layer object will be used. Otherwise, a normalization object will be created based on config ``model.normalization.*`` opts. act_layer: If not None, the provided activation function will be used. Otherwise, an activation function will be created based on config ``model.activation.*`` opts. Shape: - Input: :math:`(bs, C_{in}, X_{1}, ..., X_{N})`. - Output: :math:`(bs, C_{out}, Y_{1}, ..., Y_{N})`. .. note:: For depth-wise convolution, `groups=C_{in}=C_{out}`. """ @property def ndim(self) -> int: raise NotImplementedError("subclasses should override ndim property") @property def module_cls(self) -> Type[nn.Module]: raise NotImplementedError("subclasses should override module_cls property") def __init__( self, opts: argparse.Namespace, in_channels: int, out_channels: int, kernel_size: Union[int, Tuple[int, ...]], stride: Union[int, Tuple[int, ...]] = 1, dilation: Union[int, Tuple[int, ...]] = 1, padding: Optional[Union[int, Tuple[int, ...]]] = None, groups: int = 1, bias: bool = False, padding_mode: str = "zeros", use_norm: bool = True, use_act: bool = True, norm_layer: Optional[nn.Module] = None, act_layer: Optional[nn.Module] = None, *args, **kwargs, ) -> None: super().__init__() if norm_layer is None and use_norm: norm_type = getattr(opts, "model.normalization.name") if norm_type == "batch_norm": norm_type = f"batch_norm_{self.ndim}d" norm_layer = get_normalization_layer( opts=opts, num_features=out_channels, norm_type=norm_type ) elif norm_layer is not None and use_norm: logger.error( f"When use_norm is False, norm_layer should be None, but norm_layer={norm_layer} is provided." ) if act_layer is None and use_act: act_layer = build_activation_layer(opts, num_parameters=out_channels) elif act_layer is not None and use_act: logger.error( f"When use_act is False, act_layer should be None, but act_layer={act_layer} is provided." ) if ( use_norm and any(param[0] == "bias" for param in norm_layer.named_parameters()) and bias ): assert ( not bias ), "Do not use bias when using normalization layers with bias." if use_norm and isinstance(norm_layer, (LayerNorm, LayerNorm2D_NCHW)): bias = True if isinstance(kernel_size, int): kernel_size = (kernel_size,) * self.ndim if isinstance(stride, int): stride = (stride,) * self.ndim if isinstance(dilation, int): dilation = (dilation,) * self.ndim assert isinstance(kernel_size, Tuple) assert isinstance(stride, Tuple) assert isinstance(dilation, Tuple) if padding is None: padding = ( int((kernel_size[i] - 1) / 2) * dilation[i] for i in range(self.ndim) ) if in_channels % groups != 0: logger.error( "Input channels are not divisible by groups. {}%{} != 0 ".format( in_channels, groups ) ) if out_channels % groups != 0: logger.error( "Output channels are not divisible by groups. {}%{} != 0 ".format( out_channels, groups ) ) block = nn.Sequential() conv_layer = self.module_cls( in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, # type: ignore stride=stride, # type: ignore padding=padding, dilation=dilation, # type: ignore groups=groups, bias=bias, padding_mode=padding_mode, ) block.add_module(name="conv", module=conv_layer) self.norm_name = None if use_norm: block.add_module(name="norm", module=norm_layer) self.norm_name = norm_layer.__class__.__name__ self.act_name = None if use_act: block.add_module(name="act", module=act_layer) self.act_name = act_layer.__class__.__name__ self.block = block self.in_channels = in_channels self.out_channels = out_channels self.stride = stride self.groups = groups self.kernel_size = conv_layer.kernel_size self.bias = bias self.dilation = dilation @classmethod def add_arguments(cls, parser: argparse.ArgumentParser): if cls != _BaseConvNormActLayer: return parser group = parser.add_argument_group(cls.__name__) group.add_argument( "--model.layer.conv-init", type=str, default="kaiming_normal", help="Init type for conv layers", ) parser.add_argument( "--model.layer.conv-init-std-dev", type=float, default=None, help="Std deviation for conv layers", ) return parser def forward(self, x: Tensor) -> Tensor: return self.block(x) def __repr__(self): repr_str = self.block[0].__repr__() repr_str = repr_str[:-1] if self.norm_name is not None: repr_str += ", normalization={}".format(self.norm_name) if self.act_name is not None: repr_str += ", activation={}".format(self.act_name) repr_str += ")" return repr_str
[docs]class ConvLayer1d(_BaseConvNormActLayer): ndim = 1 module_cls = nn.Conv1d
[docs]class ConvLayer2d(_BaseConvNormActLayer): ndim = 2 module_cls = Conv2d
[docs]class ConvLayer3d(_BaseConvNormActLayer): ndim = 3 module_cls = nn.Conv3d
[docs]class TransposeConvLayer2d(BaseLayer): """ Applies a 2D Transpose convolution (aka as Deconvolution) over an input. Args: opts: Command line arguments. in_channels: :math:`C_{in}` from an expected input of size :math:`(N, C_{in}, H_{in}, W_{in})`. out_channels: :math:`C_{out}` from an expected output of size :math:`(N, C_{out}, H_{out}, W_{out})`. kernel_size: Kernel size for convolution. stride: Stride for convolution. Default: 1. dilation: Dilation rate for convolution. Default: 1. groups: Number of groups in convolution. Default: 1. bias: Use bias. Default: ``False``. padding_mode: Padding mode. Default: ``zeros``. use_norm: Use normalization layer after convolution. Default: ``True``. use_act: Use activation layer after convolution (or convolution and normalization). Default: ``True``. padding: Padding will be done on both sides of each dimension in the input. output_padding: Additional padding on the output tensor. auto_padding: Compute padding automatically. Default: ``True``. Shape: - Input: :math:`(N, C_{in}, H_{in}, W_{in})`. - Output: :math:`(N, C_{out}, H_{out}, W_{out})`. """
[docs] def __init__( self, opts: argparse.Namespace, in_channels: int, out_channels: int, kernel_size: Union[int, Tuple], stride: Optional[Union[int, Tuple]] = 1, dilation: Optional[Union[int, Tuple]] = 1, groups: Optional[int] = 1, bias: Optional[bool] = False, padding_mode: Optional[str] = "zeros", use_norm: Optional[bool] = True, use_act: Optional[bool] = True, padding: Optional[Union[int, Tuple]] = (0, 0), output_padding: Optional[Union[int, Tuple]] = None, auto_padding: Optional[bool] = True, *args, **kwargs, ): super().__init__() if use_norm: assert not bias, "Do not use bias when using normalization layers." if isinstance(kernel_size, int): kernel_size = (kernel_size, kernel_size) if isinstance(stride, int): stride = (stride, stride) if isinstance(dilation, int): dilation = (dilation, dilation) if output_padding is None: output_padding = (stride[0] - 1, stride[1] - 1) assert isinstance(kernel_size, (tuple, list)) assert isinstance(stride, (tuple, list)) assert isinstance(dilation, (tuple, list)) if auto_padding: padding = ( int((kernel_size[0] - 1) / 2) * dilation[0], int((kernel_size[1] - 1) / 2) * dilation[1], ) if in_channels % groups != 0: logger.error( "Input channels are not divisible by groups. {}%{} != 0 ".format( in_channels, groups ) ) if out_channels % groups != 0: logger.error( "Output channels are not divisible by groups. {}%{} != 0 ".format( out_channels, groups ) ) block = nn.Sequential() conv_layer = nn.ConvTranspose2d( in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias, padding_mode=padding_mode, output_padding=output_padding, ) block.add_module(name="conv", module=conv_layer) self.norm_name = None if use_norm: norm_layer = get_normalization_layer(opts=opts, num_features=out_channels) block.add_module(name="norm", module=norm_layer) self.norm_name = norm_layer.__class__.__name__ self.act_name = None act_type = getattr(opts, "model.activation.name", "relu") if act_type is not None and use_act: act_layer = build_activation_layer( opts, act_type=act_type, num_parameters=out_channels, ) block.add_module(name="act", module=act_layer) self.act_name = act_layer.__class__.__name__ self.block = block self.in_channels = in_channels self.out_channels = out_channels self.stride = stride self.groups = groups self.kernel_size = conv_layer.kernel_size self.bias = bias
[docs] def forward(self, x: Tensor) -> Tensor: return self.block(x)
def __repr__(self): repr_str = self.block[0].__repr__() repr_str = repr_str[:-1] if self.norm_name is not None: repr_str += ", normalization={}".format(self.norm_name) if self.act_name is not None: repr_str += ", activation={}".format(self.act_name) repr_str += ")" return repr_str
[docs]class NormActLayer(BaseLayer): """ Applies a normalization layer followed by an activation layer. Args: opts: Command-line arguments. num_features: :math:`C` from an expected input of size :math:`(N, C, H, W)`. Shape: - Input: :math:`(N, C, H, W)`. - Output: :math:`(N, C, H, W)`. """
[docs] def __init__(self, opts, num_features, *args, **kwargs): super().__init__() block = nn.Sequential() self.norm_name = None norm_layer = get_normalization_layer(opts=opts, num_features=num_features) block.add_module(name="norm", module=norm_layer) self.norm_name = norm_layer.__class__.__name__ self.act_name = None act_layer = build_activation_layer( opts, num_parameters=num_features, ) block.add_module(name="act", module=act_layer) self.act_name = act_layer.__class__.__name__ self.block = block
[docs] def forward(self, x: Tensor) -> Tensor: return self.block(x)
def __repr__(self): repr_str = "{}(normalization={}, activation={})".format( self.__class__.__name__, self.norm_type, self.act_type ) return repr_str
class _BaseSeparableConv(BaseLayer): """ Applies an N-dimensional depth-wise separable convolution <https://arxiv.org/abs/1610.02357> over an N-dimensional input tensor. Args: opts: Command line arguments. in_channels: :math:`C_{in}` from an expected input of size :math:`(N, C_{in}, X_{1}, ..., X_{N})`. out_channels: :math:`C_{out}` from an expected output of size :math:`(N, C_{out}, Y_{1}, ..., Y_{N})`. kernel_size: Kernel size for convolution. stride: Stride for convolution. Default: 1. dilation: Dilation rate for convolution. Default: 1. use_norm: Use normalization layer after convolution. Default: ``True``. use_act: Use activation layer after convolution (or convolution and normalization). Default: ``True``. use_act_deptwise: Use activation layer after depthwise convolution (or convolution and normalization). Default: ``False``. NOTE: We recommend against using activation function in depth-wise convolution. bias: Use bias. Default: ``False``. padding_mode: Padding mode ('zeros', 'reflect', 'replicate' or 'circular'). Default: ``zeros``. act_name: Use specific activation function. Overrides the one specified in command line args. Default: ``None``. Shape: - Input: :math:`(N, C_{in}, X_{1}, ..., X_{N})`. - Output: :math:`(N, C_{out}, Y_{1}, ..., Y_{N})`. .. note:: For depth-wise convolution, `groups=C_{in}=C_{out}`. """ def __init__( self, opts: argparse.Namespace, in_channels: int, out_channels: int, kernel_size: Union[int, Tuple[int, ...]], stride: Union[int, Tuple[int, ...]] = 1, dilation: Union[int, Tuple[int, ...]] = 1, use_norm: bool = True, use_act: bool = True, use_act_depthwise: bool = False, bias: bool = False, padding_mode: str = "zeros", act_name: Optional[str] = None, *args, **kwargs, ) -> None: super().__init__(*args, **kwargs) self.dw_conv = self.conv_layer_cls( opts=opts, in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, stride=stride, dilation=dilation, groups=in_channels, bias=False, padding_mode=padding_mode, use_norm=True, # NOTE: We recommend against using activation function in depth-wise convolution. use_act=use_act_depthwise, act_name=act_name, ) self.pw_conv = self.conv_layer_cls( opts=opts, in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, dilation=1, groups=1, bias=bias, padding_mode=padding_mode, use_norm=use_norm, use_act=use_act, act_name=act_name, ) self.in_channels = in_channels self.out_channels = out_channels self.stride = stride self.kernel_size = kernel_size self.dilation = dilation @property def conv_layer_cls(self): raise NotImplementedError("Subclasses should override conv_layer_cls.") def __repr__(self): repr_str = "{}(in_channels={}, out_channels={}, kernel_size={}, stride={}, dilation={})".format( self.__class__.__name__, self.in_channels, self.out_channels, self.kernel_size, self.stride, self.dilation, ) return repr_str def forward(self, x: Tensor) -> Tensor: x = self.dw_conv(x) x = self.pw_conv(x) return x
[docs]class SeparableConv1d(_BaseSeparableConv): conv_layer_cls = ConvLayer1d
[docs]class SeparableConv2d(_BaseSeparableConv): conv_layer_cls = ConvLayer2d
[docs]class SeparableConv3d(_BaseSeparableConv): conv_layer_cls = ConvLayer3d