#
# For licensing see accompanying LICENSE file.
# Copyright (C) 2023 Apple Inc. All Rights Reserved.
#
import argparse
from typing import Optional, Tuple, Type, Union
from torch import Tensor, nn
from cvnets.layers.activation import build_activation_layer
from cvnets.layers.base_layer import BaseLayer
from cvnets.layers.normalization.layer_norm import LayerNorm, LayerNorm2D_NCHW
from cvnets.layers.normalization_layers import get_normalization_layer
from utils import logger
[docs]class Conv2d(nn.Conv2d):
"""
Applies a 2D convolution over an input.
Args:
in_channels: :math:`C_{in}` from an expected input of size :math:`(N, C_{in}, H_{in}, W_{in})`.
out_channels: :math:`C_{out}` from an expected output of size :math:`(N, C_{out}, H_{out}, W_{out})`.
kernel_size: Kernel size for convolution.
stride: Stride for convolution. Default: 1.
padding: Padding for convolution. Default: 0.
dilation: Dilation rate for convolution. Default: 1.
groups: Number of groups in convolution. Default: 1.
bias: Use bias. Default: ``False``.
padding_mode: Padding mode ('zeros', 'reflect', 'replicate' or 'circular'). Default: ``zeros``.
use_norm: Use normalization layer after convolution. Default: ``True``.
use_act: Use activation layer after convolution (or convolution and normalization).
Default: ``True``.
act_name: Use specific activation function. Overrides the one specified in command line args.
Shape:
- Input: :math:`(N, C_{in}, H_{in}, W_{in})`.
- Output: :math:`(N, C_{out}, H_{out}, W_{out})`.
"""
[docs] def __init__(
self,
in_channels: int,
out_channels: int,
kernel_size: Union[int, Tuple[int, int]],
stride: Optional[Union[int, Tuple[int, int]]] = 1,
padding: Optional[Union[int, Tuple[int, int]]] = 0,
dilation: Optional[Union[int, Tuple[int, int]]] = 1,
groups: Optional[int] = 1,
bias: Optional[bool] = False,
padding_mode: Optional[str] = "zeros",
*args,
**kwargs,
) -> None:
super().__init__(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
groups=groups,
bias=bias,
padding_mode=padding_mode,
)
class _BaseConvNormActLayer(BaseLayer):
"""
Applies an N-dimensional convolution over an input.
Args:
opts: Command line options.
in_channels: :math:`C_{out}` from an expected output of size
:math:`(bs, C_{in}, X_{1}, ..., X_{N})`.
out_channels: :math:`C_{out}` from an expected output of size
:math:`(bs, C_{out}, Y_{1}, ..., Y_{N})`.
kernel_size: Kernel size for convolution. An integer, or tuple of length ``N``.
stride: Stride for convolution. An integer, or tuple of length ``N``. Default: 1.
dilation: Dilation rate for convolution. An integer, or tuple of length ``N``.
Default: ``1``.
padding: Padding for convolution. An integer, or tuple of length ``N``.
If not specified, padding is automatically computed based on kernel size and
dilation range. Default : ``None`` (equivalent to ``[
int((kernel_size[i] - 1) / 2) * dilation[i] for i in range(N)]``).
groups: Number of groups in convolution. Default: ``1``.
bias: Use bias. Default: ``False``.
padding_mode: Padding mode ('zeros', 'reflect', 'replicate' or 'circular').
Default: ``zeros``.
use_norm: Use normalization layer after convolution. Default: ``True``.
use_act: Use activation layer after convolution (or convolution and normalization).
Default: ``True``.
norm_layer: If not None, the provided normalization layer object will be used.
Otherwise, a normalization object will be created based on config
``model.normalization.*`` opts.
act_layer: If not None, the provided activation function will be used.
Otherwise, an activation function will be created based on config
``model.activation.*`` opts.
Shape:
- Input: :math:`(bs, C_{in}, X_{1}, ..., X_{N})`.
- Output: :math:`(bs, C_{out}, Y_{1}, ..., Y_{N})`.
.. note::
For depth-wise convolution, `groups=C_{in}=C_{out}`.
"""
@property
def ndim(self) -> int:
raise NotImplementedError("subclasses should override ndim property")
@property
def module_cls(self) -> Type[nn.Module]:
raise NotImplementedError("subclasses should override module_cls property")
def __init__(
self,
opts: argparse.Namespace,
in_channels: int,
out_channels: int,
kernel_size: Union[int, Tuple[int, ...]],
stride: Union[int, Tuple[int, ...]] = 1,
dilation: Union[int, Tuple[int, ...]] = 1,
padding: Optional[Union[int, Tuple[int, ...]]] = None,
groups: int = 1,
bias: bool = False,
padding_mode: str = "zeros",
use_norm: bool = True,
use_act: bool = True,
norm_layer: Optional[nn.Module] = None,
act_layer: Optional[nn.Module] = None,
*args,
**kwargs,
) -> None:
super().__init__()
if norm_layer is None and use_norm:
norm_type = getattr(opts, "model.normalization.name")
if norm_type == "batch_norm":
norm_type = f"batch_norm_{self.ndim}d"
norm_layer = get_normalization_layer(
opts=opts, num_features=out_channels, norm_type=norm_type
)
elif norm_layer is not None and use_norm:
logger.error(
f"When use_norm is False, norm_layer should be None, but norm_layer={norm_layer} is provided."
)
if act_layer is None and use_act:
act_layer = build_activation_layer(opts, num_parameters=out_channels)
elif act_layer is not None and use_act:
logger.error(
f"When use_act is False, act_layer should be None, but act_layer={act_layer} is provided."
)
if (
use_norm
and any(param[0] == "bias" for param in norm_layer.named_parameters())
and bias
):
assert (
not bias
), "Do not use bias when using normalization layers with bias."
if use_norm and isinstance(norm_layer, (LayerNorm, LayerNorm2D_NCHW)):
bias = True
if isinstance(kernel_size, int):
kernel_size = (kernel_size,) * self.ndim
if isinstance(stride, int):
stride = (stride,) * self.ndim
if isinstance(dilation, int):
dilation = (dilation,) * self.ndim
assert isinstance(kernel_size, Tuple)
assert isinstance(stride, Tuple)
assert isinstance(dilation, Tuple)
if padding is None:
padding = (
int((kernel_size[i] - 1) / 2) * dilation[i] for i in range(self.ndim)
)
if in_channels % groups != 0:
logger.error(
"Input channels are not divisible by groups. {}%{} != 0 ".format(
in_channels, groups
)
)
if out_channels % groups != 0:
logger.error(
"Output channels are not divisible by groups. {}%{} != 0 ".format(
out_channels, groups
)
)
block = nn.Sequential()
conv_layer = self.module_cls(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size, # type: ignore
stride=stride, # type: ignore
padding=padding,
dilation=dilation, # type: ignore
groups=groups,
bias=bias,
padding_mode=padding_mode,
)
block.add_module(name="conv", module=conv_layer)
self.norm_name = None
if use_norm:
block.add_module(name="norm", module=norm_layer)
self.norm_name = norm_layer.__class__.__name__
self.act_name = None
if use_act:
block.add_module(name="act", module=act_layer)
self.act_name = act_layer.__class__.__name__
self.block = block
self.in_channels = in_channels
self.out_channels = out_channels
self.stride = stride
self.groups = groups
self.kernel_size = conv_layer.kernel_size
self.bias = bias
self.dilation = dilation
@classmethod
def add_arguments(cls, parser: argparse.ArgumentParser):
if cls != _BaseConvNormActLayer:
return parser
group = parser.add_argument_group(cls.__name__)
group.add_argument(
"--model.layer.conv-init",
type=str,
default="kaiming_normal",
help="Init type for conv layers",
)
parser.add_argument(
"--model.layer.conv-init-std-dev",
type=float,
default=None,
help="Std deviation for conv layers",
)
return parser
def forward(self, x: Tensor) -> Tensor:
return self.block(x)
def __repr__(self):
repr_str = self.block[0].__repr__()
repr_str = repr_str[:-1]
if self.norm_name is not None:
repr_str += ", normalization={}".format(self.norm_name)
if self.act_name is not None:
repr_str += ", activation={}".format(self.act_name)
repr_str += ")"
return repr_str
[docs]class ConvLayer1d(_BaseConvNormActLayer):
ndim = 1
module_cls = nn.Conv1d
[docs]class ConvLayer2d(_BaseConvNormActLayer):
ndim = 2
module_cls = Conv2d
[docs]class ConvLayer3d(_BaseConvNormActLayer):
ndim = 3
module_cls = nn.Conv3d
[docs]class TransposeConvLayer2d(BaseLayer):
"""
Applies a 2D Transpose convolution (aka as Deconvolution) over an input.
Args:
opts: Command line arguments.
in_channels: :math:`C_{in}` from an expected input of size
:math:`(N, C_{in}, H_{in}, W_{in})`.
out_channels: :math:`C_{out}` from an expected output of size
:math:`(N, C_{out}, H_{out}, W_{out})`.
kernel_size: Kernel size for convolution.
stride: Stride for convolution. Default: 1.
dilation: Dilation rate for convolution. Default: 1.
groups: Number of groups in convolution. Default: 1.
bias: Use bias. Default: ``False``.
padding_mode: Padding mode. Default: ``zeros``.
use_norm: Use normalization layer after convolution. Default: ``True``.
use_act: Use activation layer after convolution (or convolution and normalization).
Default: ``True``.
padding: Padding will be done on both sides of each dimension in the input.
output_padding: Additional padding on the output tensor.
auto_padding: Compute padding automatically. Default: ``True``.
Shape:
- Input: :math:`(N, C_{in}, H_{in}, W_{in})`.
- Output: :math:`(N, C_{out}, H_{out}, W_{out})`.
"""
[docs] def __init__(
self,
opts: argparse.Namespace,
in_channels: int,
out_channels: int,
kernel_size: Union[int, Tuple],
stride: Optional[Union[int, Tuple]] = 1,
dilation: Optional[Union[int, Tuple]] = 1,
groups: Optional[int] = 1,
bias: Optional[bool] = False,
padding_mode: Optional[str] = "zeros",
use_norm: Optional[bool] = True,
use_act: Optional[bool] = True,
padding: Optional[Union[int, Tuple]] = (0, 0),
output_padding: Optional[Union[int, Tuple]] = None,
auto_padding: Optional[bool] = True,
*args,
**kwargs,
):
super().__init__()
if use_norm:
assert not bias, "Do not use bias when using normalization layers."
if isinstance(kernel_size, int):
kernel_size = (kernel_size, kernel_size)
if isinstance(stride, int):
stride = (stride, stride)
if isinstance(dilation, int):
dilation = (dilation, dilation)
if output_padding is None:
output_padding = (stride[0] - 1, stride[1] - 1)
assert isinstance(kernel_size, (tuple, list))
assert isinstance(stride, (tuple, list))
assert isinstance(dilation, (tuple, list))
if auto_padding:
padding = (
int((kernel_size[0] - 1) / 2) * dilation[0],
int((kernel_size[1] - 1) / 2) * dilation[1],
)
if in_channels % groups != 0:
logger.error(
"Input channels are not divisible by groups. {}%{} != 0 ".format(
in_channels, groups
)
)
if out_channels % groups != 0:
logger.error(
"Output channels are not divisible by groups. {}%{} != 0 ".format(
out_channels, groups
)
)
block = nn.Sequential()
conv_layer = nn.ConvTranspose2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
groups=groups,
bias=bias,
padding_mode=padding_mode,
output_padding=output_padding,
)
block.add_module(name="conv", module=conv_layer)
self.norm_name = None
if use_norm:
norm_layer = get_normalization_layer(opts=opts, num_features=out_channels)
block.add_module(name="norm", module=norm_layer)
self.norm_name = norm_layer.__class__.__name__
self.act_name = None
act_type = getattr(opts, "model.activation.name", "relu")
if act_type is not None and use_act:
act_layer = build_activation_layer(
opts,
act_type=act_type,
num_parameters=out_channels,
)
block.add_module(name="act", module=act_layer)
self.act_name = act_layer.__class__.__name__
self.block = block
self.in_channels = in_channels
self.out_channels = out_channels
self.stride = stride
self.groups = groups
self.kernel_size = conv_layer.kernel_size
self.bias = bias
[docs] def forward(self, x: Tensor) -> Tensor:
return self.block(x)
def __repr__(self):
repr_str = self.block[0].__repr__()
repr_str = repr_str[:-1]
if self.norm_name is not None:
repr_str += ", normalization={}".format(self.norm_name)
if self.act_name is not None:
repr_str += ", activation={}".format(self.act_name)
repr_str += ")"
return repr_str
[docs]class NormActLayer(BaseLayer):
"""
Applies a normalization layer followed by an activation layer.
Args:
opts: Command-line arguments.
num_features: :math:`C` from an expected input of size :math:`(N, C, H, W)`.
Shape:
- Input: :math:`(N, C, H, W)`.
- Output: :math:`(N, C, H, W)`.
"""
[docs] def __init__(self, opts, num_features, *args, **kwargs):
super().__init__()
block = nn.Sequential()
self.norm_name = None
norm_layer = get_normalization_layer(opts=opts, num_features=num_features)
block.add_module(name="norm", module=norm_layer)
self.norm_name = norm_layer.__class__.__name__
self.act_name = None
act_layer = build_activation_layer(
opts,
num_parameters=num_features,
)
block.add_module(name="act", module=act_layer)
self.act_name = act_layer.__class__.__name__
self.block = block
[docs] def forward(self, x: Tensor) -> Tensor:
return self.block(x)
def __repr__(self):
repr_str = "{}(normalization={}, activation={})".format(
self.__class__.__name__, self.norm_type, self.act_type
)
return repr_str
class _BaseSeparableConv(BaseLayer):
"""
Applies an N-dimensional depth-wise separable convolution
<https://arxiv.org/abs/1610.02357> over an N-dimensional input tensor.
Args:
opts: Command line arguments.
in_channels: :math:`C_{in}` from an expected input of size
:math:`(N, C_{in}, X_{1}, ..., X_{N})`.
out_channels: :math:`C_{out}` from an expected output of size
:math:`(N, C_{out}, Y_{1}, ..., Y_{N})`.
kernel_size: Kernel size for convolution.
stride: Stride for convolution. Default: 1.
dilation: Dilation rate for convolution. Default: 1.
use_norm: Use normalization layer after convolution. Default: ``True``.
use_act: Use activation layer after convolution (or convolution and normalization).
Default: ``True``.
use_act_deptwise: Use activation layer after depthwise convolution (or
convolution and normalization). Default: ``False``.
NOTE: We recommend against using activation function in depth-wise convolution.
bias: Use bias. Default: ``False``.
padding_mode: Padding mode ('zeros', 'reflect', 'replicate' or 'circular').
Default: ``zeros``.
act_name: Use specific activation function. Overrides the one specified in
command line args. Default: ``None``.
Shape:
- Input: :math:`(N, C_{in}, X_{1}, ..., X_{N})`.
- Output: :math:`(N, C_{out}, Y_{1}, ..., Y_{N})`.
.. note::
For depth-wise convolution, `groups=C_{in}=C_{out}`.
"""
def __init__(
self,
opts: argparse.Namespace,
in_channels: int,
out_channels: int,
kernel_size: Union[int, Tuple[int, ...]],
stride: Union[int, Tuple[int, ...]] = 1,
dilation: Union[int, Tuple[int, ...]] = 1,
use_norm: bool = True,
use_act: bool = True,
use_act_depthwise: bool = False,
bias: bool = False,
padding_mode: str = "zeros",
act_name: Optional[str] = None,
*args,
**kwargs,
) -> None:
super().__init__(*args, **kwargs)
self.dw_conv = self.conv_layer_cls(
opts=opts,
in_channels=in_channels,
out_channels=in_channels,
kernel_size=kernel_size,
stride=stride,
dilation=dilation,
groups=in_channels,
bias=False,
padding_mode=padding_mode,
use_norm=True,
# NOTE: We recommend against using activation function in depth-wise convolution.
use_act=use_act_depthwise,
act_name=act_name,
)
self.pw_conv = self.conv_layer_cls(
opts=opts,
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
stride=1,
dilation=1,
groups=1,
bias=bias,
padding_mode=padding_mode,
use_norm=use_norm,
use_act=use_act,
act_name=act_name,
)
self.in_channels = in_channels
self.out_channels = out_channels
self.stride = stride
self.kernel_size = kernel_size
self.dilation = dilation
@property
def conv_layer_cls(self):
raise NotImplementedError("Subclasses should override conv_layer_cls.")
def __repr__(self):
repr_str = "{}(in_channels={}, out_channels={}, kernel_size={}, stride={}, dilation={})".format(
self.__class__.__name__,
self.in_channels,
self.out_channels,
self.kernel_size,
self.stride,
self.dilation,
)
return repr_str
def forward(self, x: Tensor) -> Tensor:
x = self.dw_conv(x)
x = self.pw_conv(x)
return x
[docs]class SeparableConv1d(_BaseSeparableConv):
conv_layer_cls = ConvLayer1d
[docs]class SeparableConv2d(_BaseSeparableConv):
conv_layer_cls = ConvLayer2d
[docs]class SeparableConv3d(_BaseSeparableConv):
conv_layer_cls = ConvLayer3d