Source code for cvnets.modules.pspnet_module

#
# For licensing see accompanying LICENSE file.
# Copyright (C) 2023 Apple Inc. All Rights Reserved.
#

from typing import Optional, Tuple

import torch
import torch.nn.functional as F
from torch import Tensor, nn

from cvnets.layers import AdaptiveAvgPool2d, ConvLayer2d, Dropout2d
from cvnets.modules import BaseModule
from utils import logger


[docs]class PSP(BaseModule): """ This class defines the Pyramid Scene Parsing module in the `PSPNet paper <https://arxiv.org/abs/1612.01105>`_ Args: opts: command-line arguments in_channels (int): :math:`C_{in}` from an expected input of size :math:`(N, C_{in}, H, W)` out_channels (int): :math:`C_{out}` from an expected output of size :math:`(N, C_{out}, H, W)` pool_sizes Optional[Tuple[int, ...]]: List or Tuple of pool sizes. Default: (1, 2, 3, 6) dropout (Optional[float]): Apply dropout. Default is 0.0 """
[docs] def __init__( self, opts, in_channels: int, out_channels: int, pool_sizes: Optional[Tuple[int, ...]] = (1, 2, 3, 6), dropout: Optional[float] = 0.0, *args, **kwargs ) -> None: if not (0.0 <= dropout < 1.0): logger.error( "Dropout value in {} should be between 0 and 1. Got: {}".format( self.__class__.__name__, dropout ) ) reduction_dim = in_channels // len(pool_sizes) reduction_dim = (reduction_dim // 16) * 16 channels_after_concat = (reduction_dim * len(pool_sizes)) + in_channels super().__init__() self.psp_branches = nn.ModuleList( [ self._make_psp_layer( opts, o_size=ps, in_channels=in_channels, out_channels=reduction_dim ) for ps in pool_sizes ] ) self.fusion = nn.Sequential( ConvLayer2d( opts=opts, in_channels=channels_after_concat, out_channels=out_channels, kernel_size=3, stride=1, use_norm=True, use_act=True, ), Dropout2d(p=dropout), ) self.in_channels = in_channels self.out_channels = out_channels self.pool_sizes = pool_sizes self.inner_channels = reduction_dim self.dropout = dropout
@staticmethod def _make_psp_layer( opts, o_size: int, in_channels: int, out_channels: int ) -> nn.Module: return nn.Sequential( AdaptiveAvgPool2d(output_size=(o_size, o_size)), ConvLayer2d( opts, in_channels=in_channels, out_channels=out_channels, kernel_size=1, bias=False, use_norm=True, use_act=True, ), )
[docs] def forward(self, x: Tensor, *args, **kwargs) -> Tensor: x_size = x.shape[2:] out = [x] + [ F.interpolate( input=psp_branch(x), size=x_size, mode="bilinear", align_corners=True ) for psp_branch in self.psp_branches ] out = torch.cat(out, dim=1) out = self.fusion(out) return out
def __repr__(self): return "{}(in_channels={}, out_channels={}, pool_sizes={}, inner_channels={}, dropout_2d={})".format( self.__class__.__name__, self.in_channels, self.out_channels, self.pool_sizes, self.inner_channels, self.dropout, )