Source code for cvnets.image_projection_layers.base_image_projection

#
# For licensing see accompanying LICENSE file.
# Copyright (C) 2023 Apple Inc. All Rights Reserved.
#

import argparse
from typing import Dict, Optional

from torch import nn

from cvnets.layers import LinearLayer
from cvnets.misc.common import parameter_list


[docs]class BaseImageProjectionHead(nn.Module): """Base class that projects image representations to the same space as text representations"""
[docs] def __init__(self, opts, *args, **kwargs) -> None: super().__init__() self.lr_mult = getattr(opts, "model.image_projection_head.lr_multiplier", 1.0)
[docs] @classmethod def add_arguments(cls, parser: argparse.ArgumentParser): """Add model specific arguments""" group = parser.add_argument_group(title=cls.__name__) group.add_argument( "--model.image-projection-head.name", type=str, default=None, help="Name of the image projection head", ) group.add_argument( "--model.image-projection-head.lr-multiplier", type=float, default=1.0, help="LR multiplier for image projection head", ) return parser
[docs] def reset_parameters(self) -> None: """Reset weights of a given layer""" raise NotImplementedError
[docs] def get_trainable_parameters( self, weight_decay: Optional[float] = 0.0, no_decay_bn_filter_bias: Optional[bool] = False, *args, **kwargs, ): param_list = parameter_list( named_parameters=self.named_parameters, weight_decay=weight_decay, no_decay_bn_filter_bias=no_decay_bn_filter_bias, ) return param_list, [self.lr_mult] * len(param_list)
[docs] def forward(self, input: Dict, *args, **kwargs) -> Dict: raise NotImplementedError
[docs]def get_in_feature_dimension(image_classifier: nn.Module) -> int: """Return the input feature dimension to the image classification head.""" in_features = None if isinstance(image_classifier, nn.Sequential): # Classifier that uses nn.Sequential usually has global pooling and # multiple linear layers. Find the first linear layer and get its # in_features for layer in image_classifier: if isinstance(layer, (nn.Linear, LinearLayer)): in_features = layer.in_features break elif isinstance(image_classifier, (nn.Linear, LinearLayer)): in_features = image_classifier.in_features if in_features is None: raise NotImplementedError( f"Cannot get input feature dimension of {image_classifier}." ) return in_features