Source code for cvnets.image_projection_layers.base_image_projection

#
# For licensing see accompanying LICENSE file.
# Copyright (C) 2023 Apple Inc. All Rights Reserved.
#

import argparse
from typing import Dict, Optional

from torch import nn

from cvnets.layers import LinearLayer
from cvnets.misc.common import parameter_list


[docs]class BaseImageProjectionHead(nn.Module):
    """Base class that projects image representations to the same space as text representations"""

[docs]    def __init__(self, opts, *args, **kwargs) -> None:
        super().__init__()

        self.lr_mult = getattr(opts, "model.image_projection_head.lr_multiplier", 1.0)

[docs]    @classmethod
    def add_arguments(cls, parser: argparse.ArgumentParser):
        """Add model specific arguments"""
        group = parser.add_argument_group(title=cls.__name__)

        group.add_argument(
            "--model.image-projection-head.name",
            type=str,
            default=None,
            help="Name of the image projection head",
        )

        group.add_argument(
            "--model.image-projection-head.lr-multiplier",
            type=float,
            default=1.0,
            help="LR multiplier for image projection head",
        )

        return parser

[docs]    def reset_parameters(self) -> None:
        """Reset weights of a given layer"""
        raise NotImplementedError

[docs]    def get_trainable_parameters(
        self,
        weight_decay: Optional[float] = 0.0,
        no_decay_bn_filter_bias: Optional[bool] = False,
        *args,
        **kwargs,
    ):
        param_list = parameter_list(
            named_parameters=self.named_parameters,
            weight_decay=weight_decay,
            no_decay_bn_filter_bias=no_decay_bn_filter_bias,
        )
        return param_list, [self.lr_mult] * len(param_list)

[docs]    def forward(self, input: Dict, *args, **kwargs) -> Dict:
        raise NotImplementedError


[docs]def get_in_feature_dimension(image_classifier: nn.Module) -> int:
    """Return the input feature dimension to the image classification head."""
    in_features = None
    if isinstance(image_classifier, nn.Sequential):
        # Classifier that uses nn.Sequential usually has global pooling and
        # multiple linear layers. Find the first linear layer and get its
        # in_features
        for layer in image_classifier:
            if isinstance(layer, (nn.Linear, LinearLayer)):
                in_features = layer.in_features
                break
    elif isinstance(image_classifier, (nn.Linear, LinearLayer)):
        in_features = image_classifier.in_features

    if in_features is None:
        raise NotImplementedError(
            f"Cannot get input feature dimension of {image_classifier}."
        )

    return in_features