Source code for coremltools.converters.mil.mil.ops.defs.iOS15.conv

#  Copyright (c) 2020, Apple Inc. All rights reserved.
#
#  Use of this source code is governed by a BSD-3-clause license that can be
#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause

from coremltools.converters.mil.mil import Operation, types
from coremltools.converters.mil.mil.block import curr_opset_version
from coremltools.converters.mil.mil.input_type import (DefaultInputs,
                                                       InputSpec,
                                                       TensorInputType)
from coremltools.converters.mil.mil.ops.defs._op_reqs import register_op
from coremltools.converters.mil.mil.ops.defs._utils import \
    spatial_dimensions_out_shape
from coremltools.converters.mil.mil.ops.defs.iOS15 import _IOS15_TARGET


[docs]@register_op
class conv(Operation):
    """
    Perform convolution over input. Supports 1-D, 2-D, and 3-D convolution.

    Parameters
    ----------
    x: tensor<[n, C_in, \*d_in], T> (Required)

        * ``d_in`` are (possibly runtime-determined) spatial dimensions. For example,
          ``d_in = [224, 224]`` for 2D convolution.
        * ``1 <= len(d_in) <= 3``.
        * ``C_in`` is the number of input channels or depth dimensions.
        * ``n``  is the batch dimension.

    weight: tensor<[C_out, C_in/groups, \*K], T> (Required)

        * Filter weights.
        * ``C_in`` is the number of input channels.
        * ``C_in`` must be divisible by ``groups``.
        * ``K`` are kernel sizes. For example, ``K = [KH, KW]`` for 2-D convolution.
        * When ``dilations`` is not all ``1``, ``weight`` has to be ``const``
          at compile time

    strides: const tensor<[S], i32> (Optional)

        * Default to one vector of length equal to the number of spatial dimensions.
        * Strides along each of the spatial dimensions.
        * ``S == len(d_in)``.

    pad_type: const str (Required)

        Must be one of the following:

            * ``valid``: No padding. This is equivalent to custom pad with
              ``pad[2*i] == pad[2*i+1] == 0, for i=0,...,len(d_in)-1``.
            * ``custom``: Specify custom padding in the parameter ``pad``.
            * ``same``: Input is padded such that out spatial shapes are
              ``d_out[i] = ceil(d_in[i] / strides[i])``.
            * ``same_lower``: Similar to ``same`` but the padding
              will place extra rows/cols on the top/left if the padding amount is odd.

        Specifically, for ``i = 0,..,,len(d_in)-1``, the equivalent paddings are
        calculated as follows:

            * ``dilated_kernel = (K[i] - 1) * dilate[i] + 1``
            * If ``dilated_kernel`` is odd,
              ``padding[2*i] = padding[2*i+1] = floor(dilated_kernel / 2)``
            * Otherwise:
              ``padding[2*i] = ceil((dilated_kernel - 1) / 2)``,
              ``padding[2*i+1] = floor((dilated_kernel - 1) / 2)``

    pad: const tensor<[P], i32> (Optional. Default to all zeros)

        * ``len(P) = 2 * len(d_in)``
        * ``pad`` should be specified if and only if ``pad_type == custom``,
          otherwise errors occur.
        * ``pad`` represents the number of elements to pad before and after each
          dimension. Specifically, ``pad[0], pad[1]`` are the pad size before / after
          spatial dimension 0, ``pad[2], pad[3]`` are the pad size before / after
          spatial dimension 1, etc.

    dilations: const tensor<[S], i32> (Optional. Default to all 1s)

        * Dilation value along each spatial dimension in ``d_in``.
          See `visualization <https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md>`_.
        * ``S == len(d_in)``.

    groups: const tensor<[], i32> (Optional, default to 1)

        * Input and output channels are split by ``groups``.
        * ``C_in`` must be divisible by ``groups``.
        * Maximum value for group is ``C_in``, in which case it is a depthwise
          convolution.

        For examples (assuming ``C_in = 16, C_out = 32``):

            * ``groups == 1``, ``weight`` has shape ``[32, 16, KH, KW]``: All input
              channels are convolved with the ``weight`` kernel to produce all output
              channels.
            * ``groups == 2``, ``weight`` has shape ``[32, 8, KH, KW]``: Input
              channels 0~7 are convolved with half of the ``weight`` kernel to produce
              output channels 0~15. Similarly, input channels 8~15 are convolved with
              the other half of ``weight`` to product output channels 16~31.
            * ``groups == C_in``, ``weight`` has shape ``[32, 1, KH, KW]``: Each input
              channel is convolved with its own set of filters and each produce
              ``C_out / C_in = 2`` channels. This is equivalent to depthwise
              convolution.

    bias: const tensor<[C_out],T> (Optional, default to all 0)
        * Bias along output channels.

    Returns
    -------
    tensor<[n, C_out, \*d_out], T>
        * Output activation has the same rank and spatial dimension as the input.
          That is, ``len(d_out) == len(d_in)``.
        * For ``i=0,..,len(d_in)-1, d_out[i] = floor [(D_in[i] + pad[2*i] +
          pad[2*i+1] - (K[i]-1)*dilations[i] - 1) / strides[i] ] + 1``.

    Attributes
    ----------
    T: fp16, fp32

    See Also
    --------
    conv_transpose
    """

    input_spec = InputSpec(
        x=TensorInputType(type_domain="T"),
        weight=TensorInputType(type_domain="T"),
        bias=TensorInputType(const=True, optional=True, type_domain="T"),
        strides=TensorInputType(const=True, optional=True, type_domain=types.int32),
        pad_type=TensorInputType(const=True, optional=True, type_domain=types.str),
        pad=TensorInputType(const=True, optional=True, type_domain=types.int32),
        dilations=TensorInputType(const=True, optional=True, type_domain=types.int32),
        groups=TensorInputType(const=True, optional=True, type_domain=types.int32),
    )

    type_domains = {
        "T": (types.fp16, types.fp32),
    }

    def default_inputs(self):
        num_spatial_dims = self.x.rank - 2
        return DefaultInputs(
            bias=None,
            strides=[1]*num_spatial_dims,
            pad_type="valid",
            pad=[0]*num_spatial_dims*2,
            dilations=[1]*num_spatial_dims,
            groups=1,
        )

    def type_inference(self):
        inshape = self.x.shape
        f_shape = self.weight.shape
        kernel_shape = f_shape[2:]
        C_out = f_shape[0]
        C_in = self.x.shape[1]
        groups = self.groups.val

        if self.bias is not None and (len(self.bias.shape) > 1 or self.bias.shape[0] != C_out):
            msg = "# of bias values {} not equal to # output channels {}"
            raise ValueError(msg.format(self.bias.shape[0], C_out))
        if C_in % groups != 0:
            msg = "# of input channels {} not divisible by groups {}"
            raise ValueError(msg.format(C_in, groups))
        if C_in // groups != self.weight.shape[1]:
            msg = "C_in / groups = {}/{} != weight[1] ({})"
            raise ValueError(msg.format(C_in, groups, self.weight.shape[1]))

        strides = self.strides.val
        dilations = self.dilations.val

        # The same_lower padding is not supported in iOS15
        if curr_opset_version() == _IOS15_TARGET and self.pad_type.val == "same_lower":
            msg = "iOS15 version of conv does not support pad_type = `same_lower`"
            raise ValueError(msg)

        # Ignore self.pad if pad_type != custom
        custom_pad = None if self.pad_type.val != 'custom' else self.pad.val

        is_weight_dynamic = not self.weight.is_descendant_of_const
        if is_weight_dynamic and any([True if d > 1 else False for d in dilations]):
            raise ValueError("Convolution with dynamic weights does not support dilations!")

        N = inshape[0]
        C_out = f_shape[0]
        # spatial dimensions
        d_out_shape = spatial_dimensions_out_shape(
            pad_type=self.pad_type.val,
            input_shape=inshape[2:],
            kernel_shape=kernel_shape,
            strides=strides,
            dilations=dilations,
            custom_pad=custom_pad,
        )
        retshape = [N, C_out] + d_out_shape
        return types.tensor(self.x.dtype, tuple(retshape))


@register_op
class conv_quantized(conv):
    """
    Note: This is experimental and may change in the future.
    Supports weight quantization for parameters while performing convolution over input.
    ``W_float = W_quantized * scale + bias``.

    Parameters
    ----------
    In addition to convolutional layer parameters, the following additional parameters
    are required.

    quantization_type: const str (Required)
        * One of ``linear``, or ``lut``.

    nbits: const tensor<[], i32> (Optional. Default to 8)
        * Denotes the bit-width of the quantization. ``1 <= nbits <= 8``.

    quant_scale: tensor<*?, T> (Required)
        * Denotes the scale of quantization.

    quant_bias: tensor<*?, T> (Required)
        * Denotes the bias that is used to quantize/dequantize.

    Returns
    -------
    tensor<[n, C_out, *d_out], T>
        * Output activation has the same rank and spatial dimension as the input.
          That is, ``len(d_out) == len(d_in)``.

    Attributes
    ----------
    T: fp16, fp32
    """

    input_spec = InputSpec(
        x=TensorInputType(type_domain="T"),
        weight=TensorInputType(type_domain="U"),
        bias=TensorInputType(const=True, optional=True, type_domain="U"),
        quantization_type=TensorInputType(const=True, type_domain=types.str),
        nbits=TensorInputType(const=True, optional=True, type_domain=types.int32),
        quant_scale=TensorInputType(const=True, type_domain="T"),
        quant_bias=TensorInputType(const=True, type_domain="T"),
        strides=TensorInputType(const=True, optional=True, type_domain=types.int32),
        pad_type=TensorInputType(const=True, optional=True, type_domain=types.str),
        pad=TensorInputType(const=True, optional=True, type_domain=types.int32),
        dilations=TensorInputType(const=True, optional=True, type_domain=types.int32),
        groups=TensorInputType(const=True, optional=True, type_domain=types.int32),
        )

    type_domains = {
        "T": (types.fp32, types.fp16),
        "U": (types.uint8,),
    }

    def default_inputs(self):
        return super().default_inputs() + \
            DefaultInputs(
                nbits=8,
            )

[docs]@register_op
class conv_transpose(Operation):
    """
    Perform transposed convolution (also known as deconvolution and fractionally
    stride convolution) over input. ``conv_transpose`` can also be used to compute
    the gradient of conv. Supports 1-D, 2-D, and 3-D convolution.

    Parameters
    ----------

    x: tensor<[n,C_in,*D_in],T> (Required)
        * Input data.
        * ``D_in`` are spatial dimensions.
        * ``1 <= len(D_in) <= 3``.
        * ``C_in`` is the number of input channels.

    weight: const tensor<[C_in,C_out/groups,*D_in], T> (Required)
        * Filter weights. ``C_in, C_out`` are the number of input and output channels
          respectively.
        * ``D_in`` are spatial dimensions. ``1 <= len(D_in) <= 2``.

    bias: const tensor<[C_out],T> (Optional, default to all 0)
        * Bias added along output channels.

    pad: const tensor<[P],i32> (Optional, default to all 0s)
        * Number of elements to pad before and after each dimension.
        * ``P == 2 * len(D_in)``.
        * ``pad[2*i], pad[2*i+1]`` are pad sizes before and after
          dimension ``i``, where ``0 <= i < len(D_in)``.

    output_shape: const tensor<[P],i32> (Optional, default None)
        * Expected output shape. The first two dimensions must be ``[n, C_out]``.
        * The output shape of ``conv_transpose`` is underdetermined in general,
          because ``conv`` can map multiple input shapes to a single output shape.
          For example, for ``same`` padding mode, ``conv_out = ceil(conv_in/stride)``.
          Hence we need ``output_shape`` when this occurs.

    pad_type: const tensor<[P],i32> (Optional, default valid)
        * One of ``same``, ``valid``, or ``custom``.

    strides: const tensor<[S],i32> (Optional. Default to all 1s)
        * Stride along each of the spatial dimensions.
        * ``S == len(D_in)``.

    dilations: const tensor<[S],i32> (Optional. Default to all 1s)
        * Dilation value along each spatial dimension in ``d_in``. See ``conv``.
        * ``S == len(D_in)``.

    groups: const tensor<[], i32> (Optional. Default to 1)
        * Input and output channels are separated into ``groups``.
        * ``C_in`` and ``C_out`` must be divisible by the number of groups.
          See ``conv`` for examples.

    Returns
    -------
    tensor<[n,C_out,*D_out],T>
		* If ``output_shape`` is not ``None``:

		     ``Dout = output_shape``

		* If ``pad_type == "custom"``:

		     ``Dout[i] = (D_in[i]-1)*stride[i] + (K[i]-1) * dilation[i] + 1 - pad[2*i] - pad[2*i-1]``

		* If ``pad_type == "valid"``:

		     ``Dout[i] = (D_in[i]-1)*stride[i] + (K[i]-1) * dilation[i] + 1``

		* If ``pad_type == "same"``:

		     ``Dout[i] = D_in[i] * stride[i]``


    Attributes
    ----------
    T: fp16, fp32

    See Also
    --------
    conv
    """

    input_spec = InputSpec(
        x=TensorInputType(type_domain="T"),  # [n, C_in, spatial_dims]
        weight=TensorInputType(const=True, type_domain="T"),  # [C_out, C_in, spatial_dims]
        bias=TensorInputType(const=True, optional=True, type_domain="T"),
        pad=TensorInputType(const=True, optional=True, type_domain=types.int32),
        output_shape=TensorInputType(const=True, optional=True, type_domain=types.int32),
        pad_type=TensorInputType(const=True, optional=True, type_domain=types.str),
        strides=TensorInputType(const=True, optional=True, type_domain=types.int32),
        dilations=TensorInputType(const=True, optional=True, type_domain=types.int32),
        groups=TensorInputType(const=True, optional=True, type_domain=types.int32),
    )

    type_domains = {
        "T": (types.fp16, types.fp32),
    }

    def default_inputs(self):
        num_spatial_dims = self.x.rank - 2
        return DefaultInputs(
            bias=None,
            pad=[0]*2*num_spatial_dims,
            output_shape=None,
            pad_type="valid",
            strides=[1]*num_spatial_dims,
            dilations=[1]*num_spatial_dims,
            groups=1,
        )

    def type_inference(self):
        # Input shape is [n, C_in, spatial_dims]
        in_shape = self.x.shape
        # Weight shape is [C_in, C_out/group, spatial_dims]
        f_shape = self.weight.shape
        kernel_shape = f_shape[2:]
        spatial_dim_rank = len(in_shape) - 2
        N = in_shape[0]
        C_in = self.x.shape[0]
        groups = self.groups.val
        C_out = f_shape[1] * groups

        if self.bias is not None and self.bias.val.shape[0] != C_out:
            msg = "# of bias values {} not equal to # output channels {}"
            raise ValueError(msg.format(self.bias.val.shape[0], C_out))
        if C_out % groups != 0:
            msg = "# of input channels {} not divisible by groups {}"
            raise ValueError(msg.format(C_in, groups))

        # If output shape is given, return it
        if self.output_shape is not None:
            output_shape = self.output_shape.val
            assert output_shape[0] == N
            assert output_shape[1] == C_out
            return types.tensor(
                self.x.dtype, tuple(output_shape)
            )

        strides = self.strides.val
        dilations = self.dilations.val
        kernel_shape = [
            (kernel_shape[r] - 1) * dilations[r] + 1 for r in range(spatial_dim_rank)
        ]

        D_in = in_shape[2:]  # spatial dimensions

        # Deconv's output shape is non-deterministic, we follow TF shape logic here.
        if self.pad_type.val == "same":
            d_out_shape = [strides[r] * D_in[r] for r in range(spatial_dim_rank)]
        elif self.pad_type.val == "valid":
            d_out_shape = [
                strides[r] * (D_in[r]-1) + kernel_shape[r]
                for r in range(spatial_dim_rank)
            ]
        elif self.pad_type.val == "custom":
            if self.pad is None:
                raise ValueError("self.pad must exist if pad_type is custom")
            pad = self.pad.val
            d_out_shape = [
                strides[r] * (D_in[r] - 1)
                + kernel_shape[r]
                - pad[2 * r]
                - pad[2 * r + 1]
                for r in range(spatial_dim_rank)
            ]

        retshape = [N, C_out] + d_out_shape
        return types.tensor(self.x.dtype, tuple(retshape))