Source code for coremltools.converters.mil.mil.ops.defs.iOS15.conv

#  Copyright (c) 2020, Apple Inc. All rights reserved.
#
#  Use of this source code is governed by a BSD-3-clause license that can be
#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause

from coremltools.converters.mil.mil import Operation, types
from coremltools.converters.mil.mil.block import curr_opset_version
from coremltools.converters.mil.mil.input_type import (DefaultInputs,
                                                       InputSpec,
                                                       TensorInputType)
from coremltools.converters.mil.mil.ops.defs._op_reqs import register_op
from coremltools.converters.mil.mil.ops.defs._utils import \
    spatial_dimensions_out_shape
from coremltools.converters.mil.mil.ops.defs.iOS15 import _IOS15_TARGET


[docs]@register_op class conv(Operation): """ Perform convolution over input. Supports 1-D, 2-D, and 3-D convolution. Parameters ---------- x: tensor<[n, C_in, \*d_in], T> (Required) * ``d_in`` are (possibly runtime-determined) spatial dimensions. For example, ``d_in = [224, 224]`` for 2D convolution. * ``1 <= len(d_in) <= 3``. * ``C_in`` is the number of input channels or depth dimensions. * ``n`` is the batch dimension. weight: tensor<[C_out, C_in/groups, \*K], T> (Required) * Filter weights. * ``C_in`` is the number of input channels. * ``C_in`` must be divisible by ``groups``. * ``K`` are kernel sizes. For example, ``K = [KH, KW]`` for 2-D convolution. * When ``dilations`` is not all ``1``, ``weight`` has to be ``const`` at compile time strides: const tensor<[S], i32> (Optional) * Default to one vector of length equal to the number of spatial dimensions. * Strides along each of the spatial dimensions. * ``S == len(d_in)``. pad_type: const str (Required) Must be one of the following: * ``valid``: No padding. This is equivalent to custom pad with ``pad[2*i] == pad[2*i+1] == 0, for i=0,...,len(d_in)-1``. * ``custom``: Specify custom padding in the parameter ``pad``. * ``same``: Input is padded such that out spatial shapes are ``d_out[i] = ceil(d_in[i] / strides[i])``. * ``same_lower``: Similar to ``same`` but the padding will place extra rows/cols on the top/left if the padding amount is odd. Specifically, for ``i = 0,..,,len(d_in)-1``, the equivalent paddings are calculated as follows: * ``dilated_kernel = (K[i] - 1) * dilate[i] + 1`` * If ``dilated_kernel`` is odd, ``padding[2*i] = padding[2*i+1] = floor(dilated_kernel / 2)`` * Otherwise: ``padding[2*i] = ceil((dilated_kernel - 1) / 2)``, ``padding[2*i+1] = floor((dilated_kernel - 1) / 2)`` pad: const tensor<[P], i32> (Optional. Default to all zeros) * ``len(P) = 2 * len(d_in)`` * ``pad`` should be specified if and only if ``pad_type == custom``, otherwise errors occur. * ``pad`` represents the number of elements to pad before and after each dimension. Specifically, ``pad[0], pad[1]`` are the pad size before / after spatial dimension 0, ``pad[2], pad[3]`` are the pad size before / after spatial dimension 1, etc. dilations: const tensor<[S], i32> (Optional. Default to all 1s) * Dilation value along each spatial dimension in ``d_in``. See `visualization <https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md>`_. * ``S == len(d_in)``. groups: const tensor<[], i32> (Optional, default to 1) * Input and output channels are split by ``groups``. * ``C_in`` must be divisible by ``groups``. * Maximum value for group is ``C_in``, in which case it is a depthwise convolution. For examples (assuming ``C_in = 16, C_out = 32``): * ``groups == 1``, ``weight`` has shape ``[32, 16, KH, KW]``: All input channels are convolved with the ``weight`` kernel to produce all output channels. * ``groups == 2``, ``weight`` has shape ``[32, 8, KH, KW]``: Input channels 0~7 are convolved with half of the ``weight`` kernel to produce output channels 0~15. Similarly, input channels 8~15 are convolved with the other half of ``weight`` to product output channels 16~31. * ``groups == C_in``, ``weight`` has shape ``[32, 1, KH, KW]``: Each input channel is convolved with its own set of filters and each produce ``C_out / C_in = 2`` channels. This is equivalent to depthwise convolution. bias: const tensor<[C_out],T> (Optional, default to all 0) * Bias along output channels. Returns ------- tensor<[n, C_out, \*d_out], T> * Output activation has the same rank and spatial dimension as the input. That is, ``len(d_out) == len(d_in)``. * For ``i=0,..,len(d_in)-1, d_out[i] = floor [(D_in[i] + pad[2*i] + pad[2*i+1] - (K[i]-1)*dilations[i] - 1) / strides[i] ] + 1``. Attributes ---------- T: fp16, fp32 See Also -------- conv_transpose """ input_spec = InputSpec( x=TensorInputType(type_domain="T"), weight=TensorInputType(type_domain="T"), bias=TensorInputType(const=True, optional=True, type_domain="T"), strides=TensorInputType(const=True, optional=True, type_domain=types.int32), pad_type=TensorInputType(const=True, optional=True, type_domain=types.str), pad=TensorInputType(const=True, optional=True, type_domain=types.int32), dilations=TensorInputType(const=True, optional=True, type_domain=types.int32), groups=TensorInputType(const=True, optional=True, type_domain=types.int32), ) type_domains = { "T": (types.fp16, types.fp32), } def default_inputs(self): num_spatial_dims = self.x.rank - 2 return DefaultInputs( bias=None, strides=[1]*num_spatial_dims, pad_type="valid", pad=[0]*num_spatial_dims*2, dilations=[1]*num_spatial_dims, groups=1, ) def type_inference(self): inshape = self.x.shape f_shape = self.weight.shape kernel_shape = f_shape[2:] C_out = f_shape[0] C_in = self.x.shape[1] groups = self.groups.val if self.bias is not None and (len(self.bias.shape) > 1 or self.bias.shape[0] != C_out): msg = "# of bias values {} not equal to # output channels {}" raise ValueError(msg.format(self.bias.shape[0], C_out)) if C_in % groups != 0: msg = "# of input channels {} not divisible by groups {}" raise ValueError(msg.format(C_in, groups)) if C_in // groups != self.weight.shape[1]: msg = "C_in / groups = {}/{} != weight[1] ({})" raise ValueError(msg.format(C_in, groups, self.weight.shape[1])) strides = self.strides.val dilations = self.dilations.val # The same_lower padding is not supported in iOS15 if curr_opset_version() == _IOS15_TARGET and self.pad_type.val == "same_lower": msg = "iOS15 version of conv does not support pad_type = `same_lower`" raise ValueError(msg) # Ignore self.pad if pad_type != custom custom_pad = None if self.pad_type.val != 'custom' else self.pad.val is_weight_dynamic = not self.weight.is_descendant_of_const if is_weight_dynamic and any([True if d > 1 else False for d in dilations]): raise ValueError("Convolution with dynamic weights does not support dilations!") N = inshape[0] C_out = f_shape[0] # spatial dimensions d_out_shape = spatial_dimensions_out_shape( pad_type=self.pad_type.val, input_shape=inshape[2:], kernel_shape=kernel_shape, strides=strides, dilations=dilations, custom_pad=custom_pad, ) retshape = [N, C_out] + d_out_shape return types.tensor(self.x.dtype, tuple(retshape))
@register_op class conv_quantized(conv): """ Note: This is experimental and may change in the future. Supports weight quantization for parameters while performing convolution over input. ``W_float = W_quantized * scale + bias``. Parameters ---------- In addition to convolutional layer parameters, the following additional parameters are required. quantization_type: const str (Required) * One of ``linear``, or ``lut``. nbits: const tensor<[], i32> (Optional. Default to 8) * Denotes the bit-width of the quantization. ``1 <= nbits <= 8``. quant_scale: tensor<*?, T> (Required) * Denotes the scale of quantization. quant_bias: tensor<*?, T> (Required) * Denotes the bias that is used to quantize/dequantize. Returns ------- tensor<[n, C_out, *d_out], T> * Output activation has the same rank and spatial dimension as the input. That is, ``len(d_out) == len(d_in)``. Attributes ---------- T: fp16, fp32 """ input_spec = InputSpec( x=TensorInputType(type_domain="T"), weight=TensorInputType(type_domain="U"), bias=TensorInputType(const=True, optional=True, type_domain="U"), quantization_type=TensorInputType(const=True, type_domain=types.str), nbits=TensorInputType(const=True, optional=True, type_domain=types.int32), quant_scale=TensorInputType(const=True, type_domain="T"), quant_bias=TensorInputType(const=True, type_domain="T"), strides=TensorInputType(const=True, optional=True, type_domain=types.int32), pad_type=TensorInputType(const=True, optional=True, type_domain=types.str), pad=TensorInputType(const=True, optional=True, type_domain=types.int32), dilations=TensorInputType(const=True, optional=True, type_domain=types.int32), groups=TensorInputType(const=True, optional=True, type_domain=types.int32), ) type_domains = { "T": (types.fp32, types.fp16), "U": (types.uint8,), } def default_inputs(self): return super().default_inputs() + \ DefaultInputs( nbits=8, )
[docs]@register_op class conv_transpose(Operation): """ Perform transposed convolution (also known as deconvolution and fractionally stride convolution) over input. ``conv_transpose`` can also be used to compute the gradient of conv. Supports 1-D, 2-D, and 3-D convolution. Parameters ---------- x: tensor<[n,C_in,*D_in],T> (Required) * Input data. * ``D_in`` are spatial dimensions. * ``1 <= len(D_in) <= 3``. * ``C_in`` is the number of input channels. weight: const tensor<[C_in,C_out/groups,*D_in], T> (Required) * Filter weights. ``C_in, C_out`` are the number of input and output channels respectively. * ``D_in`` are spatial dimensions. ``1 <= len(D_in) <= 2``. bias: const tensor<[C_out],T> (Optional, default to all 0) * Bias added along output channels. pad: const tensor<[P],i32> (Optional, default to all 0s) * Number of elements to pad before and after each dimension. * ``P == 2 * len(D_in)``. * ``pad[2*i], pad[2*i+1]`` are pad sizes before and after dimension ``i``, where ``0 <= i < len(D_in)``. output_shape: const tensor<[P],i32> (Optional, default None) * Expected output shape. The first two dimensions must be ``[n, C_out]``. * The output shape of ``conv_transpose`` is underdetermined in general, because ``conv`` can map multiple input shapes to a single output shape. For example, for ``same`` padding mode, ``conv_out = ceil(conv_in/stride)``. Hence we need ``output_shape`` when this occurs. pad_type: const tensor<[P],i32> (Optional, default valid) * One of ``same``, ``valid``, or ``custom``. strides: const tensor<[S],i32> (Optional. Default to all 1s) * Stride along each of the spatial dimensions. * ``S == len(D_in)``. dilations: const tensor<[S],i32> (Optional. Default to all 1s) * Dilation value along each spatial dimension in ``d_in``. See ``conv``. * ``S == len(D_in)``. groups: const tensor<[], i32> (Optional. Default to 1) * Input and output channels are separated into ``groups``. * ``C_in`` and ``C_out`` must be divisible by the number of groups. See ``conv`` for examples. Returns ------- tensor<[n,C_out,*D_out],T> * If ``output_shape`` is not ``None``: ``Dout = output_shape`` * If ``pad_type == "custom"``: ``Dout[i] = (D_in[i]-1)*stride[i] + (K[i]-1) * dilation[i] + 1 - pad[2*i] - pad[2*i-1]`` * If ``pad_type == "valid"``: ``Dout[i] = (D_in[i]-1)*stride[i] + (K[i]-1) * dilation[i] + 1`` * If ``pad_type == "same"``: ``Dout[i] = D_in[i] * stride[i]`` Attributes ---------- T: fp16, fp32 See Also -------- conv """ input_spec = InputSpec( x=TensorInputType(type_domain="T"), # [n, C_in, spatial_dims] weight=TensorInputType(const=True, type_domain="T"), # [C_out, C_in, spatial_dims] bias=TensorInputType(const=True, optional=True, type_domain="T"), pad=TensorInputType(const=True, optional=True, type_domain=types.int32), output_shape=TensorInputType(const=True, optional=True, type_domain=types.int32), pad_type=TensorInputType(const=True, optional=True, type_domain=types.str), strides=TensorInputType(const=True, optional=True, type_domain=types.int32), dilations=TensorInputType(const=True, optional=True, type_domain=types.int32), groups=TensorInputType(const=True, optional=True, type_domain=types.int32), ) type_domains = { "T": (types.fp16, types.fp32), } def default_inputs(self): num_spatial_dims = self.x.rank - 2 return DefaultInputs( bias=None, pad=[0]*2*num_spatial_dims, output_shape=None, pad_type="valid", strides=[1]*num_spatial_dims, dilations=[1]*num_spatial_dims, groups=1, ) def type_inference(self): # Input shape is [n, C_in, spatial_dims] in_shape = self.x.shape # Weight shape is [C_in, C_out/group, spatial_dims] f_shape = self.weight.shape kernel_shape = f_shape[2:] spatial_dim_rank = len(in_shape) - 2 N = in_shape[0] C_in = self.x.shape[0] groups = self.groups.val C_out = f_shape[1] * groups if self.bias is not None and self.bias.val.shape[0] != C_out: msg = "# of bias values {} not equal to # output channels {}" raise ValueError(msg.format(self.bias.val.shape[0], C_out)) if C_out % groups != 0: msg = "# of input channels {} not divisible by groups {}" raise ValueError(msg.format(C_in, groups)) # If output shape is given, return it if self.output_shape is not None: output_shape = self.output_shape.val assert output_shape[0] == N assert output_shape[1] == C_out return types.tensor( self.x.dtype, tuple(output_shape) ) strides = self.strides.val dilations = self.dilations.val kernel_shape = [ (kernel_shape[r] - 1) * dilations[r] + 1 for r in range(spatial_dim_rank) ] D_in = in_shape[2:] # spatial dimensions # Deconv's output shape is non-deterministic, we follow TF shape logic here. if self.pad_type.val == "same": d_out_shape = [strides[r] * D_in[r] for r in range(spatial_dim_rank)] elif self.pad_type.val == "valid": d_out_shape = [ strides[r] * (D_in[r]-1) + kernel_shape[r] for r in range(spatial_dim_rank) ] elif self.pad_type.val == "custom": if self.pad is None: raise ValueError("self.pad must exist if pad_type is custom") pad = self.pad.val d_out_shape = [ strides[r] * (D_in[r] - 1) + kernel_shape[r] - pad[2 * r] - pad[2 * r + 1] for r in range(spatial_dim_rank) ] retshape = [N, C_out] + d_out_shape return types.tensor(self.x.dtype, tuple(retshape))