Source code for coremltools.converters.mil.mil.ops.defs.iOS17.quantization_ops

# Copyright (c) 2022, Apple Inc. All rights reserved.
#
#  Use of this source code is governed by a BSD-3-clause license that can be
#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause

import numpy as np

from coremltools.converters.mil.mil import types
from coremltools.converters.mil.mil.input_type import InputSpec, TensorInputType
from coremltools.converters.mil.mil.operation import VALUE, Operation, precondition
from coremltools.converters.mil.mil.ops.defs._op_reqs import register_op
from coremltools.converters.mil.mil.ops.defs.iOS17 import _IOS17_TARGET


def _rank_promoted_to_same_as_data(data, axis, param):
    """
    Reshapes `param` to be the same shape as `data`.
    """
    if axis is not None:
        axis = axis if axis >= 0 else axis + len(data.shape)
    if len(param.shape) == 0:
        return np.reshape(param, np.ones(len(data.shape), np.int32))
    else:
        axes = [i for i in range(len(data.shape)) if i != axis]
        return np.expand_dims(param, axis=tuple(axes))


def _check_scale_zp_shapes(input_data, scale, zero_point, axis):
    def assert_vector_size_same_as_axial_dimension(param, axis_dim_size, name):
        if param.rank == 1 and param.shape[0] != axis_dim_size:
            raise ValueError(
                "Parameter {}, if vector, needs to have same size as the dimension size along the parameter input".format(
                    name
                )
            )

    if scale.rank == 0:
        # ios17.dequantize doesn't want axis defined for scalar quant params.
        if axis is not None:
            raise ValueError("axis should not be provided to quantize if scale/zp are scalars")
        if zero_point is not None and zero_point.rank != 0:
            raise ValueError("zero_point should be a scalar if scale is a scalar")
    elif scale.rank == 1:
        if axis is None or axis.val is None:
            raise ValueError("axis should be provided to quantize if scale/zp are not scalars")
        if axis.val < -input_data.rank or axis.val >= input_data.rank:
            raise ValueError(
                "Parameter axis needs to be in the range -input.rank <= axis < input.rank"
            )

        input_axis_dim_size = input_data.shape[axis.val]
        assert_vector_size_same_as_axial_dimension(scale, input_axis_dim_size, "scale")
        if zero_point is not None:
            if zero_point.rank != 1:
                raise ValueError("zero_point should be a vector if scale is a vector")
            assert_vector_size_same_as_axial_dimension(
                zero_point, input_axis_dim_size, "zero_point"
            )
    else:
        raise ValueError("Params scale & zero_point should both be scalars or vectors")


[docs]@register_op(opset_version=_IOS17_TARGET)
class quantize(Operation):
    """
    Performs affine/linear quantization on an input tensor.

    The original data comes from the first "input".
    The other parameters -- ``scale``, ``zero_point``, and ``axis`` -- describe how
    quantization should occur::

        quantized_data = clip(round(input / scale) + zero_point)

    Parameters
    ----------
    input: tensor<SrcT, [1..]> (Required)

    zero_point: const tensor<DstT, [0..1]> (Optional)
        * The ``zero_point`` can be either a scalar or a vector. If not provided, it is
          assumed to be ``0``.
        * The ``zero_point`` follows similar broadcasting rules and size constraints as ``scale``.

    scale: const tensor<SrcT, [0..1]> (Required)
        * The ``scale`` can be either a scalar or a vector.
        * If ``scale`` is a vector, for implementation, it is broadcasted to the following shape:
            - The rank of ``scale`` becomes the same as the rank of the input.
            - Constraint: ``size(scale-vector) == input.shape[axis]``.
            - For ``i == axis``, ``scale.shape[i] == input.shape[i]``.
            - For ``i != axis``, ``scale.shape == 1``.
            - For example:
                - Assume ``input.shape = (2, 3, 4, 5)`` and ``axis = 1``.
                - If ``scale`` is a vector, then ``scale.size`` needs to be equal to
                  ``input.shape[axis]``; that is, equal to ``3``.
                - This is broadcasted to ``(1, 3, 1, 1)``.

    axis: const tensor<int32, []> (Optional)

    output_dtype: const tensor<string, []> (Required)
        * This parameter can take ``"uint8"``, ``"int8"`` as values.
        * The ``output_dtype`` value must match the ``zero_point`` dtype.

    Returns
    -------
    tensor<DstT, [1..]>

    Attributes
    ----------
    SrcT: fp16, fp32
    DstT: uint8, int8
    """

    input_spec = InputSpec(
        input=TensorInputType(type_domain="SrcT"),
        zero_point=TensorInputType(const=True, optional=True, type_domain="DstT"),
        scale=TensorInputType(const=True, type_domain="SrcT"),
        axis=TensorInputType(const=True, optional=True, type_domain=types.int32),
        output_dtype=TensorInputType(const=True, type_domain=types.str),
    )

    type_domains = {
        "SrcT": (types.fp16, types.fp32),
        "DstT": (types.uint8, types.int8),
    }

    def type_inference(self):
        out_dtype = types.string_to_builtin(self.output_dtype.val)
        if out_dtype not in {types.int8, types.uint8}:
            raise ValueError(
                '"quantize" op: unrecognized output dtype "{}"'.format(self.output_dtype.val)
            )

        if self.zero_point is not None:
            if out_dtype != self.zero_point.dtype:
                raise ValueError(
                    "output_dtype & zero_point dtype mismatch: {}, {}".format(
                        self.output_dtype.val, types.builtin_to_string(self.zero_point.dtype)
                    )
                )

        _check_scale_zp_shapes(self.input, self.scale, self.zero_point, self.axis)

        return types.tensor(out_dtype, self.input.shape)

    @precondition(allow=VALUE)
    def value_inference(self):
        original_data = self.input.val
        if self.zero_point is not None:
            zero_point = self.zero_point.val
        else:
            zero_point = np.int8(0) if self.output_dtype.val == "int8" else np.uint8(0)
        scale = self.scale.val
        axis = None
        if self.axis is not None:
            axis = self.axis.val
        dtype_info = np.iinfo(zero_point.dtype)

        sc = _rank_promoted_to_same_as_data(original_data, axis, scale)
        zp = _rank_promoted_to_same_as_data(original_data, axis, zero_point)
        val = np.clip(
            np.around(original_data / sc) + zp.astype(np.float32), dtype_info.min, dtype_info.max
        )
        return val.astype(zero_point.dtype)


[docs]@register_op(opset_version=_IOS17_TARGET)
class dequantize(Operation):
    """
    Performs dequantization on an input tensor with affine/linear quantization.

    The quantized data comes from the first "input".
    The other parameters -- ``scale``, ``zero_point``, and ``axis`` -- describe how
    unquantized values can be extracted from it,
    using the following equation for affine/linear quantization::

        unquantized_data = scale * (input - zero_point)

    Parameters
    ----------
    input: tensor<SrcT, [1..]> (Required)

    zero_point: const tensor<SrcT, [0..1]> (Optional)
        * The ``zero_point`` can be either a scalar or a vector. If not provided,
          it is assumed to be ``0``.
        * The ``zero_point`` follows similar broadcasting rules and size constraints as ``scale``.

    scale: const tensor<DstT, [0..1]> (Required)
        * The ``scale`` can be either a scalar or a vector.
        * If ``scale`` is a vector, for implementation, it is broadcasted to the following shape:
            - The rank of ``scale`` becomes the same as the rank of the input.
            - Constraint: ``size(scale-vector) == input.shape[axis]``.
            - For ``i == axis``, ``scale.shape[i] == input.shape[i]``.
            - For ``i != axis``, ``scale.shape == 1``.
            - For example:
                - Assume ``input.shape = (2, 3, 4, 5)`` and ``axis = 1``.
                - If ``scale`` is a vector, then ``scale.size`` needs to be equal to
                  ``input.shape[axis]``; that is, equal to ``3``.
                - This is broadcasted to ``(1, 3, 1, 1)``.

    axis: const tensor<int32, []> (Optional)

    Returns
    -------
    tensor<DstT, [1..]>

    Attributes
    ----------
    SrcT: uint8, int8
    DstT: fp16, fp32
    """

    input_spec = InputSpec(
        input=TensorInputType(type_domain="SrcT"),
        zero_point=TensorInputType(const=True, optional=True, type_domain="SrcT"),
        scale=TensorInputType(const=True, type_domain="DstT"),
        axis=TensorInputType(const=True, optional=True, type_domain=types.int32),
    )

    type_domains = {
        "DstT": (types.fp16, types.fp32),
        "SrcT": (types.uint8, types.int8),
    }

    def type_inference(self):
        _check_scale_zp_shapes(self.input, self.scale, self.zero_point, self.axis)
        return types.tensor(self.scale.dtype, self.input.shape)

    @precondition(allow=VALUE)
    def value_inference(self):
        quantized_data = self.input.val
        if self.zero_point is not None:
            zero_point = self.zero_point.val
        else:
            zero_point = np.int8(0) if self.input.dtype == types.int8 else np.uint8(0)
        scale = self.scale.val
        axis = None
        if self.axis is not None:
            axis = self.axis.val

        sc = _rank_promoted_to_same_as_data(quantized_data, axis, scale)
        zp = _rank_promoted_to_same_as_data(quantized_data, axis, zero_point)
        val = sc * (quantized_data.astype(np.float32) - zp.astype(np.float32))
        return val.astype(scale.dtype)