Source code for coremltools.optimize.torch.palettization.palettization_config

#  Copyright (c) 2024, Apple Inc. All rights reserved.
#
#  Use of this source code is governed by a BSD-3-clause license that can be
#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause

from collections import OrderedDict as _OrderedDict
from typing import Any as _Any
from typing import Callable as _Callable
from typing import Dict as _Dict
from typing import List as _List
from typing import NewType as _NewType
from typing import Optional as _Optional
from typing import Union as _Union

import cattrs as _cattrs
import torch as _torch
import torch.nn as _nn
from attr import define as _define
from attr import field as _field
from attrs import validators as _validators

from coremltools.optimize.torch._utils.torch_utils import (
    maybe_convert_str_to_dtype as _maybe_convert_str_to_dtype,
)
from coremltools.optimize.torch.optimization_config import (
    ModuleOptimizationConfig as _ModuleOptimizationConfig,
)
from coremltools.optimize.torch.optimization_config import OptimizationConfig as _OptimizationConfig
from coremltools.optimize.torch.optimization_config import (
    PalettizationGranularity,
    _deprecated_field,
    _validate_module_type_keys_factory,
)

# Default advanced options for palettization
DEFAULT_PALETTIZATION_ADVANCED_OPTIONS = {
    "cluster_permute": None,
    "palett_max_mem": 1.0,
    "kmeans_max_iter": 3,
    "prune_threshold": 1e-7,
    "kmeans_init": "auto",
    "kmeans_opt1d_threshold": 1024,
    "enforce_zero": False,
    "palett_mode": "dkm",
    "palett_tau": 0.0001,
    "palett_epsilon": 0.0001,
    "palett_lambda": 0.0,
    "add_extra_centroid": False,
    "palett_cluster_tol": 0.0,
    "palett_min_tsize": 64 * 1024,
    "palett_unique": False,
    "palett_shard": False,
    "palett_batch_mode": False,
    "palett_dist": False,
    "per_channel_scaling_factor_scheme": "min_max",
    "percentage_palett_enable": 1.0,
    "kmeans_batch_threshold": 4,
    "kmeans_n_init": 100,
    "zero_threshold": 1e-7,
    "kmeans_error_bnd": 0.0,
}


DEFAULT_PALETTIZATION_OPTIONS = {
    "quant_min": -128,
    "quant_max": 127,
    "dtype": _torch.qint8,
    "cluster_dtype": "f32",
    "weight_threshold": 2048,
    "milestone": 0,
    "quantize_activations": False,
    "enable_per_channel_scale": False,
    "granularity": "per_tensor",
    "group_size": None,
}


_default_palettization_scheme = {
    **DEFAULT_PALETTIZATION_OPTIONS,
    **DEFAULT_PALETTIZATION_ADVANCED_OPTIONS,
}


# Default scheme for palettization
DEFAULT_PALETTIZATION_SCHEME = {
    _nn.Linear: {"n_bits": 4, "cluster_dim": 1, **_default_palettization_scheme},
    _nn.Conv1d: {"n_bits": 2, "cluster_dim": 1, **_default_palettization_scheme},
    _nn.Conv2d: {"n_bits": 2, "cluster_dim": 1, **_default_palettization_scheme},
    _nn.Conv3d: {"n_bits": 2, "cluster_dim": 1, **_default_palettization_scheme},
    _nn.LayerNorm: {"n_bits": 2, "cluster_dim": 1, **_default_palettization_scheme},
    _nn.MultiheadAttention: {"n_bits": 2, "cluster_dim": 1, **_default_palettization_scheme},
    _nn.Embedding: {"n_bits": 2, "cluster_dim": 1, **_default_palettization_scheme},
}


# Pytorch modules from torch.ao.quantization.quantization_mappings.DEFAULT_QAT_MODULE_MAPPINGS that are supported
# for palettization
SUPPORTED_PYTORCH_QAT_MODULES = (_nn.Linear, _nn.Conv2d, _nn.Conv3d)



[docs]
@_define
class ModuleDKMPalettizerConfig(_ModuleOptimizationConfig):
    r"""
    Configuration class for specifying global and module-level options for palettization
    algorithm implemented in :py:class:`DKMPalettizer`.

    The parameters specified in this config control the DKM algorithm, described in
    `DKM: Differentiable K-Means Clustering Layer for Neural Network Compression
    <https://arxiv.org/abs/2108.12659>`_.

    For most use cases, the only parameters you need to specify are ``n_bits``,  ``weight_threshold``,
    and ``milestone``.

    .. note::
        Most of the parameters in this class are meant for advanced use cases and for further fine-tuning the
        DKM algorithm. The default values usually work for a majority of tasks.

    .. note::
        Change the following parameters only when you use activation quantization in conjunction with
        DKM weight palettization: ``quant_min``, ``quant_max``, ``dtype``, and ``quantize_activations``.

    Args:
        n_bits (:obj:`int`): Number of clusters. The number of clusters used is :math:`2^{n\_bits}`.
            Defaults to ``4`` for linear layers and ``2`` for all other layers.
        weight_threshold (:obj:`int`): A module is only palettized if the number of elements in
            its weight matrix exceeds ``weight_threshold``. If there are multiple weights in a
            module (like :py:class:`torch.nn.MultiheadAttention`), all of them must have
            more elements than the ``weight_threshold`` for the module to be palettized.
            Defaults to ``2048``.
        granularity (:py:class:`PalettizationGranularity`) – Granularity for palettization.
            One of ``per_tensor`` or ``per_grouped_channel``. Defaults to ``per_tensor``.
        group_size (:obj:`int`): Specify the number of channels in a group.
            Only effective when granularity is ``per_grouped_channel``.
        enable_per_channel_scale (:obj:`bool`): When set to ``True``, per channel scaling is used along the channel
            dimension.
        milestone (:obj:`int`): Step or epoch at which palettization begins. Defaults to ``0``.
        cluster_dim (:obj:`int`, ``optional``): The dimension of each cluster. Defaults to ``1``.
        quant_min: (:obj:`int`, ``optional``): The minimum value for each element in the weight clusters if they are
            quantized. Defaults to ``-128``.
        quant_max: (:obj:`int`, ``optional``): The maximum value for each element in the weight clusters if they are
            quantized. Defaults to ``127``
        dtype (:py:class:`torch.dtype`, ``optional``): The ``dtype`` to use for quantizing the activations. Only applies
            when ``quantize_activations`` is ``True``. Defaults to :py:class:`torch.qint8`.
        cluster_dtype (:obj:`str`, ``optional``): ``dtype`` to use for quantizing the clusters. Allowed options are
            ``'i8'``, ``'u8'``, ``'f16'``, ``'bf16'``, ``'f32'``.  Defaults to ``'f32'``, i.e.,
            by default, the clusters aren't quantized.
        quantize_activations (:obj:`bool`, ``optional``): When ``True``, the activation are quantized.
            Defaults to ``False``.
        cluster_permute (:obj:`tuple`, ``optional``): Permutation order to apply to weight partitions.
            Defaults to ``None``.
        palett_max_mem (:obj:`float`, ``optional``): Proportion of available GPU memory that should be used for
            palettization. Defaults to ``1.0``.
        kmeans_max_iter (:obj:`int`, ``optional``): Maximum number of differentiable ``k-means`` iterations.
            Defaults to ``3``.
        prune_threshold (:obj:`float`, ``optional``): Hard-shrinks weights between [``-prune_threshold``,
            ``prune_threshold``] to zero. Useful for joint pruning and palettization. Defaults to ``1e-7``.
        kmeans_init (:obj:`str`, ``optional``): ``k-means`` algorithm to use. Allowed options are
            ``opt1d``, ``cpu.kmeans++`` and ``kmeans++``. Defaults to ``auto``.
        kmeans_opt1d_threshold (:obj:`int`, ``optional``): Channel threshold to decide if ``opt1d kmeans``
            should be used. Defaults to ``1024``.
        enforce_zero (:obj:`bool`, ``optional``): If ``True``, enforces the LUT centroid which is closest to
            the origin to be fixed to zero. Defaults to ``False``.
        palett_mode (:obj:`str`, ``optional``): Criteria to calculate attention during ``k-means``. Allowed options are
            ``gsm``, ``dkm`` and ``hard``. Defaults to ``dkm``.
        palett_tau (:obj:`float`, ``optional``): Temperature factor for softmax used in DKM algorithm.
            Defaults to ``0.0001``.
        palett_epsilon (:obj:`float`, ``optional``): Distance threshold for clusters between ``k-means`` iterations.
            Defaults to ``0.0001``.
        palett_lambda (:obj:`float`, ``optional``): Reduces effects of outliers during centroid calculation.
            Defaults to ``0.0``.
        add_extra_centroid (:obj:`bool`, ``optional``): If ``True``, adds an extra centroid to the LUT.
            Defaults to ``False``.
        palett_cluster_tol (:obj:`float`, ``optional``): Tolerance for non-unique centroids in the LUT.
            The higher the number, the more tolerance for non-unique centroids. Defaults to ``0.0``.
        palett_min_tsize (:obj:`int`, ``optional``): Weight threshold beyond which to use custom packing and unpacking
            hook for autograd. Defaults to ``64*1024``.
        palett_unique (:obj:`bool`, ``optional``): If ``True``, reduces the attention map by leveraging the fact that
            FP16 only has ``2^16`` unique values. Useful for Large Models like LLMs where attention maps can be huge.
            Defaults to ``False``. More details can be found `eDKM: An Efficient and Accurate Train-time Weight
            Clustering for Large Language Models <https://arxiv.org/pdf/2309.00964.pdf>`_ .
        palett_shard (:obj:`bool`, ``optional``): If ``True``, the index list is sharded across GPUs.
            Defaults to ``False``. More details can be found `eDKM: An Efficient and Accurate Train-time Weight
            Clustering for Large Language Models <https://arxiv.org/pdf/2309.00964.pdf>`_ .
        palett_batch_mode (:obj:`bool`, ``optional``): If ``True``, performs batch DKM across different partitions
            created for different blocks. Defaults to ``False``. More details can be found `eDKM: An Efficient and Accurate Train-time Weight
            Clustering for Large Language Models <https://arxiv.org/pdf/2309.00964.pdf>`_ .
        palett_dist (:obj:`bool`, ``optional``): If ``True``, performs distributed distance calculation in batch_mode if
            distributed torch is available. Defaults to ``False``.
        per_channel_scaling_factor_scheme (:obj:`str`, ``optional``): Criteria to calculate the
            ``per_channel_scaling_factor``. Allowed options are ``min_max`` and ``abs``. Defaults to ``min_max``.
        percentage_palett_enable (:obj:`float`, ``optional``): Percentage partitions to enable for DKM.
                    Defaults to ``1.0``.
        kmeans_batch_threshold (:obj:`int`, ``optional``): Threshold to decide at what num_partitions to go through with
            sharded centroids list. num_partitions is calculated by dividing the channel size with the group_size
            provided. If the kmeans_batch_threshold, the algorithm resorts to performing distirbuted kmeans for lower
            partition numbers, given that num_partition number of GPUs are available. Defaults to ``4``.
        kmeans_n_init (:obj:`int`, ``optional``): Number of time the k-means algorithm will be run with different
            centroid seeds. The final results will be the best output of kmeans_n_init consecutive runs in terms of inertia.
        zero_threshold (:obj:`int`, ``optional``): Zero threshold to be used to decide min value of clamp for softmax
            . Defaults to ``1e-7``.
        kmeans_error_bnd (:obj:`float`, ``optional``): Distance threshold to decide at what distance between parameters
            and clusters to stop the kmeans operation. Defaults to ``0.0``.

    This class supports few different configurations to structure the palettization:

    1. **Per-tensor palettization**:  This is the default configuration where the whole tensor shares a single look-up
    table. The ``granularity`` is set to ``per_tensor`` and ``group_size`` is ``None``.

    2. **Per-grouped-channel palettization**: In this configuration, ``group_size`` number of channels along
    ``channel_axis`` share the same look-up table. For example, for a weight matrix of shape ``(16, 25)``, if we provide
     ``group_size = 8``, the shape of the look-up table would be ``(2, 2^n_bits)``.

    NOTE: Currently grouping is only supported along output channel axis.
    """
    n_bits: _Optional[int] = _field(
        default=None, validator=_validators.optional(_validators.instance_of(int))
    )
    weight_threshold: int = _field(
        default=DEFAULT_PALETTIZATION_OPTIONS["weight_threshold"],
        validator=_validators.instance_of(int),
    )
    granularity: PalettizationGranularity = _field(
        default=DEFAULT_PALETTIZATION_OPTIONS["granularity"],
        converter=PalettizationGranularity,
        validator=_validators.in_(PalettizationGranularity),
    )
    group_size: _Optional[int] = _field(
        default=DEFAULT_PALETTIZATION_OPTIONS["group_size"],
        validator=_validators.optional(_validators.instance_of(int)),
    )
    enable_per_channel_scale: bool = _field(
        default=DEFAULT_PALETTIZATION_OPTIONS["enable_per_channel_scale"],
        validator=_validators.instance_of(bool),
    )
    milestone: int = _field(
        default=DEFAULT_PALETTIZATION_OPTIONS["milestone"],
        validator=_validators.instance_of(int),
    )
    cluster_dim: _Optional[int] = _field(
        default=None, validator=_validators.optional(_validators.instance_of(int))
    )
    quant_min: int = _field(
        default=DEFAULT_PALETTIZATION_OPTIONS["quant_min"],
        validator=_validators.instance_of(int),
    )
    quant_max: int = _field(
        default=DEFAULT_PALETTIZATION_OPTIONS["quant_max"],
        validator=_validators.instance_of(int),
    )
    dtype: _torch.dtype = _field(
        default=DEFAULT_PALETTIZATION_OPTIONS["dtype"],
        converter=_maybe_convert_str_to_dtype,
        validator=[
            _validators.instance_of(_torch.dtype),
            _validators.in_([_torch.qint8, _torch.quint8, _torch.float32]),
        ],
    )
    cluster_dtype: str = _field(
        default=DEFAULT_PALETTIZATION_OPTIONS["cluster_dtype"],
        validator=_validators.instance_of(str),
    )
    quantize_activations: bool = _field(
        default=DEFAULT_PALETTIZATION_OPTIONS["quantize_activations"],
        validator=_validators.instance_of(bool),
    )
    cluster_permute: _Optional[tuple] = _field(
        default=DEFAULT_PALETTIZATION_ADVANCED_OPTIONS["cluster_permute"],
        validator=_validators.optional(_validators.instance_of(tuple)),
    )
    palett_max_mem: float = _field(
        default=DEFAULT_PALETTIZATION_ADVANCED_OPTIONS["palett_max_mem"],
        validator=_validators.instance_of(float),
    )
    kmeans_max_iter: int = _field(
        default=DEFAULT_PALETTIZATION_ADVANCED_OPTIONS["kmeans_max_iter"],
        validator=_validators.instance_of(int),
    )
    prune_threshold: float = _field(
        default=DEFAULT_PALETTIZATION_ADVANCED_OPTIONS["prune_threshold"],
        validator=_validators.instance_of(float),
    )
    kmeans_init: str = _field(
        default=DEFAULT_PALETTIZATION_ADVANCED_OPTIONS["kmeans_init"],
        validator=_validators.instance_of(str),
    )
    kmeans_opt1d_threshold: int = _field(
        default=DEFAULT_PALETTIZATION_ADVANCED_OPTIONS["kmeans_opt1d_threshold"],
        validator=_validators.instance_of(int),
    )
    enforce_zero: bool = _field(
        default=DEFAULT_PALETTIZATION_ADVANCED_OPTIONS["enforce_zero"],
        validator=_validators.instance_of(bool),
    )
    palett_mode: str = _field(
        default=DEFAULT_PALETTIZATION_ADVANCED_OPTIONS["palett_mode"],
        validator=_validators.instance_of(str),
    )
    palett_tau: float = _field(
        default=DEFAULT_PALETTIZATION_ADVANCED_OPTIONS["palett_tau"],
        validator=_validators.instance_of(float),
    )
    palett_epsilon: float = _field(
        default=DEFAULT_PALETTIZATION_ADVANCED_OPTIONS["palett_epsilon"],
        validator=_validators.instance_of(float),
    )
    palett_lambda: float = _field(
        default=DEFAULT_PALETTIZATION_ADVANCED_OPTIONS["palett_lambda"],
        validator=_validators.instance_of(float),
    )
    add_extra_centroid: bool = _field(
        default=DEFAULT_PALETTIZATION_ADVANCED_OPTIONS["add_extra_centroid"],
        validator=_validators.instance_of(bool),
    )
    palett_cluster_tol: float = _field(
        default=DEFAULT_PALETTIZATION_ADVANCED_OPTIONS["palett_cluster_tol"],
        validator=_validators.instance_of(float),
    )
    palett_min_tsize: int = _field(
        default=DEFAULT_PALETTIZATION_ADVANCED_OPTIONS["palett_min_tsize"],
        validator=_validators.instance_of(int),
    )
    palett_unique: bool = _field(
        default=DEFAULT_PALETTIZATION_ADVANCED_OPTIONS["palett_unique"],
        validator=_validators.instance_of(bool),
    )
    palett_shard: bool = _field(
        default=DEFAULT_PALETTIZATION_ADVANCED_OPTIONS["palett_shard"],
        validator=_validators.instance_of(bool),
    )
    palett_batch_mode: bool = _field(
        default=DEFAULT_PALETTIZATION_ADVANCED_OPTIONS["palett_batch_mode"],
        validator=_validators.instance_of(bool),
    )
    palett_dist: bool = _field(
        default=DEFAULT_PALETTIZATION_ADVANCED_OPTIONS["palett_dist"],
        validator=_validators.instance_of(bool),
    )
    per_channel_scaling_factor_scheme: str = _field(
        default=DEFAULT_PALETTIZATION_ADVANCED_OPTIONS["per_channel_scaling_factor_scheme"],
        validator=_validators.and_(
            _validators.instance_of(str), _validators.in_(["min_max", "abs"])
        ),
    )
    percentage_palett_enable: float = _field(
        default=DEFAULT_PALETTIZATION_ADVANCED_OPTIONS["percentage_palett_enable"],
        validator=_validators.instance_of(float),
    )
    kmeans_batch_threshold: int = _field(
        default=DEFAULT_PALETTIZATION_ADVANCED_OPTIONS["kmeans_batch_threshold"],
        validator=_validators.instance_of(int),
    )
    kmeans_n_init: int = _field(
        default=DEFAULT_PALETTIZATION_ADVANCED_OPTIONS["kmeans_n_init"],
        validator=_validators.instance_of(int),
    )
    zero_threshold: float = _field(
        default=DEFAULT_PALETTIZATION_ADVANCED_OPTIONS["zero_threshold"],
        validator=_validators.instance_of(float),
    )
    kmeans_error_bnd: float = _field(
        default=DEFAULT_PALETTIZATION_ADVANCED_OPTIONS["kmeans_error_bnd"],
        validator=_validators.instance_of(float),
    )
    partition_size: int = _deprecated_field(
        message=(
            "partition_size is being deprecated and will be removed in "
            "future versions. Please use group_size parameter instead."
        )
    )

    @group_size.validator
    def per_grouped_channel_granularity(self, attribute, value):
        if self.granularity == PalettizationGranularity.per_grouped_channel:
            assert (
                value is not None
            ), "group_size has to be specified along with per_grouped_channel granularity."
            assert value > 0, "group_size should be greater than zero"
        else:
            assert value is None, "group_size can't be specified along with per_tensor granularity."



_default_module_type_configs = _OrderedDict(
    {
        key: ModuleDKMPalettizerConfig.from_dict(val)
        for key, val in DEFAULT_PALETTIZATION_SCHEME.items()
    }
)


_GlobalConfigType = _NewType(
    "GlobalConfigType",
    _Union[
        _Optional[ModuleDKMPalettizerConfig],
        _List[_Optional[ModuleDKMPalettizerConfig]],
    ],
)
_ModuleTypeConfigType = _NewType(
    "ModuleTypeConfigType", _Dict[_Union[_Callable, str], _GlobalConfigType]
)
_ModuleNameConfigType = _NewType(
    "ModuleNameConfigType", _Dict[str, _Optional[ModuleDKMPalettizerConfig]]
)


def _validate_dkm_config_type(instance, attribute, value):
    if value is not None:
        if isinstance(value, list):
            return _validators.deep_iterable(
                member_validator=_validators.optional(
                    _validators.instance_of(ModuleDKMPalettizerConfig)
                ),
                iterable_validator=_validators.instance_of(list),
            )(instance, attribute, value)
        else:
            return _validators.optional(_validators.instance_of(ModuleDKMPalettizerConfig))(
                instance, attribute, value
            )



[docs]
@_define
class DKMPalettizerConfig(_OptimizationConfig):
    """
    Configuration for specifying how different submodules of a model are palettized by
    :py:class:`DKMPalettizer`.

    The ``module_type_configs`` parameter can accept a list of :py:class:`ModuleDKMPalettizerConfig`
    as values for a given module type. The list can specify
    different parameters for different ``weight_threshold`` values. This is useful if
    you want to apply different configs to layers of the same type with weights of different sizes.

    For example, to use ``4`` -bit palettization for weights with more than ``1000`` elements and
    ``2`` -bit palettization for weights with more than ``300`` but less than ``1000`` elements,
    create a config as follows:

    .. code-block:: python

        custom_config = {
            nn.Conv2d: [
                {"n_bits": 4, "cluster_dim": 4, "weight_threshold": 1000},
                {"n_bits": 2, "cluster_dim": 2, "weight_threshold": 300},
            ]
        }
        config = DKMPalettizerConfig.from_dict({"module_type_configs": custom_config})

    Args:
        global_config (:py:class:`ModuleDKMPalettizerConfig`): Config to be applied globally
            to all supported modules. Missing values are chosen from the default config.
        module_type_configs (:obj:`dict` of :obj:`str` to :py:class:`ModuleDKMPalettizerConfig`):
            Module type level configs applied to a specific module class, such as :py:class:`torch.nn.Linear`.
            The keys can be either strings or module classes. When ``module_type_config`` is set to ``None``
            for a module type, it is not palettized.
        module_name_configs (:obj:`dict` of :obj:`str` to :py:class:`ModuleDKMPalettizerConfig`):
            Module level configs applied to specific modules.
            The name of the module must be a fully qualified name that can be used to fetch it
            from the top level module using the ``module.get_submodule(target)`` method. When
            ``module_name_config`` is set to ``None`` for a module, it is not palettized.
    """

    global_config: _GlobalConfigType = _field(default=None, validator=_validate_dkm_config_type)
    module_type_configs: _ModuleTypeConfigType = _field(
        factory=_OrderedDict,
        validator=_validators.deep_mapping(
            key_validator=_validators.and_(
                _validators.instance_of((str, _Callable)),
                _validate_module_type_keys_factory(list(DEFAULT_PALETTIZATION_SCHEME.keys())),
            ),
            value_validator=_validate_dkm_config_type,
            mapping_validator=_validators.instance_of(dict),
        ),
    )
    module_name_configs: _ModuleNameConfigType = _field(
        factory=_OrderedDict,
        validator=_validators.deep_mapping(
            key_validator=_validators.instance_of(str),
            value_validator=_validate_dkm_config_type,
            mapping_validator=_validators.instance_of(dict),
        ),
    )

    def __attrs_post_init__(self):
        if (
            self.global_config is None
            and len(self.module_type_configs) == 0
            and len(self.module_name_configs) == 0
        ):
            self.module_type_configs = _default_module_type_configs
        self._sort_configs_by_weight_threshold(self.global_config)
        for ctype, config in self.module_type_configs.items():
            self.set_module_type(ctype, self._sort_configs_by_weight_threshold(config))
        for name, config in self.module_name_configs.items():
            self.set_module_type(name, self._sort_configs_by_weight_threshold(config))


[docs]
    @classmethod
    def from_dict(cls, config_dict: _Dict[str, _Any]) -> "DKMPalettizerConfig":
        super().from_dict(config_dict)
        converter = _cattrs.Converter(forbid_extra_keys=True)
        converter.register_structure_hook(_ModuleTypeConfigType, _structure_from_dict_hook)
        converter.register_structure_hook(_ModuleNameConfigType, _structure_from_dict_hook)
        converter.register_structure_hook(_GlobalConfigType, _structure_dkm_config_hook)
        return converter.structure_attrs_fromdict(config_dict, cls)


    @staticmethod
    def _sort_configs_by_weight_threshold(config: _GlobalConfigType):
        if isinstance(config, list):
            return sorted(config, key=lambda x: x.weight_threshold)
        return config



def _structure_dkm_config_hook(
    config_dict: _Union[_List[_Dict[str, _Any]], _Dict[str, _Any]], type: _Any
):
    if isinstance(config_dict, list):
        return [ModuleDKMPalettizerConfig.from_dict(cd) for cd in config_dict]
    return ModuleDKMPalettizerConfig.from_dict(config_dict)


def _structure_from_dict_hook(module_type_dict: _Dict[_Union[_Callable, str], _Any], type: _Any):
    return_dict = _OrderedDict()
    for key, value in module_type_dict.items():
        if value is None:
            return_dict[key] = None
        else:
            return_dict[key] = _structure_dkm_config_hook(value, type)
    return return_dict