Source code for apple_fm_sdk.generation_options

# For licensing see accompanying LICENSE file.
# Copyright (C) 2026 Apple Inc. All Rights Reserved.

from typing import Optional
from dataclasses import dataclass
from enum import Enum



[docs]
class SamplingModeType(str, Enum):
    """Enumeration of available sampling mode types.

    :cvar GREEDY: Always select the most likely token
    :cvar RANDOM: Randomly select from high-probability tokens
    """

    GREEDY = "greedy"
    RANDOM = "random"




[docs]
@dataclass
class SamplingMode:
    """A type that defines how values are sampled from a probability distribution.

    This class represents different sampling strategies that control how the model
    picks tokens when generating a response. The model builds its response in a loop,
    and at each iteration it produces a probability distribution for all tokens in
    its vocabulary. The sampling mode determines how to select the next token from
    this distribution.

    :ivar mode_type: The type of sampling mode
    :vartype mode_type: SamplingModeType
    :ivar top: For random sampling with fixed top-k, the number of high-probability
        tokens to consider
    :vartype top: Optional[int]
    :ivar probability_threshold: For random sampling with variable threshold, the
        cumulative probability threshold
    :vartype probability_threshold: Optional[float]
    :ivar seed: Random seed for reproducible random sampling
    :vartype seed: Optional[int]
    """

    mode_type: SamplingModeType
    top: Optional[int] = None
    probability_threshold: Optional[float] = None
    seed: Optional[int] = None


[docs]
    @classmethod
    def greedy(cls) -> "SamplingMode":
        """Create a sampling mode that always chooses the most likely token.

        Greedy sampling provides deterministic, focused responses by always
        selecting the token with the highest probability at each step.

        :return: A SamplingMode configured for greedy sampling
        :rtype: SamplingMode

        Example::

            import apple_fm_sdk as fm

            sampling = fm.SamplingMode.greedy()
            options = fm.GenerationOptions(sampling=sampling)
        """
        return cls(mode_type=SamplingModeType.GREEDY)



[docs]
    @classmethod
    def random(
        cls,
        top: Optional[int] = None,
        probability_threshold: Optional[float] = None,
        seed: Optional[int] = None,
    ) -> "SamplingMode":
        """Create a random sampling mode with optional constraints.

        Random sampling introduces variability in responses by randomly selecting
        from high-probability tokens. You can constrain the selection using either:

        - **top**: Consider only the top-k most likely tokens (fixed number)
        - **probability_threshold**: Consider tokens until cumulative probability
          reaches the threshold (variable number)

        :param top: Number of high-probability tokens to consider. If specified,
            only the top-k most likely tokens are candidates for selection.
        :type top: Optional[int]
        :param probability_threshold: Cumulative probability threshold (0.0 to 1.0).
            If specified, tokens are considered until their cumulative probability
            reaches this threshold.
        :type probability_threshold: Optional[float]
        :param seed: Random seed for reproducible sampling. Using the same seed
            with the same inputs will produce the same outputs.
        :type seed: Optional[int]
        :return: A SamplingMode configured for random sampling
        :rtype: SamplingMode
        :raises ValueError: If both top and probability_threshold are specified,
            or if values are out of valid ranges

        Examples:
            Random sampling with top-k::

                import apple_fm_sdk as fm

                # Consider only top 50 most likely tokens
                sampling = fm.SamplingMode.random(top=50, seed=42)
                options = fm.GenerationOptions(sampling=sampling)

            Random sampling with probability threshold::

                import apple_fm_sdk as fm

                # Consider tokens until 90% cumulative probability
                sampling = fm.SamplingMode.random(
                    probability_threshold=0.9,
                    seed=42
                )
                options = fm.GenerationOptions(sampling=sampling)

            Random sampling with seed only::

                import apple_fm_sdk as fm

                # Reproducible random sampling without constraints
                sampling = fm.SamplingMode.random(seed=42)
                options = fm.GenerationOptions(sampling=sampling)

        Note:
            - Only one of ``top`` or ``probability_threshold`` can be specified
            - If neither is specified, all tokens are considered
            - The ``seed`` parameter enables reproducible generation
        """
        if top is not None and probability_threshold is not None:
            raise ValueError(
                "Cannot specify both 'top' and 'probability_threshold'. "
                "Choose one sampling constraint."
            )

        if top is not None and (not isinstance(top, int) or top <= 0):
            raise ValueError("'top' must be a positive integer")

        if probability_threshold is not None and (
            not isinstance(probability_threshold, (int, float))
            or not 0.0 <= probability_threshold <= 1.0
        ):
            raise ValueError("'probability_threshold' must be between 0.0 and 1.0")

        if seed is not None and not isinstance(seed, int):
            raise ValueError("'seed' must be an integer")

        return cls(
            mode_type=SamplingModeType.RANDOM,
            top=top,
            probability_threshold=probability_threshold,
            seed=seed,
        )





[docs]
@dataclass
class GenerationOptions:
    """Options that control how the model generates its response to a prompt.

    Generation options determine the decoding strategy the framework uses to adjust
    the way the model chooses output tokens. When you interact with the model, it
    converts your input to a token sequence and uses it to generate the response.

    **Important Considerations:**

    - Only use ``maximum_response_tokens`` when you need to protect against
      unexpectedly verbose responses. Enforcing a strict token response limit can
      lead to the model producing malformed results or grammatically incorrect
      responses.

    - All input to the model contributes tokens to the context window, including
      the Instructions, Prompt, Tool definitions, and Generable types, as well as
      the model's responses. If your session exceeds the available context size,
      it throws an ExceededContextWindowSizeError.

    :ivar sampling: A sampling strategy for how the model picks tokens when
        generating a response. Defaults to None (uses model default).
    :vartype sampling: Optional[SamplingMode]
    :ivar temperature: Temperature influences the confidence of the model's response.
        Higher values (e.g., 1.0) make output more random and creative, while lower
        values (e.g., 0.1) make it more focused and deterministic. Valid range is
        typically 0.0 to 1.0. Defaults to None (uses model default).
    :vartype temperature: Optional[float]
    :ivar maximum_response_tokens: The maximum number of tokens the model is allowed
        to produce in its response. Use this to prevent unexpectedly verbose responses,
        but be aware that strict limits may result in incomplete or malformed output.
        Defaults to None (no explicit limit).
    :vartype maximum_response_tokens: Optional[int]

    Examples:
        Default options::

            import apple_fm_sdk as fm

            options = fm.GenerationOptions()

        Custom temperature and token limit::

            import apple_fm_sdk as fm

            options = fm.GenerationOptions(
                temperature=0.7,
                maximum_response_tokens=500
            )

        Greedy sampling with temperature::

            import apple_fm_sdk as fm

            options = fm.GenerationOptions(
                sampling=fm.SamplingMode.greedy(),
                temperature=0.3
            )

        Random sampling with constraints::

            import apple_fm_sdk as fm

            options = fm.GenerationOptions(
                sampling=fm.SamplingMode.random(top=50, seed=42),
                temperature=0.8,
                maximum_response_tokens=1000
            )

    See Also:
        - :class:`SamplingMode`: For configuring sampling strategies
        - :class:`~apple_fm_sdk.session.LanguageModelSession`: For using options in sessions
    """

    sampling: Optional[SamplingMode] = None
    temperature: Optional[float] = None
    maximum_response_tokens: Optional[int] = None


[docs]
    def __post_init__(self):
        """Validate generation options after initialization.

        :raises ValueError: If any option values are invalid
        """
        if self.temperature is not None:
            if not isinstance(self.temperature, (int, float)):
                raise ValueError("'temperature' must be a number")
            if self.temperature < 0.0:
                raise ValueError("'temperature' must be non-negative")

        if self.maximum_response_tokens is not None:
            if not isinstance(self.maximum_response_tokens, int):
                raise ValueError("'maximum_response_tokens' must be an integer")
            if self.maximum_response_tokens <= 0:
                raise ValueError("'maximum_response_tokens' must be positive")

        if self.sampling is not None and not isinstance(self.sampling, SamplingMode):
            raise ValueError("'sampling' must be a SamplingMode instance")


    def to_dict(self) -> dict:
        """Convert GenerationOptions to a dictionary for JSON serialization.

        This method converts the GenerationOptions instance into a dictionary
        format suitable for passing to the C bindings layer as JSON.

        :return: Dictionary representation of the generation options
        :rtype: dict

        Example::

            import apple_fm_sdk as fm

            options = fm.GenerationOptions(
                temperature=0.7,
                sampling=fm.SamplingMode.random(top=50),
                maximum_response_tokens=500
            )
            options_dict = options.to_dict()
            # {'temperature': 0.7, 'sampling': {'mode': 'random', 'top_k': 50}, 'maximum_response_tokens': 500}
        """
        result = {}

        if self.sampling is not None:
            sampling_dict = {"mode": self.sampling.mode_type.value}
            if self.sampling.mode_type == SamplingModeType.RANDOM:
                if self.sampling.top is not None:
                    sampling_dict["top_k"] = str(self.sampling.top)
                if self.sampling.probability_threshold is not None:
                    sampling_dict["top_p"] = str(self.sampling.probability_threshold)
                if self.sampling.seed is not None:
                    sampling_dict["seed"] = str(self.sampling.seed)
            result["sampling"] = sampling_dict

        if self.temperature is not None:
            result["temperature"] = self.temperature

        if self.maximum_response_tokens is not None:
            result["maximum_response_tokens"] = self.maximum_response_tokens

        return result