llm-gguf-tools/helpers/models/quantisation.py

"""Pydantic models for quantisation operations.

Contains data models specific to the quantisation workflow including
quantisation types, configurations, and results. Uses UK English spelling
conventions throughout (quantisation, not quantization).
"""

from __future__ import annotations

from enum import StrEnum
from typing import TYPE_CHECKING

from pydantic import BaseModel, ConfigDict, Field, field_validator

if TYPE_CHECKING:
    from pathlib import Path


class QuantisationType(StrEnum):
    """Available quantisation types for Bartowski-method GGUF model conversion.

    Defines the specific quantisation strategies supported by this tool, ranging
    from Q4_K_M baseline to Q4_K_XXL maximum precision variants. Each type
    represents different trade-offs between model size and quality preservation
    for embeddings, attention layers, and feed-forward networks.
    """

    Q4_K_M = "Q4_K_M"
    Q4_K_L = "Q4_K_L"
    Q4_K_XL = "Q4_K_XL"
    Q4_K_XXL = "Q4_K_XXL"


class URLType(StrEnum):
    """Supported URL formats for model source specification.

    Categorises input URL formats to enable appropriate handling strategies.
    HuggingFace URLs require full model download and conversion, whilst Ollama
    GGUF URLs allow direct GGUF file downloads with pattern matching for
    efficient processing of pre-quantised models.
    """

    HUGGINGFACE = "huggingface"
    OLLAMA_GGUF = "ollama_gguf"


class QuantisationConfig(BaseModel):
    """Configuration for a specific quantisation method with tensor-level precision control.

    Defines quantisation parameters including tensor type mappings and fallback
    methods for handling different model architectures. Enables fine-grained
    control over which layers receive higher precision treatment whilst
    maintaining compatibility across diverse model structures.
    """

    model_config = ConfigDict(use_enum_values=True)

    name: str
    description: str
    tensor_types: dict[str, str] = Field(default_factory=dict)
    fallback_methods: list[dict[str, str]] = Field(default_factory=list)


class ModelSource(BaseModel):
    """Represents a model source with parsed information from URL analysis.

    Contains comprehensive metadata extracted from model URLs including source
    repository details, author information, and GGUF file patterns. Enables
    differentiation between regular HuggingFace repositories requiring conversion
    and GGUF repositories allowing direct file downloads.
    """

    model_config = ConfigDict(use_enum_values=True, protected_namespaces=())

    url: str
    url_type: URLType
    source_model: str
    original_author: str
    model_name: str
    gguf_file_pattern: str | None = None
    is_gguf_repo: bool = False

    @field_validator("url")
    @classmethod
    def validate_url(cls, v: str) -> str:
        """Validate that URL is not empty.

        Ensures the provided URL string is not empty or None,
        as this is required for model source identification.

        Returns:
            The validated URL string.

        Raises:
            ValueError: If URL is empty or None.
        """
        if not v:
            msg = "URL cannot be empty"
            raise ValueError(msg)
        return v


class QuantisationResult(BaseModel):
    """Result of a quantisation operation with comprehensive status tracking.

    Captures the outcome of individual quantisation attempts including success
    status, file paths, sizes, and error details. Supports workflow status
    tracking from planning through processing to completion, enabling real-time
    progress reporting and parallel upload coordination.
    """

    model_config = ConfigDict(use_enum_values=True, arbitrary_types_allowed=True)

    quantisation_type: QuantisationType
    success: bool
    file_path: Path | None = None
    file_size: str | None = None
    method_used: str | None = None
    error_message: str | None = None
    status: str = "pending"  # planned, processing, uploading, completed, failed


class LlamaCppEnvironment(BaseModel):
    """Represents llama.cpp environment setup with binary and script locations.

    Encapsulates the runtime environment for llama.cpp tools including paths
    to quantisation binaries, CLI tools, and conversion scripts. Handles both
    local binary installations and repository-based setups to provide flexible
    deployment options across different system configurations.
    """

    model_config = ConfigDict(arbitrary_types_allowed=True)

    quantise_binary: Path  # UK spelling
    cli_binary: Path
    convert_script: str
    use_repo: bool = False


class QuantisationContext(BaseModel):
    """Context object containing all parameters needed for quantisation execution.

    Encapsulates quantisation parameters to reduce method argument counts
    and improve code maintainability following parameter object pattern.
    """

    model_config = ConfigDict(frozen=True)

    f16_model_path: Path
    model_source: ModelSource
    config: QuantisationConfig
    llama_env: LlamaCppEnvironment
    models_dir: Path
    imatrix_path: Path | None = None
    base_quant: str = "Q4_K_M"

    def get_output_path(self) -> Path:
        """Generate output path for quantised model.

        Returns:
            Path to the output GGUF file.
        """
        output_filename = (
            f"{self.model_source.original_author}-"
            f"{self.model_source.model_name}-"
            f"{self.config.name}.gguf"
        )
        return self.models_dir / self.model_source.model_name / output_filename