"""Pydantic models for quantisation operations. Contains data models specific to the quantisation workflow including quantisation types, configurations, and results. Uses UK English spelling conventions throughout (quantisation, not quantization). """ from __future__ import annotations from enum import StrEnum from typing import TYPE_CHECKING from pydantic import BaseModel, ConfigDict, Field, field_validator if TYPE_CHECKING: from pathlib import Path class QuantisationType(StrEnum): """Available quantisation types for Bartowski-method GGUF model conversion. Defines the specific quantisation strategies supported by this tool, ranging from Q4_K_M baseline to Q4_K_XXL maximum precision variants. Each type represents different trade-offs between model size and quality preservation for embeddings, attention layers, and feed-forward networks. """ Q4_K_M = "Q4_K_M" Q4_K_L = "Q4_K_L" Q4_K_XL = "Q4_K_XL" Q4_K_XXL = "Q4_K_XXL" class URLType(StrEnum): """Supported URL formats for model source specification. Categorises input URL formats to enable appropriate handling strategies. HuggingFace URLs require full model download and conversion, whilst Ollama GGUF URLs allow direct GGUF file downloads with pattern matching for efficient processing of pre-quantised models. """ HUGGINGFACE = "huggingface" OLLAMA_GGUF = "ollama_gguf" class QuantisationConfig(BaseModel): """Configuration for a specific quantisation method with tensor-level precision control. Defines quantisation parameters including tensor type mappings and fallback methods for handling different model architectures. Enables fine-grained control over which layers receive higher precision treatment whilst maintaining compatibility across diverse model structures. """ model_config = ConfigDict(use_enum_values=True) name: str description: str tensor_types: dict[str, str] = Field(default_factory=dict) fallback_methods: list[dict[str, str]] = Field(default_factory=list) class ModelSource(BaseModel): """Represents a model source with parsed information from URL analysis. Contains comprehensive metadata extracted from model URLs including source repository details, author information, and GGUF file patterns. Enables differentiation between regular HuggingFace repositories requiring conversion and GGUF repositories allowing direct file downloads. """ model_config = ConfigDict(use_enum_values=True, protected_namespaces=()) url: str url_type: URLType source_model: str original_author: str model_name: str gguf_file_pattern: str | None = None is_gguf_repo: bool = False @field_validator("url") @classmethod def validate_url(cls, v: str) -> str: """Validate that URL is not empty. Ensures the provided URL string is not empty or None, as this is required for model source identification. Returns: The validated URL string. Raises: ValueError: If URL is empty or None. """ if not v: msg = "URL cannot be empty" raise ValueError(msg) return v class QuantisationResult(BaseModel): """Result of a quantisation operation with comprehensive status tracking. Captures the outcome of individual quantisation attempts including success status, file paths, sizes, and error details. Supports workflow status tracking from planning through processing to completion, enabling real-time progress reporting and parallel upload coordination. """ model_config = ConfigDict(use_enum_values=True, arbitrary_types_allowed=True) quantisation_type: QuantisationType success: bool file_path: Path | None = None file_size: str | None = None method_used: str | None = None error_message: str | None = None status: str = "pending" # planned, processing, uploading, completed, failed class LlamaCppEnvironment(BaseModel): """Represents llama.cpp environment setup with binary and script locations. Encapsulates the runtime environment for llama.cpp tools including paths to quantisation binaries, CLI tools, and conversion scripts. Handles both local binary installations and repository-based setups to provide flexible deployment options across different system configurations. """ model_config = ConfigDict(arbitrary_types_allowed=True) quantise_binary: Path # UK spelling cli_binary: Path convert_script: str use_repo: bool = False class QuantisationContext(BaseModel): """Context object containing all parameters needed for quantisation execution. Encapsulates quantisation parameters to reduce method argument counts and improve code maintainability following parameter object pattern. """ model_config = ConfigDict(frozen=True) f16_model_path: Path model_source: ModelSource config: QuantisationConfig llama_env: LlamaCppEnvironment models_dir: Path imatrix_path: Path | None = None base_quant: str = "Q4_K_M" def get_output_path(self) -> Path: """Generate output path for quantised model. Returns: Path to the output GGUF file. """ output_filename = ( f"{self.model_source.original_author}-" f"{self.model_source.model_name}-" f"{self.config.name}.gguf" ) return self.models_dir / self.model_source.model_name / output_filename