llm-gguf-tools/helpers/models/quantisation.py
2025-08-07 18:29:12 +01:00

168 lines
5.4 KiB
Python

"""Pydantic models for quantisation operations.
Contains data models specific to the quantisation workflow including
quantisation types, configurations, and results. Uses UK English spelling
conventions throughout (quantisation, not quantization).
"""
from __future__ import annotations
from enum import StrEnum
from typing import TYPE_CHECKING
from pydantic import BaseModel, ConfigDict, Field, field_validator
if TYPE_CHECKING:
from pathlib import Path
class QuantisationType(StrEnum):
"""Available quantisation types for Bartowski-method GGUF model conversion.
Defines the specific quantisation strategies supported by this tool, ranging
from Q4_K_M baseline to Q4_K_XXL maximum precision variants. Each type
represents different trade-offs between model size and quality preservation
for embeddings, attention layers, and feed-forward networks.
"""
Q4_K_M = "Q4_K_M"
Q4_K_L = "Q4_K_L"
Q4_K_XL = "Q4_K_XL"
Q4_K_XXL = "Q4_K_XXL"
class URLType(StrEnum):
"""Supported URL formats for model source specification.
Categorises input URL formats to enable appropriate handling strategies.
HuggingFace URLs require full model download and conversion, whilst Ollama
GGUF URLs allow direct GGUF file downloads with pattern matching for
efficient processing of pre-quantised models.
"""
HUGGINGFACE = "huggingface"
OLLAMA_GGUF = "ollama_gguf"
class QuantisationConfig(BaseModel):
"""Configuration for a specific quantisation method with tensor-level precision control.
Defines quantisation parameters including tensor type mappings and fallback
methods for handling different model architectures. Enables fine-grained
control over which layers receive higher precision treatment whilst
maintaining compatibility across diverse model structures.
"""
model_config = ConfigDict(use_enum_values=True)
name: str
description: str
tensor_types: dict[str, str] = Field(default_factory=dict)
fallback_methods: list[dict[str, str]] = Field(default_factory=list)
class ModelSource(BaseModel):
"""Represents a model source with parsed information from URL analysis.
Contains comprehensive metadata extracted from model URLs including source
repository details, author information, and GGUF file patterns. Enables
differentiation between regular HuggingFace repositories requiring conversion
and GGUF repositories allowing direct file downloads.
"""
model_config = ConfigDict(use_enum_values=True, protected_namespaces=())
url: str
url_type: URLType
source_model: str
original_author: str
model_name: str
gguf_file_pattern: str | None = None
is_gguf_repo: bool = False
@field_validator("url")
@classmethod
def validate_url(cls, v: str) -> str:
"""Validate that URL is not empty.
Ensures the provided URL string is not empty or None,
as this is required for model source identification.
Returns:
The validated URL string.
Raises:
ValueError: If URL is empty or None.
"""
if not v:
msg = "URL cannot be empty"
raise ValueError(msg)
return v
class QuantisationResult(BaseModel):
"""Result of a quantisation operation with comprehensive status tracking.
Captures the outcome of individual quantisation attempts including success
status, file paths, sizes, and error details. Supports workflow status
tracking from planning through processing to completion, enabling real-time
progress reporting and parallel upload coordination.
"""
model_config = ConfigDict(use_enum_values=True, arbitrary_types_allowed=True)
quantisation_type: QuantisationType
success: bool
file_path: Path | None = None
file_size: str | None = None
method_used: str | None = None
error_message: str | None = None
status: str = "pending" # planned, processing, uploading, completed, failed
class LlamaCppEnvironment(BaseModel):
"""Represents llama.cpp environment setup with binary and script locations.
Encapsulates the runtime environment for llama.cpp tools including paths
to quantisation binaries, CLI tools, and conversion scripts. Handles both
local binary installations and repository-based setups to provide flexible
deployment options across different system configurations.
"""
model_config = ConfigDict(arbitrary_types_allowed=True)
quantise_binary: Path # UK spelling
cli_binary: Path
convert_script: str
use_repo: bool = False
class QuantisationContext(BaseModel):
"""Context object containing all parameters needed for quantisation execution.
Encapsulates quantisation parameters to reduce method argument counts
and improve code maintainability following parameter object pattern.
"""
model_config = ConfigDict(frozen=True)
f16_model_path: Path
model_source: ModelSource
config: QuantisationConfig
llama_env: LlamaCppEnvironment
models_dir: Path
imatrix_path: Path | None = None
base_quant: str = "Q4_K_M"
def get_output_path(self) -> Path:
"""Generate output path for quantised model.
Returns:
Path to the output GGUF file.
"""
output_filename = (
f"{self.model_source.original_author}-"
f"{self.model_source.model_name}-"
f"{self.config.name}.gguf"
)
return self.models_dir / self.model_source.model_name / output_filename