Initial commit

2025-08-07 18:29:12 +01:00 · 2025-08-07 18:29:12 +01:00 · ef7df1a8c3
commit ef7df1a8c3
28 changed files with 6829 additions and 0 deletions
--- a/helpers/models/quantisation.py
+++ b/helpers/models/quantisation.py
@ -0,0 +1,168 @@
+"""Pydantic models for quantisation operations.
+
+Contains data models specific to the quantisation workflow including
+quantisation types, configurations, and results. Uses UK English spelling
+conventions throughout (quantisation, not quantization).
+"""
+
+from __future__ import annotations
+
+from enum import StrEnum
+from typing import TYPE_CHECKING
+
+from pydantic import BaseModel, ConfigDict, Field, field_validator
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+
+class QuantisationType(StrEnum):
+    """Available quantisation types for Bartowski-method GGUF model conversion.
+
+    Defines the specific quantisation strategies supported by this tool, ranging
+    from Q4_K_M baseline to Q4_K_XXL maximum precision variants. Each type
+    represents different trade-offs between model size and quality preservation
+    for embeddings, attention layers, and feed-forward networks.
+    """
+
+    Q4_K_M = "Q4_K_M"
+    Q4_K_L = "Q4_K_L"
+    Q4_K_XL = "Q4_K_XL"
+    Q4_K_XXL = "Q4_K_XXL"
+
+
+class URLType(StrEnum):
+    """Supported URL formats for model source specification.
+
+    Categorises input URL formats to enable appropriate handling strategies.
+    HuggingFace URLs require full model download and conversion, whilst Ollama
+    GGUF URLs allow direct GGUF file downloads with pattern matching for
+    efficient processing of pre-quantised models.
+    """
+
+    HUGGINGFACE = "huggingface"
+    OLLAMA_GGUF = "ollama_gguf"
+
+
+class QuantisationConfig(BaseModel):
+    """Configuration for a specific quantisation method with tensor-level precision control.
+
+    Defines quantisation parameters including tensor type mappings and fallback
+    methods for handling different model architectures. Enables fine-grained
+    control over which layers receive higher precision treatment whilst
+    maintaining compatibility across diverse model structures.
+    """
+
+    model_config = ConfigDict(use_enum_values=True)
+
+    name: str
+    description: str
+    tensor_types: dict[str, str] = Field(default_factory=dict)
+    fallback_methods: list[dict[str, str]] = Field(default_factory=list)
+
+
+class ModelSource(BaseModel):
+    """Represents a model source with parsed information from URL analysis.
+
+    Contains comprehensive metadata extracted from model URLs including source
+    repository details, author information, and GGUF file patterns. Enables
+    differentiation between regular HuggingFace repositories requiring conversion
+    and GGUF repositories allowing direct file downloads.
+    """
+
+    model_config = ConfigDict(use_enum_values=True, protected_namespaces=())
+
+    url: str
+    url_type: URLType
+    source_model: str
+    original_author: str
+    model_name: str
+    gguf_file_pattern: str | None = None
+    is_gguf_repo: bool = False
+
+    @field_validator("url")
+    @classmethod
+    def validate_url(cls, v: str) -> str:
+        """Validate that URL is not empty.
+
+        Ensures the provided URL string is not empty or None,
+        as this is required for model source identification.
+
+        Returns:
+            The validated URL string.
+
+        Raises:
+            ValueError: If URL is empty or None.
+        """
+        if not v:
+            msg = "URL cannot be empty"
+            raise ValueError(msg)
+        return v
+
+
+class QuantisationResult(BaseModel):
+    """Result of a quantisation operation with comprehensive status tracking.
+
+    Captures the outcome of individual quantisation attempts including success
+    status, file paths, sizes, and error details. Supports workflow status
+    tracking from planning through processing to completion, enabling real-time
+    progress reporting and parallel upload coordination.
+    """
+
+    model_config = ConfigDict(use_enum_values=True, arbitrary_types_allowed=True)
+
+    quantisation_type: QuantisationType
+    success: bool
+    file_path: Path | None = None
+    file_size: str | None = None
+    method_used: str | None = None
+    error_message: str | None = None
+    status: str = "pending"  # planned, processing, uploading, completed, failed
+
+
+class LlamaCppEnvironment(BaseModel):
+    """Represents llama.cpp environment setup with binary and script locations.
+
+    Encapsulates the runtime environment for llama.cpp tools including paths
+    to quantisation binaries, CLI tools, and conversion scripts. Handles both
+    local binary installations and repository-based setups to provide flexible
+    deployment options across different system configurations.
+    """
+
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+
+    quantise_binary: Path  # UK spelling
+    cli_binary: Path
+    convert_script: str
+    use_repo: bool = False
+
+
+class QuantisationContext(BaseModel):
+    """Context object containing all parameters needed for quantisation execution.
+
+    Encapsulates quantisation parameters to reduce method argument counts
+    and improve code maintainability following parameter object pattern.
+    """
+
+    model_config = ConfigDict(frozen=True)
+
+    f16_model_path: Path
+    model_source: ModelSource
+    config: QuantisationConfig
+    llama_env: LlamaCppEnvironment
+    models_dir: Path
+    imatrix_path: Path | None = None
+    base_quant: str = "Q4_K_M"
+
+    def get_output_path(self) -> Path:
+        """Generate output path for quantised model.
+
+        Returns:
+            Path to the output GGUF file.
+        """
+        output_filename = (
+            f"{self.model_source.original_author}-"
+            f"{self.model_source.model_name}-"
+            f"{self.config.name}.gguf"
+        )
+        return self.models_dir / self.model_source.model_name / output_filename