168 lines
5.4 KiB
Python
168 lines
5.4 KiB
Python
"""Pydantic models for quantisation operations.
|
|
|
|
Contains data models specific to the quantisation workflow including
|
|
quantisation types, configurations, and results. Uses UK English spelling
|
|
conventions throughout (quantisation, not quantization).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from enum import StrEnum
|
|
from typing import TYPE_CHECKING
|
|
|
|
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
|
|
|
if TYPE_CHECKING:
|
|
from pathlib import Path
|
|
|
|
|
|
class QuantisationType(StrEnum):
|
|
"""Available quantisation types for Bartowski-method GGUF model conversion.
|
|
|
|
Defines the specific quantisation strategies supported by this tool, ranging
|
|
from Q4_K_M baseline to Q4_K_XXL maximum precision variants. Each type
|
|
represents different trade-offs between model size and quality preservation
|
|
for embeddings, attention layers, and feed-forward networks.
|
|
"""
|
|
|
|
Q4_K_M = "Q4_K_M"
|
|
Q4_K_L = "Q4_K_L"
|
|
Q4_K_XL = "Q4_K_XL"
|
|
Q4_K_XXL = "Q4_K_XXL"
|
|
|
|
|
|
class URLType(StrEnum):
|
|
"""Supported URL formats for model source specification.
|
|
|
|
Categorises input URL formats to enable appropriate handling strategies.
|
|
HuggingFace URLs require full model download and conversion, whilst Ollama
|
|
GGUF URLs allow direct GGUF file downloads with pattern matching for
|
|
efficient processing of pre-quantised models.
|
|
"""
|
|
|
|
HUGGINGFACE = "huggingface"
|
|
OLLAMA_GGUF = "ollama_gguf"
|
|
|
|
|
|
class QuantisationConfig(BaseModel):
|
|
"""Configuration for a specific quantisation method with tensor-level precision control.
|
|
|
|
Defines quantisation parameters including tensor type mappings and fallback
|
|
methods for handling different model architectures. Enables fine-grained
|
|
control over which layers receive higher precision treatment whilst
|
|
maintaining compatibility across diverse model structures.
|
|
"""
|
|
|
|
model_config = ConfigDict(use_enum_values=True)
|
|
|
|
name: str
|
|
description: str
|
|
tensor_types: dict[str, str] = Field(default_factory=dict)
|
|
fallback_methods: list[dict[str, str]] = Field(default_factory=list)
|
|
|
|
|
|
class ModelSource(BaseModel):
|
|
"""Represents a model source with parsed information from URL analysis.
|
|
|
|
Contains comprehensive metadata extracted from model URLs including source
|
|
repository details, author information, and GGUF file patterns. Enables
|
|
differentiation between regular HuggingFace repositories requiring conversion
|
|
and GGUF repositories allowing direct file downloads.
|
|
"""
|
|
|
|
model_config = ConfigDict(use_enum_values=True, protected_namespaces=())
|
|
|
|
url: str
|
|
url_type: URLType
|
|
source_model: str
|
|
original_author: str
|
|
model_name: str
|
|
gguf_file_pattern: str | None = None
|
|
is_gguf_repo: bool = False
|
|
|
|
@field_validator("url")
|
|
@classmethod
|
|
def validate_url(cls, v: str) -> str:
|
|
"""Validate that URL is not empty.
|
|
|
|
Ensures the provided URL string is not empty or None,
|
|
as this is required for model source identification.
|
|
|
|
Returns:
|
|
The validated URL string.
|
|
|
|
Raises:
|
|
ValueError: If URL is empty or None.
|
|
"""
|
|
if not v:
|
|
msg = "URL cannot be empty"
|
|
raise ValueError(msg)
|
|
return v
|
|
|
|
|
|
class QuantisationResult(BaseModel):
|
|
"""Result of a quantisation operation with comprehensive status tracking.
|
|
|
|
Captures the outcome of individual quantisation attempts including success
|
|
status, file paths, sizes, and error details. Supports workflow status
|
|
tracking from planning through processing to completion, enabling real-time
|
|
progress reporting and parallel upload coordination.
|
|
"""
|
|
|
|
model_config = ConfigDict(use_enum_values=True, arbitrary_types_allowed=True)
|
|
|
|
quantisation_type: QuantisationType
|
|
success: bool
|
|
file_path: Path | None = None
|
|
file_size: str | None = None
|
|
method_used: str | None = None
|
|
error_message: str | None = None
|
|
status: str = "pending" # planned, processing, uploading, completed, failed
|
|
|
|
|
|
class LlamaCppEnvironment(BaseModel):
|
|
"""Represents llama.cpp environment setup with binary and script locations.
|
|
|
|
Encapsulates the runtime environment for llama.cpp tools including paths
|
|
to quantisation binaries, CLI tools, and conversion scripts. Handles both
|
|
local binary installations and repository-based setups to provide flexible
|
|
deployment options across different system configurations.
|
|
"""
|
|
|
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
|
|
quantise_binary: Path # UK spelling
|
|
cli_binary: Path
|
|
convert_script: str
|
|
use_repo: bool = False
|
|
|
|
|
|
class QuantisationContext(BaseModel):
|
|
"""Context object containing all parameters needed for quantisation execution.
|
|
|
|
Encapsulates quantisation parameters to reduce method argument counts
|
|
and improve code maintainability following parameter object pattern.
|
|
"""
|
|
|
|
model_config = ConfigDict(frozen=True)
|
|
|
|
f16_model_path: Path
|
|
model_source: ModelSource
|
|
config: QuantisationConfig
|
|
llama_env: LlamaCppEnvironment
|
|
models_dir: Path
|
|
imatrix_path: Path | None = None
|
|
base_quant: str = "Q4_K_M"
|
|
|
|
def get_output_path(self) -> Path:
|
|
"""Generate output path for quantised model.
|
|
|
|
Returns:
|
|
Path to the output GGUF file.
|
|
"""
|
|
output_filename = (
|
|
f"{self.model_source.original_author}-"
|
|
f"{self.model_source.model_name}-"
|
|
f"{self.config.name}.gguf"
|
|
)
|
|
return self.models_dir / self.model_source.model_name / output_filename
|