Initial commit
This commit is contained in:
commit
ef7df1a8c3
28 changed files with 6829 additions and 0 deletions
168
helpers/models/quantisation.py
Normal file
168
helpers/models/quantisation.py
Normal file
|
@ -0,0 +1,168 @@
|
|||
"""Pydantic models for quantisation operations.
|
||||
|
||||
Contains data models specific to the quantisation workflow including
|
||||
quantisation types, configurations, and results. Uses UK English spelling
|
||||
conventions throughout (quantisation, not quantization).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from enum import StrEnum
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class QuantisationType(StrEnum):
|
||||
"""Available quantisation types for Bartowski-method GGUF model conversion.
|
||||
|
||||
Defines the specific quantisation strategies supported by this tool, ranging
|
||||
from Q4_K_M baseline to Q4_K_XXL maximum precision variants. Each type
|
||||
represents different trade-offs between model size and quality preservation
|
||||
for embeddings, attention layers, and feed-forward networks.
|
||||
"""
|
||||
|
||||
Q4_K_M = "Q4_K_M"
|
||||
Q4_K_L = "Q4_K_L"
|
||||
Q4_K_XL = "Q4_K_XL"
|
||||
Q4_K_XXL = "Q4_K_XXL"
|
||||
|
||||
|
||||
class URLType(StrEnum):
|
||||
"""Supported URL formats for model source specification.
|
||||
|
||||
Categorises input URL formats to enable appropriate handling strategies.
|
||||
HuggingFace URLs require full model download and conversion, whilst Ollama
|
||||
GGUF URLs allow direct GGUF file downloads with pattern matching for
|
||||
efficient processing of pre-quantised models.
|
||||
"""
|
||||
|
||||
HUGGINGFACE = "huggingface"
|
||||
OLLAMA_GGUF = "ollama_gguf"
|
||||
|
||||
|
||||
class QuantisationConfig(BaseModel):
|
||||
"""Configuration for a specific quantisation method with tensor-level precision control.
|
||||
|
||||
Defines quantisation parameters including tensor type mappings and fallback
|
||||
methods for handling different model architectures. Enables fine-grained
|
||||
control over which layers receive higher precision treatment whilst
|
||||
maintaining compatibility across diverse model structures.
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(use_enum_values=True)
|
||||
|
||||
name: str
|
||||
description: str
|
||||
tensor_types: dict[str, str] = Field(default_factory=dict)
|
||||
fallback_methods: list[dict[str, str]] = Field(default_factory=list)
|
||||
|
||||
|
||||
class ModelSource(BaseModel):
|
||||
"""Represents a model source with parsed information from URL analysis.
|
||||
|
||||
Contains comprehensive metadata extracted from model URLs including source
|
||||
repository details, author information, and GGUF file patterns. Enables
|
||||
differentiation between regular HuggingFace repositories requiring conversion
|
||||
and GGUF repositories allowing direct file downloads.
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(use_enum_values=True, protected_namespaces=())
|
||||
|
||||
url: str
|
||||
url_type: URLType
|
||||
source_model: str
|
||||
original_author: str
|
||||
model_name: str
|
||||
gguf_file_pattern: str | None = None
|
||||
is_gguf_repo: bool = False
|
||||
|
||||
@field_validator("url")
|
||||
@classmethod
|
||||
def validate_url(cls, v: str) -> str:
|
||||
"""Validate that URL is not empty.
|
||||
|
||||
Ensures the provided URL string is not empty or None,
|
||||
as this is required for model source identification.
|
||||
|
||||
Returns:
|
||||
The validated URL string.
|
||||
|
||||
Raises:
|
||||
ValueError: If URL is empty or None.
|
||||
"""
|
||||
if not v:
|
||||
msg = "URL cannot be empty"
|
||||
raise ValueError(msg)
|
||||
return v
|
||||
|
||||
|
||||
class QuantisationResult(BaseModel):
|
||||
"""Result of a quantisation operation with comprehensive status tracking.
|
||||
|
||||
Captures the outcome of individual quantisation attempts including success
|
||||
status, file paths, sizes, and error details. Supports workflow status
|
||||
tracking from planning through processing to completion, enabling real-time
|
||||
progress reporting and parallel upload coordination.
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(use_enum_values=True, arbitrary_types_allowed=True)
|
||||
|
||||
quantisation_type: QuantisationType
|
||||
success: bool
|
||||
file_path: Path | None = None
|
||||
file_size: str | None = None
|
||||
method_used: str | None = None
|
||||
error_message: str | None = None
|
||||
status: str = "pending" # planned, processing, uploading, completed, failed
|
||||
|
||||
|
||||
class LlamaCppEnvironment(BaseModel):
|
||||
"""Represents llama.cpp environment setup with binary and script locations.
|
||||
|
||||
Encapsulates the runtime environment for llama.cpp tools including paths
|
||||
to quantisation binaries, CLI tools, and conversion scripts. Handles both
|
||||
local binary installations and repository-based setups to provide flexible
|
||||
deployment options across different system configurations.
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||
|
||||
quantise_binary: Path # UK spelling
|
||||
cli_binary: Path
|
||||
convert_script: str
|
||||
use_repo: bool = False
|
||||
|
||||
|
||||
class QuantisationContext(BaseModel):
|
||||
"""Context object containing all parameters needed for quantisation execution.
|
||||
|
||||
Encapsulates quantisation parameters to reduce method argument counts
|
||||
and improve code maintainability following parameter object pattern.
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(frozen=True)
|
||||
|
||||
f16_model_path: Path
|
||||
model_source: ModelSource
|
||||
config: QuantisationConfig
|
||||
llama_env: LlamaCppEnvironment
|
||||
models_dir: Path
|
||||
imatrix_path: Path | None = None
|
||||
base_quant: str = "Q4_K_M"
|
||||
|
||||
def get_output_path(self) -> Path:
|
||||
"""Generate output path for quantised model.
|
||||
|
||||
Returns:
|
||||
Path to the output GGUF file.
|
||||
"""
|
||||
output_filename = (
|
||||
f"{self.model_source.original_author}-"
|
||||
f"{self.model_source.model_name}-"
|
||||
f"{self.config.name}.gguf"
|
||||
)
|
||||
return self.models_dir / self.model_source.model_name / output_filename
|
Loading…
Add table
Add a link
Reference in a new issue