llm-gguf-tools/helpers/models/conversion.py
2025-08-07 18:29:12 +01:00

150 lines
5.4 KiB
Python

"""Pydantic models for GGUF conversion operations.
Contains data models for SafeTensors to GGUF conversion including
model configurations, parameter mappings, and tensor specifications.
Uses UK English spelling conventions throughout.
"""
from __future__ import annotations
from typing import Any
from pydantic import BaseModel, ConfigDict, Field
class ModelConfig(BaseModel):
"""Parsed model configuration from HuggingFace config.json.
Represents the standard configuration metadata extracted from HuggingFace
models, providing structured access to architecture details, hyperparameters,
and quantisation settings required for GGUF conversion.
"""
model_config = ConfigDict(extra="allow")
architectures: list[str] = Field(default_factory=lambda: ["Unknown"])
model_type: str = "unknown"
vocab_size: int = 32000
max_position_embeddings: int = 2048
hidden_size: int = 4096
num_hidden_layers: int = 32
intermediate_size: int = 11008
num_attention_heads: int = 32
num_key_value_heads: int | None = None
rope_theta: float = 10000.0
rope_scaling: dict[str, Any] | None = None
rms_norm_eps: float = 1e-5
vision_config: VisionConfig | None = None
def to_gguf_params(self) -> GGUFParameters:
"""Convert model configuration to GGUF parameters.
Translates HuggingFace model configuration values to GGUF-specific
parameter format, handling defaults and calculating derived values
like RoPE dimension count from head dimensions.
Returns:
GGUFParameters instance with converted values.
"""
params = {
"vocab_size": self.vocab_size,
"context_length": self.max_position_embeddings,
"embedding_length": self.hidden_size,
"block_count": self.num_hidden_layers,
"feed_forward_length": self.intermediate_size,
"attention.head_count": self.num_attention_heads,
"attention.head_count_kv": self.num_key_value_heads or self.num_attention_heads,
"attention.layer_norm_rms_epsilon": self.rms_norm_eps,
"rope.freq_base": self.rope_theta,
"rope.dimension_count": self.hidden_size // self.num_attention_heads,
}
return GGUFParameters(**params) # type: ignore[arg-type]
class VisionConfig(BaseModel):
"""Vision model configuration for multimodal models.
Contains parameters specific to vision components in multimodal architectures,
including patch sizes, embedding dimensions, and spatial merge configurations
for proper GGUF metadata generation.
"""
model_config = ConfigDict(extra="allow")
hidden_size: int = 1536
num_hidden_layers: int = 42
num_attention_heads: int = 12
intermediate_size: int = 4224
patch_size: int = 14
spatial_merge_size: int = 2
rms_norm_eps: float | None = None
class GGUFParameters(BaseModel):
"""GGUF-specific parameters inferred from model configuration.
Translates HuggingFace configuration values to GGUF parameter names and
formats, providing a standardised interface for GGUF writer configuration
across different model architectures and quantisation strategies.
"""
model_config = ConfigDict(extra="allow")
# Basic parameters
vocab_size: int
context_length: int
embedding_length: int
block_count: int
feed_forward_length: int
# Attention parameters
attention_head_count: int = Field(alias="attention.head_count")
attention_head_count_kv: int = Field(alias="attention.head_count_kv")
attention_layer_norm_rms_epsilon: float = Field(alias="attention.layer_norm_rms_epsilon")
# RoPE parameters
rope_freq_base: float = Field(alias="rope.freq_base")
rope_dimension_count: int = Field(alias="rope.dimension_count")
rope_scaling_type: str | None = Field(default=None, alias="rope.scaling.type")
rope_scaling_factor: float | None = Field(default=None, alias="rope.scaling.factor")
class TensorMapping(BaseModel):
"""Mapping configuration for tensor name conversion.
Defines rules for translating between HuggingFace tensor naming conventions
and GGUF tensor names, supporting both direct mappings and pattern-based
transformations for layer-specific tensors.
"""
model_config = ConfigDict(frozen=True)
# Direct mappings (exact name matches)
direct_mappings: dict[str, str] = Field(
default_factory=lambda: {
"model.embed_tokens.weight": "token_embd.weight",
"model.norm.weight": "output_norm.weight",
"lm_head.weight": "output.weight",
}
)
# Layer component patterns (for .layers.N. tensors)
layer_patterns: dict[str, str] = Field(
default_factory=lambda: {
"self_attn.q_proj.weight": "attn_q.weight",
"self_attn.q_proj.bias": "attn_q.bias",
"self_attn.k_proj.weight": "attn_k.weight",
"self_attn.k_proj.bias": "attn_k.bias",
"self_attn.v_proj.weight": "attn_v.weight",
"self_attn.v_proj.bias": "attn_v.bias",
"self_attn.o_proj": "attn_output.weight",
"mlp.gate_proj": "ffn_gate.weight",
"mlp.up_proj": "ffn_up.weight",
"mlp.down_proj": "ffn_down.weight",
"input_layernorm": "attn_norm.weight",
"post_attention_layernorm": "ffn_norm.weight",
}
)
# Architecture-specific overrides
architecture_overrides: dict[str, dict[str, str]] = Field(default_factory=dict)