150 lines
5.4 KiB
Python
150 lines
5.4 KiB
Python
"""Pydantic models for GGUF conversion operations.
|
|
|
|
Contains data models for SafeTensors to GGUF conversion including
|
|
model configurations, parameter mappings, and tensor specifications.
|
|
Uses UK English spelling conventions throughout.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from typing import Any
|
|
|
|
from pydantic import BaseModel, ConfigDict, Field
|
|
|
|
|
|
class ModelConfig(BaseModel):
|
|
"""Parsed model configuration from HuggingFace config.json.
|
|
|
|
Represents the standard configuration metadata extracted from HuggingFace
|
|
models, providing structured access to architecture details, hyperparameters,
|
|
and quantisation settings required for GGUF conversion.
|
|
"""
|
|
|
|
model_config = ConfigDict(extra="allow")
|
|
|
|
architectures: list[str] = Field(default_factory=lambda: ["Unknown"])
|
|
model_type: str = "unknown"
|
|
vocab_size: int = 32000
|
|
max_position_embeddings: int = 2048
|
|
hidden_size: int = 4096
|
|
num_hidden_layers: int = 32
|
|
intermediate_size: int = 11008
|
|
num_attention_heads: int = 32
|
|
num_key_value_heads: int | None = None
|
|
rope_theta: float = 10000.0
|
|
rope_scaling: dict[str, Any] | None = None
|
|
rms_norm_eps: float = 1e-5
|
|
vision_config: VisionConfig | None = None
|
|
|
|
def to_gguf_params(self) -> GGUFParameters:
|
|
"""Convert model configuration to GGUF parameters.
|
|
|
|
Translates HuggingFace model configuration values to GGUF-specific
|
|
parameter format, handling defaults and calculating derived values
|
|
like RoPE dimension count from head dimensions.
|
|
|
|
Returns:
|
|
GGUFParameters instance with converted values.
|
|
"""
|
|
params = {
|
|
"vocab_size": self.vocab_size,
|
|
"context_length": self.max_position_embeddings,
|
|
"embedding_length": self.hidden_size,
|
|
"block_count": self.num_hidden_layers,
|
|
"feed_forward_length": self.intermediate_size,
|
|
"attention.head_count": self.num_attention_heads,
|
|
"attention.head_count_kv": self.num_key_value_heads or self.num_attention_heads,
|
|
"attention.layer_norm_rms_epsilon": self.rms_norm_eps,
|
|
"rope.freq_base": self.rope_theta,
|
|
"rope.dimension_count": self.hidden_size // self.num_attention_heads,
|
|
}
|
|
return GGUFParameters(**params) # type: ignore[arg-type]
|
|
|
|
|
|
class VisionConfig(BaseModel):
|
|
"""Vision model configuration for multimodal models.
|
|
|
|
Contains parameters specific to vision components in multimodal architectures,
|
|
including patch sizes, embedding dimensions, and spatial merge configurations
|
|
for proper GGUF metadata generation.
|
|
"""
|
|
|
|
model_config = ConfigDict(extra="allow")
|
|
|
|
hidden_size: int = 1536
|
|
num_hidden_layers: int = 42
|
|
num_attention_heads: int = 12
|
|
intermediate_size: int = 4224
|
|
patch_size: int = 14
|
|
spatial_merge_size: int = 2
|
|
rms_norm_eps: float | None = None
|
|
|
|
|
|
class GGUFParameters(BaseModel):
|
|
"""GGUF-specific parameters inferred from model configuration.
|
|
|
|
Translates HuggingFace configuration values to GGUF parameter names and
|
|
formats, providing a standardised interface for GGUF writer configuration
|
|
across different model architectures and quantisation strategies.
|
|
"""
|
|
|
|
model_config = ConfigDict(extra="allow")
|
|
|
|
# Basic parameters
|
|
vocab_size: int
|
|
context_length: int
|
|
embedding_length: int
|
|
block_count: int
|
|
feed_forward_length: int
|
|
|
|
# Attention parameters
|
|
attention_head_count: int = Field(alias="attention.head_count")
|
|
attention_head_count_kv: int = Field(alias="attention.head_count_kv")
|
|
attention_layer_norm_rms_epsilon: float = Field(alias="attention.layer_norm_rms_epsilon")
|
|
|
|
# RoPE parameters
|
|
rope_freq_base: float = Field(alias="rope.freq_base")
|
|
rope_dimension_count: int = Field(alias="rope.dimension_count")
|
|
rope_scaling_type: str | None = Field(default=None, alias="rope.scaling.type")
|
|
rope_scaling_factor: float | None = Field(default=None, alias="rope.scaling.factor")
|
|
|
|
|
|
class TensorMapping(BaseModel):
|
|
"""Mapping configuration for tensor name conversion.
|
|
|
|
Defines rules for translating between HuggingFace tensor naming conventions
|
|
and GGUF tensor names, supporting both direct mappings and pattern-based
|
|
transformations for layer-specific tensors.
|
|
"""
|
|
|
|
model_config = ConfigDict(frozen=True)
|
|
|
|
# Direct mappings (exact name matches)
|
|
direct_mappings: dict[str, str] = Field(
|
|
default_factory=lambda: {
|
|
"model.embed_tokens.weight": "token_embd.weight",
|
|
"model.norm.weight": "output_norm.weight",
|
|
"lm_head.weight": "output.weight",
|
|
}
|
|
)
|
|
|
|
# Layer component patterns (for .layers.N. tensors)
|
|
layer_patterns: dict[str, str] = Field(
|
|
default_factory=lambda: {
|
|
"self_attn.q_proj.weight": "attn_q.weight",
|
|
"self_attn.q_proj.bias": "attn_q.bias",
|
|
"self_attn.k_proj.weight": "attn_k.weight",
|
|
"self_attn.k_proj.bias": "attn_k.bias",
|
|
"self_attn.v_proj.weight": "attn_v.weight",
|
|
"self_attn.v_proj.bias": "attn_v.bias",
|
|
"self_attn.o_proj": "attn_output.weight",
|
|
"mlp.gate_proj": "ffn_gate.weight",
|
|
"mlp.up_proj": "ffn_up.weight",
|
|
"mlp.down_proj": "ffn_down.weight",
|
|
"input_layernorm": "attn_norm.weight",
|
|
"post_attention_layernorm": "ffn_norm.weight",
|
|
}
|
|
)
|
|
|
|
# Architecture-specific overrides
|
|
architecture_overrides: dict[str, dict[str, str]] = Field(default_factory=dict)
|