"""Pydantic models for GGUF conversion operations. Contains data models for SafeTensors to GGUF conversion including model configurations, parameter mappings, and tensor specifications. Uses UK English spelling conventions throughout. """ from __future__ import annotations from typing import Any from pydantic import BaseModel, ConfigDict, Field class ModelConfig(BaseModel): """Parsed model configuration from HuggingFace config.json. Represents the standard configuration metadata extracted from HuggingFace models, providing structured access to architecture details, hyperparameters, and quantisation settings required for GGUF conversion. """ model_config = ConfigDict(extra="allow") architectures: list[str] = Field(default_factory=lambda: ["Unknown"]) model_type: str = "unknown" vocab_size: int = 32000 max_position_embeddings: int = 2048 hidden_size: int = 4096 num_hidden_layers: int = 32 intermediate_size: int = 11008 num_attention_heads: int = 32 num_key_value_heads: int | None = None rope_theta: float = 10000.0 rope_scaling: dict[str, Any] | None = None rms_norm_eps: float = 1e-5 vision_config: VisionConfig | None = None def to_gguf_params(self) -> GGUFParameters: """Convert model configuration to GGUF parameters. Translates HuggingFace model configuration values to GGUF-specific parameter format, handling defaults and calculating derived values like RoPE dimension count from head dimensions. Returns: GGUFParameters instance with converted values. """ params = { "vocab_size": self.vocab_size, "context_length": self.max_position_embeddings, "embedding_length": self.hidden_size, "block_count": self.num_hidden_layers, "feed_forward_length": self.intermediate_size, "attention.head_count": self.num_attention_heads, "attention.head_count_kv": self.num_key_value_heads or self.num_attention_heads, "attention.layer_norm_rms_epsilon": self.rms_norm_eps, "rope.freq_base": self.rope_theta, "rope.dimension_count": self.hidden_size // self.num_attention_heads, } return GGUFParameters(**params) # type: ignore[arg-type] class VisionConfig(BaseModel): """Vision model configuration for multimodal models. Contains parameters specific to vision components in multimodal architectures, including patch sizes, embedding dimensions, and spatial merge configurations for proper GGUF metadata generation. """ model_config = ConfigDict(extra="allow") hidden_size: int = 1536 num_hidden_layers: int = 42 num_attention_heads: int = 12 intermediate_size: int = 4224 patch_size: int = 14 spatial_merge_size: int = 2 rms_norm_eps: float | None = None class GGUFParameters(BaseModel): """GGUF-specific parameters inferred from model configuration. Translates HuggingFace configuration values to GGUF parameter names and formats, providing a standardised interface for GGUF writer configuration across different model architectures and quantisation strategies. """ model_config = ConfigDict(extra="allow") # Basic parameters vocab_size: int context_length: int embedding_length: int block_count: int feed_forward_length: int # Attention parameters attention_head_count: int = Field(alias="attention.head_count") attention_head_count_kv: int = Field(alias="attention.head_count_kv") attention_layer_norm_rms_epsilon: float = Field(alias="attention.layer_norm_rms_epsilon") # RoPE parameters rope_freq_base: float = Field(alias="rope.freq_base") rope_dimension_count: int = Field(alias="rope.dimension_count") rope_scaling_type: str | None = Field(default=None, alias="rope.scaling.type") rope_scaling_factor: float | None = Field(default=None, alias="rope.scaling.factor") class TensorMapping(BaseModel): """Mapping configuration for tensor name conversion. Defines rules for translating between HuggingFace tensor naming conventions and GGUF tensor names, supporting both direct mappings and pattern-based transformations for layer-specific tensors. """ model_config = ConfigDict(frozen=True) # Direct mappings (exact name matches) direct_mappings: dict[str, str] = Field( default_factory=lambda: { "model.embed_tokens.weight": "token_embd.weight", "model.norm.weight": "output_norm.weight", "lm_head.weight": "output.weight", } ) # Layer component patterns (for .layers.N. tensors) layer_patterns: dict[str, str] = Field( default_factory=lambda: { "self_attn.q_proj.weight": "attn_q.weight", "self_attn.q_proj.bias": "attn_q.bias", "self_attn.k_proj.weight": "attn_k.weight", "self_attn.k_proj.bias": "attn_k.bias", "self_attn.v_proj.weight": "attn_v.weight", "self_attn.v_proj.bias": "attn_v.bias", "self_attn.o_proj": "attn_output.weight", "mlp.gate_proj": "ffn_gate.weight", "mlp.up_proj": "ffn_up.weight", "mlp.down_proj": "ffn_down.weight", "input_layernorm": "attn_norm.weight", "post_attention_layernorm": "ffn_norm.weight", } ) # Architecture-specific overrides architecture_overrides: dict[str, dict[str, str]] = Field(default_factory=dict)