llm-gguf-tools/helpers/utils/config_parser.py
2025-08-07 18:29:12 +01:00

171 lines
6.3 KiB
Python

"""Configuration parsing utilities.
Provides utilities for parsing model configurations, inferring parameters,
and handling architecture-specific settings. Uses UK English spelling
conventions throughout.
"""
from __future__ import annotations
from typing import TYPE_CHECKING, Any
from helpers.models.conversion import GGUFParameters, ModelConfig, VisionConfig
from helpers.services.filesystem import FilesystemService
if TYPE_CHECKING:
from pathlib import Path
class ConfigParser:
"""Parses and transforms model configuration files.
Handles loading of HuggingFace config.json files, parameter inference,
and conversion to GGUF-compatible formats. Provides sensible defaults
for missing values and architecture-specific handling.
"""
def __init__(self) -> None:
"""Initialise ConfigParser."""
self.fs = FilesystemService()
def load_model_config(self, model_path: Path) -> ModelConfig:
"""Load model configuration from config.json file.
Reads the standard HuggingFace config.json file and parses it into
a structured ModelConfig instance with proper type validation. Handles
vision model configurations and provides sensible defaults for missing values.
Returns:
Parsed ModelConfig instance.
"""
config_file = model_path / "config.json"
raw_config = self.fs.load_json_config(config_file)
# Parse vision config if present
vision_config = None
if "vision_config" in raw_config:
vision_config = VisionConfig(**raw_config["vision_config"])
# Create ModelConfig with parsed values
return ModelConfig(
architectures=raw_config.get("architectures", ["Unknown"]),
model_type=raw_config.get("model_type", "unknown"),
vocab_size=raw_config.get("vocab_size", 32000),
max_position_embeddings=raw_config.get("max_position_embeddings", 2048),
hidden_size=raw_config.get("hidden_size", 4096),
num_hidden_layers=raw_config.get("num_hidden_layers", 32),
intermediate_size=raw_config.get("intermediate_size", 11008),
num_attention_heads=raw_config.get("num_attention_heads", 32),
num_key_value_heads=raw_config.get("num_key_value_heads"),
rope_theta=raw_config.get("rope_theta", 10000.0),
rope_scaling=raw_config.get("rope_scaling"),
rms_norm_eps=raw_config.get("rms_norm_eps", 1e-5),
vision_config=vision_config,
)
def infer_gguf_parameters(self, config: ModelConfig) -> GGUFParameters:
"""Infer GGUF parameters from model configuration.
Translates HuggingFace model configuration to GGUF parameter format,
providing sensible defaults for missing values and handling various
architecture conventions.
Args:
config: Parsed ModelConfig instance.
Returns:
GGUFParameters with inferred values.
"""
# Calculate derived parameters
num_heads = config.num_attention_heads
embedding_length = config.hidden_size
rope_dimension_count = embedding_length // num_heads
# Handle KV heads (for GQA models)
num_kv_heads = config.num_key_value_heads or num_heads
# Create GGUFParameters using dict with aliases
params_dict = {
"vocab_size": config.vocab_size,
"context_length": config.max_position_embeddings,
"embedding_length": embedding_length,
"block_count": config.num_hidden_layers,
"feed_forward_length": config.intermediate_size,
"attention.head_count": num_heads,
"attention.head_count_kv": num_kv_heads,
"attention.layer_norm_rms_epsilon": config.rms_norm_eps,
"rope.freq_base": config.rope_theta,
"rope.dimension_count": rope_dimension_count,
}
params = GGUFParameters.model_validate(params_dict)
# Add RoPE scaling if present
if config.rope_scaling:
params.rope_scaling_type = config.rope_scaling.get("type", "linear")
params.rope_scaling_factor = config.rope_scaling.get("factor", 1.0)
return params
@staticmethod
def get_architecture_mapping(architecture: str) -> str:
"""Map architecture names to known GGUF architectures.
Provides fallback mappings for architectures not directly supported
by GGUF, mapping them to similar known architectures.
Args:
architecture: Original architecture name from config.
Returns:
GGUF-compatible architecture name.
"""
# Architecture mappings to known GGUF types
mappings = {
"DotsOCRForCausalLM": "qwen2", # Similar architecture
"GptOssForCausalLM": "llama", # Use llama as fallback
"MistralForCausalLM": "llama", # Mistral is llama-like
"Qwen2ForCausalLM": "qwen2",
"LlamaForCausalLM": "llama",
"GemmaForCausalLM": "gemma",
"Phi3ForCausalLM": "phi3",
# Add more mappings as needed
}
return mappings.get(architecture, "llama") # Default to llama
@staticmethod
def load_tokeniser_config(model_path: Path) -> dict[str, Any]:
"""Load tokeniser configuration from model directory.
Reads tokenizer_config.json to extract special token IDs and
other tokenisation parameters.
Args:
model_path: Path to model directory.
Returns:
Tokeniser configuration dictionary.
"""
fs = FilesystemService()
tokeniser_config_path = model_path / "tokenizer_config.json"
if not tokeniser_config_path.exists():
# Return defaults if no config found
return {
"bos_token_id": 1,
"eos_token_id": 2,
"unk_token_id": 0,
"pad_token_id": 0,
}
config = fs.load_json_config(tokeniser_config_path)
# Extract token IDs with defaults
return {
"bos_token_id": config.get("bos_token_id", 1),
"eos_token_id": config.get("eos_token_id", 2),
"unk_token_id": config.get("unk_token_id", 0),
"pad_token_id": config.get("pad_token_id", 0),
"model_type": config.get("model_type", "llama"),
}