llm-gguf-tools/helpers/services/gguf.py

236 lines
8.9 KiB
Python

"""GGUF file operations service.
Provides unified interface for creating, writing, and manipulating GGUF files.
Consolidates GGUF-specific operations from conversion and quantisation workflows.
Uses UK English spelling conventions throughout.
"""
from __future__ import annotations
import gc
from typing import TYPE_CHECKING, Any, Protocol
import gguf
import torch
from safetensors import safe_open
from helpers.logger import logger
from helpers.services.filesystem import FilesystemService
from helpers.utils.config_parser import ConfigParser
class VisionConfig(Protocol):
"""Protocol for vision model configuration."""
hidden_size: int
num_hidden_layers: int
num_attention_heads: int
intermediate_size: int
patch_size: int
spatial_merge_size: int
class TensorMapper(Protocol):
"""Protocol for tensor name mapping."""
def map_tensor_name(self, name: str) -> str | None:
"""Map a tensor name to its GGUF equivalent."""
if TYPE_CHECKING:
from pathlib import Path
import numpy as np
from helpers.models.conversion import ModelConfig
class GGUFWriter:
"""Manages GGUF file creation and metadata writing.
Provides high-level interface for GGUF file operations including metadata
configuration, tensor addition, and tokeniser integration. Encapsulates
low-level GGUF library interactions for consistent error handling.
"""
def __init__(self, output_path: Path, architecture: str) -> None:
"""Initialise GGUF writer with output path and architecture.
Creates the underlying GGUF writer instance and prepares for metadata
and tensor addition. Sets up the file structure for the specified
model architecture.
"""
self.output_path = output_path
self.architecture = architecture
self.writer = gguf.GGUFWriter(str(output_path), architecture)
logger.info(f"Created GGUF writer for {architecture} architecture")
def add_metadata(self, model_config: ModelConfig, model_name: str) -> None:
"""Add comprehensive metadata from model configuration.
Writes general model information, architectural parameters, and
quantisation settings to the GGUF file header. Handles both standard
and vision model configurations with appropriate parameter mapping.
"""
# General metadata
self.writer.add_name(model_name)
self.writer.add_description(f"Converted from {model_config.architectures[0]}")
self.writer.add_file_type(gguf.LlamaFileType.ALL_F32)
# Model parameters from config
params = model_config.to_gguf_params()
self.writer.add_context_length(params.context_length)
self.writer.add_embedding_length(params.embedding_length)
self.writer.add_block_count(params.block_count)
self.writer.add_feed_forward_length(params.feed_forward_length)
self.writer.add_head_count(params.attention_head_count)
self.writer.add_head_count_kv(params.attention_head_count_kv)
self.writer.add_layer_norm_rms_eps(params.attention_layer_norm_rms_epsilon)
self.writer.add_rope_freq_base(params.rope_freq_base)
self.writer.add_rope_dimension_count(params.rope_dimension_count)
logger.info(f"Added metadata: {params.block_count} layers, {params.context_length} context")
def add_vision_metadata(self, vision_config: VisionConfig | None) -> None:
"""Add vision model parameters to GGUF metadata.
Configures vision-specific parameters for multimodal models including
embedding dimensions, attention heads, and spatial processing settings.
"""
if not vision_config:
return
logger.info("Adding vision model parameters...")
self.writer.add_vision_embedding_length(vision_config.hidden_size)
self.writer.add_vision_block_count(vision_config.num_hidden_layers)
self.writer.add_vision_head_count(vision_config.num_attention_heads)
self.writer.add_vision_feed_forward_length(vision_config.intermediate_size)
self.writer.add_vision_patch_size(vision_config.patch_size)
self.writer.add_vision_spatial_merge_size(vision_config.spatial_merge_size)
if hasattr(vision_config, "rms_norm_eps") and vision_config.rms_norm_eps:
self.writer.add_vision_attention_layernorm_eps(vision_config.rms_norm_eps)
def add_tokeniser(self, tokeniser_config: dict[str, Any]) -> None:
"""Add tokeniser metadata to GGUF file.
Writes special token IDs and tokeniser model type to enable proper
text processing during inference. Uses sensible defaults for missing
configuration values.
"""
self.writer.add_bos_token_id(tokeniser_config.get("bos_token_id", 1))
self.writer.add_eos_token_id(tokeniser_config.get("eos_token_id", 2))
self.writer.add_unk_token_id(tokeniser_config.get("unk_token_id", 0))
self.writer.add_pad_token_id(tokeniser_config.get("pad_token_id", 0))
self.writer.add_tokenizer_model(tokeniser_config.get("model_type", "llama"))
logger.info("Added tokeniser configuration")
def add_tensor(self, name: str, data: np.ndarray) -> None:
"""Add a tensor to the GGUF file.
Writes tensor data with the specified name to the file. Handles
data type conversions and validates tensor shapes.
"""
self.writer.add_tensor(name, data)
def finalise(self) -> None:
"""Write all data to file and close writer.
Completes the GGUF file creation by writing headers, key-value data,
and tensor data in the correct order. Ensures proper file closure.
"""
logger.info(f"Writing GGUF file to {self.output_path}")
self.writer.write_header_to_file()
self.writer.write_kv_data_to_file()
self.writer.write_tensors_to_file()
self.writer.close()
logger.info("GGUF file written successfully")
class GGUFConverter:
"""High-level GGUF conversion orchestrator.
Coordinates the complete conversion workflow from source models to GGUF
format, managing metadata extraction, tensor mapping, and file writing.
"""
@staticmethod
def convert_safetensors(
model_path: Path,
output_path: Path,
model_config: ModelConfig,
architecture: str,
tensor_mapper: TensorMapper,
) -> bool:
"""Convert SafeTensors model to GGUF format.
Orchestrates the conversion process including metadata setup, tensor
loading with BFloat16 support, name mapping, and tokeniser integration.
Returns:
True if conversion successful, False otherwise.
"""
logger.info(f"Converting {model_path.name} to GGUF...")
# Create writer
writer_wrapper = GGUFWriter(output_path, architecture)
# Add metadata
writer_wrapper.add_metadata(model_config, model_path.name)
# Add vision metadata if present
if model_config.vision_config:
writer_wrapper.add_vision_metadata(model_config.vision_config)
# Load and add tensors
fs = FilesystemService()
tensor_files = fs.find_safetensor_files(model_path)
logger.info(f"Found {len(tensor_files)} tensor file(s)")
tensor_count = 0
for tensor_file in tensor_files:
logger.info(f"Loading {tensor_file.name}...")
with safe_open(tensor_file, framework="pt") as f:
for tensor_name in f.keys(): # noqa: SIM118
tensor_data = f.get_tensor(tensor_name)
# Convert BFloat16 to Float32
if hasattr(tensor_data, "numpy"):
if torch and tensor_data.dtype == torch.bfloat16:
tensor_data = tensor_data.float()
tensor_data = tensor_data.numpy()
# Map tensor name
gguf_name = tensor_mapper.map_tensor_name(tensor_name)
if gguf_name:
writer_wrapper.add_tensor(gguf_name, tensor_data)
tensor_count += 1
if tensor_count % 100 == 0:
logger.info(f" Processed {tensor_count} tensors...")
# Free memory after processing each tensor
del tensor_data
# Force garbage collection after processing each file
gc.collect()
logger.info(f"Total tensors processed: {tensor_count}")
# Add tokeniser
try:
tok_config = ConfigParser.load_tokeniser_config(model_path)
writer_wrapper.add_tokeniser(tok_config)
logger.info("Tokeniser added")
except Exception as e:
logger.warning(f"Could not add tokeniser: {e}")
# Finalise file
writer_wrapper.finalise()
file_size = fs.get_file_size(output_path)
logger.info(f"Conversion complete! Output: {output_path} ({file_size})")
return True