llm-gguf-tools/helpers/services/gguf.py

"""GGUF file operations service.

Provides unified interface for creating, writing, and manipulating GGUF files.
Consolidates GGUF-specific operations from conversion and quantisation workflows.
Uses UK English spelling conventions throughout.
"""

from __future__ import annotations

import gc
from typing import TYPE_CHECKING, Any, Protocol

import gguf
import torch
from safetensors import safe_open

from helpers.logger import logger
from helpers.services.filesystem import FilesystemService
from helpers.utils.config_parser import ConfigParser


class VisionConfig(Protocol):
    """Protocol for vision model configuration."""

    hidden_size: int
    num_hidden_layers: int
    num_attention_heads: int
    intermediate_size: int
    patch_size: int
    spatial_merge_size: int


class TensorMapper(Protocol):
    """Protocol for tensor name mapping."""

    def map_tensor_name(self, name: str) -> str | None:
        """Map a tensor name to its GGUF equivalent."""


if TYPE_CHECKING:
    from pathlib import Path

    import numpy as np

    from helpers.models.conversion import ModelConfig


class GGUFWriter:
    """Manages GGUF file creation and metadata writing.

    Provides high-level interface for GGUF file operations including metadata
    configuration, tensor addition, and tokeniser integration. Encapsulates
    low-level GGUF library interactions for consistent error handling.
    """

    def __init__(self, output_path: Path, architecture: str) -> None:
        """Initialise GGUF writer with output path and architecture.

        Creates the underlying GGUF writer instance and prepares for metadata
        and tensor addition. Sets up the file structure for the specified
        model architecture.
        """
        self.output_path = output_path
        self.architecture = architecture
        self.writer = gguf.GGUFWriter(str(output_path), architecture)
        logger.info(f"Created GGUF writer for {architecture} architecture")

    def add_metadata(self, model_config: ModelConfig, model_name: str) -> None:
        """Add comprehensive metadata from model configuration.

        Writes general model information, architectural parameters, and
        quantisation settings to the GGUF file header. Handles both standard
        and vision model configurations with appropriate parameter mapping.
        """
        # General metadata
        self.writer.add_name(model_name)
        self.writer.add_description(f"Converted from {model_config.architectures[0]}")
        self.writer.add_file_type(gguf.LlamaFileType.ALL_F32)

        # Model parameters from config
        params = model_config.to_gguf_params()
        self.writer.add_context_length(params.context_length)
        self.writer.add_embedding_length(params.embedding_length)
        self.writer.add_block_count(params.block_count)
        self.writer.add_feed_forward_length(params.feed_forward_length)
        self.writer.add_head_count(params.attention_head_count)
        self.writer.add_head_count_kv(params.attention_head_count_kv)
        self.writer.add_layer_norm_rms_eps(params.attention_layer_norm_rms_epsilon)
        self.writer.add_rope_freq_base(params.rope_freq_base)
        self.writer.add_rope_dimension_count(params.rope_dimension_count)

        logger.info(f"Added metadata: {params.block_count} layers, {params.context_length} context")

    def add_vision_metadata(self, vision_config: VisionConfig | None) -> None:
        """Add vision model parameters to GGUF metadata.

        Configures vision-specific parameters for multimodal models including
        embedding dimensions, attention heads, and spatial processing settings.
        """
        if not vision_config:
            return

        logger.info("Adding vision model parameters...")
        self.writer.add_vision_embedding_length(vision_config.hidden_size)
        self.writer.add_vision_block_count(vision_config.num_hidden_layers)
        self.writer.add_vision_head_count(vision_config.num_attention_heads)
        self.writer.add_vision_feed_forward_length(vision_config.intermediate_size)
        self.writer.add_vision_patch_size(vision_config.patch_size)
        self.writer.add_vision_spatial_merge_size(vision_config.spatial_merge_size)

        if hasattr(vision_config, "rms_norm_eps") and vision_config.rms_norm_eps:
            self.writer.add_vision_attention_layernorm_eps(vision_config.rms_norm_eps)

    def add_tokeniser(self, tokeniser_config: dict[str, Any]) -> None:
        """Add tokeniser metadata to GGUF file.

        Writes special token IDs and tokeniser model type to enable proper
        text processing during inference. Uses sensible defaults for missing
        configuration values.
        """
        self.writer.add_bos_token_id(tokeniser_config.get("bos_token_id", 1))
        self.writer.add_eos_token_id(tokeniser_config.get("eos_token_id", 2))
        self.writer.add_unk_token_id(tokeniser_config.get("unk_token_id", 0))
        self.writer.add_pad_token_id(tokeniser_config.get("pad_token_id", 0))
        self.writer.add_tokenizer_model(tokeniser_config.get("model_type", "llama"))

        logger.info("Added tokeniser configuration")

    def add_tensor(self, name: str, data: np.ndarray) -> None:
        """Add a tensor to the GGUF file.

        Writes tensor data with the specified name to the file. Handles
        data type conversions and validates tensor shapes.
        """
        self.writer.add_tensor(name, data)

    def finalise(self) -> None:
        """Write all data to file and close writer.

        Completes the GGUF file creation by writing headers, key-value data,
        and tensor data in the correct order. Ensures proper file closure.
        """
        logger.info(f"Writing GGUF file to {self.output_path}")
        self.writer.write_header_to_file()
        self.writer.write_kv_data_to_file()
        self.writer.write_tensors_to_file()
        self.writer.close()
        logger.info("GGUF file written successfully")


class GGUFConverter:
    """High-level GGUF conversion orchestrator.

    Coordinates the complete conversion workflow from source models to GGUF
    format, managing metadata extraction, tensor mapping, and file writing.
    """

    @staticmethod
    def convert_safetensors(
        model_path: Path,
        output_path: Path,
        model_config: ModelConfig,
        architecture: str,
        tensor_mapper: TensorMapper,
    ) -> bool:
        """Convert SafeTensors model to GGUF format.

        Orchestrates the conversion process including metadata setup, tensor
        loading with BFloat16 support, name mapping, and tokeniser integration.

        Returns:
            True if conversion successful, False otherwise.
        """
        logger.info(f"Converting {model_path.name} to GGUF...")

        # Create writer
        writer_wrapper = GGUFWriter(output_path, architecture)

        # Add metadata
        writer_wrapper.add_metadata(model_config, model_path.name)

        # Add vision metadata if present
        if model_config.vision_config:
            writer_wrapper.add_vision_metadata(model_config.vision_config)

        # Load and add tensors
        fs = FilesystemService()
        tensor_files = fs.find_safetensor_files(model_path)
        logger.info(f"Found {len(tensor_files)} tensor file(s)")

        tensor_count = 0
        for tensor_file in tensor_files:
            logger.info(f"Loading {tensor_file.name}...")
            with safe_open(tensor_file, framework="pt") as f:
                for tensor_name in f.keys():  # noqa: SIM118
                    tensor_data = f.get_tensor(tensor_name)

                    # Convert BFloat16 to Float32
                    if hasattr(tensor_data, "numpy"):
                        if torch and tensor_data.dtype == torch.bfloat16:
                            tensor_data = tensor_data.float()
                        tensor_data = tensor_data.numpy()

                    # Map tensor name
                    gguf_name = tensor_mapper.map_tensor_name(tensor_name)

                    if gguf_name:
                        writer_wrapper.add_tensor(gguf_name, tensor_data)
                        tensor_count += 1

                        if tensor_count % 100 == 0:
                            logger.info(f"  Processed {tensor_count} tensors...")

                    # Free memory after processing each tensor
                    del tensor_data

            # Force garbage collection after processing each file
            gc.collect()

        logger.info(f"Total tensors processed: {tensor_count}")

        # Add tokeniser
        try:
            tok_config = ConfigParser.load_tokeniser_config(model_path)
            writer_wrapper.add_tokeniser(tok_config)
            logger.info("Tokeniser added")
        except Exception as e:
            logger.warning(f"Could not add tokeniser: {e}")

        # Finalise file
        writer_wrapper.finalise()

        file_size = fs.get_file_size(output_path)
        logger.info(f"Conversion complete! Output: {output_path} ({file_size})")

        return True