"""GGUF file operations service. Provides unified interface for creating, writing, and manipulating GGUF files. Consolidates GGUF-specific operations from conversion and quantisation workflows. Uses UK English spelling conventions throughout. """ from __future__ import annotations from typing import TYPE_CHECKING, Any import gguf import torch from safetensors import safe_open from helpers.logger import logger from helpers.services.filesystem import FilesystemService from helpers.utils.config_parser import ConfigParser if TYPE_CHECKING: from pathlib import Path import numpy as np from helpers.models.conversion import ModelConfig class GGUFWriter: """Manages GGUF file creation and metadata writing. Provides high-level interface for GGUF file operations including metadata configuration, tensor addition, and tokeniser integration. Encapsulates low-level GGUF library interactions for consistent error handling. """ def __init__(self, output_path: Path, architecture: str) -> None: """Initialise GGUF writer with output path and architecture. Creates the underlying GGUF writer instance and prepares for metadata and tensor addition. Sets up the file structure for the specified model architecture. """ self.output_path = output_path self.architecture = architecture self.writer = gguf.GGUFWriter(str(output_path), architecture) logger.info(f"Created GGUF writer for {architecture} architecture") def add_metadata(self, model_config: ModelConfig, model_name: str) -> None: """Add comprehensive metadata from model configuration. Writes general model information, architectural parameters, and quantisation settings to the GGUF file header. Handles both standard and vision model configurations with appropriate parameter mapping. """ # General metadata self.writer.add_name(model_name) self.writer.add_description(f"Converted from {model_config.architectures[0]}") self.writer.add_file_type(gguf.LlamaFileType.ALL_F32) # Model parameters from config params = model_config.to_gguf_params() self.writer.add_context_length(params.context_length) self.writer.add_embedding_length(params.embedding_length) self.writer.add_block_count(params.block_count) self.writer.add_feed_forward_length(params.feed_forward_length) self.writer.add_head_count(params.attention_head_count) self.writer.add_head_count_kv(params.attention_head_count_kv) self.writer.add_layer_norm_rms_eps(params.attention_layer_norm_rms_epsilon) self.writer.add_rope_freq_base(params.rope_freq_base) self.writer.add_rope_dimension_count(params.rope_dimension_count) logger.info(f"Added metadata: {params.block_count} layers, {params.context_length} context") def add_vision_metadata(self, vision_config: Any) -> None: """Add vision model parameters to GGUF metadata. Configures vision-specific parameters for multimodal models including embedding dimensions, attention heads, and spatial processing settings. """ if not vision_config: return logger.info("Adding vision model parameters...") self.writer.add_vision_embedding_length(vision_config.hidden_size) self.writer.add_vision_block_count(vision_config.num_hidden_layers) self.writer.add_vision_head_count(vision_config.num_attention_heads) self.writer.add_vision_feed_forward_length(vision_config.intermediate_size) self.writer.add_vision_patch_size(vision_config.patch_size) self.writer.add_vision_spatial_merge_size(vision_config.spatial_merge_size) if hasattr(vision_config, "rms_norm_eps") and vision_config.rms_norm_eps: self.writer.add_vision_attention_layernorm_eps(vision_config.rms_norm_eps) def add_tokeniser(self, tokeniser_config: dict[str, Any]) -> None: """Add tokeniser metadata to GGUF file. Writes special token IDs and tokeniser model type to enable proper text processing during inference. Uses sensible defaults for missing configuration values. """ self.writer.add_bos_token_id(tokeniser_config.get("bos_token_id", 1)) self.writer.add_eos_token_id(tokeniser_config.get("eos_token_id", 2)) self.writer.add_unk_token_id(tokeniser_config.get("unk_token_id", 0)) self.writer.add_pad_token_id(tokeniser_config.get("pad_token_id", 0)) self.writer.add_tokenizer_model(tokeniser_config.get("model_type", "llama")) logger.info("Added tokeniser configuration") def add_tensor(self, name: str, data: np.ndarray) -> None: """Add a tensor to the GGUF file. Writes tensor data with the specified name to the file. Handles data type conversions and validates tensor shapes. """ self.writer.add_tensor(name, data) def finalise(self) -> None: """Write all data to file and close writer. Completes the GGUF file creation by writing headers, key-value data, and tensor data in the correct order. Ensures proper file closure. """ logger.info(f"Writing GGUF file to {self.output_path}") self.writer.write_header_to_file() self.writer.write_kv_data_to_file() self.writer.write_tensors_to_file() self.writer.close() logger.info("GGUF file written successfully") class GGUFConverter: """High-level GGUF conversion orchestrator. Coordinates the complete conversion workflow from source models to GGUF format, managing metadata extraction, tensor mapping, and file writing. """ @staticmethod def convert_safetensors( model_path: Path, output_path: Path, model_config: ModelConfig, architecture: str, tensor_mapper: Any, ) -> bool: """Convert SafeTensors model to GGUF format. Orchestrates the conversion process including metadata setup, tensor loading with BFloat16 support, name mapping, and tokeniser integration. Returns: True if conversion successful, False otherwise. """ logger.info(f"Converting {model_path.name} to GGUF...") # Create writer writer_wrapper = GGUFWriter(output_path, architecture) # Add metadata writer_wrapper.add_metadata(model_config, model_path.name) # Add vision metadata if present if model_config.vision_config: writer_wrapper.add_vision_metadata(model_config.vision_config) # Load and add tensors fs = FilesystemService() tensor_files = fs.find_safetensor_files(model_path) logger.info(f"Found {len(tensor_files)} tensor file(s)") tensor_count = 0 for tensor_file in tensor_files: logger.info(f"Loading {tensor_file.name}...") with safe_open(tensor_file, framework="pt") as f: for tensor_name in f: tensor_data = f.get_tensor(tensor_name) # Convert BFloat16 to Float32 if hasattr(tensor_data, "numpy"): if torch and tensor_data.dtype == torch.bfloat16: tensor_data = tensor_data.float() tensor_data = tensor_data.numpy() # Map tensor name gguf_name = tensor_mapper.map_tensor_name(tensor_name) if gguf_name: writer_wrapper.add_tensor(gguf_name, tensor_data) tensor_count += 1 if tensor_count % 100 == 0: logger.info(f" Processed {tensor_count} tensors...") logger.info(f"Total tensors processed: {tensor_count}") # Add tokeniser try: tok_config = ConfigParser.load_tokeniser_config(model_path) writer_wrapper.add_tokeniser(tok_config) logger.info("Tokeniser added") except Exception as e: logger.warning(f"Could not add tokeniser: {e}") # Finalise file writer_wrapper.finalise() file_size = fs.get_file_size(output_path) logger.info(f"Conversion complete! Output: {output_path} ({file_size})") return True