236 lines
8.9 KiB
Python
236 lines
8.9 KiB
Python
"""GGUF file operations service.
|
|
|
|
Provides unified interface for creating, writing, and manipulating GGUF files.
|
|
Consolidates GGUF-specific operations from conversion and quantisation workflows.
|
|
Uses UK English spelling conventions throughout.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import gc
|
|
from typing import TYPE_CHECKING, Any, Protocol
|
|
|
|
import gguf
|
|
import torch
|
|
from safetensors import safe_open
|
|
|
|
from helpers.logger import logger
|
|
from helpers.services.filesystem import FilesystemService
|
|
from helpers.utils.config_parser import ConfigParser
|
|
|
|
|
|
class VisionConfig(Protocol):
|
|
"""Protocol for vision model configuration."""
|
|
|
|
hidden_size: int
|
|
num_hidden_layers: int
|
|
num_attention_heads: int
|
|
intermediate_size: int
|
|
patch_size: int
|
|
spatial_merge_size: int
|
|
|
|
|
|
class TensorMapper(Protocol):
|
|
"""Protocol for tensor name mapping."""
|
|
|
|
def map_tensor_name(self, name: str) -> str | None:
|
|
"""Map a tensor name to its GGUF equivalent."""
|
|
|
|
|
|
if TYPE_CHECKING:
|
|
from pathlib import Path
|
|
|
|
import numpy as np
|
|
|
|
from helpers.models.conversion import ModelConfig
|
|
|
|
|
|
class GGUFWriter:
|
|
"""Manages GGUF file creation and metadata writing.
|
|
|
|
Provides high-level interface for GGUF file operations including metadata
|
|
configuration, tensor addition, and tokeniser integration. Encapsulates
|
|
low-level GGUF library interactions for consistent error handling.
|
|
"""
|
|
|
|
def __init__(self, output_path: Path, architecture: str) -> None:
|
|
"""Initialise GGUF writer with output path and architecture.
|
|
|
|
Creates the underlying GGUF writer instance and prepares for metadata
|
|
and tensor addition. Sets up the file structure for the specified
|
|
model architecture.
|
|
"""
|
|
self.output_path = output_path
|
|
self.architecture = architecture
|
|
self.writer = gguf.GGUFWriter(str(output_path), architecture)
|
|
logger.info(f"Created GGUF writer for {architecture} architecture")
|
|
|
|
def add_metadata(self, model_config: ModelConfig, model_name: str) -> None:
|
|
"""Add comprehensive metadata from model configuration.
|
|
|
|
Writes general model information, architectural parameters, and
|
|
quantisation settings to the GGUF file header. Handles both standard
|
|
and vision model configurations with appropriate parameter mapping.
|
|
"""
|
|
# General metadata
|
|
self.writer.add_name(model_name)
|
|
self.writer.add_description(f"Converted from {model_config.architectures[0]}")
|
|
self.writer.add_file_type(gguf.LlamaFileType.ALL_F32)
|
|
|
|
# Model parameters from config
|
|
params = model_config.to_gguf_params()
|
|
self.writer.add_context_length(params.context_length)
|
|
self.writer.add_embedding_length(params.embedding_length)
|
|
self.writer.add_block_count(params.block_count)
|
|
self.writer.add_feed_forward_length(params.feed_forward_length)
|
|
self.writer.add_head_count(params.attention_head_count)
|
|
self.writer.add_head_count_kv(params.attention_head_count_kv)
|
|
self.writer.add_layer_norm_rms_eps(params.attention_layer_norm_rms_epsilon)
|
|
self.writer.add_rope_freq_base(params.rope_freq_base)
|
|
self.writer.add_rope_dimension_count(params.rope_dimension_count)
|
|
|
|
logger.info(f"Added metadata: {params.block_count} layers, {params.context_length} context")
|
|
|
|
def add_vision_metadata(self, vision_config: VisionConfig | None) -> None:
|
|
"""Add vision model parameters to GGUF metadata.
|
|
|
|
Configures vision-specific parameters for multimodal models including
|
|
embedding dimensions, attention heads, and spatial processing settings.
|
|
"""
|
|
if not vision_config:
|
|
return
|
|
|
|
logger.info("Adding vision model parameters...")
|
|
self.writer.add_vision_embedding_length(vision_config.hidden_size)
|
|
self.writer.add_vision_block_count(vision_config.num_hidden_layers)
|
|
self.writer.add_vision_head_count(vision_config.num_attention_heads)
|
|
self.writer.add_vision_feed_forward_length(vision_config.intermediate_size)
|
|
self.writer.add_vision_patch_size(vision_config.patch_size)
|
|
self.writer.add_vision_spatial_merge_size(vision_config.spatial_merge_size)
|
|
|
|
if hasattr(vision_config, "rms_norm_eps") and vision_config.rms_norm_eps:
|
|
self.writer.add_vision_attention_layernorm_eps(vision_config.rms_norm_eps)
|
|
|
|
def add_tokeniser(self, tokeniser_config: dict[str, Any]) -> None:
|
|
"""Add tokeniser metadata to GGUF file.
|
|
|
|
Writes special token IDs and tokeniser model type to enable proper
|
|
text processing during inference. Uses sensible defaults for missing
|
|
configuration values.
|
|
"""
|
|
self.writer.add_bos_token_id(tokeniser_config.get("bos_token_id", 1))
|
|
self.writer.add_eos_token_id(tokeniser_config.get("eos_token_id", 2))
|
|
self.writer.add_unk_token_id(tokeniser_config.get("unk_token_id", 0))
|
|
self.writer.add_pad_token_id(tokeniser_config.get("pad_token_id", 0))
|
|
self.writer.add_tokenizer_model(tokeniser_config.get("model_type", "llama"))
|
|
|
|
logger.info("Added tokeniser configuration")
|
|
|
|
def add_tensor(self, name: str, data: np.ndarray) -> None:
|
|
"""Add a tensor to the GGUF file.
|
|
|
|
Writes tensor data with the specified name to the file. Handles
|
|
data type conversions and validates tensor shapes.
|
|
"""
|
|
self.writer.add_tensor(name, data)
|
|
|
|
def finalise(self) -> None:
|
|
"""Write all data to file and close writer.
|
|
|
|
Completes the GGUF file creation by writing headers, key-value data,
|
|
and tensor data in the correct order. Ensures proper file closure.
|
|
"""
|
|
logger.info(f"Writing GGUF file to {self.output_path}")
|
|
self.writer.write_header_to_file()
|
|
self.writer.write_kv_data_to_file()
|
|
self.writer.write_tensors_to_file()
|
|
self.writer.close()
|
|
logger.info("GGUF file written successfully")
|
|
|
|
|
|
class GGUFConverter:
|
|
"""High-level GGUF conversion orchestrator.
|
|
|
|
Coordinates the complete conversion workflow from source models to GGUF
|
|
format, managing metadata extraction, tensor mapping, and file writing.
|
|
"""
|
|
|
|
@staticmethod
|
|
def convert_safetensors(
|
|
model_path: Path,
|
|
output_path: Path,
|
|
model_config: ModelConfig,
|
|
architecture: str,
|
|
tensor_mapper: TensorMapper,
|
|
) -> bool:
|
|
"""Convert SafeTensors model to GGUF format.
|
|
|
|
Orchestrates the conversion process including metadata setup, tensor
|
|
loading with BFloat16 support, name mapping, and tokeniser integration.
|
|
|
|
Returns:
|
|
True if conversion successful, False otherwise.
|
|
"""
|
|
logger.info(f"Converting {model_path.name} to GGUF...")
|
|
|
|
# Create writer
|
|
writer_wrapper = GGUFWriter(output_path, architecture)
|
|
|
|
# Add metadata
|
|
writer_wrapper.add_metadata(model_config, model_path.name)
|
|
|
|
# Add vision metadata if present
|
|
if model_config.vision_config:
|
|
writer_wrapper.add_vision_metadata(model_config.vision_config)
|
|
|
|
# Load and add tensors
|
|
fs = FilesystemService()
|
|
tensor_files = fs.find_safetensor_files(model_path)
|
|
logger.info(f"Found {len(tensor_files)} tensor file(s)")
|
|
|
|
tensor_count = 0
|
|
for tensor_file in tensor_files:
|
|
logger.info(f"Loading {tensor_file.name}...")
|
|
with safe_open(tensor_file, framework="pt") as f:
|
|
for tensor_name in f.keys(): # noqa: SIM118
|
|
tensor_data = f.get_tensor(tensor_name)
|
|
|
|
# Convert BFloat16 to Float32
|
|
if hasattr(tensor_data, "numpy"):
|
|
if torch and tensor_data.dtype == torch.bfloat16:
|
|
tensor_data = tensor_data.float()
|
|
tensor_data = tensor_data.numpy()
|
|
|
|
# Map tensor name
|
|
gguf_name = tensor_mapper.map_tensor_name(tensor_name)
|
|
|
|
if gguf_name:
|
|
writer_wrapper.add_tensor(gguf_name, tensor_data)
|
|
tensor_count += 1
|
|
|
|
if tensor_count % 100 == 0:
|
|
logger.info(f" Processed {tensor_count} tensors...")
|
|
|
|
# Free memory after processing each tensor
|
|
del tensor_data
|
|
|
|
# Force garbage collection after processing each file
|
|
gc.collect()
|
|
|
|
logger.info(f"Total tensors processed: {tensor_count}")
|
|
|
|
# Add tokeniser
|
|
try:
|
|
tok_config = ConfigParser.load_tokeniser_config(model_path)
|
|
writer_wrapper.add_tokeniser(tok_config)
|
|
logger.info("Tokeniser added")
|
|
except Exception as e:
|
|
logger.warning(f"Could not add tokeniser: {e}")
|
|
|
|
# Finalise file
|
|
writer_wrapper.finalise()
|
|
|
|
file_size = fs.get_file_size(output_path)
|
|
logger.info(f"Conversion complete! Output: {output_path} ({file_size})")
|
|
|
|
return True
|