llm-gguf-tools/helpers/gguf/converter.py
2025-08-09 17:16:02 +01:00

216 lines
7.2 KiB
Python

"""SafeTensors to GGUF conversion.
Handles conversion of SafeTensors models to GGUF format with proper
metadata and tensor mapping.
"""
from __future__ import annotations
import gc
import json
import traceback
from pathlib import Path
from typing import TYPE_CHECKING, Any
import torch
from safetensors import safe_open
from helpers.filesystem import FilesystemService
from helpers.gguf.writer import GGUFWriter
from helpers.logger import logger
if TYPE_CHECKING:
from helpers.models.conversion import ModelConfig
from helpers.utils.tensor_mapping import TensorMapper
class GGUFConverter:
"""High-level GGUF conversion orchestrator.
Coordinates the complete conversion workflow from source models to GGUF
format, managing metadata extraction, tensor mapping, and file writing.
"""
@staticmethod
def convert_safetensors(
model_path: Path,
output_path: Path,
model_config: ModelConfig,
architecture: str,
tensor_mapper: TensorMapper,
) -> bool:
"""Convert SafeTensors model to GGUF format.
Orchestrates the conversion process including metadata setup, tensor
loading with BFloat16 support, name mapping, and tokeniser integration.
Returns:
True if conversion successful, False otherwise.
"""
logger.info(f"Converting {model_path.name} to GGUF...")
# Create writer
writer_wrapper = GGUFWriter(output_path, architecture)
# Add metadata
writer_wrapper.add_metadata(model_config, model_path.name)
# Add vision metadata if present
if model_config.vision_config:
writer_wrapper.add_vision_metadata(model_config.vision_config)
# Load and add tensors
fs = FilesystemService()
tensor_files = fs.find_safetensor_files(model_path)
logger.info(f"Found {len(tensor_files)} tensor file(s)")
tensor_count = 0
for tensor_file in tensor_files:
logger.info(f"Loading {tensor_file.name}...")
with safe_open(tensor_file, framework="pt") as f:
for tensor_name in f.keys(): # noqa: SIM118
tensor_data = f.get_tensor(tensor_name)
# Convert BFloat16 to Float32
if hasattr(tensor_data, "numpy"):
if torch and tensor_data.dtype == torch.bfloat16:
tensor_data = tensor_data.float()
numpy_data = tensor_data.numpy()
else:
# Already numpy
numpy_data = tensor_data
# Map tensor name
gguf_name = tensor_mapper.map_tensor_name(tensor_name)
if not gguf_name:
logger.debug(f"Skipping unmapped tensor: {tensor_name}")
continue
logger.debug(f" {tensor_name} -> {gguf_name}")
writer_wrapper.add_tensor(gguf_name, numpy_data)
tensor_count += 1
# Clean up memory after each file
gc.collect()
if torch and torch.cuda.is_available():
torch.cuda.empty_cache()
logger.info(f"Added {tensor_count} tensors")
# Add tokeniser
tokeniser_config = GGUFConverter.load_tokeniser_config(model_path)
if tokeniser_config:
writer_wrapper.add_tokeniser(tokeniser_config)
writer_wrapper.add_tokeniser_vocabulary(model_path)
# Finalise and write
writer_wrapper.write()
# Clean up
del writer_wrapper
gc.collect()
return output_path.exists()
@staticmethod
def convert_pytorch(
model_path: Path,
output_path: Path,
model_config: ModelConfig,
architecture: str,
tensor_mapper: TensorMapper,
) -> bool:
"""Convert PyTorch model to GGUF format.
Handles PyTorch bin file conversion with sharded model support,
BFloat16 compatibility, and proper memory management.
Returns:
True if conversion successful, False otherwise.
"""
logger.info(f"Converting {model_path.name} to GGUF...")
# Create writer
writer_wrapper = GGUFWriter(output_path, architecture)
# Add metadata
writer_wrapper.add_metadata(model_config, model_path.name)
# Load and add tensors
fs = FilesystemService()
model_files = fs.find_safetensor_files(model_path)
logger.info(f"Found {len(model_files)} model file(s)")
tensor_count = 0
for model_file in model_files:
logger.info(f"Loading {model_file.name}...")
try:
checkpoint = torch.load(model_file, map_location="cpu", weights_only=True)
for tensor_name, tensor_data in checkpoint.items():
# Convert to numpy
if hasattr(tensor_data, "numpy"):
if tensor_data.dtype == torch.bfloat16:
converted_tensor = tensor_data.float()
else:
converted_tensor = tensor_data
numpy_data = converted_tensor.numpy()
else:
numpy_data = tensor_data
# Map tensor name
gguf_name = tensor_mapper.map_tensor_name(tensor_name)
if not gguf_name:
logger.debug(f"Skipping unmapped tensor: {tensor_name}")
continue
logger.debug(f" {tensor_name} -> {gguf_name}")
writer_wrapper.add_tensor(gguf_name, numpy_data)
tensor_count += 1
# Clean up checkpoint
del checkpoint
gc.collect()
if torch.cuda.is_available():
torch.cuda.empty_cache()
except Exception as e:
logger.error(f"Failed to load {model_file.name}: {e}")
logger.error(traceback.format_exc())
return False
logger.info(f"Added {tensor_count} tensors")
# Add tokeniser
tokeniser_config = GGUFConverter.load_tokeniser_config(model_path)
if tokeniser_config:
writer_wrapper.add_tokeniser(tokeniser_config)
writer_wrapper.add_tokeniser_vocabulary(model_path)
# Finalise and write
writer_wrapper.write()
# Clean up
del writer_wrapper
gc.collect()
return output_path.exists()
@staticmethod
def load_tokeniser_config(model_path: Path) -> dict[str, Any] | None:
"""Load tokeniser configuration from model directory.
Returns:
Tokeniser configuration dictionary or None if not found.
"""
config_path = model_path / "tokenizer_config.json"
if not config_path.exists():
logger.warning("tokenizer_config.json not found")
return None
try:
with Path(config_path).open(encoding="utf-8") as f:
return json.load(f)
except Exception as e:
logger.error(f"Failed to load tokeniser config: {e}")
return None