llm-gguf-tools/helpers/gguf/converter.py

"""SafeTensors to GGUF conversion.

Handles conversion of SafeTensors models to GGUF format with proper
metadata and tensor mapping.
"""

from __future__ import annotations

import gc
import json
import traceback
from pathlib import Path
from typing import TYPE_CHECKING, Any

import torch
from safetensors import safe_open

from helpers.filesystem import FilesystemService
from helpers.gguf.writer import GGUFWriter
from helpers.logger import logger

if TYPE_CHECKING:
    from helpers.models.conversion import ModelConfig
    from helpers.utils.tensor_mapping import TensorMapper


class GGUFConverter:
    """High-level GGUF conversion orchestrator.

    Coordinates the complete conversion workflow from source models to GGUF
    format, managing metadata extraction, tensor mapping, and file writing.
    """

    @staticmethod
    def convert_safetensors(
        model_path: Path,
        output_path: Path,
        model_config: ModelConfig,
        architecture: str,
        tensor_mapper: TensorMapper,
    ) -> bool:
        """Convert SafeTensors model to GGUF format.

        Orchestrates the conversion process including metadata setup, tensor
        loading with BFloat16 support, name mapping, and tokeniser integration.

        Returns:
            True if conversion successful, False otherwise.
        """
        logger.info(f"Converting {model_path.name} to GGUF...")

        # Create writer
        writer_wrapper = GGUFWriter(output_path, architecture)

        # Add metadata
        writer_wrapper.add_metadata(model_config, model_path.name)

        # Add vision metadata if present
        if model_config.vision_config:
            writer_wrapper.add_vision_metadata(model_config.vision_config)

        # Load and add tensors
        fs = FilesystemService()
        tensor_files = fs.find_safetensor_files(model_path)
        logger.info(f"Found {len(tensor_files)} tensor file(s)")

        tensor_count = 0
        for tensor_file in tensor_files:
            logger.info(f"Loading {tensor_file.name}...")
            with safe_open(tensor_file, framework="pt") as f:
                for tensor_name in f.keys():  # noqa: SIM118
                    tensor_data = f.get_tensor(tensor_name)

                    # Convert BFloat16 to Float32
                    if hasattr(tensor_data, "numpy"):
                        if torch and tensor_data.dtype == torch.bfloat16:
                            tensor_data = tensor_data.float()
                        numpy_data = tensor_data.numpy()
                    else:
                        # Already numpy
                        numpy_data = tensor_data

                    # Map tensor name
                    gguf_name = tensor_mapper.map_tensor_name(tensor_name)
                    if not gguf_name:
                        logger.debug(f"Skipping unmapped tensor: {tensor_name}")
                        continue

                    logger.debug(f"  {tensor_name} -> {gguf_name}")
                    writer_wrapper.add_tensor(gguf_name, numpy_data)
                    tensor_count += 1

            # Clean up memory after each file
            gc.collect()
            if torch and torch.cuda.is_available():
                torch.cuda.empty_cache()

        logger.info(f"Added {tensor_count} tensors")

        # Add tokeniser
        tokeniser_config = GGUFConverter.load_tokeniser_config(model_path)
        if tokeniser_config:
            writer_wrapper.add_tokeniser(tokeniser_config)
            writer_wrapper.add_tokeniser_vocabulary(model_path)

        # Finalise and write
        writer_wrapper.write()

        # Clean up
        del writer_wrapper
        gc.collect()

        return output_path.exists()

    @staticmethod
    def convert_pytorch(
        model_path: Path,
        output_path: Path,
        model_config: ModelConfig,
        architecture: str,
        tensor_mapper: TensorMapper,
    ) -> bool:
        """Convert PyTorch model to GGUF format.

        Handles PyTorch bin file conversion with sharded model support,
        BFloat16 compatibility, and proper memory management.

        Returns:
            True if conversion successful, False otherwise.
        """
        logger.info(f"Converting {model_path.name} to GGUF...")

        # Create writer
        writer_wrapper = GGUFWriter(output_path, architecture)

        # Add metadata
        writer_wrapper.add_metadata(model_config, model_path.name)

        # Load and add tensors
        fs = FilesystemService()
        model_files = fs.find_safetensor_files(model_path)
        logger.info(f"Found {len(model_files)} model file(s)")

        tensor_count = 0
        for model_file in model_files:
            logger.info(f"Loading {model_file.name}...")
            try:
                checkpoint = torch.load(model_file, map_location="cpu", weights_only=True)

                for tensor_name, tensor_data in checkpoint.items():
                    # Convert to numpy
                    if hasattr(tensor_data, "numpy"):
                        if tensor_data.dtype == torch.bfloat16:
                            converted_tensor = tensor_data.float()
                        else:
                            converted_tensor = tensor_data
                        numpy_data = converted_tensor.numpy()
                    else:
                        numpy_data = tensor_data

                    # Map tensor name
                    gguf_name = tensor_mapper.map_tensor_name(tensor_name)
                    if not gguf_name:
                        logger.debug(f"Skipping unmapped tensor: {tensor_name}")
                        continue

                    logger.debug(f"  {tensor_name} -> {gguf_name}")
                    writer_wrapper.add_tensor(gguf_name, numpy_data)
                    tensor_count += 1

                # Clean up checkpoint
                del checkpoint
                gc.collect()
                if torch.cuda.is_available():
                    torch.cuda.empty_cache()

            except Exception as e:
                logger.error(f"Failed to load {model_file.name}: {e}")
                logger.error(traceback.format_exc())
                return False

        logger.info(f"Added {tensor_count} tensors")

        # Add tokeniser
        tokeniser_config = GGUFConverter.load_tokeniser_config(model_path)
        if tokeniser_config:
            writer_wrapper.add_tokeniser(tokeniser_config)
            writer_wrapper.add_tokeniser_vocabulary(model_path)

        # Finalise and write
        writer_wrapper.write()

        # Clean up
        del writer_wrapper
        gc.collect()

        return output_path.exists()

    @staticmethod
    def load_tokeniser_config(model_path: Path) -> dict[str, Any] | None:
        """Load tokeniser configuration from model directory.

        Returns:
            Tokeniser configuration dictionary or None if not found.
        """
        config_path = model_path / "tokenizer_config.json"
        if not config_path.exists():
            logger.warning("tokenizer_config.json not found")
            return None

        try:
            with Path(config_path).open(encoding="utf-8") as f:
                return json.load(f)
        except Exception as e:
            logger.error(f"Failed to load tokeniser config: {e}")
            return None