llm-gguf-tools/helpers/llama_cpp/imatrix.py

"""Importance matrix operations for llama.cpp.

Handles importance matrix generation and management for improved
quantisation quality.
"""

from __future__ import annotations

import os
import platform
import subprocess
from pathlib import Path
from typing import TYPE_CHECKING

from helpers.filesystem import FilesystemService
from helpers.llama_cpp.binary_manager import BinaryManager
from helpers.logger import logger

if TYPE_CHECKING:
    from helpers.models.quantisation import ModelSource


class IMatrixHandler:
    """Handles importance matrix file management.

    Manages detection and use of existing importance matrix files for
    quantisation guidance.
    """

    def __init__(self) -> None:
        """Initialise IMatrixHandler."""
        self.fs = FilesystemService()

    def find_imatrix(self, model_dir: Path) -> Path | None:
        """Find existing imatrix file in model directory.

        Returns:
            Path to imatrix file if found, None otherwise.
        """
        imatrix_path = model_dir / "imatrix.dat"

        if imatrix_path.exists():
            file_size = self.fs.get_file_size(imatrix_path)
            logger.info(f"Found existing imatrix: {imatrix_path.name} ({file_size})")
            return imatrix_path

        return None

    def prompt_for_user_imatrix(self, model_dir: Path) -> Path | None:
        """Prompt user for existing imatrix file.

        Returns:
            Path to user-provided imatrix, or None if not available.
        """
        imatrix_path = model_dir / "imatrix.dat"

        logger.info(f"Model directory: {model_dir}")
        logger.info(f"Looking for imatrix file at: {imatrix_path}")
        logger.info(
            "Tip: You can download pre-computed imatrix files from Bartowski's repositories!"
        )
        logger.info(
            "   Example: https://huggingface.co/bartowski/MODEL-NAME-GGUF/resolve/main/MODEL-NAME.imatrix"
        )

        response = (
            input("\n❓ Do you have an imatrix file to place in the model directory? (y/N): ")
            .strip()
            .lower()
        )

        if response != "y":
            return None

        logger.info(f"Please place your imatrix.dat file in: {model_dir}")
        input("⏳ Press Enter when you've placed the imatrix.dat file (or Ctrl+C to cancel)...")

        if imatrix_path.exists():
            file_size = self.fs.get_file_size(imatrix_path)
            logger.info(f"Found imatrix file! ({file_size})")
            return imatrix_path

        logger.warning("No imatrix.dat file found - continuing without imatrix")
        return None


class IMatrixGenerator:
    """Generates importance matrices for quantisation guidance.

    Uses llama-imatrix binary to compute importance matrices from
    calibration data, which helps preserve model quality during
    quantisation by identifying critical weights.
    """

    # Default calibration data location
    CALIBRATION_DATA = Path("resources") / "imatrix_data.txt"

    def __init__(self) -> None:
        """Initialise imatrix generator."""
        self.binary_manager = BinaryManager()
        self.imatrix_binary = self._get_imatrix_binary()

    def _get_imatrix_binary(self) -> Path | None:
        """Get llama-imatrix binary, downloading if necessary.

        Returns:
            Path to binary if found, None otherwise.
        """
        # First check local directory for manual placement
        local_binary = Path("./llama-imatrix")
        if local_binary.exists():
            logger.info(f"Using local llama-imatrix binary: {local_binary}")
            return local_binary

        # Download from GitHub releases
        binary_path = self.binary_manager.get_imatrix_binary()
        if binary_path and self.binary_manager.check_binary_works(binary_path):
            logger.info(f"Using llama-imatrix binary: {binary_path}")
            return binary_path

        logger.warning("llama-imatrix binary not available")
        return None

    def can_generate(self) -> bool:
        """Check if imatrix generation is available.

        Returns:
            True if binary and calibration data are available.
        """
        return self.imatrix_binary is not None and self.CALIBRATION_DATA.exists()

    def generate_imatrix(
        self,
        f16_model_path: Path,
        output_path: Path,
        calibration_data: Path | None = None,
    ) -> bool:
        """Generate importance matrix for a model.

        Returns:
            True if generation successful, False otherwise.
        """
        validation_error = self._validate_generation_inputs(f16_model_path, calibration_data)
        if validation_error:
            logger.error(validation_error)
            return False

        cal_data = calibration_data or self.CALIBRATION_DATA
        cmd = self._build_imatrix_command(f16_model_path, cal_data, output_path)

        self._log_generation_start(f16_model_path, cal_data, output_path)

        return self._execute_imatrix_generation(cmd, output_path)

    def _validate_generation_inputs(
        self,
        f16_model_path: Path,
        calibration_data: Path | None,
    ) -> str | None:
        """Validate inputs for imatrix generation.

        Returns:
            Error message if validation fails, None if valid.
        """
        if not self.imatrix_binary:
            return "llama-imatrix binary not available"

        if not f16_model_path.exists():
            return f"Model file not found: {f16_model_path}"

        cal_data = calibration_data or self.CALIBRATION_DATA
        if not cal_data.exists():
            return f"Calibration data not found: {cal_data}"

        return None

    def _build_imatrix_command(
        self,
        f16_model_path: Path,
        cal_data: Path,
        output_path: Path,
    ) -> list[str]:
        """Build command for imatrix generation.

        Returns:
            Command list ready for subprocess execution.
        """
        return [
            str(self.imatrix_binary),
            "-m",
            str(f16_model_path),
            "-f",
            str(cal_data),
            "-o",
            str(output_path),
            "--chunks",
            "128",  # Process in chunks for stability
        ]

    def _log_generation_start(
        self,
        f16_model_path: Path,
        cal_data: Path,
        output_path: Path,
    ) -> None:
        """Log the start of imatrix generation."""
        logger.info("🧮 Generating importance matrix...")
        logger.info(f"📊 Model: {f16_model_path.name}")
        logger.info(f"📝 Calibration data: {cal_data.name}")
        logger.info(f"💾 Output: {output_path.name}")

    def _execute_imatrix_generation(self, cmd: list[str], output_path: Path) -> bool:
        """Execute the imatrix generation process.

        Returns:
            True if generation completed successfully, False otherwise.
        """
        # Set LD_LIBRARY_PATH for shared libraries
        env = os.environ.copy()
        if platform.system() != "Windows":
            lib_path = str(self.binary_manager.BINARY_DIR)
            if "LD_LIBRARY_PATH" in env:
                env["LD_LIBRARY_PATH"] = f"{lib_path}:{env['LD_LIBRARY_PATH']}"
            else:
                env["LD_LIBRARY_PATH"] = lib_path

        try:
            process = subprocess.Popen(
                cmd,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                universal_newlines=True,
                bufsize=1,
                env=env,
            )

            self._stream_process_output(process)
            return self._handle_process_completion(process, output_path)

        except Exception as e:
            logger.error(f"❌ Imatrix generation failed: {e}")
            return False

    def _stream_process_output(self, process: subprocess.Popen[str]) -> None:
        """Stream output from the running process."""
        while True:
            if process.stdout is not None:
                output = process.stdout.readline()
            else:
                break
            if not output and process.poll() is not None:
                break
            if output:
                # Filter progress updates for cleaner output
                line = output.strip()
                if line and not line.startswith("["):
                    logger.info(f"  {line}")

    def _handle_process_completion(self, process: subprocess.Popen[str], output_path: Path) -> bool:
        """Handle completion of the imatrix generation process.

        Returns:
            True if process completed successfully and output exists, False otherwise.
        """
        return_code = process.poll()
        if return_code != 0:
            logger.error(f"❌ Imatrix generation failed with return code {return_code}")
            return False

        if not output_path.exists():
            logger.error("Generation completed but output file not found")
            return False

        size_mb = output_path.stat().st_size / (1024 * 1024)
        logger.info(f"✅ Generated imatrix: {output_path.name} ({size_mb:.1f} MB)")
        return True

    def prompt_for_generation(
        self,
        model_source: ModelSource,
        model_dir: Path,
        f16_model_path: Path,
    ) -> Path | None:
        """Prompt user to generate imatrix.

        Interactively prompts the user to generate an importance matrix
        for enhanced quantisation quality using the model source information,
        directory, and F16 model path. Checks binary availability before prompting.

        Returns:
            Path to generated imatrix or None if skipped.
        """
        if not self.can_generate():
            logger.info("⚠️ Imatrix generation not available (missing binary or calibration data)")
            return None

        logger.info("\n" + "=" * 70)
        logger.info("📊 Importance Matrix Generation")
        logger.info("=" * 70)
        logger.info(
            "\nImportance matrices improve quantisation quality by identifying"
            "\ncritical weights in the model. This process takes 5-10 minutes"
            "\nbut significantly improves the quality of smaller quantisations."
        )
        logger.info(f"\nModel: {model_source.model_name}")
        logger.info(f"Calibration data: {self.CALIBRATION_DATA.name}")

        response = input("\n❓ Generate importance matrix? (Y/n): ").strip().lower()

        if response == "n":
            logger.info("Skipping imatrix generation")
            return None

        # Generate imatrix
        output_path = model_dir / "imatrix.dat"
        logger.info("\n⏳ Generating importance matrix (this may take 5-10 minutes)...")

        if self.generate_imatrix(f16_model_path, output_path):
            return output_path

        logger.warning("Failed to generate imatrix, continuing without it")
        return None