llm-gguf-tools/helpers/llama_cpp/imatrix.py
2025-08-09 17:16:02 +01:00

322 lines
11 KiB
Python

"""Importance matrix operations for llama.cpp.
Handles importance matrix generation and management for improved
quantisation quality.
"""
from __future__ import annotations
import os
import platform
import subprocess
from pathlib import Path
from typing import TYPE_CHECKING
from helpers.filesystem import FilesystemService
from helpers.llama_cpp.binary_manager import BinaryManager
from helpers.logger import logger
if TYPE_CHECKING:
from helpers.models.quantisation import ModelSource
class IMatrixHandler:
"""Handles importance matrix file management.
Manages detection and use of existing importance matrix files for
quantisation guidance.
"""
def __init__(self) -> None:
"""Initialise IMatrixHandler."""
self.fs = FilesystemService()
def find_imatrix(self, model_dir: Path) -> Path | None:
"""Find existing imatrix file in model directory.
Returns:
Path to imatrix file if found, None otherwise.
"""
imatrix_path = model_dir / "imatrix.dat"
if imatrix_path.exists():
file_size = self.fs.get_file_size(imatrix_path)
logger.info(f"Found existing imatrix: {imatrix_path.name} ({file_size})")
return imatrix_path
return None
def prompt_for_user_imatrix(self, model_dir: Path) -> Path | None:
"""Prompt user for existing imatrix file.
Returns:
Path to user-provided imatrix, or None if not available.
"""
imatrix_path = model_dir / "imatrix.dat"
logger.info(f"Model directory: {model_dir}")
logger.info(f"Looking for imatrix file at: {imatrix_path}")
logger.info(
"Tip: You can download pre-computed imatrix files from Bartowski's repositories!"
)
logger.info(
" Example: https://huggingface.co/bartowski/MODEL-NAME-GGUF/resolve/main/MODEL-NAME.imatrix"
)
response = (
input("\n❓ Do you have an imatrix file to place in the model directory? (y/N): ")
.strip()
.lower()
)
if response != "y":
return None
logger.info(f"Please place your imatrix.dat file in: {model_dir}")
input("⏳ Press Enter when you've placed the imatrix.dat file (or Ctrl+C to cancel)...")
if imatrix_path.exists():
file_size = self.fs.get_file_size(imatrix_path)
logger.info(f"Found imatrix file! ({file_size})")
return imatrix_path
logger.warning("No imatrix.dat file found - continuing without imatrix")
return None
class IMatrixGenerator:
"""Generates importance matrices for quantisation guidance.
Uses llama-imatrix binary to compute importance matrices from
calibration data, which helps preserve model quality during
quantisation by identifying critical weights.
"""
# Default calibration data location
CALIBRATION_DATA = Path("resources") / "imatrix_data.txt"
def __init__(self) -> None:
"""Initialise imatrix generator."""
self.binary_manager = BinaryManager()
self.imatrix_binary = self._get_imatrix_binary()
def _get_imatrix_binary(self) -> Path | None:
"""Get llama-imatrix binary, downloading if necessary.
Returns:
Path to binary if found, None otherwise.
"""
# First check local directory for manual placement
local_binary = Path("./llama-imatrix")
if local_binary.exists():
logger.info(f"Using local llama-imatrix binary: {local_binary}")
return local_binary
# Download from GitHub releases
binary_path = self.binary_manager.get_imatrix_binary()
if binary_path and self.binary_manager.check_binary_works(binary_path):
logger.info(f"Using llama-imatrix binary: {binary_path}")
return binary_path
logger.warning("llama-imatrix binary not available")
return None
def can_generate(self) -> bool:
"""Check if imatrix generation is available.
Returns:
True if binary and calibration data are available.
"""
return self.imatrix_binary is not None and self.CALIBRATION_DATA.exists()
def generate_imatrix(
self,
f16_model_path: Path,
output_path: Path,
calibration_data: Path | None = None,
) -> bool:
"""Generate importance matrix for a model.
Returns:
True if generation successful, False otherwise.
"""
validation_error = self._validate_generation_inputs(f16_model_path, calibration_data)
if validation_error:
logger.error(validation_error)
return False
cal_data = calibration_data or self.CALIBRATION_DATA
cmd = self._build_imatrix_command(f16_model_path, cal_data, output_path)
self._log_generation_start(f16_model_path, cal_data, output_path)
return self._execute_imatrix_generation(cmd, output_path)
def _validate_generation_inputs(
self,
f16_model_path: Path,
calibration_data: Path | None,
) -> str | None:
"""Validate inputs for imatrix generation.
Returns:
Error message if validation fails, None if valid.
"""
if not self.imatrix_binary:
return "llama-imatrix binary not available"
if not f16_model_path.exists():
return f"Model file not found: {f16_model_path}"
cal_data = calibration_data or self.CALIBRATION_DATA
if not cal_data.exists():
return f"Calibration data not found: {cal_data}"
return None
def _build_imatrix_command(
self,
f16_model_path: Path,
cal_data: Path,
output_path: Path,
) -> list[str]:
"""Build command for imatrix generation.
Returns:
Command list ready for subprocess execution.
"""
return [
str(self.imatrix_binary),
"-m",
str(f16_model_path),
"-f",
str(cal_data),
"-o",
str(output_path),
"--chunks",
"128", # Process in chunks for stability
]
def _log_generation_start(
self,
f16_model_path: Path,
cal_data: Path,
output_path: Path,
) -> None:
"""Log the start of imatrix generation."""
logger.info("🧮 Generating importance matrix...")
logger.info(f"📊 Model: {f16_model_path.name}")
logger.info(f"📝 Calibration data: {cal_data.name}")
logger.info(f"💾 Output: {output_path.name}")
def _execute_imatrix_generation(self, cmd: list[str], output_path: Path) -> bool:
"""Execute the imatrix generation process.
Returns:
True if generation completed successfully, False otherwise.
"""
# Set LD_LIBRARY_PATH for shared libraries
env = os.environ.copy()
if platform.system() != "Windows":
lib_path = str(self.binary_manager.BINARY_DIR)
if "LD_LIBRARY_PATH" in env:
env["LD_LIBRARY_PATH"] = f"{lib_path}:{env['LD_LIBRARY_PATH']}"
else:
env["LD_LIBRARY_PATH"] = lib_path
try:
process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
universal_newlines=True,
bufsize=1,
env=env,
)
self._stream_process_output(process)
return self._handle_process_completion(process, output_path)
except Exception as e:
logger.error(f"❌ Imatrix generation failed: {e}")
return False
def _stream_process_output(self, process: subprocess.Popen[str]) -> None:
"""Stream output from the running process."""
while True:
if process.stdout is not None:
output = process.stdout.readline()
else:
break
if not output and process.poll() is not None:
break
if output:
# Filter progress updates for cleaner output
line = output.strip()
if line and not line.startswith("["):
logger.info(f" {line}")
def _handle_process_completion(self, process: subprocess.Popen[str], output_path: Path) -> bool:
"""Handle completion of the imatrix generation process.
Returns:
True if process completed successfully and output exists, False otherwise.
"""
return_code = process.poll()
if return_code != 0:
logger.error(f"❌ Imatrix generation failed with return code {return_code}")
return False
if not output_path.exists():
logger.error("Generation completed but output file not found")
return False
size_mb = output_path.stat().st_size / (1024 * 1024)
logger.info(f"✅ Generated imatrix: {output_path.name} ({size_mb:.1f} MB)")
return True
def prompt_for_generation(
self,
model_source: ModelSource,
model_dir: Path,
f16_model_path: Path,
) -> Path | None:
"""Prompt user to generate imatrix.
Interactively prompts the user to generate an importance matrix
for enhanced quantisation quality using the model source information,
directory, and F16 model path. Checks binary availability before prompting.
Returns:
Path to generated imatrix or None if skipped.
"""
if not self.can_generate():
logger.info("⚠️ Imatrix generation not available (missing binary or calibration data)")
return None
logger.info("\n" + "=" * 70)
logger.info("📊 Importance Matrix Generation")
logger.info("=" * 70)
logger.info(
"\nImportance matrices improve quantisation quality by identifying"
"\ncritical weights in the model. This process takes 5-10 minutes"
"\nbut significantly improves the quality of smaller quantisations."
)
logger.info(f"\nModel: {model_source.model_name}")
logger.info(f"Calibration data: {self.CALIBRATION_DATA.name}")
response = input("\n❓ Generate importance matrix? (Y/n): ").strip().lower()
if response == "n":
logger.info("Skipping imatrix generation")
return None
# Generate imatrix
output_path = model_dir / "imatrix.dat"
logger.info("\n⏳ Generating importance matrix (this may take 5-10 minutes)...")
if self.generate_imatrix(f16_model_path, output_path):
return output_path
logger.warning("Failed to generate imatrix, continuing without it")
return None