322 lines
11 KiB
Python
322 lines
11 KiB
Python
"""Importance matrix operations for llama.cpp.
|
|
|
|
Handles importance matrix generation and management for improved
|
|
quantisation quality.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import platform
|
|
import subprocess
|
|
from pathlib import Path
|
|
from typing import TYPE_CHECKING
|
|
|
|
from helpers.filesystem import FilesystemService
|
|
from helpers.llama_cpp.binary_manager import BinaryManager
|
|
from helpers.logger import logger
|
|
|
|
if TYPE_CHECKING:
|
|
from helpers.models.quantisation import ModelSource
|
|
|
|
|
|
class IMatrixHandler:
|
|
"""Handles importance matrix file management.
|
|
|
|
Manages detection and use of existing importance matrix files for
|
|
quantisation guidance.
|
|
"""
|
|
|
|
def __init__(self) -> None:
|
|
"""Initialise IMatrixHandler."""
|
|
self.fs = FilesystemService()
|
|
|
|
def find_imatrix(self, model_dir: Path) -> Path | None:
|
|
"""Find existing imatrix file in model directory.
|
|
|
|
Returns:
|
|
Path to imatrix file if found, None otherwise.
|
|
"""
|
|
imatrix_path = model_dir / "imatrix.dat"
|
|
|
|
if imatrix_path.exists():
|
|
file_size = self.fs.get_file_size(imatrix_path)
|
|
logger.info(f"Found existing imatrix: {imatrix_path.name} ({file_size})")
|
|
return imatrix_path
|
|
|
|
return None
|
|
|
|
def prompt_for_user_imatrix(self, model_dir: Path) -> Path | None:
|
|
"""Prompt user for existing imatrix file.
|
|
|
|
Returns:
|
|
Path to user-provided imatrix, or None if not available.
|
|
"""
|
|
imatrix_path = model_dir / "imatrix.dat"
|
|
|
|
logger.info(f"Model directory: {model_dir}")
|
|
logger.info(f"Looking for imatrix file at: {imatrix_path}")
|
|
logger.info(
|
|
"Tip: You can download pre-computed imatrix files from Bartowski's repositories!"
|
|
)
|
|
logger.info(
|
|
" Example: https://huggingface.co/bartowski/MODEL-NAME-GGUF/resolve/main/MODEL-NAME.imatrix"
|
|
)
|
|
|
|
response = (
|
|
input("\n❓ Do you have an imatrix file to place in the model directory? (y/N): ")
|
|
.strip()
|
|
.lower()
|
|
)
|
|
|
|
if response != "y":
|
|
return None
|
|
|
|
logger.info(f"Please place your imatrix.dat file in: {model_dir}")
|
|
input("⏳ Press Enter when you've placed the imatrix.dat file (or Ctrl+C to cancel)...")
|
|
|
|
if imatrix_path.exists():
|
|
file_size = self.fs.get_file_size(imatrix_path)
|
|
logger.info(f"Found imatrix file! ({file_size})")
|
|
return imatrix_path
|
|
|
|
logger.warning("No imatrix.dat file found - continuing without imatrix")
|
|
return None
|
|
|
|
|
|
class IMatrixGenerator:
|
|
"""Generates importance matrices for quantisation guidance.
|
|
|
|
Uses llama-imatrix binary to compute importance matrices from
|
|
calibration data, which helps preserve model quality during
|
|
quantisation by identifying critical weights.
|
|
"""
|
|
|
|
# Default calibration data location
|
|
CALIBRATION_DATA = Path("resources") / "imatrix_data.txt"
|
|
|
|
def __init__(self) -> None:
|
|
"""Initialise imatrix generator."""
|
|
self.binary_manager = BinaryManager()
|
|
self.imatrix_binary = self._get_imatrix_binary()
|
|
|
|
def _get_imatrix_binary(self) -> Path | None:
|
|
"""Get llama-imatrix binary, downloading if necessary.
|
|
|
|
Returns:
|
|
Path to binary if found, None otherwise.
|
|
"""
|
|
# First check local directory for manual placement
|
|
local_binary = Path("./llama-imatrix")
|
|
if local_binary.exists():
|
|
logger.info(f"Using local llama-imatrix binary: {local_binary}")
|
|
return local_binary
|
|
|
|
# Download from GitHub releases
|
|
binary_path = self.binary_manager.get_imatrix_binary()
|
|
if binary_path and self.binary_manager.check_binary_works(binary_path):
|
|
logger.info(f"Using llama-imatrix binary: {binary_path}")
|
|
return binary_path
|
|
|
|
logger.warning("llama-imatrix binary not available")
|
|
return None
|
|
|
|
def can_generate(self) -> bool:
|
|
"""Check if imatrix generation is available.
|
|
|
|
Returns:
|
|
True if binary and calibration data are available.
|
|
"""
|
|
return self.imatrix_binary is not None and self.CALIBRATION_DATA.exists()
|
|
|
|
def generate_imatrix(
|
|
self,
|
|
f16_model_path: Path,
|
|
output_path: Path,
|
|
calibration_data: Path | None = None,
|
|
) -> bool:
|
|
"""Generate importance matrix for a model.
|
|
|
|
Returns:
|
|
True if generation successful, False otherwise.
|
|
"""
|
|
validation_error = self._validate_generation_inputs(f16_model_path, calibration_data)
|
|
if validation_error:
|
|
logger.error(validation_error)
|
|
return False
|
|
|
|
cal_data = calibration_data or self.CALIBRATION_DATA
|
|
cmd = self._build_imatrix_command(f16_model_path, cal_data, output_path)
|
|
|
|
self._log_generation_start(f16_model_path, cal_data, output_path)
|
|
|
|
return self._execute_imatrix_generation(cmd, output_path)
|
|
|
|
def _validate_generation_inputs(
|
|
self,
|
|
f16_model_path: Path,
|
|
calibration_data: Path | None,
|
|
) -> str | None:
|
|
"""Validate inputs for imatrix generation.
|
|
|
|
Returns:
|
|
Error message if validation fails, None if valid.
|
|
"""
|
|
if not self.imatrix_binary:
|
|
return "llama-imatrix binary not available"
|
|
|
|
if not f16_model_path.exists():
|
|
return f"Model file not found: {f16_model_path}"
|
|
|
|
cal_data = calibration_data or self.CALIBRATION_DATA
|
|
if not cal_data.exists():
|
|
return f"Calibration data not found: {cal_data}"
|
|
|
|
return None
|
|
|
|
def _build_imatrix_command(
|
|
self,
|
|
f16_model_path: Path,
|
|
cal_data: Path,
|
|
output_path: Path,
|
|
) -> list[str]:
|
|
"""Build command for imatrix generation.
|
|
|
|
Returns:
|
|
Command list ready for subprocess execution.
|
|
"""
|
|
return [
|
|
str(self.imatrix_binary),
|
|
"-m",
|
|
str(f16_model_path),
|
|
"-f",
|
|
str(cal_data),
|
|
"-o",
|
|
str(output_path),
|
|
"--chunks",
|
|
"128", # Process in chunks for stability
|
|
]
|
|
|
|
def _log_generation_start(
|
|
self,
|
|
f16_model_path: Path,
|
|
cal_data: Path,
|
|
output_path: Path,
|
|
) -> None:
|
|
"""Log the start of imatrix generation."""
|
|
logger.info("🧮 Generating importance matrix...")
|
|
logger.info(f"📊 Model: {f16_model_path.name}")
|
|
logger.info(f"📝 Calibration data: {cal_data.name}")
|
|
logger.info(f"💾 Output: {output_path.name}")
|
|
|
|
def _execute_imatrix_generation(self, cmd: list[str], output_path: Path) -> bool:
|
|
"""Execute the imatrix generation process.
|
|
|
|
Returns:
|
|
True if generation completed successfully, False otherwise.
|
|
"""
|
|
# Set LD_LIBRARY_PATH for shared libraries
|
|
env = os.environ.copy()
|
|
if platform.system() != "Windows":
|
|
lib_path = str(self.binary_manager.BINARY_DIR)
|
|
if "LD_LIBRARY_PATH" in env:
|
|
env["LD_LIBRARY_PATH"] = f"{lib_path}:{env['LD_LIBRARY_PATH']}"
|
|
else:
|
|
env["LD_LIBRARY_PATH"] = lib_path
|
|
|
|
try:
|
|
process = subprocess.Popen(
|
|
cmd,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.STDOUT,
|
|
universal_newlines=True,
|
|
bufsize=1,
|
|
env=env,
|
|
)
|
|
|
|
self._stream_process_output(process)
|
|
return self._handle_process_completion(process, output_path)
|
|
|
|
except Exception as e:
|
|
logger.error(f"❌ Imatrix generation failed: {e}")
|
|
return False
|
|
|
|
def _stream_process_output(self, process: subprocess.Popen[str]) -> None:
|
|
"""Stream output from the running process."""
|
|
while True:
|
|
if process.stdout is not None:
|
|
output = process.stdout.readline()
|
|
else:
|
|
break
|
|
if not output and process.poll() is not None:
|
|
break
|
|
if output:
|
|
# Filter progress updates for cleaner output
|
|
line = output.strip()
|
|
if line and not line.startswith("["):
|
|
logger.info(f" {line}")
|
|
|
|
def _handle_process_completion(self, process: subprocess.Popen[str], output_path: Path) -> bool:
|
|
"""Handle completion of the imatrix generation process.
|
|
|
|
Returns:
|
|
True if process completed successfully and output exists, False otherwise.
|
|
"""
|
|
return_code = process.poll()
|
|
if return_code != 0:
|
|
logger.error(f"❌ Imatrix generation failed with return code {return_code}")
|
|
return False
|
|
|
|
if not output_path.exists():
|
|
logger.error("Generation completed but output file not found")
|
|
return False
|
|
|
|
size_mb = output_path.stat().st_size / (1024 * 1024)
|
|
logger.info(f"✅ Generated imatrix: {output_path.name} ({size_mb:.1f} MB)")
|
|
return True
|
|
|
|
def prompt_for_generation(
|
|
self,
|
|
model_source: ModelSource,
|
|
model_dir: Path,
|
|
f16_model_path: Path,
|
|
) -> Path | None:
|
|
"""Prompt user to generate imatrix.
|
|
|
|
Interactively prompts the user to generate an importance matrix
|
|
for enhanced quantisation quality using the model source information,
|
|
directory, and F16 model path. Checks binary availability before prompting.
|
|
|
|
Returns:
|
|
Path to generated imatrix or None if skipped.
|
|
"""
|
|
if not self.can_generate():
|
|
logger.info("⚠️ Imatrix generation not available (missing binary or calibration data)")
|
|
return None
|
|
|
|
logger.info("\n" + "=" * 70)
|
|
logger.info("📊 Importance Matrix Generation")
|
|
logger.info("=" * 70)
|
|
logger.info(
|
|
"\nImportance matrices improve quantisation quality by identifying"
|
|
"\ncritical weights in the model. This process takes 5-10 minutes"
|
|
"\nbut significantly improves the quality of smaller quantisations."
|
|
)
|
|
logger.info(f"\nModel: {model_source.model_name}")
|
|
logger.info(f"Calibration data: {self.CALIBRATION_DATA.name}")
|
|
|
|
response = input("\n❓ Generate importance matrix? (Y/n): ").strip().lower()
|
|
|
|
if response == "n":
|
|
logger.info("Skipping imatrix generation")
|
|
return None
|
|
|
|
# Generate imatrix
|
|
output_path = model_dir / "imatrix.dat"
|
|
logger.info("\n⏳ Generating importance matrix (this may take 5-10 minutes)...")
|
|
|
|
if self.generate_imatrix(f16_model_path, output_path):
|
|
return output_path
|
|
|
|
logger.warning("Failed to generate imatrix, continuing without it")
|
|
return None
|