llm-gguf-tools/helpers/services/llama_cpp.py

83 lines
3.1 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Importance matrix (imatrix) management service.
Manages detection and use of existing importance matrix files for
quantisation guidance. Provides user prompts for supplying pre-computed
imatrix files from external sources.
"""
from __future__ import annotations
from typing import TYPE_CHECKING
from helpers.logger import logger
from helpers.services.filesystem import FilesystemService
if TYPE_CHECKING:
from pathlib import Path
class IMatrixManager:
"""Handles importance matrix file management for quantisation.
Locates existing importance matrix files or prompts users to provide
pre-computed matrices from external sources. These matrices guide
quantisation decisions to preserve model quality.
"""
def __init__(self) -> None:
"""Initialise IMatrixManager."""
self.fs = FilesystemService()
def find_imatrix(self, model_dir: Path) -> Path | None:
"""Find or prompt for importance matrix file.
Searches for existing imatrix files first, then provides interactive
prompts for user-supplied matrices. See docs/imatrix_data.md for
instructions on generating imatrix files.
Returns:
Path to imatrix file, or None if not available.
"""
imatrix_path = model_dir / "imatrix.dat"
# Check for existing imatrix
if imatrix_path.exists():
logger.info(f"Found existing imatrix: {imatrix_path.name}")
return imatrix_path
# Try user-provided imatrix
return self._prompt_for_user_imatrix(model_dir, imatrix_path)
def _prompt_for_user_imatrix(self, model_dir: Path, imatrix_path: Path) -> Path | None:
"""Prompt user for existing imatrix file.
Returns:
Path to user-provided imatrix, or None if not available.
"""
logger.info(f"Model directory: {model_dir}")
logger.info(f"Looking for imatrix file at: {imatrix_path}")
logger.info("\n" + "=" * 70)
logger.info("📊 No existing imatrix file found")
logger.info("\nYou have two options:")
logger.info(" 1. Provide a pre-computed imatrix file")
logger.info(" (💡 see docs/imatrix_data.md to generate your own)")
logger.info(" 2. Skip imatrix usage (lower quality quantisation)")
logger.info("=" * 70)
response = input("\n❓ Do you have an imatrix file to provide? (y/N): ").strip().lower()
if response != "y":
logger.info("Continuing without imatrix (quantisation quality may be lower)")
logger.info(" See docs/imatrix_data.md for instructions on generating imatrix files") # noqa: RUF001
return None
logger.info(f"\nPlease place your imatrix.dat file in: {model_dir}")
input("⏳ Press Enter when you've placed the file (or Ctrl+C to cancel)...")
if imatrix_path.exists():
file_size = self.fs.get_file_size(imatrix_path)
logger.info(f"✅ Found imatrix file! ({file_size})")
return imatrix_path
logger.warning("No imatrix.dat file found - continuing without imatrix")
return None