83 lines
3.1 KiB
Python
83 lines
3.1 KiB
Python
"""Importance matrix (imatrix) management service.
|
||
|
||
Manages detection and use of existing importance matrix files for
|
||
quantisation guidance. Provides user prompts for supplying pre-computed
|
||
imatrix files from external sources.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from typing import TYPE_CHECKING
|
||
|
||
from helpers.logger import logger
|
||
from helpers.services.filesystem import FilesystemService
|
||
|
||
if TYPE_CHECKING:
|
||
from pathlib import Path
|
||
|
||
|
||
class IMatrixManager:
|
||
"""Handles importance matrix file management for quantisation.
|
||
|
||
Locates existing importance matrix files or prompts users to provide
|
||
pre-computed matrices from external sources. These matrices guide
|
||
quantisation decisions to preserve model quality.
|
||
"""
|
||
|
||
def __init__(self) -> None:
|
||
"""Initialise IMatrixManager."""
|
||
self.fs = FilesystemService()
|
||
|
||
def find_imatrix(self, model_dir: Path) -> Path | None:
|
||
"""Find or prompt for importance matrix file.
|
||
|
||
Searches for existing imatrix files first, then provides interactive
|
||
prompts for user-supplied matrices. See docs/imatrix_data.md for
|
||
instructions on generating imatrix files.
|
||
|
||
Returns:
|
||
Path to imatrix file, or None if not available.
|
||
"""
|
||
imatrix_path = model_dir / "imatrix.dat"
|
||
|
||
# Check for existing imatrix
|
||
if imatrix_path.exists():
|
||
logger.info(f"Found existing imatrix: {imatrix_path.name}")
|
||
return imatrix_path
|
||
|
||
# Try user-provided imatrix
|
||
return self._prompt_for_user_imatrix(model_dir, imatrix_path)
|
||
|
||
def _prompt_for_user_imatrix(self, model_dir: Path, imatrix_path: Path) -> Path | None:
|
||
"""Prompt user for existing imatrix file.
|
||
|
||
Returns:
|
||
Path to user-provided imatrix, or None if not available.
|
||
"""
|
||
logger.info(f"Model directory: {model_dir}")
|
||
logger.info(f"Looking for imatrix file at: {imatrix_path}")
|
||
logger.info("\n" + "=" * 70)
|
||
logger.info("📊 No existing imatrix file found")
|
||
logger.info("\nYou have two options:")
|
||
logger.info(" 1. Provide a pre-computed imatrix file")
|
||
logger.info(" (💡 see docs/imatrix_data.md to generate your own)")
|
||
logger.info(" 2. Skip imatrix usage (lower quality quantisation)")
|
||
logger.info("=" * 70)
|
||
|
||
response = input("\n❓ Do you have an imatrix file to provide? (y/N): ").strip().lower()
|
||
|
||
if response != "y":
|
||
logger.info("Continuing without imatrix (quantisation quality may be lower)")
|
||
logger.info("ℹ️ See docs/imatrix_data.md for instructions on generating imatrix files") # noqa: RUF001
|
||
return None
|
||
|
||
logger.info(f"\nPlease place your imatrix.dat file in: {model_dir}")
|
||
input("⏳ Press Enter when you've placed the file (or Ctrl+C to cancel)...")
|
||
|
||
if imatrix_path.exists():
|
||
file_size = self.fs.get_file_size(imatrix_path)
|
||
logger.info(f"✅ Found imatrix file! ({file_size})")
|
||
return imatrix_path
|
||
|
||
logger.warning("No imatrix.dat file found - continuing without imatrix")
|
||
return None
|