llm-gguf-tools/helpers/services/llama_cpp.py
2025-08-07 18:29:12 +01:00

417 lines
14 KiB
Python

"""llama.cpp environment and operations service.
Manages llama.cpp binary discovery, environment setup, and imatrix generation.
Provides consistent interface for interacting with llama.cpp tools across
different installation methods.
"""
from __future__ import annotations
import subprocess
from pathlib import Path
from helpers.logger import logger
from helpers.models.quantisation import LlamaCppEnvironment
from helpers.services.filesystem import FilesystemService
class EnvironmentManager:
"""Manages llama.cpp environment setup and binary discovery.
Handles detection of local binaries, repository setup, and conversion
script location. Provides fallback strategies for different installation
scenarios including local builds and repository-based setups.
"""
def __init__(self, work_dir: Path) -> None:
"""Initialise EnvironmentManager."""
self.work_dir = work_dir
self.llama_cpp_dir = work_dir / "llama.cpp"
self.fs = FilesystemService()
def setup(self) -> LlamaCppEnvironment:
"""Set up llama.cpp environment with automatic detection.
Checks for local llama.cpp binaries first, then falls back to
repository-based setup if needed. Handles conversion script location,
dependency installation, and path resolution.
Returns:
Configured LlamaCppEnvironment instance.
"""
# Check for local binaries first
local_env = self._check_local_binaries()
if local_env:
return local_env
# Setup repository if needed
return self.setup_repository()
def _check_local_binaries(self) -> LlamaCppEnvironment | None:
"""Check for existing llama.cpp binaries in current directory.
Searches for quantise and CLI binaries in the current directory
and standard installation paths. Also locates conversion scripts.
Returns:
LlamaCppEnvironment if binaries found, None otherwise.
"""
quantise_bin = Path("./llama-quantize")
cli_bin = Path("./llama-cli")
if not (quantise_bin.exists() and cli_bin.exists()):
return None
logger.info("Found llama.cpp binaries in current directory")
# Check for conversion script
convert_script = self._find_convert_script()
if convert_script:
logger.info(f"Found conversion script: {convert_script}")
return LlamaCppEnvironment(
quantise_binary=quantise_bin.resolve(),
cli_binary=cli_bin.resolve(),
convert_script=convert_script,
use_repo=False,
)
logger.warning("No conversion script found in current directory")
logger.info("Will use llama.cpp repository method for conversion")
return LlamaCppEnvironment(
quantise_binary=quantise_bin.resolve(),
cli_binary=cli_bin.resolve(),
convert_script=f"python3 {self.llama_cpp_dir}/convert_hf_to_gguf.py",
use_repo=True,
)
def _find_convert_script(self) -> str | None:
"""Find conversion script in current directory.
Searches for various naming conventions of the HF to GGUF
conversion script.
Returns:
Command to run conversion script, or None if not found.
"""
scripts = [
"./llama-convert-hf-to-gguf",
"python3 ./convert_hf_to_gguf.py",
"python3 ./convert-hf-to-gguf.py",
]
for script in scripts:
if script.startswith("python3"):
script_path = script.split(" ", 1)[1]
if Path(script_path).exists():
return script
elif Path(script).exists():
return script
return None
def setup_repository(self) -> LlamaCppEnvironment:
"""Setup llama.cpp repository for conversion scripts.
Clones the llama.cpp repository if not present and installs
Python dependencies for model conversion.
Returns:
LlamaCppEnvironment configured with repository paths.
"""
if not self.llama_cpp_dir.exists():
logger.info("Cloning llama.cpp for conversion script...")
subprocess.run(
[
"git",
"clone",
"https://github.com/ggerganov/llama.cpp.git",
str(self.llama_cpp_dir),
],
check=True,
)
# Install Python requirements
logger.info("Installing Python requirements...")
subprocess.run(
[
"pip3",
"install",
"-r",
"requirements.txt",
"--break-system-packages",
"--root-user-action=ignore",
],
cwd=self.llama_cpp_dir,
check=True,
)
# Install additional conversion dependencies
logger.info("Installing additional conversion dependencies...")
subprocess.run(
[
"pip3",
"install",
"transformers",
"sentencepiece",
"protobuf",
"--break-system-packages",
"--root-user-action=ignore",
],
check=True,
)
else:
logger.info("llama.cpp repository already exists")
# Use local binaries but repo conversion script
return LlamaCppEnvironment(
quantise_binary=Path("./llama-quantize").resolve(),
cli_binary=Path("./llama-cli").resolve(),
convert_script=f"python3 {self.llama_cpp_dir}/convert_hf_to_gguf.py",
use_repo=False,
)
class IMatrixGenerator:
"""Handles importance matrix generation for quantisation guidance.
Generates or locates importance matrices that guide quantisation
decisions, helping preserve model quality by identifying critical
tensors requiring higher precision.
"""
def __init__(self) -> None:
"""Initialise IMatrixGenerator."""
self.fs = FilesystemService()
def generate_imatrix(
self, f16_model_path: Path, llama_env: LlamaCppEnvironment, model_dir: Path
) -> Path | None:
"""Generate importance matrix for quantisation guidance.
Searches for existing imatrix files first, provides interactive
prompts for user-supplied matrices, then generates new matrices
using calibration data if necessary.
Returns:
Path to imatrix file, or None if generation fails.
"""
imatrix_path = model_dir / "imatrix.dat"
# Check for existing imatrix
if imatrix_path.exists():
logger.info(f"Found existing imatrix: {imatrix_path.name}")
return imatrix_path
# Try user-provided imatrix
user_imatrix = self._prompt_for_user_imatrix(model_dir, imatrix_path)
if user_imatrix:
return user_imatrix
# Generate new imatrix
calibration_file = self._get_calibration_file()
if not calibration_file:
return None
return self._generate_new_imatrix(f16_model_path, llama_env, imatrix_path, calibration_file)
def _prompt_for_user_imatrix(self, model_dir: Path, imatrix_path: Path) -> Path | None:
"""Prompt user for existing imatrix file.
Returns:
Path to user-provided imatrix, or None if not available.
"""
logger.info(f"Model directory: {model_dir}")
logger.info(f"Looking for imatrix file at: {imatrix_path}")
logger.info(
"Tip: You can download pre-computed imatrix files from Bartowski's repositories!"
)
logger.info(
" Example: https://huggingface.co/bartowski/MODEL-NAME-GGUF/resolve/main/MODEL-NAME.imatrix"
)
response = (
input("\n❓ Do you have an imatrix file to place in the model directory? (y/N): ")
.strip()
.lower()
)
if response != "y":
return None
logger.info(f"Please place your imatrix.dat file in: {model_dir}")
input("⏳ Press Enter when you've placed the imatrix.dat file (or Ctrl+C to cancel)...")
if imatrix_path.exists():
file_size = self.fs.get_file_size(imatrix_path)
logger.info(f"Found imatrix file! ({file_size})")
return imatrix_path
logger.warning("No imatrix.dat file found - continuing with automatic generation")
return None
def _get_calibration_file(self) -> Path | None:
"""Get calibration data file for imatrix generation.
Returns:
Path to calibration file, or None if not found.
"""
calibration_file = Path(__file__).parent.parent.parent / "resources" / "imatrix_data.txt"
if not calibration_file.exists():
logger.warning("resources/imatrix_data.txt not found - skipping imatrix generation")
logger.info(
"Download from: https://gist.githubusercontent.com/bartowski1182/"
"eb213dccb3571f863da82e99418f81e8/raw/calibration_datav3.txt"
)
return None
return calibration_file
def _generate_new_imatrix(
self,
f16_model_path: Path,
llama_env: LlamaCppEnvironment,
imatrix_path: Path,
calibration_file: Path,
) -> Path | None:
"""Generate new importance matrix using calibration data.
Returns:
Path to generated imatrix, or None if generation fails.
"""
logger.info("Generating importance matrix (this may take 1-4 hours for large models)...")
logger.info(f"Model: {f16_model_path.name}")
logger.info(f"Calibration: {calibration_file}")
logger.info(f"Output: {imatrix_path}")
# Find imatrix binary
imatrix_binary = self._find_imatrix_binary(llama_env)
if not imatrix_binary:
logger.warning("llama-imatrix binary not found - skipping imatrix generation")
logger.info("Make sure llama-imatrix is in the same directory as llama-quantize")
return None
# Build and execute command
cmd = self._build_imatrix_command(
imatrix_binary, f16_model_path, calibration_file, imatrix_path
)
return self._execute_imatrix_generation(cmd, imatrix_path)
def _build_imatrix_command(
self, binary: Path, model_path: Path, calibration_file: Path, output_path: Path
) -> list[str]:
"""Build imatrix generation command.
Returns:
Command arguments as list.
"""
return [
str(binary),
"-m",
str(model_path),
"-f",
str(calibration_file),
"-o",
str(output_path),
"--process-output",
"--output-frequency",
"10",
"--save-frequency",
"50",
"-t",
"8",
"-c",
"2048",
"-b",
"512",
]
def _execute_imatrix_generation(self, cmd: list[str], imatrix_path: Path) -> Path | None:
"""Execute imatrix generation command with real-time output.
Returns:
Path to generated imatrix file, or None if generation fails.
"""
logger.info(f"Running: {' '.join(cmd)}")
logger.info("Starting imatrix generation... (progress will be shown)")
try:
process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
universal_newlines=True,
bufsize=1,
)
self._stream_imatrix_output(process)
return_code = process.poll()
if return_code == 0:
return self._validate_imatrix_output(imatrix_path)
except KeyboardInterrupt:
logger.info("imatrix generation cancelled by user")
process.terminate()
return None
except Exception as e:
logger.error(f"imatrix generation failed with exception: {e}")
return None
else:
logger.error(f"imatrix generation failed with return code {return_code}")
return None
def _stream_imatrix_output(self, process: subprocess.Popen) -> None:
"""Stream imatrix generation output in real-time."""
while True:
if process.stdout is not None:
output = process.stdout.readline()
else:
break
if not output and process.poll() is not None:
break
if output:
line = output.strip()
if self._should_log_imatrix_line(line):
logger.info(line)
def _should_log_imatrix_line(self, line: str) -> bool:
"""Determine if imatrix output line should be logged.
Returns:
True if line should be logged, False otherwise.
"""
keywords = ["Computing imatrix", "perplexity:", "save_imatrix", "entries =", "ETA"]
return any(keyword in line for keyword in keywords) or line.startswith("[")
def _validate_imatrix_output(self, imatrix_path: Path) -> Path | None:
"""Validate generated imatrix file.
Returns:
Path to imatrix if valid, None otherwise.
"""
if imatrix_path.exists():
file_size = self.fs.get_file_size(imatrix_path)
logger.info(f"imatrix generation successful! ({file_size})")
return imatrix_path
logger.error("imatrix generation completed but file not found")
return None
def _find_imatrix_binary(self, llama_env: LlamaCppEnvironment) -> Path | None:
"""Find llama-imatrix binary in common locations.
Searches for the imatrix binary in the current directory and
standard installation paths.
Returns:
Path to imatrix binary, or None if not found.
"""
candidates = [
Path("./llama-imatrix"),
llama_env.quantise_binary.parent / "llama-imatrix",
Path("/usr/local/bin/llama-imatrix"),
Path("/usr/bin/llama-imatrix"),
]
for candidate in candidates:
if candidate.exists() and candidate.is_file():
return candidate
return None