Initial commit

This commit is contained in:
Tom Foster 2025-08-07 18:29:12 +01:00
commit ef7df1a8c3
28 changed files with 6829 additions and 0 deletions

View file

@ -0,0 +1,417 @@
"""llama.cpp environment and operations service.
Manages llama.cpp binary discovery, environment setup, and imatrix generation.
Provides consistent interface for interacting with llama.cpp tools across
different installation methods.
"""
from __future__ import annotations
import subprocess
from pathlib import Path
from helpers.logger import logger
from helpers.models.quantisation import LlamaCppEnvironment
from helpers.services.filesystem import FilesystemService
class EnvironmentManager:
"""Manages llama.cpp environment setup and binary discovery.
Handles detection of local binaries, repository setup, and conversion
script location. Provides fallback strategies for different installation
scenarios including local builds and repository-based setups.
"""
def __init__(self, work_dir: Path) -> None:
"""Initialise EnvironmentManager."""
self.work_dir = work_dir
self.llama_cpp_dir = work_dir / "llama.cpp"
self.fs = FilesystemService()
def setup(self) -> LlamaCppEnvironment:
"""Set up llama.cpp environment with automatic detection.
Checks for local llama.cpp binaries first, then falls back to
repository-based setup if needed. Handles conversion script location,
dependency installation, and path resolution.
Returns:
Configured LlamaCppEnvironment instance.
"""
# Check for local binaries first
local_env = self._check_local_binaries()
if local_env:
return local_env
# Setup repository if needed
return self.setup_repository()
def _check_local_binaries(self) -> LlamaCppEnvironment | None:
"""Check for existing llama.cpp binaries in current directory.
Searches for quantise and CLI binaries in the current directory
and standard installation paths. Also locates conversion scripts.
Returns:
LlamaCppEnvironment if binaries found, None otherwise.
"""
quantise_bin = Path("./llama-quantize")
cli_bin = Path("./llama-cli")
if not (quantise_bin.exists() and cli_bin.exists()):
return None
logger.info("Found llama.cpp binaries in current directory")
# Check for conversion script
convert_script = self._find_convert_script()
if convert_script:
logger.info(f"Found conversion script: {convert_script}")
return LlamaCppEnvironment(
quantise_binary=quantise_bin.resolve(),
cli_binary=cli_bin.resolve(),
convert_script=convert_script,
use_repo=False,
)
logger.warning("No conversion script found in current directory")
logger.info("Will use llama.cpp repository method for conversion")
return LlamaCppEnvironment(
quantise_binary=quantise_bin.resolve(),
cli_binary=cli_bin.resolve(),
convert_script=f"python3 {self.llama_cpp_dir}/convert_hf_to_gguf.py",
use_repo=True,
)
def _find_convert_script(self) -> str | None:
"""Find conversion script in current directory.
Searches for various naming conventions of the HF to GGUF
conversion script.
Returns:
Command to run conversion script, or None if not found.
"""
scripts = [
"./llama-convert-hf-to-gguf",
"python3 ./convert_hf_to_gguf.py",
"python3 ./convert-hf-to-gguf.py",
]
for script in scripts:
if script.startswith("python3"):
script_path = script.split(" ", 1)[1]
if Path(script_path).exists():
return script
elif Path(script).exists():
return script
return None
def setup_repository(self) -> LlamaCppEnvironment:
"""Setup llama.cpp repository for conversion scripts.
Clones the llama.cpp repository if not present and installs
Python dependencies for model conversion.
Returns:
LlamaCppEnvironment configured with repository paths.
"""
if not self.llama_cpp_dir.exists():
logger.info("Cloning llama.cpp for conversion script...")
subprocess.run(
[
"git",
"clone",
"https://github.com/ggerganov/llama.cpp.git",
str(self.llama_cpp_dir),
],
check=True,
)
# Install Python requirements
logger.info("Installing Python requirements...")
subprocess.run(
[
"pip3",
"install",
"-r",
"requirements.txt",
"--break-system-packages",
"--root-user-action=ignore",
],
cwd=self.llama_cpp_dir,
check=True,
)
# Install additional conversion dependencies
logger.info("Installing additional conversion dependencies...")
subprocess.run(
[
"pip3",
"install",
"transformers",
"sentencepiece",
"protobuf",
"--break-system-packages",
"--root-user-action=ignore",
],
check=True,
)
else:
logger.info("llama.cpp repository already exists")
# Use local binaries but repo conversion script
return LlamaCppEnvironment(
quantise_binary=Path("./llama-quantize").resolve(),
cli_binary=Path("./llama-cli").resolve(),
convert_script=f"python3 {self.llama_cpp_dir}/convert_hf_to_gguf.py",
use_repo=False,
)
class IMatrixGenerator:
"""Handles importance matrix generation for quantisation guidance.
Generates or locates importance matrices that guide quantisation
decisions, helping preserve model quality by identifying critical
tensors requiring higher precision.
"""
def __init__(self) -> None:
"""Initialise IMatrixGenerator."""
self.fs = FilesystemService()
def generate_imatrix(
self, f16_model_path: Path, llama_env: LlamaCppEnvironment, model_dir: Path
) -> Path | None:
"""Generate importance matrix for quantisation guidance.
Searches for existing imatrix files first, provides interactive
prompts for user-supplied matrices, then generates new matrices
using calibration data if necessary.
Returns:
Path to imatrix file, or None if generation fails.
"""
imatrix_path = model_dir / "imatrix.dat"
# Check for existing imatrix
if imatrix_path.exists():
logger.info(f"Found existing imatrix: {imatrix_path.name}")
return imatrix_path
# Try user-provided imatrix
user_imatrix = self._prompt_for_user_imatrix(model_dir, imatrix_path)
if user_imatrix:
return user_imatrix
# Generate new imatrix
calibration_file = self._get_calibration_file()
if not calibration_file:
return None
return self._generate_new_imatrix(f16_model_path, llama_env, imatrix_path, calibration_file)
def _prompt_for_user_imatrix(self, model_dir: Path, imatrix_path: Path) -> Path | None:
"""Prompt user for existing imatrix file.
Returns:
Path to user-provided imatrix, or None if not available.
"""
logger.info(f"Model directory: {model_dir}")
logger.info(f"Looking for imatrix file at: {imatrix_path}")
logger.info(
"Tip: You can download pre-computed imatrix files from Bartowski's repositories!"
)
logger.info(
" Example: https://huggingface.co/bartowski/MODEL-NAME-GGUF/resolve/main/MODEL-NAME.imatrix"
)
response = (
input("\n❓ Do you have an imatrix file to place in the model directory? (y/N): ")
.strip()
.lower()
)
if response != "y":
return None
logger.info(f"Please place your imatrix.dat file in: {model_dir}")
input("⏳ Press Enter when you've placed the imatrix.dat file (or Ctrl+C to cancel)...")
if imatrix_path.exists():
file_size = self.fs.get_file_size(imatrix_path)
logger.info(f"Found imatrix file! ({file_size})")
return imatrix_path
logger.warning("No imatrix.dat file found - continuing with automatic generation")
return None
def _get_calibration_file(self) -> Path | None:
"""Get calibration data file for imatrix generation.
Returns:
Path to calibration file, or None if not found.
"""
calibration_file = Path(__file__).parent.parent.parent / "resources" / "imatrix_data.txt"
if not calibration_file.exists():
logger.warning("resources/imatrix_data.txt not found - skipping imatrix generation")
logger.info(
"Download from: https://gist.githubusercontent.com/bartowski1182/"
"eb213dccb3571f863da82e99418f81e8/raw/calibration_datav3.txt"
)
return None
return calibration_file
def _generate_new_imatrix(
self,
f16_model_path: Path,
llama_env: LlamaCppEnvironment,
imatrix_path: Path,
calibration_file: Path,
) -> Path | None:
"""Generate new importance matrix using calibration data.
Returns:
Path to generated imatrix, or None if generation fails.
"""
logger.info("Generating importance matrix (this may take 1-4 hours for large models)...")
logger.info(f"Model: {f16_model_path.name}")
logger.info(f"Calibration: {calibration_file}")
logger.info(f"Output: {imatrix_path}")
# Find imatrix binary
imatrix_binary = self._find_imatrix_binary(llama_env)
if not imatrix_binary:
logger.warning("llama-imatrix binary not found - skipping imatrix generation")
logger.info("Make sure llama-imatrix is in the same directory as llama-quantize")
return None
# Build and execute command
cmd = self._build_imatrix_command(
imatrix_binary, f16_model_path, calibration_file, imatrix_path
)
return self._execute_imatrix_generation(cmd, imatrix_path)
def _build_imatrix_command(
self, binary: Path, model_path: Path, calibration_file: Path, output_path: Path
) -> list[str]:
"""Build imatrix generation command.
Returns:
Command arguments as list.
"""
return [
str(binary),
"-m",
str(model_path),
"-f",
str(calibration_file),
"-o",
str(output_path),
"--process-output",
"--output-frequency",
"10",
"--save-frequency",
"50",
"-t",
"8",
"-c",
"2048",
"-b",
"512",
]
def _execute_imatrix_generation(self, cmd: list[str], imatrix_path: Path) -> Path | None:
"""Execute imatrix generation command with real-time output.
Returns:
Path to generated imatrix file, or None if generation fails.
"""
logger.info(f"Running: {' '.join(cmd)}")
logger.info("Starting imatrix generation... (progress will be shown)")
try:
process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
universal_newlines=True,
bufsize=1,
)
self._stream_imatrix_output(process)
return_code = process.poll()
if return_code == 0:
return self._validate_imatrix_output(imatrix_path)
except KeyboardInterrupt:
logger.info("imatrix generation cancelled by user")
process.terminate()
return None
except Exception as e:
logger.error(f"imatrix generation failed with exception: {e}")
return None
else:
logger.error(f"imatrix generation failed with return code {return_code}")
return None
def _stream_imatrix_output(self, process: subprocess.Popen) -> None:
"""Stream imatrix generation output in real-time."""
while True:
if process.stdout is not None:
output = process.stdout.readline()
else:
break
if not output and process.poll() is not None:
break
if output:
line = output.strip()
if self._should_log_imatrix_line(line):
logger.info(line)
def _should_log_imatrix_line(self, line: str) -> bool:
"""Determine if imatrix output line should be logged.
Returns:
True if line should be logged, False otherwise.
"""
keywords = ["Computing imatrix", "perplexity:", "save_imatrix", "entries =", "ETA"]
return any(keyword in line for keyword in keywords) or line.startswith("[")
def _validate_imatrix_output(self, imatrix_path: Path) -> Path | None:
"""Validate generated imatrix file.
Returns:
Path to imatrix if valid, None otherwise.
"""
if imatrix_path.exists():
file_size = self.fs.get_file_size(imatrix_path)
logger.info(f"imatrix generation successful! ({file_size})")
return imatrix_path
logger.error("imatrix generation completed but file not found")
return None
def _find_imatrix_binary(self, llama_env: LlamaCppEnvironment) -> Path | None:
"""Find llama-imatrix binary in common locations.
Searches for the imatrix binary in the current directory and
standard installation paths.
Returns:
Path to imatrix binary, or None if not found.
"""
candidates = [
Path("./llama-imatrix"),
llama_env.quantise_binary.parent / "llama-imatrix",
Path("/usr/local/bin/llama-imatrix"),
Path("/usr/bin/llama-imatrix"),
]
for candidate in candidates:
if candidate.exists() and candidate.is_file():
return candidate
return None