417 lines
14 KiB
Python
417 lines
14 KiB
Python
"""llama.cpp environment and operations service.
|
|
|
|
Manages llama.cpp binary discovery, environment setup, and imatrix generation.
|
|
Provides consistent interface for interacting with llama.cpp tools across
|
|
different installation methods.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import subprocess
|
|
from pathlib import Path
|
|
|
|
from helpers.logger import logger
|
|
from helpers.models.quantisation import LlamaCppEnvironment
|
|
from helpers.services.filesystem import FilesystemService
|
|
|
|
|
|
class EnvironmentManager:
|
|
"""Manages llama.cpp environment setup and binary discovery.
|
|
|
|
Handles detection of local binaries, repository setup, and conversion
|
|
script location. Provides fallback strategies for different installation
|
|
scenarios including local builds and repository-based setups.
|
|
"""
|
|
|
|
def __init__(self, work_dir: Path) -> None:
|
|
"""Initialise EnvironmentManager."""
|
|
self.work_dir = work_dir
|
|
self.llama_cpp_dir = work_dir / "llama.cpp"
|
|
self.fs = FilesystemService()
|
|
|
|
def setup(self) -> LlamaCppEnvironment:
|
|
"""Set up llama.cpp environment with automatic detection.
|
|
|
|
Checks for local llama.cpp binaries first, then falls back to
|
|
repository-based setup if needed. Handles conversion script location,
|
|
dependency installation, and path resolution.
|
|
|
|
Returns:
|
|
Configured LlamaCppEnvironment instance.
|
|
"""
|
|
# Check for local binaries first
|
|
local_env = self._check_local_binaries()
|
|
if local_env:
|
|
return local_env
|
|
|
|
# Setup repository if needed
|
|
return self.setup_repository()
|
|
|
|
def _check_local_binaries(self) -> LlamaCppEnvironment | None:
|
|
"""Check for existing llama.cpp binaries in current directory.
|
|
|
|
Searches for quantise and CLI binaries in the current directory
|
|
and standard installation paths. Also locates conversion scripts.
|
|
|
|
Returns:
|
|
LlamaCppEnvironment if binaries found, None otherwise.
|
|
"""
|
|
quantise_bin = Path("./llama-quantize")
|
|
cli_bin = Path("./llama-cli")
|
|
|
|
if not (quantise_bin.exists() and cli_bin.exists()):
|
|
return None
|
|
|
|
logger.info("Found llama.cpp binaries in current directory")
|
|
|
|
# Check for conversion script
|
|
convert_script = self._find_convert_script()
|
|
if convert_script:
|
|
logger.info(f"Found conversion script: {convert_script}")
|
|
return LlamaCppEnvironment(
|
|
quantise_binary=quantise_bin.resolve(),
|
|
cli_binary=cli_bin.resolve(),
|
|
convert_script=convert_script,
|
|
use_repo=False,
|
|
)
|
|
|
|
logger.warning("No conversion script found in current directory")
|
|
logger.info("Will use llama.cpp repository method for conversion")
|
|
return LlamaCppEnvironment(
|
|
quantise_binary=quantise_bin.resolve(),
|
|
cli_binary=cli_bin.resolve(),
|
|
convert_script=f"python3 {self.llama_cpp_dir}/convert_hf_to_gguf.py",
|
|
use_repo=True,
|
|
)
|
|
|
|
def _find_convert_script(self) -> str | None:
|
|
"""Find conversion script in current directory.
|
|
|
|
Searches for various naming conventions of the HF to GGUF
|
|
conversion script.
|
|
|
|
Returns:
|
|
Command to run conversion script, or None if not found.
|
|
"""
|
|
scripts = [
|
|
"./llama-convert-hf-to-gguf",
|
|
"python3 ./convert_hf_to_gguf.py",
|
|
"python3 ./convert-hf-to-gguf.py",
|
|
]
|
|
|
|
for script in scripts:
|
|
if script.startswith("python3"):
|
|
script_path = script.split(" ", 1)[1]
|
|
if Path(script_path).exists():
|
|
return script
|
|
elif Path(script).exists():
|
|
return script
|
|
return None
|
|
|
|
def setup_repository(self) -> LlamaCppEnvironment:
|
|
"""Setup llama.cpp repository for conversion scripts.
|
|
|
|
Clones the llama.cpp repository if not present and installs
|
|
Python dependencies for model conversion.
|
|
|
|
Returns:
|
|
LlamaCppEnvironment configured with repository paths.
|
|
"""
|
|
if not self.llama_cpp_dir.exists():
|
|
logger.info("Cloning llama.cpp for conversion script...")
|
|
subprocess.run(
|
|
[
|
|
"git",
|
|
"clone",
|
|
"https://github.com/ggerganov/llama.cpp.git",
|
|
str(self.llama_cpp_dir),
|
|
],
|
|
check=True,
|
|
)
|
|
|
|
# Install Python requirements
|
|
logger.info("Installing Python requirements...")
|
|
subprocess.run(
|
|
[
|
|
"pip3",
|
|
"install",
|
|
"-r",
|
|
"requirements.txt",
|
|
"--break-system-packages",
|
|
"--root-user-action=ignore",
|
|
],
|
|
cwd=self.llama_cpp_dir,
|
|
check=True,
|
|
)
|
|
|
|
# Install additional conversion dependencies
|
|
logger.info("Installing additional conversion dependencies...")
|
|
subprocess.run(
|
|
[
|
|
"pip3",
|
|
"install",
|
|
"transformers",
|
|
"sentencepiece",
|
|
"protobuf",
|
|
"--break-system-packages",
|
|
"--root-user-action=ignore",
|
|
],
|
|
check=True,
|
|
)
|
|
else:
|
|
logger.info("llama.cpp repository already exists")
|
|
|
|
# Use local binaries but repo conversion script
|
|
return LlamaCppEnvironment(
|
|
quantise_binary=Path("./llama-quantize").resolve(),
|
|
cli_binary=Path("./llama-cli").resolve(),
|
|
convert_script=f"python3 {self.llama_cpp_dir}/convert_hf_to_gguf.py",
|
|
use_repo=False,
|
|
)
|
|
|
|
|
|
class IMatrixGenerator:
|
|
"""Handles importance matrix generation for quantisation guidance.
|
|
|
|
Generates or locates importance matrices that guide quantisation
|
|
decisions, helping preserve model quality by identifying critical
|
|
tensors requiring higher precision.
|
|
"""
|
|
|
|
def __init__(self) -> None:
|
|
"""Initialise IMatrixGenerator."""
|
|
self.fs = FilesystemService()
|
|
|
|
def generate_imatrix(
|
|
self, f16_model_path: Path, llama_env: LlamaCppEnvironment, model_dir: Path
|
|
) -> Path | None:
|
|
"""Generate importance matrix for quantisation guidance.
|
|
|
|
Searches for existing imatrix files first, provides interactive
|
|
prompts for user-supplied matrices, then generates new matrices
|
|
using calibration data if necessary.
|
|
|
|
Returns:
|
|
Path to imatrix file, or None if generation fails.
|
|
"""
|
|
imatrix_path = model_dir / "imatrix.dat"
|
|
|
|
# Check for existing imatrix
|
|
if imatrix_path.exists():
|
|
logger.info(f"Found existing imatrix: {imatrix_path.name}")
|
|
return imatrix_path
|
|
|
|
# Try user-provided imatrix
|
|
user_imatrix = self._prompt_for_user_imatrix(model_dir, imatrix_path)
|
|
if user_imatrix:
|
|
return user_imatrix
|
|
|
|
# Generate new imatrix
|
|
calibration_file = self._get_calibration_file()
|
|
if not calibration_file:
|
|
return None
|
|
|
|
return self._generate_new_imatrix(f16_model_path, llama_env, imatrix_path, calibration_file)
|
|
|
|
def _prompt_for_user_imatrix(self, model_dir: Path, imatrix_path: Path) -> Path | None:
|
|
"""Prompt user for existing imatrix file.
|
|
|
|
Returns:
|
|
Path to user-provided imatrix, or None if not available.
|
|
"""
|
|
logger.info(f"Model directory: {model_dir}")
|
|
logger.info(f"Looking for imatrix file at: {imatrix_path}")
|
|
logger.info(
|
|
"Tip: You can download pre-computed imatrix files from Bartowski's repositories!"
|
|
)
|
|
logger.info(
|
|
" Example: https://huggingface.co/bartowski/MODEL-NAME-GGUF/resolve/main/MODEL-NAME.imatrix"
|
|
)
|
|
|
|
response = (
|
|
input("\n❓ Do you have an imatrix file to place in the model directory? (y/N): ")
|
|
.strip()
|
|
.lower()
|
|
)
|
|
|
|
if response != "y":
|
|
return None
|
|
|
|
logger.info(f"Please place your imatrix.dat file in: {model_dir}")
|
|
input("⏳ Press Enter when you've placed the imatrix.dat file (or Ctrl+C to cancel)...")
|
|
|
|
if imatrix_path.exists():
|
|
file_size = self.fs.get_file_size(imatrix_path)
|
|
logger.info(f"Found imatrix file! ({file_size})")
|
|
return imatrix_path
|
|
|
|
logger.warning("No imatrix.dat file found - continuing with automatic generation")
|
|
return None
|
|
|
|
def _get_calibration_file(self) -> Path | None:
|
|
"""Get calibration data file for imatrix generation.
|
|
|
|
Returns:
|
|
Path to calibration file, or None if not found.
|
|
"""
|
|
calibration_file = Path(__file__).parent.parent.parent / "resources" / "imatrix_data.txt"
|
|
if not calibration_file.exists():
|
|
logger.warning("resources/imatrix_data.txt not found - skipping imatrix generation")
|
|
logger.info(
|
|
"Download from: https://gist.githubusercontent.com/bartowski1182/"
|
|
"eb213dccb3571f863da82e99418f81e8/raw/calibration_datav3.txt"
|
|
)
|
|
return None
|
|
return calibration_file
|
|
|
|
def _generate_new_imatrix(
|
|
self,
|
|
f16_model_path: Path,
|
|
llama_env: LlamaCppEnvironment,
|
|
imatrix_path: Path,
|
|
calibration_file: Path,
|
|
) -> Path | None:
|
|
"""Generate new importance matrix using calibration data.
|
|
|
|
Returns:
|
|
Path to generated imatrix, or None if generation fails.
|
|
"""
|
|
logger.info("Generating importance matrix (this may take 1-4 hours for large models)...")
|
|
logger.info(f"Model: {f16_model_path.name}")
|
|
logger.info(f"Calibration: {calibration_file}")
|
|
logger.info(f"Output: {imatrix_path}")
|
|
|
|
# Find imatrix binary
|
|
imatrix_binary = self._find_imatrix_binary(llama_env)
|
|
if not imatrix_binary:
|
|
logger.warning("llama-imatrix binary not found - skipping imatrix generation")
|
|
logger.info("Make sure llama-imatrix is in the same directory as llama-quantize")
|
|
return None
|
|
|
|
# Build and execute command
|
|
cmd = self._build_imatrix_command(
|
|
imatrix_binary, f16_model_path, calibration_file, imatrix_path
|
|
)
|
|
return self._execute_imatrix_generation(cmd, imatrix_path)
|
|
|
|
def _build_imatrix_command(
|
|
self, binary: Path, model_path: Path, calibration_file: Path, output_path: Path
|
|
) -> list[str]:
|
|
"""Build imatrix generation command.
|
|
|
|
Returns:
|
|
Command arguments as list.
|
|
"""
|
|
return [
|
|
str(binary),
|
|
"-m",
|
|
str(model_path),
|
|
"-f",
|
|
str(calibration_file),
|
|
"-o",
|
|
str(output_path),
|
|
"--process-output",
|
|
"--output-frequency",
|
|
"10",
|
|
"--save-frequency",
|
|
"50",
|
|
"-t",
|
|
"8",
|
|
"-c",
|
|
"2048",
|
|
"-b",
|
|
"512",
|
|
]
|
|
|
|
def _execute_imatrix_generation(self, cmd: list[str], imatrix_path: Path) -> Path | None:
|
|
"""Execute imatrix generation command with real-time output.
|
|
|
|
Returns:
|
|
Path to generated imatrix file, or None if generation fails.
|
|
"""
|
|
logger.info(f"Running: {' '.join(cmd)}")
|
|
logger.info("Starting imatrix generation... (progress will be shown)")
|
|
|
|
try:
|
|
process = subprocess.Popen(
|
|
cmd,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.STDOUT,
|
|
universal_newlines=True,
|
|
bufsize=1,
|
|
)
|
|
|
|
self._stream_imatrix_output(process)
|
|
|
|
return_code = process.poll()
|
|
if return_code == 0:
|
|
return self._validate_imatrix_output(imatrix_path)
|
|
|
|
except KeyboardInterrupt:
|
|
logger.info("imatrix generation cancelled by user")
|
|
process.terminate()
|
|
return None
|
|
except Exception as e:
|
|
logger.error(f"imatrix generation failed with exception: {e}")
|
|
return None
|
|
else:
|
|
logger.error(f"imatrix generation failed with return code {return_code}")
|
|
return None
|
|
|
|
def _stream_imatrix_output(self, process: subprocess.Popen) -> None:
|
|
"""Stream imatrix generation output in real-time."""
|
|
while True:
|
|
if process.stdout is not None:
|
|
output = process.stdout.readline()
|
|
else:
|
|
break
|
|
if not output and process.poll() is not None:
|
|
break
|
|
if output:
|
|
line = output.strip()
|
|
if self._should_log_imatrix_line(line):
|
|
logger.info(line)
|
|
|
|
def _should_log_imatrix_line(self, line: str) -> bool:
|
|
"""Determine if imatrix output line should be logged.
|
|
|
|
Returns:
|
|
True if line should be logged, False otherwise.
|
|
"""
|
|
keywords = ["Computing imatrix", "perplexity:", "save_imatrix", "entries =", "ETA"]
|
|
return any(keyword in line for keyword in keywords) or line.startswith("[")
|
|
|
|
def _validate_imatrix_output(self, imatrix_path: Path) -> Path | None:
|
|
"""Validate generated imatrix file.
|
|
|
|
Returns:
|
|
Path to imatrix if valid, None otherwise.
|
|
"""
|
|
if imatrix_path.exists():
|
|
file_size = self.fs.get_file_size(imatrix_path)
|
|
logger.info(f"imatrix generation successful! ({file_size})")
|
|
return imatrix_path
|
|
logger.error("imatrix generation completed but file not found")
|
|
return None
|
|
|
|
def _find_imatrix_binary(self, llama_env: LlamaCppEnvironment) -> Path | None:
|
|
"""Find llama-imatrix binary in common locations.
|
|
|
|
Searches for the imatrix binary in the current directory and
|
|
standard installation paths.
|
|
|
|
Returns:
|
|
Path to imatrix binary, or None if not found.
|
|
"""
|
|
candidates = [
|
|
Path("./llama-imatrix"),
|
|
llama_env.quantise_binary.parent / "llama-imatrix",
|
|
Path("/usr/local/bin/llama-imatrix"),
|
|
Path("/usr/bin/llama-imatrix"),
|
|
]
|
|
|
|
for candidate in candidates:
|
|
if candidate.exists() and candidate.is_file():
|
|
return candidate
|
|
|
|
return None
|