Initial commit
This commit is contained in:
commit
ef7df1a8c3
28 changed files with 6829 additions and 0 deletions
417
helpers/services/llama_cpp.py
Normal file
417
helpers/services/llama_cpp.py
Normal file
|
@ -0,0 +1,417 @@
|
|||
"""llama.cpp environment and operations service.
|
||||
|
||||
Manages llama.cpp binary discovery, environment setup, and imatrix generation.
|
||||
Provides consistent interface for interacting with llama.cpp tools across
|
||||
different installation methods.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from helpers.logger import logger
|
||||
from helpers.models.quantisation import LlamaCppEnvironment
|
||||
from helpers.services.filesystem import FilesystemService
|
||||
|
||||
|
||||
class EnvironmentManager:
|
||||
"""Manages llama.cpp environment setup and binary discovery.
|
||||
|
||||
Handles detection of local binaries, repository setup, and conversion
|
||||
script location. Provides fallback strategies for different installation
|
||||
scenarios including local builds and repository-based setups.
|
||||
"""
|
||||
|
||||
def __init__(self, work_dir: Path) -> None:
|
||||
"""Initialise EnvironmentManager."""
|
||||
self.work_dir = work_dir
|
||||
self.llama_cpp_dir = work_dir / "llama.cpp"
|
||||
self.fs = FilesystemService()
|
||||
|
||||
def setup(self) -> LlamaCppEnvironment:
|
||||
"""Set up llama.cpp environment with automatic detection.
|
||||
|
||||
Checks for local llama.cpp binaries first, then falls back to
|
||||
repository-based setup if needed. Handles conversion script location,
|
||||
dependency installation, and path resolution.
|
||||
|
||||
Returns:
|
||||
Configured LlamaCppEnvironment instance.
|
||||
"""
|
||||
# Check for local binaries first
|
||||
local_env = self._check_local_binaries()
|
||||
if local_env:
|
||||
return local_env
|
||||
|
||||
# Setup repository if needed
|
||||
return self.setup_repository()
|
||||
|
||||
def _check_local_binaries(self) -> LlamaCppEnvironment | None:
|
||||
"""Check for existing llama.cpp binaries in current directory.
|
||||
|
||||
Searches for quantise and CLI binaries in the current directory
|
||||
and standard installation paths. Also locates conversion scripts.
|
||||
|
||||
Returns:
|
||||
LlamaCppEnvironment if binaries found, None otherwise.
|
||||
"""
|
||||
quantise_bin = Path("./llama-quantize")
|
||||
cli_bin = Path("./llama-cli")
|
||||
|
||||
if not (quantise_bin.exists() and cli_bin.exists()):
|
||||
return None
|
||||
|
||||
logger.info("Found llama.cpp binaries in current directory")
|
||||
|
||||
# Check for conversion script
|
||||
convert_script = self._find_convert_script()
|
||||
if convert_script:
|
||||
logger.info(f"Found conversion script: {convert_script}")
|
||||
return LlamaCppEnvironment(
|
||||
quantise_binary=quantise_bin.resolve(),
|
||||
cli_binary=cli_bin.resolve(),
|
||||
convert_script=convert_script,
|
||||
use_repo=False,
|
||||
)
|
||||
|
||||
logger.warning("No conversion script found in current directory")
|
||||
logger.info("Will use llama.cpp repository method for conversion")
|
||||
return LlamaCppEnvironment(
|
||||
quantise_binary=quantise_bin.resolve(),
|
||||
cli_binary=cli_bin.resolve(),
|
||||
convert_script=f"python3 {self.llama_cpp_dir}/convert_hf_to_gguf.py",
|
||||
use_repo=True,
|
||||
)
|
||||
|
||||
def _find_convert_script(self) -> str | None:
|
||||
"""Find conversion script in current directory.
|
||||
|
||||
Searches for various naming conventions of the HF to GGUF
|
||||
conversion script.
|
||||
|
||||
Returns:
|
||||
Command to run conversion script, or None if not found.
|
||||
"""
|
||||
scripts = [
|
||||
"./llama-convert-hf-to-gguf",
|
||||
"python3 ./convert_hf_to_gguf.py",
|
||||
"python3 ./convert-hf-to-gguf.py",
|
||||
]
|
||||
|
||||
for script in scripts:
|
||||
if script.startswith("python3"):
|
||||
script_path = script.split(" ", 1)[1]
|
||||
if Path(script_path).exists():
|
||||
return script
|
||||
elif Path(script).exists():
|
||||
return script
|
||||
return None
|
||||
|
||||
def setup_repository(self) -> LlamaCppEnvironment:
|
||||
"""Setup llama.cpp repository for conversion scripts.
|
||||
|
||||
Clones the llama.cpp repository if not present and installs
|
||||
Python dependencies for model conversion.
|
||||
|
||||
Returns:
|
||||
LlamaCppEnvironment configured with repository paths.
|
||||
"""
|
||||
if not self.llama_cpp_dir.exists():
|
||||
logger.info("Cloning llama.cpp for conversion script...")
|
||||
subprocess.run(
|
||||
[
|
||||
"git",
|
||||
"clone",
|
||||
"https://github.com/ggerganov/llama.cpp.git",
|
||||
str(self.llama_cpp_dir),
|
||||
],
|
||||
check=True,
|
||||
)
|
||||
|
||||
# Install Python requirements
|
||||
logger.info("Installing Python requirements...")
|
||||
subprocess.run(
|
||||
[
|
||||
"pip3",
|
||||
"install",
|
||||
"-r",
|
||||
"requirements.txt",
|
||||
"--break-system-packages",
|
||||
"--root-user-action=ignore",
|
||||
],
|
||||
cwd=self.llama_cpp_dir,
|
||||
check=True,
|
||||
)
|
||||
|
||||
# Install additional conversion dependencies
|
||||
logger.info("Installing additional conversion dependencies...")
|
||||
subprocess.run(
|
||||
[
|
||||
"pip3",
|
||||
"install",
|
||||
"transformers",
|
||||
"sentencepiece",
|
||||
"protobuf",
|
||||
"--break-system-packages",
|
||||
"--root-user-action=ignore",
|
||||
],
|
||||
check=True,
|
||||
)
|
||||
else:
|
||||
logger.info("llama.cpp repository already exists")
|
||||
|
||||
# Use local binaries but repo conversion script
|
||||
return LlamaCppEnvironment(
|
||||
quantise_binary=Path("./llama-quantize").resolve(),
|
||||
cli_binary=Path("./llama-cli").resolve(),
|
||||
convert_script=f"python3 {self.llama_cpp_dir}/convert_hf_to_gguf.py",
|
||||
use_repo=False,
|
||||
)
|
||||
|
||||
|
||||
class IMatrixGenerator:
|
||||
"""Handles importance matrix generation for quantisation guidance.
|
||||
|
||||
Generates or locates importance matrices that guide quantisation
|
||||
decisions, helping preserve model quality by identifying critical
|
||||
tensors requiring higher precision.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Initialise IMatrixGenerator."""
|
||||
self.fs = FilesystemService()
|
||||
|
||||
def generate_imatrix(
|
||||
self, f16_model_path: Path, llama_env: LlamaCppEnvironment, model_dir: Path
|
||||
) -> Path | None:
|
||||
"""Generate importance matrix for quantisation guidance.
|
||||
|
||||
Searches for existing imatrix files first, provides interactive
|
||||
prompts for user-supplied matrices, then generates new matrices
|
||||
using calibration data if necessary.
|
||||
|
||||
Returns:
|
||||
Path to imatrix file, or None if generation fails.
|
||||
"""
|
||||
imatrix_path = model_dir / "imatrix.dat"
|
||||
|
||||
# Check for existing imatrix
|
||||
if imatrix_path.exists():
|
||||
logger.info(f"Found existing imatrix: {imatrix_path.name}")
|
||||
return imatrix_path
|
||||
|
||||
# Try user-provided imatrix
|
||||
user_imatrix = self._prompt_for_user_imatrix(model_dir, imatrix_path)
|
||||
if user_imatrix:
|
||||
return user_imatrix
|
||||
|
||||
# Generate new imatrix
|
||||
calibration_file = self._get_calibration_file()
|
||||
if not calibration_file:
|
||||
return None
|
||||
|
||||
return self._generate_new_imatrix(f16_model_path, llama_env, imatrix_path, calibration_file)
|
||||
|
||||
def _prompt_for_user_imatrix(self, model_dir: Path, imatrix_path: Path) -> Path | None:
|
||||
"""Prompt user for existing imatrix file.
|
||||
|
||||
Returns:
|
||||
Path to user-provided imatrix, or None if not available.
|
||||
"""
|
||||
logger.info(f"Model directory: {model_dir}")
|
||||
logger.info(f"Looking for imatrix file at: {imatrix_path}")
|
||||
logger.info(
|
||||
"Tip: You can download pre-computed imatrix files from Bartowski's repositories!"
|
||||
)
|
||||
logger.info(
|
||||
" Example: https://huggingface.co/bartowski/MODEL-NAME-GGUF/resolve/main/MODEL-NAME.imatrix"
|
||||
)
|
||||
|
||||
response = (
|
||||
input("\n❓ Do you have an imatrix file to place in the model directory? (y/N): ")
|
||||
.strip()
|
||||
.lower()
|
||||
)
|
||||
|
||||
if response != "y":
|
||||
return None
|
||||
|
||||
logger.info(f"Please place your imatrix.dat file in: {model_dir}")
|
||||
input("⏳ Press Enter when you've placed the imatrix.dat file (or Ctrl+C to cancel)...")
|
||||
|
||||
if imatrix_path.exists():
|
||||
file_size = self.fs.get_file_size(imatrix_path)
|
||||
logger.info(f"Found imatrix file! ({file_size})")
|
||||
return imatrix_path
|
||||
|
||||
logger.warning("No imatrix.dat file found - continuing with automatic generation")
|
||||
return None
|
||||
|
||||
def _get_calibration_file(self) -> Path | None:
|
||||
"""Get calibration data file for imatrix generation.
|
||||
|
||||
Returns:
|
||||
Path to calibration file, or None if not found.
|
||||
"""
|
||||
calibration_file = Path(__file__).parent.parent.parent / "resources" / "imatrix_data.txt"
|
||||
if not calibration_file.exists():
|
||||
logger.warning("resources/imatrix_data.txt not found - skipping imatrix generation")
|
||||
logger.info(
|
||||
"Download from: https://gist.githubusercontent.com/bartowski1182/"
|
||||
"eb213dccb3571f863da82e99418f81e8/raw/calibration_datav3.txt"
|
||||
)
|
||||
return None
|
||||
return calibration_file
|
||||
|
||||
def _generate_new_imatrix(
|
||||
self,
|
||||
f16_model_path: Path,
|
||||
llama_env: LlamaCppEnvironment,
|
||||
imatrix_path: Path,
|
||||
calibration_file: Path,
|
||||
) -> Path | None:
|
||||
"""Generate new importance matrix using calibration data.
|
||||
|
||||
Returns:
|
||||
Path to generated imatrix, or None if generation fails.
|
||||
"""
|
||||
logger.info("Generating importance matrix (this may take 1-4 hours for large models)...")
|
||||
logger.info(f"Model: {f16_model_path.name}")
|
||||
logger.info(f"Calibration: {calibration_file}")
|
||||
logger.info(f"Output: {imatrix_path}")
|
||||
|
||||
# Find imatrix binary
|
||||
imatrix_binary = self._find_imatrix_binary(llama_env)
|
||||
if not imatrix_binary:
|
||||
logger.warning("llama-imatrix binary not found - skipping imatrix generation")
|
||||
logger.info("Make sure llama-imatrix is in the same directory as llama-quantize")
|
||||
return None
|
||||
|
||||
# Build and execute command
|
||||
cmd = self._build_imatrix_command(
|
||||
imatrix_binary, f16_model_path, calibration_file, imatrix_path
|
||||
)
|
||||
return self._execute_imatrix_generation(cmd, imatrix_path)
|
||||
|
||||
def _build_imatrix_command(
|
||||
self, binary: Path, model_path: Path, calibration_file: Path, output_path: Path
|
||||
) -> list[str]:
|
||||
"""Build imatrix generation command.
|
||||
|
||||
Returns:
|
||||
Command arguments as list.
|
||||
"""
|
||||
return [
|
||||
str(binary),
|
||||
"-m",
|
||||
str(model_path),
|
||||
"-f",
|
||||
str(calibration_file),
|
||||
"-o",
|
||||
str(output_path),
|
||||
"--process-output",
|
||||
"--output-frequency",
|
||||
"10",
|
||||
"--save-frequency",
|
||||
"50",
|
||||
"-t",
|
||||
"8",
|
||||
"-c",
|
||||
"2048",
|
||||
"-b",
|
||||
"512",
|
||||
]
|
||||
|
||||
def _execute_imatrix_generation(self, cmd: list[str], imatrix_path: Path) -> Path | None:
|
||||
"""Execute imatrix generation command with real-time output.
|
||||
|
||||
Returns:
|
||||
Path to generated imatrix file, or None if generation fails.
|
||||
"""
|
||||
logger.info(f"Running: {' '.join(cmd)}")
|
||||
logger.info("Starting imatrix generation... (progress will be shown)")
|
||||
|
||||
try:
|
||||
process = subprocess.Popen(
|
||||
cmd,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
universal_newlines=True,
|
||||
bufsize=1,
|
||||
)
|
||||
|
||||
self._stream_imatrix_output(process)
|
||||
|
||||
return_code = process.poll()
|
||||
if return_code == 0:
|
||||
return self._validate_imatrix_output(imatrix_path)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
logger.info("imatrix generation cancelled by user")
|
||||
process.terminate()
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"imatrix generation failed with exception: {e}")
|
||||
return None
|
||||
else:
|
||||
logger.error(f"imatrix generation failed with return code {return_code}")
|
||||
return None
|
||||
|
||||
def _stream_imatrix_output(self, process: subprocess.Popen) -> None:
|
||||
"""Stream imatrix generation output in real-time."""
|
||||
while True:
|
||||
if process.stdout is not None:
|
||||
output = process.stdout.readline()
|
||||
else:
|
||||
break
|
||||
if not output and process.poll() is not None:
|
||||
break
|
||||
if output:
|
||||
line = output.strip()
|
||||
if self._should_log_imatrix_line(line):
|
||||
logger.info(line)
|
||||
|
||||
def _should_log_imatrix_line(self, line: str) -> bool:
|
||||
"""Determine if imatrix output line should be logged.
|
||||
|
||||
Returns:
|
||||
True if line should be logged, False otherwise.
|
||||
"""
|
||||
keywords = ["Computing imatrix", "perplexity:", "save_imatrix", "entries =", "ETA"]
|
||||
return any(keyword in line for keyword in keywords) or line.startswith("[")
|
||||
|
||||
def _validate_imatrix_output(self, imatrix_path: Path) -> Path | None:
|
||||
"""Validate generated imatrix file.
|
||||
|
||||
Returns:
|
||||
Path to imatrix if valid, None otherwise.
|
||||
"""
|
||||
if imatrix_path.exists():
|
||||
file_size = self.fs.get_file_size(imatrix_path)
|
||||
logger.info(f"imatrix generation successful! ({file_size})")
|
||||
return imatrix_path
|
||||
logger.error("imatrix generation completed but file not found")
|
||||
return None
|
||||
|
||||
def _find_imatrix_binary(self, llama_env: LlamaCppEnvironment) -> Path | None:
|
||||
"""Find llama-imatrix binary in common locations.
|
||||
|
||||
Searches for the imatrix binary in the current directory and
|
||||
standard installation paths.
|
||||
|
||||
Returns:
|
||||
Path to imatrix binary, or None if not found.
|
||||
"""
|
||||
candidates = [
|
||||
Path("./llama-imatrix"),
|
||||
llama_env.quantise_binary.parent / "llama-imatrix",
|
||||
Path("/usr/local/bin/llama-imatrix"),
|
||||
Path("/usr/bin/llama-imatrix"),
|
||||
]
|
||||
|
||||
for candidate in candidates:
|
||||
if candidate.exists() and candidate.is_file():
|
||||
return candidate
|
||||
|
||||
return None
|
Loading…
Add table
Add a link
Reference in a new issue