llm-gguf-tools/helpers/llama_cpp/binary_manager.py
2025-08-09 17:16:02 +01:00

494 lines
18 KiB
Python

"""Binary manager for llama.cpp releases.
Downloads and manages llama.cpp binary releases from GitHub, handling
platform detection, version checking, and caching.
"""
from __future__ import annotations
import json
import os
import platform
import shutil
import subprocess
import tarfile
import time
import zipfile
from pathlib import Path
from typing import TYPE_CHECKING, ClassVar
from urllib.request import urlopen, urlretrieve
from helpers.logger import logger
if TYPE_CHECKING:
from typing import Any
class BinaryManager:
"""Manages llama.cpp binary downloads and updates.
Automatically downloads appropriate llama.cpp releases based on platform,
caches binaries locally, and checks for updates from GitHub releases.
"""
GITHUB_API = "https://api.github.com/repos/ggml-org/llama.cpp/releases/latest"
# Use local .cache directory in project
BINARY_DIR = Path(".cache") / "llm-gguf-tools" / "binaries"
# Platform mappings to release asset patterns
PLATFORM_PATTERNS: ClassVar[dict[tuple[str, str], list[str]]] = {
("Linux", "x86_64"): ["linux-x64", "ubuntu-x64", "linux-amd64"],
("Linux", "aarch64"): ["linux-arm64", "linux-aarch64"],
("Darwin", "x86_64"): ["macos-x64", "darwin-x64", "macos-amd64"],
("Darwin", "arm64"): ["macos-arm64", "darwin-arm64", "macos-aarch64"],
("Windows", "AMD64"): ["win-x64", "windows-x64", "win64"],
}
def __init__(self) -> None:
"""Initialise binary manager."""
self.BINARY_DIR.mkdir(parents=True, exist_ok=True)
self.version_file = self.BINARY_DIR / "version.json"
self.quantize_binary_path = self._get_binary_path("llama-quantize")
self.imatrix_binary_path = self._get_binary_path("llama-imatrix")
def _get_binary_path(self, base_name: str) -> Path:
"""Get path to binary.
Constructs the full path to a binary executable based on the base
name, automatically adding the appropriate file extension for the
current operating system platform.
Returns:
Path where binary should be located.
"""
binary_name = f"{base_name}.exe" if platform.system() == "Windows" else base_name
return self.BINARY_DIR / binary_name
def get_quantise_binary(self) -> Path | None:
"""Get llama-quantize binary, downloading if necessary.
Returns:
Path to binary if available, None if download fails.
"""
return self._get_binary("llama-quantize", self.quantize_binary_path)
def get_imatrix_binary(self) -> Path | None:
"""Get llama-imatrix binary, downloading if necessary.
Returns:
Path to binary if available, None if download fails.
"""
return self._get_binary("llama-imatrix", self.imatrix_binary_path)
def _get_binary(self, name: str, binary_path: Path) -> Path | None:
"""Get a specific binary, downloading if necessary.
Checks for existing binaries and downloads the latest release if
updates are needed. Falls back to existing binaries if download
fails, ensuring robust binary availability for quantisation tasks.
Returns:
Path to binary if available, None if download fails.
"""
# Check if we have a binary and if it needs updating
if self._should_update():
logger.info("🔄 Checking for llama.cpp updates...")
if not self._download_latest():
logger.warning("Failed to download latest llama.cpp release")
# Fall back to existing binary if available
if binary_path.exists():
logger.info(f"Using existing {name} binary")
return binary_path
return None
if binary_path.exists():
return binary_path
logger.info("📥 Downloading llama.cpp binaries...")
if self._download_latest():
return binary_path
return None
def _should_update(self) -> bool:
"""Check if binary needs updating.
Returns:
True if update needed, False otherwise.
"""
# If no binaries exist, we need to download
if not self.quantize_binary_path.exists() or not self.imatrix_binary_path.exists():
return True
# Check version file
if not self.version_file.exists():
return True
try:
with Path(self.version_file).open(encoding="utf-8") as f:
cached_version = json.load(f)
# Check if cached version is older than 7 days
if time.time() - cached_version.get("timestamp", 0) > 7 * 24 * 3600:
return True
except Exception:
return True
return False
def _download_latest(self) -> bool:
"""Download latest llama.cpp release.
Returns:
True if successful, False otherwise.
"""
try:
# Get latest release info
release_info = self._get_latest_release()
if not release_info:
return False
# Find appropriate asset for platform
asset_url = self._find_platform_asset(release_info["assets"])
if not asset_url:
logger.warning("No suitable binary found for this platform")
return False
# Download and extract
logger.info(f"📥 Downloading from: {asset_url}")
if not self._download_and_extract(asset_url):
return False
# Save version info
self._save_version_info(release_info)
logger.info("✅ Successfully downloaded llama.cpp binary")
except Exception as e:
logger.error(f"Failed to download llama.cpp: {e}")
return False
else:
return True
def _get_latest_release(self) -> dict[str, Any] | None:
"""Get latest release info from GitHub API.
Returns:
Release info dict or None if failed.
"""
try:
with urlopen(self.GITHUB_API) as response: # noqa: S310
return json.loads(response.read())
except Exception as e:
logger.error(f"Failed to fetch release info: {e}")
return None
def _find_platform_asset(self, assets: list[dict[str, Any]]) -> str | None:
"""Find appropriate asset for current platform.
Returns:
Download URL for appropriate asset or None.
"""
patterns = self._get_platform_patterns()
if not patterns:
return None
return self._select_best_asset(assets, patterns)
def _get_platform_patterns(self) -> list[str]:
"""Get platform patterns for current system.
Returns:
List of patterns to match in asset names.
"""
system = platform.system()
machine = platform.machine()
# Get specific patterns for this platform
patterns = self.PLATFORM_PATTERNS.get((system, machine), [])
if patterns:
return patterns
# Fall back to generic patterns
generic_patterns = {
"Linux": ["linux", "ubuntu"],
"Darwin": ["macos", "darwin"],
"Windows": ["win", "windows"],
}
return generic_patterns.get(system, [])
def _select_best_asset(self, assets: list[dict[str, Any]], patterns: list[str]) -> str | None:
"""Select the best asset from available options.
Returns:
Download URL for best matching asset or None.
"""
avoid_patterns = ["cuda", "rocm", "hip", "metal", "sycl"]
prefer_patterns = ["cpu", "vulkan", "avx2", "avx"]
best_asset = None
best_score = -1
for asset in assets:
name = asset["name"].lower()
# Skip GPU-specific builds
if any(pattern in name for pattern in avoid_patterns):
continue
# Check platform match
if not any(pattern in name for pattern in patterns):
continue
score = self._score_asset(name, patterns, prefer_patterns)
if score > best_score:
best_score = score
best_asset = asset
return best_asset["browser_download_url"] if best_asset else None
def _score_asset(self, name: str, patterns: list[str], prefer_patterns: list[str]) -> int:
"""Score an asset based on platform and preference matching.
Returns:
Numeric score for asset quality (higher is better).
"""
score = 0
# Platform match bonus
if any(pattern in name for pattern in patterns):
score += 10
# Preference bonuses
for pattern in prefer_patterns:
if pattern in name:
score += 5
# Archive format preference
system = platform.system()
if (system == "Windows" and name.endswith(".zip")) or (
system != "Windows" and name.endswith(".tar.gz")
):
score += 2
return score
def _download_and_extract(self, url: str) -> bool:
"""Download and extract binary archive.
Downloads the binary archive from the specified URL and extracts
the necessary binaries and shared libraries. Handles both ZIP and
TAR.GZ formats with appropriate platform-specific permissions.
Returns:
True if successful, False otherwise.
"""
try:
# Download to temp file
temp_file = self.BINARY_DIR / "temp_download"
logger.info("⬇️ Downloading archive...")
urlretrieve(url, temp_file) # noqa: S310
# Extract based on file type
if url.endswith(".zip"):
with zipfile.ZipFile(temp_file, "r") as zf:
self._extract_binary_from_archive(zf)
elif url.endswith((".tar.gz", ".tgz")):
with tarfile.open(temp_file, "r:gz") as tf:
self._extract_binary_from_archive(tf)
else:
logger.error(f"Unknown archive format: {url}")
return False
# Clean up temp file
temp_file.unlink()
# Make binaries executable on Unix
if platform.system() != "Windows":
self.quantize_binary_path.chmod(0o755)
self.imatrix_binary_path.chmod(0o755)
except Exception as e:
logger.error(f"Failed to download and extract: {e}")
return False
else:
return True
def _extract_binary_from_archive(self, archive: Any) -> None:
"""Extract llama binaries and their dependencies from archive."""
target_binaries = {
"llama-quantize": ["llama-quantize", "llama-quantize.exe", "quantize", "quantize.exe"],
"llama-imatrix": ["llama-imatrix", "llama-imatrix.exe", "imatrix", "imatrix.exe"],
}
# Also extract shared libraries
shared_libs = [
"libllama.so",
"libggml-base.so",
"libggml.so",
"libllama.dll",
"libggml.dll",
]
members = self._get_archive_members(archive)
extracted = self._extract_matching_binaries(archive, members, target_binaries)
self._extract_shared_libraries(archive, members, shared_libs)
self._cleanup_extracted_directories()
self._report_missing_binaries(extracted)
def _get_archive_members(self, archive: Any) -> list[str]:
"""Get list of members from archive.
Returns:
List of member names in the archive.
"""
if isinstance(archive, zipfile.ZipFile):
return archive.namelist()
return [m.name for m in archive.getmembers()]
def _extract_matching_binaries(
self,
archive: Any,
members: list[str],
target_binaries: dict[str, list[str]],
) -> set[str]:
"""Extract binaries that match target patterns.
Returns:
Set of successfully extracted binary types.
"""
extracted = set()
for member in members:
base_name = Path(member).name
for binary_type, possible_names in target_binaries.items():
if base_name in possible_names:
self._extract_single_binary(archive, member, binary_type)
extracted.add(binary_type)
break
return extracted
def _extract_single_binary(self, archive: Any, member: str, binary_type: str) -> None:
"""Extract a single binary from archive."""
logger.info(f"📦 Extracting {Path(member).name} as {binary_type}...")
target_path = self._get_binary_path(binary_type)
if isinstance(archive, zipfile.ZipFile):
self._extract_from_zip(archive, member, target_path)
else: # tarfile
self._extract_from_tar(archive, member, target_path)
def _extract_from_zip(self, archive: zipfile.ZipFile, member: str, target_path: Path) -> None:
"""Extract binary from zip archive."""
temp_path = self.BINARY_DIR / "temp_binary"
with archive.open(member) as source, temp_path.open("wb") as target:
shutil.copyfileobj(source, target)
shutil.move(str(temp_path), str(target_path))
def _extract_from_tar(self, archive: tarfile.TarFile, member: str, target_path: Path) -> None:
"""Extract binary from tar archive."""
archive.extract(member, self.BINARY_DIR)
extracted_path = self.BINARY_DIR / member
if extracted_path != target_path:
shutil.move(str(extracted_path), str(target_path))
def _cleanup_extracted_directories(self) -> None:
"""Clean up any extracted directories."""
for item in self.BINARY_DIR.iterdir():
if item.is_dir() and item.name != "binaries":
shutil.rmtree(item)
def _extract_shared_libraries(
self, archive: Any, members: list[str], lib_patterns: list[str]
) -> None:
"""Extract shared libraries needed by the binaries.
Searches through archive members to find shared libraries matching
the specified patterns and extracts them to ensure proper binary
functionality. Sets appropriate permissions on Unix systems.
"""
for member in members:
base_name = Path(member).name
if any(lib in base_name for lib in lib_patterns):
logger.info(f"📚 Extracting library: {base_name}")
target_path = self.BINARY_DIR / base_name
if isinstance(archive, zipfile.ZipFile):
temp_path = self.BINARY_DIR / "temp_lib"
with archive.open(member) as source, temp_path.open("wb") as target:
shutil.copyfileobj(source, target)
shutil.move(str(temp_path), str(target_path))
else: # tarfile
archive.extract(member, self.BINARY_DIR)
extracted_path = self.BINARY_DIR / member
if extracted_path != target_path:
shutil.move(str(extracted_path), str(target_path))
# Make libraries executable on Unix
if platform.system() != "Windows":
target_path.chmod(0o755)
def _report_missing_binaries(self, extracted: set[str]) -> None:
"""Report any missing binaries."""
if "llama-quantize" not in extracted:
logger.warning("llama-quantize binary not found in archive")
if "llama-imatrix" not in extracted:
logger.warning("llama-imatrix binary not found in archive")
def _save_version_info(self, release_info: dict[str, Any]) -> None:
"""Save version information to cache.
Stores release version, timestamp, and URL information to the local
cache to enable version checking and update determination for
future binary manager operations.
"""
version_data = {
"version": release_info.get("tag_name", "unknown"),
"timestamp": time.time(),
"url": release_info.get("html_url", ""),
}
with Path(self.version_file).open("w", encoding="utf-8") as f:
json.dump(version_data, f, indent=2)
logger.info(f"📌 Cached version: {version_data['version']}")
def check_binary_works(self, binary_path: Path | None = None) -> bool:
"""Check if the binary actually works.
Validates that the specified binary can execute properly by running
a help command with appropriate environment variables set for shared
library loading. Defaults to checking the quantise binary if no path provided.
Returns:
True if binary executes successfully, False otherwise.
"""
if binary_path is None:
binary_path = self.quantize_binary_path
if not binary_path.exists():
return False
try:
# Set LD_LIBRARY_PATH to include binary directory for shared libraries
env = os.environ.copy()
if platform.system() != "Windows":
lib_path = str(self.BINARY_DIR)
if "LD_LIBRARY_PATH" in env:
env["LD_LIBRARY_PATH"] = f"{lib_path}:{env['LD_LIBRARY_PATH']}"
else:
env["LD_LIBRARY_PATH"] = lib_path
result = subprocess.run(
[str(binary_path), "--help"],
check=False,
capture_output=True,
text=True,
timeout=5,
env=env,
)
except Exception:
return False
else:
# llama-quantize returns 1 for --help but shows usage, which means it works
return result.returncode in {0, 1} and "usage:" in result.stdout.lower()