494 lines
18 KiB
Python
494 lines
18 KiB
Python
"""Binary manager for llama.cpp releases.
|
|
|
|
Downloads and manages llama.cpp binary releases from GitHub, handling
|
|
platform detection, version checking, and caching.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
import platform
|
|
import shutil
|
|
import subprocess
|
|
import tarfile
|
|
import time
|
|
import zipfile
|
|
from pathlib import Path
|
|
from typing import TYPE_CHECKING, ClassVar
|
|
from urllib.request import urlopen, urlretrieve
|
|
|
|
from helpers.logger import logger
|
|
|
|
if TYPE_CHECKING:
|
|
from typing import Any
|
|
|
|
|
|
class BinaryManager:
|
|
"""Manages llama.cpp binary downloads and updates.
|
|
|
|
Automatically downloads appropriate llama.cpp releases based on platform,
|
|
caches binaries locally, and checks for updates from GitHub releases.
|
|
"""
|
|
|
|
GITHUB_API = "https://api.github.com/repos/ggml-org/llama.cpp/releases/latest"
|
|
# Use local .cache directory in project
|
|
BINARY_DIR = Path(".cache") / "llm-gguf-tools" / "binaries"
|
|
|
|
# Platform mappings to release asset patterns
|
|
PLATFORM_PATTERNS: ClassVar[dict[tuple[str, str], list[str]]] = {
|
|
("Linux", "x86_64"): ["linux-x64", "ubuntu-x64", "linux-amd64"],
|
|
("Linux", "aarch64"): ["linux-arm64", "linux-aarch64"],
|
|
("Darwin", "x86_64"): ["macos-x64", "darwin-x64", "macos-amd64"],
|
|
("Darwin", "arm64"): ["macos-arm64", "darwin-arm64", "macos-aarch64"],
|
|
("Windows", "AMD64"): ["win-x64", "windows-x64", "win64"],
|
|
}
|
|
|
|
def __init__(self) -> None:
|
|
"""Initialise binary manager."""
|
|
self.BINARY_DIR.mkdir(parents=True, exist_ok=True)
|
|
self.version_file = self.BINARY_DIR / "version.json"
|
|
self.quantize_binary_path = self._get_binary_path("llama-quantize")
|
|
self.imatrix_binary_path = self._get_binary_path("llama-imatrix")
|
|
|
|
def _get_binary_path(self, base_name: str) -> Path:
|
|
"""Get path to binary.
|
|
|
|
Constructs the full path to a binary executable based on the base
|
|
name, automatically adding the appropriate file extension for the
|
|
current operating system platform.
|
|
|
|
Returns:
|
|
Path where binary should be located.
|
|
"""
|
|
binary_name = f"{base_name}.exe" if platform.system() == "Windows" else base_name
|
|
return self.BINARY_DIR / binary_name
|
|
|
|
def get_quantise_binary(self) -> Path | None:
|
|
"""Get llama-quantize binary, downloading if necessary.
|
|
|
|
Returns:
|
|
Path to binary if available, None if download fails.
|
|
"""
|
|
return self._get_binary("llama-quantize", self.quantize_binary_path)
|
|
|
|
def get_imatrix_binary(self) -> Path | None:
|
|
"""Get llama-imatrix binary, downloading if necessary.
|
|
|
|
Returns:
|
|
Path to binary if available, None if download fails.
|
|
"""
|
|
return self._get_binary("llama-imatrix", self.imatrix_binary_path)
|
|
|
|
def _get_binary(self, name: str, binary_path: Path) -> Path | None:
|
|
"""Get a specific binary, downloading if necessary.
|
|
|
|
Checks for existing binaries and downloads the latest release if
|
|
updates are needed. Falls back to existing binaries if download
|
|
fails, ensuring robust binary availability for quantisation tasks.
|
|
|
|
Returns:
|
|
Path to binary if available, None if download fails.
|
|
"""
|
|
# Check if we have a binary and if it needs updating
|
|
if self._should_update():
|
|
logger.info("🔄 Checking for llama.cpp updates...")
|
|
if not self._download_latest():
|
|
logger.warning("Failed to download latest llama.cpp release")
|
|
# Fall back to existing binary if available
|
|
if binary_path.exists():
|
|
logger.info(f"Using existing {name} binary")
|
|
return binary_path
|
|
return None
|
|
|
|
if binary_path.exists():
|
|
return binary_path
|
|
|
|
logger.info("📥 Downloading llama.cpp binaries...")
|
|
if self._download_latest():
|
|
return binary_path
|
|
|
|
return None
|
|
|
|
def _should_update(self) -> bool:
|
|
"""Check if binary needs updating.
|
|
|
|
Returns:
|
|
True if update needed, False otherwise.
|
|
"""
|
|
# If no binaries exist, we need to download
|
|
if not self.quantize_binary_path.exists() or not self.imatrix_binary_path.exists():
|
|
return True
|
|
|
|
# Check version file
|
|
if not self.version_file.exists():
|
|
return True
|
|
|
|
try:
|
|
with Path(self.version_file).open(encoding="utf-8") as f:
|
|
cached_version = json.load(f)
|
|
|
|
# Check if cached version is older than 7 days
|
|
if time.time() - cached_version.get("timestamp", 0) > 7 * 24 * 3600:
|
|
return True
|
|
|
|
except Exception:
|
|
return True
|
|
|
|
return False
|
|
|
|
def _download_latest(self) -> bool:
|
|
"""Download latest llama.cpp release.
|
|
|
|
Returns:
|
|
True if successful, False otherwise.
|
|
"""
|
|
try:
|
|
# Get latest release info
|
|
release_info = self._get_latest_release()
|
|
if not release_info:
|
|
return False
|
|
|
|
# Find appropriate asset for platform
|
|
asset_url = self._find_platform_asset(release_info["assets"])
|
|
if not asset_url:
|
|
logger.warning("No suitable binary found for this platform")
|
|
return False
|
|
|
|
# Download and extract
|
|
logger.info(f"📥 Downloading from: {asset_url}")
|
|
if not self._download_and_extract(asset_url):
|
|
return False
|
|
|
|
# Save version info
|
|
self._save_version_info(release_info)
|
|
|
|
logger.info("✅ Successfully downloaded llama.cpp binary")
|
|
except Exception as e:
|
|
logger.error(f"Failed to download llama.cpp: {e}")
|
|
return False
|
|
else:
|
|
return True
|
|
|
|
def _get_latest_release(self) -> dict[str, Any] | None:
|
|
"""Get latest release info from GitHub API.
|
|
|
|
Returns:
|
|
Release info dict or None if failed.
|
|
"""
|
|
try:
|
|
with urlopen(self.GITHUB_API) as response: # noqa: S310
|
|
return json.loads(response.read())
|
|
except Exception as e:
|
|
logger.error(f"Failed to fetch release info: {e}")
|
|
return None
|
|
|
|
def _find_platform_asset(self, assets: list[dict[str, Any]]) -> str | None:
|
|
"""Find appropriate asset for current platform.
|
|
|
|
Returns:
|
|
Download URL for appropriate asset or None.
|
|
"""
|
|
patterns = self._get_platform_patterns()
|
|
if not patterns:
|
|
return None
|
|
|
|
return self._select_best_asset(assets, patterns)
|
|
|
|
def _get_platform_patterns(self) -> list[str]:
|
|
"""Get platform patterns for current system.
|
|
|
|
Returns:
|
|
List of patterns to match in asset names.
|
|
"""
|
|
system = platform.system()
|
|
machine = platform.machine()
|
|
|
|
# Get specific patterns for this platform
|
|
patterns = self.PLATFORM_PATTERNS.get((system, machine), [])
|
|
if patterns:
|
|
return patterns
|
|
|
|
# Fall back to generic patterns
|
|
generic_patterns = {
|
|
"Linux": ["linux", "ubuntu"],
|
|
"Darwin": ["macos", "darwin"],
|
|
"Windows": ["win", "windows"],
|
|
}
|
|
return generic_patterns.get(system, [])
|
|
|
|
def _select_best_asset(self, assets: list[dict[str, Any]], patterns: list[str]) -> str | None:
|
|
"""Select the best asset from available options.
|
|
|
|
Returns:
|
|
Download URL for best matching asset or None.
|
|
"""
|
|
avoid_patterns = ["cuda", "rocm", "hip", "metal", "sycl"]
|
|
prefer_patterns = ["cpu", "vulkan", "avx2", "avx"]
|
|
|
|
best_asset = None
|
|
best_score = -1
|
|
|
|
for asset in assets:
|
|
name = asset["name"].lower()
|
|
|
|
# Skip GPU-specific builds
|
|
if any(pattern in name for pattern in avoid_patterns):
|
|
continue
|
|
|
|
# Check platform match
|
|
if not any(pattern in name for pattern in patterns):
|
|
continue
|
|
|
|
score = self._score_asset(name, patterns, prefer_patterns)
|
|
if score > best_score:
|
|
best_score = score
|
|
best_asset = asset
|
|
|
|
return best_asset["browser_download_url"] if best_asset else None
|
|
|
|
def _score_asset(self, name: str, patterns: list[str], prefer_patterns: list[str]) -> int:
|
|
"""Score an asset based on platform and preference matching.
|
|
|
|
Returns:
|
|
Numeric score for asset quality (higher is better).
|
|
"""
|
|
score = 0
|
|
|
|
# Platform match bonus
|
|
if any(pattern in name for pattern in patterns):
|
|
score += 10
|
|
|
|
# Preference bonuses
|
|
for pattern in prefer_patterns:
|
|
if pattern in name:
|
|
score += 5
|
|
|
|
# Archive format preference
|
|
system = platform.system()
|
|
if (system == "Windows" and name.endswith(".zip")) or (
|
|
system != "Windows" and name.endswith(".tar.gz")
|
|
):
|
|
score += 2
|
|
|
|
return score
|
|
|
|
def _download_and_extract(self, url: str) -> bool:
|
|
"""Download and extract binary archive.
|
|
|
|
Downloads the binary archive from the specified URL and extracts
|
|
the necessary binaries and shared libraries. Handles both ZIP and
|
|
TAR.GZ formats with appropriate platform-specific permissions.
|
|
|
|
Returns:
|
|
True if successful, False otherwise.
|
|
"""
|
|
try:
|
|
# Download to temp file
|
|
temp_file = self.BINARY_DIR / "temp_download"
|
|
logger.info("⬇️ Downloading archive...")
|
|
urlretrieve(url, temp_file) # noqa: S310
|
|
|
|
# Extract based on file type
|
|
if url.endswith(".zip"):
|
|
with zipfile.ZipFile(temp_file, "r") as zf:
|
|
self._extract_binary_from_archive(zf)
|
|
elif url.endswith((".tar.gz", ".tgz")):
|
|
with tarfile.open(temp_file, "r:gz") as tf:
|
|
self._extract_binary_from_archive(tf)
|
|
else:
|
|
logger.error(f"Unknown archive format: {url}")
|
|
return False
|
|
|
|
# Clean up temp file
|
|
temp_file.unlink()
|
|
|
|
# Make binaries executable on Unix
|
|
if platform.system() != "Windows":
|
|
self.quantize_binary_path.chmod(0o755)
|
|
self.imatrix_binary_path.chmod(0o755)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to download and extract: {e}")
|
|
return False
|
|
else:
|
|
return True
|
|
|
|
def _extract_binary_from_archive(self, archive: Any) -> None:
|
|
"""Extract llama binaries and their dependencies from archive."""
|
|
target_binaries = {
|
|
"llama-quantize": ["llama-quantize", "llama-quantize.exe", "quantize", "quantize.exe"],
|
|
"llama-imatrix": ["llama-imatrix", "llama-imatrix.exe", "imatrix", "imatrix.exe"],
|
|
}
|
|
|
|
# Also extract shared libraries
|
|
shared_libs = [
|
|
"libllama.so",
|
|
"libggml-base.so",
|
|
"libggml.so",
|
|
"libllama.dll",
|
|
"libggml.dll",
|
|
]
|
|
|
|
members = self._get_archive_members(archive)
|
|
extracted = self._extract_matching_binaries(archive, members, target_binaries)
|
|
self._extract_shared_libraries(archive, members, shared_libs)
|
|
self._cleanup_extracted_directories()
|
|
self._report_missing_binaries(extracted)
|
|
|
|
def _get_archive_members(self, archive: Any) -> list[str]:
|
|
"""Get list of members from archive.
|
|
|
|
Returns:
|
|
List of member names in the archive.
|
|
"""
|
|
if isinstance(archive, zipfile.ZipFile):
|
|
return archive.namelist()
|
|
return [m.name for m in archive.getmembers()]
|
|
|
|
def _extract_matching_binaries(
|
|
self,
|
|
archive: Any,
|
|
members: list[str],
|
|
target_binaries: dict[str, list[str]],
|
|
) -> set[str]:
|
|
"""Extract binaries that match target patterns.
|
|
|
|
Returns:
|
|
Set of successfully extracted binary types.
|
|
"""
|
|
extracted = set()
|
|
for member in members:
|
|
base_name = Path(member).name
|
|
|
|
for binary_type, possible_names in target_binaries.items():
|
|
if base_name in possible_names:
|
|
self._extract_single_binary(archive, member, binary_type)
|
|
extracted.add(binary_type)
|
|
break
|
|
return extracted
|
|
|
|
def _extract_single_binary(self, archive: Any, member: str, binary_type: str) -> None:
|
|
"""Extract a single binary from archive."""
|
|
logger.info(f"📦 Extracting {Path(member).name} as {binary_type}...")
|
|
target_path = self._get_binary_path(binary_type)
|
|
|
|
if isinstance(archive, zipfile.ZipFile):
|
|
self._extract_from_zip(archive, member, target_path)
|
|
else: # tarfile
|
|
self._extract_from_tar(archive, member, target_path)
|
|
|
|
def _extract_from_zip(self, archive: zipfile.ZipFile, member: str, target_path: Path) -> None:
|
|
"""Extract binary from zip archive."""
|
|
temp_path = self.BINARY_DIR / "temp_binary"
|
|
with archive.open(member) as source, temp_path.open("wb") as target:
|
|
shutil.copyfileobj(source, target)
|
|
shutil.move(str(temp_path), str(target_path))
|
|
|
|
def _extract_from_tar(self, archive: tarfile.TarFile, member: str, target_path: Path) -> None:
|
|
"""Extract binary from tar archive."""
|
|
archive.extract(member, self.BINARY_DIR)
|
|
extracted_path = self.BINARY_DIR / member
|
|
if extracted_path != target_path:
|
|
shutil.move(str(extracted_path), str(target_path))
|
|
|
|
def _cleanup_extracted_directories(self) -> None:
|
|
"""Clean up any extracted directories."""
|
|
for item in self.BINARY_DIR.iterdir():
|
|
if item.is_dir() and item.name != "binaries":
|
|
shutil.rmtree(item)
|
|
|
|
def _extract_shared_libraries(
|
|
self, archive: Any, members: list[str], lib_patterns: list[str]
|
|
) -> None:
|
|
"""Extract shared libraries needed by the binaries.
|
|
|
|
Searches through archive members to find shared libraries matching
|
|
the specified patterns and extracts them to ensure proper binary
|
|
functionality. Sets appropriate permissions on Unix systems.
|
|
"""
|
|
for member in members:
|
|
base_name = Path(member).name
|
|
if any(lib in base_name for lib in lib_patterns):
|
|
logger.info(f"📚 Extracting library: {base_name}")
|
|
target_path = self.BINARY_DIR / base_name
|
|
|
|
if isinstance(archive, zipfile.ZipFile):
|
|
temp_path = self.BINARY_DIR / "temp_lib"
|
|
with archive.open(member) as source, temp_path.open("wb") as target:
|
|
shutil.copyfileobj(source, target)
|
|
shutil.move(str(temp_path), str(target_path))
|
|
else: # tarfile
|
|
archive.extract(member, self.BINARY_DIR)
|
|
extracted_path = self.BINARY_DIR / member
|
|
if extracted_path != target_path:
|
|
shutil.move(str(extracted_path), str(target_path))
|
|
|
|
# Make libraries executable on Unix
|
|
if platform.system() != "Windows":
|
|
target_path.chmod(0o755)
|
|
|
|
def _report_missing_binaries(self, extracted: set[str]) -> None:
|
|
"""Report any missing binaries."""
|
|
if "llama-quantize" not in extracted:
|
|
logger.warning("llama-quantize binary not found in archive")
|
|
if "llama-imatrix" not in extracted:
|
|
logger.warning("llama-imatrix binary not found in archive")
|
|
|
|
def _save_version_info(self, release_info: dict[str, Any]) -> None:
|
|
"""Save version information to cache.
|
|
|
|
Stores release version, timestamp, and URL information to the local
|
|
cache to enable version checking and update determination for
|
|
future binary manager operations.
|
|
"""
|
|
version_data = {
|
|
"version": release_info.get("tag_name", "unknown"),
|
|
"timestamp": time.time(),
|
|
"url": release_info.get("html_url", ""),
|
|
}
|
|
|
|
with Path(self.version_file).open("w", encoding="utf-8") as f:
|
|
json.dump(version_data, f, indent=2)
|
|
|
|
logger.info(f"📌 Cached version: {version_data['version']}")
|
|
|
|
def check_binary_works(self, binary_path: Path | None = None) -> bool:
|
|
"""Check if the binary actually works.
|
|
|
|
Validates that the specified binary can execute properly by running
|
|
a help command with appropriate environment variables set for shared
|
|
library loading. Defaults to checking the quantise binary if no path provided.
|
|
|
|
Returns:
|
|
True if binary executes successfully, False otherwise.
|
|
"""
|
|
if binary_path is None:
|
|
binary_path = self.quantize_binary_path
|
|
|
|
if not binary_path.exists():
|
|
return False
|
|
|
|
try:
|
|
# Set LD_LIBRARY_PATH to include binary directory for shared libraries
|
|
env = os.environ.copy()
|
|
if platform.system() != "Windows":
|
|
lib_path = str(self.BINARY_DIR)
|
|
if "LD_LIBRARY_PATH" in env:
|
|
env["LD_LIBRARY_PATH"] = f"{lib_path}:{env['LD_LIBRARY_PATH']}"
|
|
else:
|
|
env["LD_LIBRARY_PATH"] = lib_path
|
|
|
|
result = subprocess.run(
|
|
[str(binary_path), "--help"],
|
|
check=False,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=5,
|
|
env=env,
|
|
)
|
|
except Exception:
|
|
return False
|
|
else:
|
|
# llama-quantize returns 1 for --help but shows usage, which means it works
|
|
return result.returncode in {0, 1} and "usage:" in result.stdout.lower()
|