llm-gguf-tools/helpers/quantisation/model_manager.py
2025-08-09 17:16:02 +01:00

422 lines
16 KiB
Python

"""Model acquisition and preparation management.
Handles model downloading from HuggingFace and preparation for quantisation,
including format detection and conversion.
"""
from __future__ import annotations
import shutil
import subprocess
import traceback
from typing import TYPE_CHECKING
from helpers.filesystem import FilesystemService
from helpers.gguf import GGUFConverter
from helpers.logger import logger
from helpers.models.quantisation import ModelSource
from helpers.utils.config_parser import ConfigParser
from helpers.utils.tensor_mapping import TensorMapper
if TYPE_CHECKING:
from pathlib import Path
class ModelManager:
"""Handles model downloading and preparation for quantisation.
Manages both GGUF repository downloads and HuggingFace model conversions,
providing unified interface for model acquisition and preparation.
"""
def __init__(self, models_dir: Path) -> None:
"""Initialise model manager with storage configuration.
Creates a new model manager instance that will handle model downloading,
format detection, and preparation for quantisation workflows using the
specified directory as the base storage location.
"""
self.models_dir = models_dir
self.fs = FilesystemService()
def prepare_model(self, model_source: ModelSource) -> Path:
"""Prepare model for quantisation and return F16 model path.
Handles both GGUF repository downloads and regular HuggingFace model
conversion workflows with automatic format detection. Processes the
provided model source information to determine the optimal acquisition
strategy and ensures the model is in F16 GGUF format.
Returns:
Path to F16 GGUF model ready for quantisation.
"""
model_dir = self.models_dir / model_source.model_name
if model_source.is_gguf_repo:
return self._handle_gguf_repo(model_source, model_dir)
return self._handle_regular_repo(model_source, model_dir)
def _handle_gguf_repo(self, model_source: ModelSource, model_dir: Path) -> Path:
"""Handle GGUF repository download with pattern matching.
Downloads GGUF files matching specified patterns, prioritising
multi-part files and F16 variants. Uses the model source information
and target directory to efficiently locate and download appropriate
GGUF files from HuggingFace repositories.
Returns:
Path to downloaded or existing GGUF file.
"""
logger.info(f"⬇️ Downloading GGUF file from repository: {model_source.source_model}")
logger.info(f"🔍 Looking for file pattern: *{model_source.gguf_file_pattern}*")
f16_model = model_dir / f"{model_source.original_author}-{model_source.model_name}-f16.gguf"
if f16_model.exists():
logger.info(f"✅ Found existing F16 file: {f16_model.name}")
return f16_model
# Check for existing GGUF files
model_dir.mkdir(parents=True, exist_ok=True)
existing_gguf = self.fs.find_gguf_files(model_dir)
if existing_gguf:
logger.info(f"✅ Found existing GGUF file: {existing_gguf[0].name}")
return existing_gguf[0]
# Download with patterns
downloaded_file = self._download_gguf_with_patterns(
model_source.source_model, model_source.gguf_file_pattern, model_dir
)
if downloaded_file:
# Handle multi-part files
if "00001-of-" in downloaded_file.name:
return downloaded_file
if "-00002-of-" in downloaded_file.name or "-00003-of-" in downloaded_file.name:
base_name = downloaded_file.name.replace("-00002-of-", "-00001-of-").replace(
"-00003-of-", "-00001-of-"
)
first_part = downloaded_file.parent / base_name
if first_part.exists():
logger.info(f"🔄 Using first part: {first_part.name}")
return first_part
# Rename single file to standard name
downloaded_file.rename(f16_model)
return f16_model
# Fallback to regular conversion
logger.info("💡 Falling back to downloading full repository and converting...")
return self._handle_regular_repo(
ModelSource(**{**model_source.dict(), "is_gguf_repo": False}),
model_dir,
)
def _download_gguf_with_patterns(
self, source_model: str, pattern: str | None, model_dir: Path
) -> Path | None:
"""Download GGUF file using various pattern strategies.
Tries multiple pattern variations to find and download appropriate
GGUF files, handling timeouts and temporary directories. Uses the
HuggingFace model identifier with an optional pattern to search for
specific files and downloads them to the target directory.
Returns:
Path to downloaded file, or None if all patterns fail.
"""
if pattern:
patterns = [
f"*{pattern}*",
f"*{pattern.lower()}*",
f"*{pattern.upper()}*",
"*f16*",
"*F16*",
"*fp16*",
]
else:
patterns = ["*f16*", "*F16*", "*fp16*"]
temp_dir = model_dir / "gguf_temp"
for search_pattern in patterns:
logger.info(f"🔍 Trying pattern: {search_pattern}")
temp_dir.mkdir(exist_ok=True)
try:
logger.debug(
f"DEBUG: Running huggingface-cli download for pattern {search_pattern}"
)
result = subprocess.run(
[
"timeout",
"300",
"huggingface-cli",
"download",
source_model,
"--include",
search_pattern,
"--local-dir",
str(temp_dir),
],
check=True,
capture_output=True,
text=True,
)
logger.debug(
f"DEBUG: Download command completed with return code {result.returncode}"
)
# Find downloaded GGUF files
gguf_files = self.fs.find_gguf_files(temp_dir, pattern)
if gguf_files:
found_file = gguf_files[0]
logger.info(f"✅ Found GGUF file: {found_file.name}")
# Move to parent directory
final_path = model_dir / found_file.name
shutil.move(str(found_file), str(final_path))
shutil.rmtree(temp_dir)
return final_path
except subprocess.CalledProcessError as e:
logger.debug(
f"DEBUG: Pattern {search_pattern} failed with return code {e.returncode}"
)
if e.stderr:
logger.debug(f"DEBUG: stderr: {e.stderr}")
if e.stdout:
logger.debug(f"DEBUG: stdout: {e.stdout}")
logger.info(f"⚠️ Pattern {search_pattern} failed or timed out")
continue
except Exception as e:
logger.error(f"❌ Unexpected error during download: {e}")
logger.error("Exception traceback:")
for line in traceback.format_exc().splitlines():
logger.error(f" {line}")
continue
finally:
if temp_dir.exists():
shutil.rmtree(temp_dir, ignore_errors=True)
return None
def _handle_regular_repo(
self,
model_source: ModelSource,
model_dir: Path,
) -> Path:
"""Handle regular HuggingFace repository conversion.
Downloads full model repository and converts to F16 GGUF format
using our native Python-based GGUFConverter for SafeTensors models.
Processes the model source information and uses the local directory
for storage during the download and conversion workflow.
Returns:
Path to converted F16 GGUF model.
"""
logger.info(f"⬇️ Downloading source model: {model_source.source_model}")
# Download model if needed
if not model_dir.exists():
self._download_repository(model_source.source_model, model_dir)
else:
logger.info("✅ Model already downloaded")
# Convert to GGUF
return self._convert_to_gguf(model_source, model_dir)
def _setup_download_directories(self, model_dir: Path) -> None:
"""Set up directories for model download.
Creates the necessary directory structure for model downloads,
including the base model directory and HuggingFace metadata
directory to ensure proper organisation of downloaded assets.
"""
model_dir.mkdir(parents=True, exist_ok=True)
huggingface_dir = model_dir / ".huggingface"
huggingface_dir.mkdir(parents=True, exist_ok=True)
def _create_download_process(self, source_model: str, model_dir: Path) -> subprocess.Popen:
"""Create subprocess for downloading repository.
Initiates a HuggingFace CLI download process for the specified model
identifier, configuring it to download to the local directory whilst
excluding existing GGUF files to avoid conflicts.
Returns:
Subprocess for downloading.
"""
return subprocess.Popen(
[
"huggingface-cli",
"download",
source_model,
"--local-dir",
str(model_dir),
"--exclude",
"*.gguf",
],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
bufsize=1, # Line buffered
universal_newlines=True,
)
def _stream_download_output(self, process: subprocess.Popen) -> None:
"""Stream download process output with appropriate logging levels.
Monitors the download subprocess output and routes progress information
to appropriate log levels, providing real-time feedback on download
progress whilst filtering debug information appropriately.
"""
if process.stdout:
for line in process.stdout:
# Log download progress lines
if line.strip():
# Check if it's a progress line (contains %)
if "%" in line or "Downloading" in line or "Fetching" in line:
# Use info level for progress lines
logger.info(f" {line.strip()}")
else:
# Use debug for other output
logger.debug(f" {line.strip()}")
def _handle_download_errors(self, source_model: str, e: Exception) -> None:
"""Handle download errors with detailed logging.
Processes download exceptions for the specified model, providing
comprehensive error logging including return codes, stderr, and
stdout information to aid in debugging download failures.
Raises:
TypeError: Always raised with appropriate error message.
"""
if isinstance(e, subprocess.CalledProcessError):
logger.error(f"❌ Failed to download repository {source_model}")
logger.error(f"Return code: {e.returncode}")
if e.stderr:
logger.error(f"stderr: {e.stderr}")
if e.stdout:
logger.error(f"stdout: {e.stdout}")
msg = f"Repository download failed: {e}"
raise TypeError(msg) from e
logger.error(f"❌ Unexpected error during repository download: {e}")
logger.error("Exception traceback:")
for line in traceback.format_exc().splitlines():
logger.error(f" {line}")
msg = f"Repository download failed: {e}"
raise TypeError(msg) from e
def _download_repository(self, source_model: str, model_dir: Path) -> None:
"""Download HuggingFace repository.
Orchestrates the complete repository download workflow for the
specified HuggingFace model, managing directory setup, process
execution, and error handling to ensure robust model acquisition.
Raises:
RuntimeError: If download fails.
"""
self._setup_download_directories(model_dir)
try:
logger.info(f"⬇️ Downloading full repository: {source_model}")
logger.info("📊 Progress will be shown below...")
process = self._create_download_process(source_model, model_dir)
self._stream_download_output(process)
# Wait for process to complete
return_code = process.wait()
if return_code != 0:
msg = f"Repository download failed with return code {return_code}"
raise RuntimeError(msg)
logger.info("✅ Repository download completed successfully")
except Exception as e:
self._handle_download_errors(source_model, e)
def _convert_to_gguf(self, model_source: ModelSource, model_dir: Path) -> Path:
"""Convert model to GGUF F16 format.
Converts SafeTensors models to GGUF F16 format using our native
Python converter. Processes model source information and the
directory containing downloaded model files, handling architecture
detection and tensor mapping for optimal compatibility.
Returns:
Path to F16 GGUF model.
Raises:
RuntimeError: If conversion fails.
"""
logger.info("🔄 Converting to GGUF F16 format...")
f16_model = model_dir / f"{model_source.original_author}-{model_source.model_name}-f16.gguf"
if f16_model.exists():
logger.info("✅ F16 model already exists")
return f16_model
# Check for SafeTensors files
safetensor_files = list(model_dir.glob("*.safetensors"))
if not safetensor_files:
logger.error("❌ Model format not supported")
logger.info("💡 This tool supports GGUF and SafeTensors formats")
msg = "Model must be in GGUF or SafeTensors format"
raise RuntimeError(msg)
logger.info("🐍 Using native Python GGUFConverter...")
logger.info(f"✅ Found {len(safetensor_files)} SafeTensors files")
# Load model configuration
config_parser = ConfigParser()
model_config = config_parser.load_model_config(model_dir)
# Get architecture mapping
arch_name = model_config.architectures[0] if model_config.architectures else "llama"
arch = config_parser.get_architecture_mapping(arch_name)
if arch != arch_name:
logger.info(f"📝 Architecture mapping: {arch_name}{arch}")
# Check if architecture is supported by llama.cpp
supported_archs = {
"llama",
"qwen2",
"gemma",
"phi3",
"falcon",
"gpt2",
"gptj",
"gptneox",
"mpt",
"baichuan",
"stablelm",
}
if arch not in supported_archs:
logger.warning("=" * 70)
logger.warning(f"⚠️ Architecture '{arch_name}' may not be supported by llama.cpp")
logger.warning(f"⚠️ The GGUF will be created with architecture: '{arch}'")
logger.warning("⚠️ Check if your inference software supports this architecture.")
logger.warning("=" * 70)
# Convert using GGUFConverter
tensor_mapper = TensorMapper()
success = GGUFConverter.convert_safetensors(
model_dir, f16_model, model_config, arch, tensor_mapper
)
if not success:
logger.error("❌ Native Python conversion failed")
msg = "Failed to convert SafeTensors model to GGUF"
raise RuntimeError(msg)
logger.info("✅ Native Python conversion successful")
return f16_model