422 lines
16 KiB
Python
422 lines
16 KiB
Python
"""Model acquisition and preparation management.
|
|
|
|
Handles model downloading from HuggingFace and preparation for quantisation,
|
|
including format detection and conversion.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import shutil
|
|
import subprocess
|
|
import traceback
|
|
from typing import TYPE_CHECKING
|
|
|
|
from helpers.filesystem import FilesystemService
|
|
from helpers.gguf import GGUFConverter
|
|
from helpers.logger import logger
|
|
from helpers.models.quantisation import ModelSource
|
|
from helpers.utils.config_parser import ConfigParser
|
|
from helpers.utils.tensor_mapping import TensorMapper
|
|
|
|
if TYPE_CHECKING:
|
|
from pathlib import Path
|
|
|
|
|
|
class ModelManager:
|
|
"""Handles model downloading and preparation for quantisation.
|
|
|
|
Manages both GGUF repository downloads and HuggingFace model conversions,
|
|
providing unified interface for model acquisition and preparation.
|
|
"""
|
|
|
|
def __init__(self, models_dir: Path) -> None:
|
|
"""Initialise model manager with storage configuration.
|
|
|
|
Creates a new model manager instance that will handle model downloading,
|
|
format detection, and preparation for quantisation workflows using the
|
|
specified directory as the base storage location.
|
|
"""
|
|
self.models_dir = models_dir
|
|
self.fs = FilesystemService()
|
|
|
|
def prepare_model(self, model_source: ModelSource) -> Path:
|
|
"""Prepare model for quantisation and return F16 model path.
|
|
|
|
Handles both GGUF repository downloads and regular HuggingFace model
|
|
conversion workflows with automatic format detection. Processes the
|
|
provided model source information to determine the optimal acquisition
|
|
strategy and ensures the model is in F16 GGUF format.
|
|
|
|
Returns:
|
|
Path to F16 GGUF model ready for quantisation.
|
|
"""
|
|
model_dir = self.models_dir / model_source.model_name
|
|
|
|
if model_source.is_gguf_repo:
|
|
return self._handle_gguf_repo(model_source, model_dir)
|
|
return self._handle_regular_repo(model_source, model_dir)
|
|
|
|
def _handle_gguf_repo(self, model_source: ModelSource, model_dir: Path) -> Path:
|
|
"""Handle GGUF repository download with pattern matching.
|
|
|
|
Downloads GGUF files matching specified patterns, prioritising
|
|
multi-part files and F16 variants. Uses the model source information
|
|
and target directory to efficiently locate and download appropriate
|
|
GGUF files from HuggingFace repositories.
|
|
|
|
Returns:
|
|
Path to downloaded or existing GGUF file.
|
|
"""
|
|
logger.info(f"⬇️ Downloading GGUF file from repository: {model_source.source_model}")
|
|
logger.info(f"🔍 Looking for file pattern: *{model_source.gguf_file_pattern}*")
|
|
|
|
f16_model = model_dir / f"{model_source.original_author}-{model_source.model_name}-f16.gguf"
|
|
|
|
if f16_model.exists():
|
|
logger.info(f"✅ Found existing F16 file: {f16_model.name}")
|
|
return f16_model
|
|
|
|
# Check for existing GGUF files
|
|
model_dir.mkdir(parents=True, exist_ok=True)
|
|
existing_gguf = self.fs.find_gguf_files(model_dir)
|
|
|
|
if existing_gguf:
|
|
logger.info(f"✅ Found existing GGUF file: {existing_gguf[0].name}")
|
|
return existing_gguf[0]
|
|
|
|
# Download with patterns
|
|
downloaded_file = self._download_gguf_with_patterns(
|
|
model_source.source_model, model_source.gguf_file_pattern, model_dir
|
|
)
|
|
|
|
if downloaded_file:
|
|
# Handle multi-part files
|
|
if "00001-of-" in downloaded_file.name:
|
|
return downloaded_file
|
|
if "-00002-of-" in downloaded_file.name or "-00003-of-" in downloaded_file.name:
|
|
base_name = downloaded_file.name.replace("-00002-of-", "-00001-of-").replace(
|
|
"-00003-of-", "-00001-of-"
|
|
)
|
|
first_part = downloaded_file.parent / base_name
|
|
if first_part.exists():
|
|
logger.info(f"🔄 Using first part: {first_part.name}")
|
|
return first_part
|
|
|
|
# Rename single file to standard name
|
|
downloaded_file.rename(f16_model)
|
|
return f16_model
|
|
|
|
# Fallback to regular conversion
|
|
logger.info("💡 Falling back to downloading full repository and converting...")
|
|
return self._handle_regular_repo(
|
|
ModelSource(**{**model_source.dict(), "is_gguf_repo": False}),
|
|
model_dir,
|
|
)
|
|
|
|
def _download_gguf_with_patterns(
|
|
self, source_model: str, pattern: str | None, model_dir: Path
|
|
) -> Path | None:
|
|
"""Download GGUF file using various pattern strategies.
|
|
|
|
Tries multiple pattern variations to find and download appropriate
|
|
GGUF files, handling timeouts and temporary directories. Uses the
|
|
HuggingFace model identifier with an optional pattern to search for
|
|
specific files and downloads them to the target directory.
|
|
|
|
Returns:
|
|
Path to downloaded file, or None if all patterns fail.
|
|
"""
|
|
if pattern:
|
|
patterns = [
|
|
f"*{pattern}*",
|
|
f"*{pattern.lower()}*",
|
|
f"*{pattern.upper()}*",
|
|
"*f16*",
|
|
"*F16*",
|
|
"*fp16*",
|
|
]
|
|
else:
|
|
patterns = ["*f16*", "*F16*", "*fp16*"]
|
|
|
|
temp_dir = model_dir / "gguf_temp"
|
|
|
|
for search_pattern in patterns:
|
|
logger.info(f"🔍 Trying pattern: {search_pattern}")
|
|
temp_dir.mkdir(exist_ok=True)
|
|
|
|
try:
|
|
logger.debug(
|
|
f"DEBUG: Running huggingface-cli download for pattern {search_pattern}"
|
|
)
|
|
result = subprocess.run(
|
|
[
|
|
"timeout",
|
|
"300",
|
|
"huggingface-cli",
|
|
"download",
|
|
source_model,
|
|
"--include",
|
|
search_pattern,
|
|
"--local-dir",
|
|
str(temp_dir),
|
|
],
|
|
check=True,
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
logger.debug(
|
|
f"DEBUG: Download command completed with return code {result.returncode}"
|
|
)
|
|
|
|
# Find downloaded GGUF files
|
|
gguf_files = self.fs.find_gguf_files(temp_dir, pattern)
|
|
if gguf_files:
|
|
found_file = gguf_files[0]
|
|
logger.info(f"✅ Found GGUF file: {found_file.name}")
|
|
|
|
# Move to parent directory
|
|
final_path = model_dir / found_file.name
|
|
shutil.move(str(found_file), str(final_path))
|
|
shutil.rmtree(temp_dir)
|
|
return final_path
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
logger.debug(
|
|
f"DEBUG: Pattern {search_pattern} failed with return code {e.returncode}"
|
|
)
|
|
if e.stderr:
|
|
logger.debug(f"DEBUG: stderr: {e.stderr}")
|
|
if e.stdout:
|
|
logger.debug(f"DEBUG: stdout: {e.stdout}")
|
|
logger.info(f"⚠️ Pattern {search_pattern} failed or timed out")
|
|
continue
|
|
except Exception as e:
|
|
logger.error(f"❌ Unexpected error during download: {e}")
|
|
logger.error("Exception traceback:")
|
|
for line in traceback.format_exc().splitlines():
|
|
logger.error(f" {line}")
|
|
continue
|
|
finally:
|
|
if temp_dir.exists():
|
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
|
|
|
return None
|
|
|
|
def _handle_regular_repo(
|
|
self,
|
|
model_source: ModelSource,
|
|
model_dir: Path,
|
|
) -> Path:
|
|
"""Handle regular HuggingFace repository conversion.
|
|
|
|
Downloads full model repository and converts to F16 GGUF format
|
|
using our native Python-based GGUFConverter for SafeTensors models.
|
|
Processes the model source information and uses the local directory
|
|
for storage during the download and conversion workflow.
|
|
|
|
Returns:
|
|
Path to converted F16 GGUF model.
|
|
"""
|
|
logger.info(f"⬇️ Downloading source model: {model_source.source_model}")
|
|
|
|
# Download model if needed
|
|
if not model_dir.exists():
|
|
self._download_repository(model_source.source_model, model_dir)
|
|
else:
|
|
logger.info("✅ Model already downloaded")
|
|
|
|
# Convert to GGUF
|
|
return self._convert_to_gguf(model_source, model_dir)
|
|
|
|
def _setup_download_directories(self, model_dir: Path) -> None:
|
|
"""Set up directories for model download.
|
|
|
|
Creates the necessary directory structure for model downloads,
|
|
including the base model directory and HuggingFace metadata
|
|
directory to ensure proper organisation of downloaded assets.
|
|
"""
|
|
model_dir.mkdir(parents=True, exist_ok=True)
|
|
huggingface_dir = model_dir / ".huggingface"
|
|
huggingface_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
def _create_download_process(self, source_model: str, model_dir: Path) -> subprocess.Popen:
|
|
"""Create subprocess for downloading repository.
|
|
|
|
Initiates a HuggingFace CLI download process for the specified model
|
|
identifier, configuring it to download to the local directory whilst
|
|
excluding existing GGUF files to avoid conflicts.
|
|
|
|
Returns:
|
|
Subprocess for downloading.
|
|
"""
|
|
return subprocess.Popen(
|
|
[
|
|
"huggingface-cli",
|
|
"download",
|
|
source_model,
|
|
"--local-dir",
|
|
str(model_dir),
|
|
"--exclude",
|
|
"*.gguf",
|
|
],
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.STDOUT,
|
|
text=True,
|
|
bufsize=1, # Line buffered
|
|
universal_newlines=True,
|
|
)
|
|
|
|
def _stream_download_output(self, process: subprocess.Popen) -> None:
|
|
"""Stream download process output with appropriate logging levels.
|
|
|
|
Monitors the download subprocess output and routes progress information
|
|
to appropriate log levels, providing real-time feedback on download
|
|
progress whilst filtering debug information appropriately.
|
|
"""
|
|
if process.stdout:
|
|
for line in process.stdout:
|
|
# Log download progress lines
|
|
if line.strip():
|
|
# Check if it's a progress line (contains %)
|
|
if "%" in line or "Downloading" in line or "Fetching" in line:
|
|
# Use info level for progress lines
|
|
logger.info(f" {line.strip()}")
|
|
else:
|
|
# Use debug for other output
|
|
logger.debug(f" {line.strip()}")
|
|
|
|
def _handle_download_errors(self, source_model: str, e: Exception) -> None:
|
|
"""Handle download errors with detailed logging.
|
|
|
|
Processes download exceptions for the specified model, providing
|
|
comprehensive error logging including return codes, stderr, and
|
|
stdout information to aid in debugging download failures.
|
|
|
|
Raises:
|
|
TypeError: Always raised with appropriate error message.
|
|
"""
|
|
if isinstance(e, subprocess.CalledProcessError):
|
|
logger.error(f"❌ Failed to download repository {source_model}")
|
|
logger.error(f"Return code: {e.returncode}")
|
|
if e.stderr:
|
|
logger.error(f"stderr: {e.stderr}")
|
|
if e.stdout:
|
|
logger.error(f"stdout: {e.stdout}")
|
|
msg = f"Repository download failed: {e}"
|
|
raise TypeError(msg) from e
|
|
logger.error(f"❌ Unexpected error during repository download: {e}")
|
|
logger.error("Exception traceback:")
|
|
for line in traceback.format_exc().splitlines():
|
|
logger.error(f" {line}")
|
|
msg = f"Repository download failed: {e}"
|
|
raise TypeError(msg) from e
|
|
|
|
def _download_repository(self, source_model: str, model_dir: Path) -> None:
|
|
"""Download HuggingFace repository.
|
|
|
|
Orchestrates the complete repository download workflow for the
|
|
specified HuggingFace model, managing directory setup, process
|
|
execution, and error handling to ensure robust model acquisition.
|
|
|
|
Raises:
|
|
RuntimeError: If download fails.
|
|
"""
|
|
self._setup_download_directories(model_dir)
|
|
|
|
try:
|
|
logger.info(f"⬇️ Downloading full repository: {source_model}")
|
|
logger.info("📊 Progress will be shown below...")
|
|
|
|
process = self._create_download_process(source_model, model_dir)
|
|
self._stream_download_output(process)
|
|
|
|
# Wait for process to complete
|
|
return_code = process.wait()
|
|
|
|
if return_code != 0:
|
|
msg = f"Repository download failed with return code {return_code}"
|
|
raise RuntimeError(msg)
|
|
|
|
logger.info("✅ Repository download completed successfully")
|
|
|
|
except Exception as e:
|
|
self._handle_download_errors(source_model, e)
|
|
|
|
def _convert_to_gguf(self, model_source: ModelSource, model_dir: Path) -> Path:
|
|
"""Convert model to GGUF F16 format.
|
|
|
|
Converts SafeTensors models to GGUF F16 format using our native
|
|
Python converter. Processes model source information and the
|
|
directory containing downloaded model files, handling architecture
|
|
detection and tensor mapping for optimal compatibility.
|
|
|
|
Returns:
|
|
Path to F16 GGUF model.
|
|
|
|
Raises:
|
|
RuntimeError: If conversion fails.
|
|
"""
|
|
logger.info("🔄 Converting to GGUF F16 format...")
|
|
f16_model = model_dir / f"{model_source.original_author}-{model_source.model_name}-f16.gguf"
|
|
|
|
if f16_model.exists():
|
|
logger.info("✅ F16 model already exists")
|
|
return f16_model
|
|
|
|
# Check for SafeTensors files
|
|
safetensor_files = list(model_dir.glob("*.safetensors"))
|
|
if not safetensor_files:
|
|
logger.error("❌ Model format not supported")
|
|
logger.info("💡 This tool supports GGUF and SafeTensors formats")
|
|
msg = "Model must be in GGUF or SafeTensors format"
|
|
raise RuntimeError(msg)
|
|
|
|
logger.info("🐍 Using native Python GGUFConverter...")
|
|
logger.info(f"✅ Found {len(safetensor_files)} SafeTensors files")
|
|
|
|
# Load model configuration
|
|
config_parser = ConfigParser()
|
|
model_config = config_parser.load_model_config(model_dir)
|
|
|
|
# Get architecture mapping
|
|
arch_name = model_config.architectures[0] if model_config.architectures else "llama"
|
|
arch = config_parser.get_architecture_mapping(arch_name)
|
|
|
|
if arch != arch_name:
|
|
logger.info(f"📝 Architecture mapping: {arch_name} → {arch}")
|
|
|
|
# Check if architecture is supported by llama.cpp
|
|
supported_archs = {
|
|
"llama",
|
|
"qwen2",
|
|
"gemma",
|
|
"phi3",
|
|
"falcon",
|
|
"gpt2",
|
|
"gptj",
|
|
"gptneox",
|
|
"mpt",
|
|
"baichuan",
|
|
"stablelm",
|
|
}
|
|
|
|
if arch not in supported_archs:
|
|
logger.warning("=" * 70)
|
|
logger.warning(f"⚠️ Architecture '{arch_name}' may not be supported by llama.cpp")
|
|
logger.warning(f"⚠️ The GGUF will be created with architecture: '{arch}'")
|
|
logger.warning("⚠️ Check if your inference software supports this architecture.")
|
|
logger.warning("=" * 70)
|
|
|
|
# Convert using GGUFConverter
|
|
tensor_mapper = TensorMapper()
|
|
success = GGUFConverter.convert_safetensors(
|
|
model_dir, f16_model, model_config, arch, tensor_mapper
|
|
)
|
|
|
|
if not success:
|
|
logger.error("❌ Native Python conversion failed")
|
|
msg = "Failed to convert SafeTensors model to GGUF"
|
|
raise RuntimeError(msg)
|
|
|
|
logger.info("✅ Native Python conversion successful")
|
|
return f16_model
|