llm-gguf-tools/helpers/quantisation/model_manager.py

"""Model acquisition and preparation management.

Handles model downloading from HuggingFace and preparation for quantisation,
including format detection and conversion.
"""

from __future__ import annotations

import shutil
import subprocess
import traceback
from typing import TYPE_CHECKING

from helpers.filesystem import FilesystemService
from helpers.gguf import GGUFConverter
from helpers.logger import logger
from helpers.models.quantisation import ModelSource
from helpers.utils.config_parser import ConfigParser
from helpers.utils.tensor_mapping import TensorMapper

if TYPE_CHECKING:
    from pathlib import Path


class ModelManager:
    """Handles model downloading and preparation for quantisation.

    Manages both GGUF repository downloads and HuggingFace model conversions,
    providing unified interface for model acquisition and preparation.
    """

    def __init__(self, models_dir: Path) -> None:
        """Initialise model manager with storage configuration.

        Creates a new model manager instance that will handle model downloading,
        format detection, and preparation for quantisation workflows using the
        specified directory as the base storage location.
        """
        self.models_dir = models_dir
        self.fs = FilesystemService()

    def prepare_model(self, model_source: ModelSource) -> Path:
        """Prepare model for quantisation and return F16 model path.

        Handles both GGUF repository downloads and regular HuggingFace model
        conversion workflows with automatic format detection. Processes the
        provided model source information to determine the optimal acquisition
        strategy and ensures the model is in F16 GGUF format.

        Returns:
            Path to F16 GGUF model ready for quantisation.
        """
        model_dir = self.models_dir / model_source.model_name

        if model_source.is_gguf_repo:
            return self._handle_gguf_repo(model_source, model_dir)
        return self._handle_regular_repo(model_source, model_dir)

    def _handle_gguf_repo(self, model_source: ModelSource, model_dir: Path) -> Path:
        """Handle GGUF repository download with pattern matching.

        Downloads GGUF files matching specified patterns, prioritising
        multi-part files and F16 variants. Uses the model source information
        and target directory to efficiently locate and download appropriate
        GGUF files from HuggingFace repositories.

        Returns:
            Path to downloaded or existing GGUF file.
        """
        logger.info(f"⬇️ Downloading GGUF file from repository: {model_source.source_model}")
        logger.info(f"🔍 Looking for file pattern: *{model_source.gguf_file_pattern}*")

        f16_model = model_dir / f"{model_source.original_author}-{model_source.model_name}-f16.gguf"

        if f16_model.exists():
            logger.info(f"✅ Found existing F16 file: {f16_model.name}")
            return f16_model

        # Check for existing GGUF files
        model_dir.mkdir(parents=True, exist_ok=True)
        existing_gguf = self.fs.find_gguf_files(model_dir)

        if existing_gguf:
            logger.info(f"✅ Found existing GGUF file: {existing_gguf[0].name}")
            return existing_gguf[0]

        # Download with patterns
        downloaded_file = self._download_gguf_with_patterns(
            model_source.source_model, model_source.gguf_file_pattern, model_dir
        )

        if downloaded_file:
            # Handle multi-part files
            if "00001-of-" in downloaded_file.name:
                return downloaded_file
            if "-00002-of-" in downloaded_file.name or "-00003-of-" in downloaded_file.name:
                base_name = downloaded_file.name.replace("-00002-of-", "-00001-of-").replace(
                    "-00003-of-", "-00001-of-"
                )
                first_part = downloaded_file.parent / base_name
                if first_part.exists():
                    logger.info(f"🔄 Using first part: {first_part.name}")
                    return first_part

            # Rename single file to standard name
            downloaded_file.rename(f16_model)
            return f16_model

        # Fallback to regular conversion
        logger.info("💡 Falling back to downloading full repository and converting...")
        return self._handle_regular_repo(
            ModelSource(**{**model_source.dict(), "is_gguf_repo": False}),
            model_dir,
        )

    def _download_gguf_with_patterns(
        self, source_model: str, pattern: str | None, model_dir: Path
    ) -> Path | None:
        """Download GGUF file using various pattern strategies.

        Tries multiple pattern variations to find and download appropriate
        GGUF files, handling timeouts and temporary directories. Uses the
        HuggingFace model identifier with an optional pattern to search for
        specific files and downloads them to the target directory.

        Returns:
            Path to downloaded file, or None if all patterns fail.
        """
        if pattern:
            patterns = [
                f"*{pattern}*",
                f"*{pattern.lower()}*",
                f"*{pattern.upper()}*",
                "*f16*",
                "*F16*",
                "*fp16*",
            ]
        else:
            patterns = ["*f16*", "*F16*", "*fp16*"]

        temp_dir = model_dir / "gguf_temp"

        for search_pattern in patterns:
            logger.info(f"🔍 Trying pattern: {search_pattern}")
            temp_dir.mkdir(exist_ok=True)

            try:
                logger.debug(
                    f"DEBUG: Running huggingface-cli download for pattern {search_pattern}"
                )
                result = subprocess.run(
                    [
                        "timeout",
                        "300",
                        "huggingface-cli",
                        "download",
                        source_model,
                        "--include",
                        search_pattern,
                        "--local-dir",
                        str(temp_dir),
                    ],
                    check=True,
                    capture_output=True,
                    text=True,
                )
                logger.debug(
                    f"DEBUG: Download command completed with return code {result.returncode}"
                )

                # Find downloaded GGUF files
                gguf_files = self.fs.find_gguf_files(temp_dir, pattern)
                if gguf_files:
                    found_file = gguf_files[0]
                    logger.info(f"✅ Found GGUF file: {found_file.name}")

                    # Move to parent directory
                    final_path = model_dir / found_file.name
                    shutil.move(str(found_file), str(final_path))
                    shutil.rmtree(temp_dir)
                    return final_path

            except subprocess.CalledProcessError as e:
                logger.debug(
                    f"DEBUG: Pattern {search_pattern} failed with return code {e.returncode}"
                )
                if e.stderr:
                    logger.debug(f"DEBUG: stderr: {e.stderr}")
                if e.stdout:
                    logger.debug(f"DEBUG: stdout: {e.stdout}")
                logger.info(f"⚠️ Pattern {search_pattern} failed or timed out")
                continue
            except Exception as e:
                logger.error(f"❌ Unexpected error during download: {e}")
                logger.error("Exception traceback:")
                for line in traceback.format_exc().splitlines():
                    logger.error(f"  {line}")
                continue
            finally:
                if temp_dir.exists():
                    shutil.rmtree(temp_dir, ignore_errors=True)

        return None

    def _handle_regular_repo(
        self,
        model_source: ModelSource,
        model_dir: Path,
    ) -> Path:
        """Handle regular HuggingFace repository conversion.

        Downloads full model repository and converts to F16 GGUF format
        using our native Python-based GGUFConverter for SafeTensors models.
        Processes the model source information and uses the local directory
        for storage during the download and conversion workflow.

        Returns:
            Path to converted F16 GGUF model.
        """
        logger.info(f"⬇️ Downloading source model: {model_source.source_model}")

        # Download model if needed
        if not model_dir.exists():
            self._download_repository(model_source.source_model, model_dir)
        else:
            logger.info("✅ Model already downloaded")

        # Convert to GGUF
        return self._convert_to_gguf(model_source, model_dir)

    def _setup_download_directories(self, model_dir: Path) -> None:
        """Set up directories for model download.

        Creates the necessary directory structure for model downloads,
        including the base model directory and HuggingFace metadata
        directory to ensure proper organisation of downloaded assets.
        """
        model_dir.mkdir(parents=True, exist_ok=True)
        huggingface_dir = model_dir / ".huggingface"
        huggingface_dir.mkdir(parents=True, exist_ok=True)

    def _create_download_process(self, source_model: str, model_dir: Path) -> subprocess.Popen:
        """Create subprocess for downloading repository.

        Initiates a HuggingFace CLI download process for the specified model
        identifier, configuring it to download to the local directory whilst
        excluding existing GGUF files to avoid conflicts.

        Returns:
            Subprocess for downloading.
        """
        return subprocess.Popen(
            [
                "huggingface-cli",
                "download",
                source_model,
                "--local-dir",
                str(model_dir),
                "--exclude",
                "*.gguf",
            ],
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            text=True,
            bufsize=1,  # Line buffered
            universal_newlines=True,
        )

    def _stream_download_output(self, process: subprocess.Popen) -> None:
        """Stream download process output with appropriate logging levels.

        Monitors the download subprocess output and routes progress information
        to appropriate log levels, providing real-time feedback on download
        progress whilst filtering debug information appropriately.
        """
        if process.stdout:
            for line in process.stdout:
                # Log download progress lines
                if line.strip():
                    # Check if it's a progress line (contains %)
                    if "%" in line or "Downloading" in line or "Fetching" in line:
                        # Use info level for progress lines
                        logger.info(f"  {line.strip()}")
                    else:
                        # Use debug for other output
                        logger.debug(f"  {line.strip()}")

    def _handle_download_errors(self, source_model: str, e: Exception) -> None:
        """Handle download errors with detailed logging.

        Processes download exceptions for the specified model, providing
        comprehensive error logging including return codes, stderr, and
        stdout information to aid in debugging download failures.

        Raises:
            TypeError: Always raised with appropriate error message.
        """
        if isinstance(e, subprocess.CalledProcessError):
            logger.error(f"❌ Failed to download repository {source_model}")
            logger.error(f"Return code: {e.returncode}")
            if e.stderr:
                logger.error(f"stderr: {e.stderr}")
            if e.stdout:
                logger.error(f"stdout: {e.stdout}")
            msg = f"Repository download failed: {e}"
            raise TypeError(msg) from e
        logger.error(f"❌ Unexpected error during repository download: {e}")
        logger.error("Exception traceback:")
        for line in traceback.format_exc().splitlines():
            logger.error(f"  {line}")
        msg = f"Repository download failed: {e}"
        raise TypeError(msg) from e

    def _download_repository(self, source_model: str, model_dir: Path) -> None:
        """Download HuggingFace repository.

        Orchestrates the complete repository download workflow for the
        specified HuggingFace model, managing directory setup, process
        execution, and error handling to ensure robust model acquisition.

        Raises:
            RuntimeError: If download fails.
        """
        self._setup_download_directories(model_dir)

        try:
            logger.info(f"⬇️ Downloading full repository: {source_model}")
            logger.info("📊 Progress will be shown below...")

            process = self._create_download_process(source_model, model_dir)
            self._stream_download_output(process)

            # Wait for process to complete
            return_code = process.wait()

            if return_code != 0:
                msg = f"Repository download failed with return code {return_code}"
                raise RuntimeError(msg)

            logger.info("✅ Repository download completed successfully")

        except Exception as e:
            self._handle_download_errors(source_model, e)

    def _convert_to_gguf(self, model_source: ModelSource, model_dir: Path) -> Path:
        """Convert model to GGUF F16 format.

        Converts SafeTensors models to GGUF F16 format using our native
        Python converter. Processes model source information and the
        directory containing downloaded model files, handling architecture
        detection and tensor mapping for optimal compatibility.

        Returns:
            Path to F16 GGUF model.

        Raises:
            RuntimeError: If conversion fails.
        """
        logger.info("🔄 Converting to GGUF F16 format...")
        f16_model = model_dir / f"{model_source.original_author}-{model_source.model_name}-f16.gguf"

        if f16_model.exists():
            logger.info("✅ F16 model already exists")
            return f16_model

        # Check for SafeTensors files
        safetensor_files = list(model_dir.glob("*.safetensors"))
        if not safetensor_files:
            logger.error("❌ Model format not supported")
            logger.info("💡 This tool supports GGUF and SafeTensors formats")
            msg = "Model must be in GGUF or SafeTensors format"
            raise RuntimeError(msg)

        logger.info("🐍 Using native Python GGUFConverter...")
        logger.info(f"✅ Found {len(safetensor_files)} SafeTensors files")

        # Load model configuration
        config_parser = ConfigParser()
        model_config = config_parser.load_model_config(model_dir)

        # Get architecture mapping
        arch_name = model_config.architectures[0] if model_config.architectures else "llama"
        arch = config_parser.get_architecture_mapping(arch_name)

        if arch != arch_name:
            logger.info(f"📝 Architecture mapping: {arch_name} → {arch}")

        # Check if architecture is supported by llama.cpp
        supported_archs = {
            "llama",
            "qwen2",
            "gemma",
            "phi3",
            "falcon",
            "gpt2",
            "gptj",
            "gptneox",
            "mpt",
            "baichuan",
            "stablelm",
        }

        if arch not in supported_archs:
            logger.warning("=" * 70)
            logger.warning(f"⚠️  Architecture '{arch_name}' may not be supported by llama.cpp")
            logger.warning(f"⚠️  The GGUF will be created with architecture: '{arch}'")
            logger.warning("⚠️  Check if your inference software supports this architecture.")
            logger.warning("=" * 70)

        # Convert using GGUFConverter
        tensor_mapper = TensorMapper()
        success = GGUFConverter.convert_safetensors(
            model_dir, f16_model, model_config, arch, tensor_mapper
        )

        if not success:
            logger.error("❌ Native Python conversion failed")
            msg = "Failed to convert SafeTensors model to GGUF"
            raise RuntimeError(msg)

        logger.info("✅ Native Python conversion successful")
        return f16_model