llm-gguf-tools/helpers/services/quantisation.py

"""Quantisation operations service.

Provides modular quantisation engine, model management, and upload capabilities
for GGUF model processing. Consolidates quantisation logic from various tools
into reusable components following SOLID principles.
"""

from __future__ import annotations

import shutil
import subprocess
import tempfile
import traceback
from pathlib import Path

from helpers.logger import logger
from helpers.models.quantisation import (
    ModelSource,
    QuantisationContext,
    QuantisationResult,
    QuantisationType,
)
from helpers.services.filesystem import FilesystemService
from helpers.services.gguf import GGUFConverter
from helpers.services.llama_python import LlamaCppPythonAPI
from helpers.utils.config_parser import ConfigParser
from helpers.utils.tensor_mapping import TensorMapper


class QuantisationEngine:
    """Handles the actual quantisation process with configurable methods.

    Provides flexible quantisation execution supporting multiple tensor
    precision configurations, importance matrices, and fallback strategies.
    Uses llama-cpp-python API for direct quantisation without subprocess overhead.
    """

    def __init__(self) -> None:
        """Initialise quantisation engine."""
        self.fs = FilesystemService()
        self.python_api = LlamaCppPythonAPI()

    def quantise(self, context: QuantisationContext) -> QuantisationResult:
        """Perform quantisation using the specified configuration.

        Executes quantisation using Python API. Since llama-cpp-python is a
        required dependency, we can rely on it being available.

        Returns:
            QuantisationResult with success status and file information.
        """
        logger.debug(f"DEBUG: Starting quantisation for {context.config.name}")
        logger.info(
            f"⚙️ Creating {context.config.name} quantisation ({context.config.description})..."
        )

        output_path = context.get_output_path()
        logger.debug(f"DEBUG: Output path: {output_path}")

        # Check input file exists and is readable
        if not context.f16_model_path.exists():
            error_msg = f"Input model file does not exist: {context.f16_model_path}"
            logger.error(f"❌ {error_msg}")
            return QuantisationResult(
                quantisation_type=QuantisationType(context.config.name),
                success=False,
                error_message=error_msg,
            )

        # Check if we have enough disk space (rough estimate)
        try:
            input_size = context.f16_model_path.stat().st_size
            logger.debug(f"DEBUG: Input file size: {input_size / (1024**3):.2f} GB")
            # This is a rough check - actual available space calculation is more complex
            logger.debug(f"DEBUG: Output directory: {output_path.parent}")
        except Exception as e:
            logger.warning(f"⚠️ Could not check disk space: {e}")

        logger.info(f"🎯 Attempting {context.config.name} quantisation...")
        logger.debug(f"DEBUG: Source: {context.f16_model_path}")
        logger.debug(f"DEBUG: Target: {output_path}")
        logger.debug(f"DEBUG: imatrix: {context.imatrix_path}")

        try:
            # Use Python API for quantisation
            logger.info("🐍 Using Python API for quantisation...")
            logger.debug("DEBUG: Calling python_api.quantise_model...")

            success = self.python_api.quantise_model(
                context.f16_model_path, output_path, context.config, context.imatrix_path
            )

            logger.debug(f"DEBUG: Python API returned: {success}")

            if success:
                logger.debug("DEBUG: Quantisation successful, creating success result")
                return self._create_success_result(context.config.name, output_path, "Python API")

            logger.error(f"❌ {context.config.name} quantisation failed")
            return QuantisationResult(
                quantisation_type=QuantisationType(context.config.name),
                success=False,
                error_message="Quantisation failed via Python API",
            )

        except Exception as e:
            logger.error(f"❌ Exception during {context.config.name} quantisation: {e}")
            logger.error("Exception traceback:")
            for line in traceback.format_exc().splitlines():
                logger.error(f"  {line}")

            return QuantisationResult(
                quantisation_type=QuantisationType(context.config.name),
                success=False,
                error_message=f"Exception during quantisation: {e!s}",
            )

    def _create_success_result(
        self, quant_type: str, output_path: Path, method_used: str
    ) -> QuantisationResult:
        """Create successful quantisation result with file metadata.

        Returns:
            QuantisationResult with file path and size information.
        """
        file_size = self.fs.get_file_size(output_path)
        return QuantisationResult(
            quantisation_type=QuantisationType(quant_type),
            success=True,
            file_path=output_path,
            file_size=file_size,
            method_used=method_used,
        )


class ModelManager:
    """Handles model downloading and preparation for quantisation.

    Manages both GGUF repository downloads and HuggingFace model conversions,
    providing unified interface for model acquisition and preparation.
    """

    def __init__(self, models_dir: Path) -> None:
        """Initialise model manager with storage configuration.

        Sets up model storage directory for model downloads and conversions.
        """
        self.models_dir = models_dir
        self.fs = FilesystemService()

    def prepare_model(self, model_source: ModelSource) -> Path:
        """Prepare model for quantisation and return F16 model path.

        Handles both GGUF repository downloads and regular HuggingFace model
        conversion workflows with automatic format detection.

        Returns:
            Path to F16 GGUF model ready for quantisation.
        """
        model_dir = self.models_dir / model_source.model_name

        if model_source.is_gguf_repo:
            return self._handle_gguf_repo(model_source, model_dir)
        return self._handle_regular_repo(model_source, model_dir)

    def _handle_gguf_repo(self, model_source: ModelSource, model_dir: Path) -> Path:
        """Handle GGUF repository download with pattern matching.

        Downloads GGUF files matching specified patterns, prioritising
        multi-part files and F16 variants.

        Returns:
            Path to downloaded or existing GGUF file.
        """
        logger.info(f"⬇️ Downloading GGUF file from repository: {model_source.source_model}")
        logger.info(f"🔍 Looking for file pattern: *{model_source.gguf_file_pattern}*")

        f16_model = model_dir / f"{model_source.model_name}-f16.gguf"

        if f16_model.exists():
            logger.info(f"✅ Found existing F16 file: {f16_model.name}")
            return f16_model

        # Check for existing GGUF files
        model_dir.mkdir(parents=True, exist_ok=True)
        existing_gguf = self.fs.find_gguf_files(model_dir)

        if existing_gguf:
            logger.info(f"✅ Found existing GGUF file: {existing_gguf[0].name}")
            return existing_gguf[0]

        # Download with patterns
        downloaded_file = self._download_gguf_with_patterns(
            model_source.source_model, model_source.gguf_file_pattern, model_dir
        )

        if downloaded_file:
            # Handle multi-part files
            if "00001-of-" in downloaded_file.name:
                return downloaded_file
            if "-00002-of-" in downloaded_file.name or "-00003-of-" in downloaded_file.name:
                base_name = downloaded_file.name.replace("-00002-of-", "-00001-of-").replace(
                    "-00003-of-", "-00001-of-"
                )
                first_part = downloaded_file.parent / base_name
                if first_part.exists():
                    logger.info(f"🔄 Using first part: {first_part.name}")
                    return first_part

            # Rename single file to standard name
            downloaded_file.rename(f16_model)
            return f16_model

        # Fallback to regular conversion
        logger.info("💡 Falling back to downloading full repository and converting...")
        return self._handle_regular_repo(
            ModelSource(**{**model_source.dict(), "is_gguf_repo": False}),
            model_dir,
        )

    def _download_gguf_with_patterns(
        self, source_model: str, pattern: str | None, model_dir: Path
    ) -> Path | None:
        """Download GGUF file using various pattern strategies.

        Tries multiple pattern variations to find and download appropriate
        GGUF files, handling timeouts and temporary directories.

        Returns:
            Path to downloaded file, or None if all patterns fail.
        """
        if pattern:
            patterns = [
                f"*{pattern}*",
                f"*{pattern.lower()}*",
                f"*{pattern.upper()}*",
                "*f16*",
                "*F16*",
                "*fp16*",
            ]
        else:
            patterns = ["*f16*", "*F16*", "*fp16*"]

        temp_dir = model_dir / "gguf_temp"

        for search_pattern in patterns:
            logger.info(f"🔍 Trying pattern: {search_pattern}")
            temp_dir.mkdir(exist_ok=True)

            try:
                logger.debug(
                    f"DEBUG: Running huggingface-cli download for pattern {search_pattern}"
                )
                result = subprocess.run(
                    [
                        "timeout",
                        "300",
                        "huggingface-cli",
                        "download",
                        source_model,
                        "--include",
                        search_pattern,
                        "--local-dir",
                        str(temp_dir),
                    ],
                    check=True,
                    capture_output=True,
                    text=True,
                )
                logger.debug(
                    f"DEBUG: Download command completed with return code {result.returncode}"
                )

                # Find downloaded GGUF files
                gguf_files = self.fs.find_gguf_files(temp_dir, pattern)
                if gguf_files:
                    found_file = gguf_files[0]
                    logger.info(f"✅ Found GGUF file: {found_file.name}")

                    # Move to parent directory
                    final_path = model_dir / found_file.name
                    shutil.move(str(found_file), str(final_path))
                    shutil.rmtree(temp_dir)
                    return final_path

            except subprocess.CalledProcessError as e:
                logger.debug(
                    f"DEBUG: Pattern {search_pattern} failed with return code {e.returncode}"
                )
                if e.stderr:
                    logger.debug(f"DEBUG: stderr: {e.stderr}")
                if e.stdout:
                    logger.debug(f"DEBUG: stdout: {e.stdout}")
                logger.info(f"⚠️ Pattern {search_pattern} failed or timed out")
                continue
            except Exception as e:
                logger.error(f"❌ Unexpected error during download: {e}")
                logger.error("Exception traceback:")
                for line in traceback.format_exc().splitlines():
                    logger.error(f"  {line}")
                continue
            finally:
                if temp_dir.exists():
                    shutil.rmtree(temp_dir, ignore_errors=True)

        return None

    def _handle_regular_repo(
        self,
        model_source: ModelSource,
        model_dir: Path,
    ) -> Path:
        """Handle regular HuggingFace repository conversion.

        Downloads full model repository and converts to F16 GGUF format
        using our native Python-based GGUFConverter for SafeTensors models.

        Returns:
            Path to converted F16 GGUF model.
        """
        logger.info(f"⬇️ Downloading source model: {model_source.source_model}")

        # Download model if needed
        if not model_dir.exists():
            self._download_repository(model_source.source_model, model_dir)
        else:
            logger.info("✅ Model already downloaded")

        # Convert to GGUF
        return self._convert_to_gguf(model_source, model_dir)

    def _download_repository(self, source_model: str, model_dir: Path) -> None:
        """Download HuggingFace repository.

        Args:
            source_model: HuggingFace model identifier.
            model_dir: Local directory for download.

        Raises:
            RuntimeError: If download fails.
        """
        try:
            logger.debug(f"DEBUG: Downloading full repository: {source_model}")
            result = subprocess.run(
                [
                    "huggingface-cli",
                    "download",
                    source_model,
                    "--local-dir",
                    str(model_dir),
                ],
                check=True,
                capture_output=True,
                text=True,
            )
            logger.debug(
                f"DEBUG: Repository download completed with return code {result.returncode}"
            )
        except subprocess.CalledProcessError as e:
            logger.error(f"❌ Failed to download repository {source_model}")
            logger.error(f"Return code: {e.returncode}")
            if e.stderr:
                logger.error(f"stderr: {e.stderr}")
            if e.stdout:
                logger.error(f"stdout: {e.stdout}")
            msg = f"Repository download failed: {e}"
            raise RuntimeError(msg) from e
        except Exception as e:
            logger.error(f"❌ Unexpected error during repository download: {e}")
            logger.error("Exception traceback:")
            for line in traceback.format_exc().splitlines():
                logger.error(f"  {line}")
            raise

    def _convert_to_gguf(self, model_source: ModelSource, model_dir: Path) -> Path:
        """Convert model to GGUF F16 format.

        Args:
            model_source: Model source information.
            model_dir: Directory containing model files.

        Returns:
            Path to F16 GGUF model.

        Raises:
            RuntimeError: If conversion fails.
        """
        logger.info("🔄 Converting to GGUF F16 format...")
        f16_model = model_dir / f"{model_source.model_name}-f16.gguf"

        if f16_model.exists():
            logger.info("✅ F16 model already exists")
            return f16_model

        # Check for SafeTensors files
        safetensor_files = list(model_dir.glob("*.safetensors"))
        if not safetensor_files:
            logger.error("❌ Model format not supported")
            logger.info("💡 This tool supports GGUF and SafeTensors formats")
            msg = "Model must be in GGUF or SafeTensors format"
            raise RuntimeError(msg)

        logger.info("🐍 Using native Python GGUFConverter...")
        logger.info(f"✅ Found {len(safetensor_files)} SafeTensors files")

        # Load model configuration
        config_parser = ConfigParser()
        model_config = config_parser.load_model_config(model_dir)

        # Get architecture mapping
        arch_name = model_config.architectures[0] if model_config.architectures else "llama"
        arch = config_parser.get_architecture_mapping(arch_name)

        if arch != arch_name:
            logger.info(f"📝 Architecture mapping: {arch_name} → {arch}")

        # Convert using GGUFConverter
        tensor_mapper = TensorMapper()
        success = GGUFConverter.convert_safetensors(
            model_dir, f16_model, model_config, arch, tensor_mapper
        )

        if not success:
            logger.error("❌ Native Python conversion failed")
            msg = "Failed to convert SafeTensors model to GGUF"
            raise RuntimeError(msg)

        logger.info("✅ Native Python conversion successful")
        return f16_model


class HuggingFaceUploader:
    """Handles uploading models and documentation to HuggingFace.

    Provides methods for repository creation, file uploads, and README
    updates with proper error handling and retry logic.
    """

    @staticmethod
    def get_username() -> str:
        """Get authenticated HuggingFace username.

        Returns:
            HuggingFace username from CLI authentication.

        Raises:
            RuntimeError: If not authenticated.
        """
        try:
            result = subprocess.run(
                ["huggingface-cli", "whoami"],
                capture_output=True,
                text=True,
                check=True,
            )
            return result.stdout.strip()
        except (subprocess.CalledProcessError, FileNotFoundError) as err:
            msg = "Please log in to HuggingFace first: huggingface-cli login"
            raise RuntimeError(msg) from err

    def upload_readme(self, output_repo: str, readme_path: Path) -> None:
        """Upload or update README file to repository.

        Creates repository if needed, handles existing repository updates.

        Raises:
            RuntimeError: If the README upload fails.
        """
        logger.info("Uploading README...")

        # First ensure the repository exists
        self._ensure_repo_exists(output_repo)

        # Upload without --create flag to avoid PR creation
        try:
            logger.debug(f"DEBUG: Uploading README to {output_repo}")
            result = subprocess.run(
                [
                    "huggingface-cli",
                    "upload",
                    output_repo,
                    str(readme_path),
                    "README.md",
                    "--commit-message",
                    "Update README.md",
                ],
                check=True,
                capture_output=True,
                text=True,
            )
            logger.debug(f"DEBUG: README upload completed with return code {result.returncode}")
        except subprocess.CalledProcessError as e:
            logger.error(f"❌ Failed to upload README to {output_repo}")
            logger.error(f"Return code: {e.returncode}")
            if e.stderr:
                logger.error(f"stderr: {e.stderr}")
            if e.stdout:
                logger.error(f"stdout: {e.stdout}")
            msg = f"README upload failed: {e}"
            raise RuntimeError(msg) from e
        except Exception as e:
            logger.error(f"❌ Unexpected error during README upload: {e}")
            logger.error("Exception traceback:")
            for line in traceback.format_exc().splitlines():
                logger.error(f"  {line}")
            raise
        logger.info("README uploaded")

    def _ensure_repo_exists(self, repo_id: str) -> None:
        """Ensure the repository exists, creating it if necessary."""
        try:
            # Try to create the repo - will fail if it already exists
            subprocess.run(
                [
                    "huggingface-cli",
                    "repo",
                    "create",
                    repo_id,
                    "--type",
                    "model",
                    "-y",
                ],
                check=True,
                capture_output=True,
                text=True,
            )
            logger.info(f"Created repository: {repo_id}")
        except subprocess.CalledProcessError:
            # Repository already exists, that's fine
            pass

    def upload_model_file(self, output_repo: str, model_path: Path) -> None:
        """Upload model file to repository.

        Uploads GGUF model file to specified repository path.
        Always uses huggingface-cli to ensure proper handling of large files
        via HuggingFace's xet backend.

        Raises:
            RuntimeError: If the model file upload fails.
        """
        logger.info(f"Uploading {model_path.name}...")

        # Always use huggingface-cli for model files to ensure xet backend is used
        try:
            logger.debug(f"DEBUG: Uploading model file {model_path.name} to {output_repo}")
            result = subprocess.run(
                [
                    "huggingface-cli",
                    "upload",
                    output_repo,
                    str(model_path),
                    model_path.name,
                    "--revision",
                    "main",  # Explicitly push to main branch
                    "--commit-message",
                    f"Add {model_path.name}",
                ],
                check=True,
                capture_output=True,
                text=True,
            )
            logger.debug(f"DEBUG: Model upload completed with return code {result.returncode}")
        except subprocess.CalledProcessError as e:
            logger.error(f"❌ Failed to upload model file {model_path.name} to {output_repo}")
            logger.error(f"Return code: {e.returncode}")
            if e.stderr:
                logger.error(f"stderr: {e.stderr}")
            if e.stdout:
                logger.error(f"stdout: {e.stdout}")
            msg = f"Model file upload failed: {e}"
            raise RuntimeError(msg) from e
        except Exception as e:
            logger.error(f"❌ Unexpected error during model file upload: {e}")
            logger.error("Exception traceback:")
            for line in traceback.format_exc().splitlines():
                logger.error(f"  {line}")
            raise

        # Extract and log the URL if present in output
        if result.stdout:
            for line in result.stdout.splitlines():
                if "https://huggingface.co/" in line:
                    logger.info(f"Upload URL: {line.strip()}")
                    break

        logger.info(f"{model_path.name} uploaded")

    def _try_git_upload_file(
        self,
        repo_id: str,
        local_path: Path,
        repo_path: str,
        *,
        create_repo: bool = False,
    ) -> bool:
        """Try to upload file using git directly to avoid PR creation.

        Returns:
            bool: True if upload successful, False if should fallback to CLI.
        """
        try:
            with tempfile.TemporaryDirectory() as temp_dir:
                temp_path = Path(temp_dir)
                repo_url = f"https://huggingface.co/{repo_id}"

                # Clone repository
                logger.info(f"Cloning {repo_url}...")
                result = subprocess.run(
                    ["git", "clone", repo_url, str(temp_path / "repo")],
                    check=False,
                    capture_output=True,
                    text=True,
                )

                if result.returncode != 0:
                    if create_repo:
                        # Repository doesn't exist, let huggingface-cli handle creation
                        return False
                    logger.warning(f"Clone failed: {result.stderr}")
                    return False

                repo_dir = temp_path / "repo"
                target_file = repo_dir / repo_path

                # Ensure target directory exists
                target_file.parent.mkdir(parents=True, exist_ok=True)

                # Copy file
                shutil.copy2(local_path, target_file)

                # Check if there are any changes
                status_result = subprocess.run(
                    ["git", "status", "--porcelain"],
                    cwd=repo_dir,
                    capture_output=True,
                    text=True,
                    check=True,
                )

                if not status_result.stdout.strip():
                    logger.info(f"No changes detected for {repo_path}, file already up-to-date")
                    return True  # File is already up-to-date, no need to push

                # Git add, commit, push
                subprocess.run(
                    ["git", "add", repo_path],
                    cwd=repo_dir,
                    check=True,
                    capture_output=True,
                    text=True,
                )
                subprocess.run(
                    ["git", "commit", "-m", f"Update {repo_path}"],
                    cwd=repo_dir,
                    check=True,
                    capture_output=True,
                    text=True,
                )
                subprocess.run(
                    ["git", "push"],
                    cwd=repo_dir,
                    check=True,
                    capture_output=True,
                    text=True,
                )

                return True

        except subprocess.CalledProcessError as e:
            logger.warning(f"Git upload failed: {e}")
            return False
        except Exception as e:
            logger.warning(f"Git upload error: {e}")
            return False