llm-gguf-tools/helpers/services/huggingface.py

"""HuggingFace operations service.

Handles all interactions with HuggingFace including model downloads,
uploads, README generation, and repository management. Uses UK English
spelling conventions throughout.
"""

from __future__ import annotations

import re
import subprocess
import tempfile
from pathlib import Path
from typing import TYPE_CHECKING

from helpers.logger import logger
from helpers.models.quantisation import QuantisationType

if TYPE_CHECKING:
    from helpers.models.quantisation import ModelSource, QuantisationResult


class HuggingFaceService:
    """Manages HuggingFace repository operations.

    Provides methods for downloading models, uploading files, and managing
    repositories. Handles authentication, error recovery, and progress tracking
    for robust interaction with HuggingFace services.
    """

    @staticmethod
    def get_username() -> str:
        """Get authenticated HuggingFace username.

        Retrieves the current user's HuggingFace username using the CLI.
        Requires prior authentication via `huggingface-cli login`.

        Returns:
            HuggingFace username.

        Raises:
            RuntimeError: If not authenticated or CLI not available.
        """
        try:
            result = subprocess.run(
                ["huggingface-cli", "whoami"],
                capture_output=True,
                text=True,
                check=True,
            )
            return result.stdout.strip()
        except (subprocess.CalledProcessError, FileNotFoundError) as err:
            msg = "Please log in to HuggingFace first: huggingface-cli login"
            raise RuntimeError(msg) from err

    @staticmethod
    def download_model(
        model_name: str, output_dir: Path, include_pattern: str | None = None
    ) -> None:
        """Download model from HuggingFace.

        Downloads a complete model or specific files matching a pattern.
        Creates the output directory if it doesn't exist. Supports filtered
        downloads for efficient bandwidth usage when only certain files are needed.
        """
        logger.info(f"Downloading {model_name} to {output_dir}")

        cmd = [
            "huggingface-cli",
            "download",
            model_name,
            "--local-dir",
            str(output_dir),
        ]

        if include_pattern:
            cmd.extend(["--include", include_pattern])

        subprocess.run(cmd, check=True)
        logger.info("Download complete")

    @staticmethod
    def upload_file(
        repo_id: str,
        local_path: Path,
        repo_path: str | None = None,
        create_repo: bool = False,
    ) -> None:
        """Upload a file to HuggingFace repository.

        Uploads a single file to the specified repository path. Can create
        the repository if it doesn't exist. Handles repository creation conflicts
        gracefully by retrying without the create flag when needed.

        Raises:
            CalledProcessError: If upload fails.
        """
        repo_path = repo_path or local_path.name
        logger.info(f"Uploading {local_path.name} to {repo_id}/{repo_path}")

        cmd = [
            "huggingface-cli",
            "upload",
            repo_id,
            str(local_path),
            repo_path,
        ]

        if create_repo:
            cmd.append("--create")

        try:
            subprocess.run(cmd, check=True, capture_output=True)
            logger.info(f"Uploaded {repo_path}")
        except subprocess.CalledProcessError:
            if create_repo:
                # Repository might already exist, retry without --create
                cmd = cmd[:-1]  # Remove --create flag
                subprocess.run(cmd, check=True)
                logger.info(f"Updated {repo_path}")
            else:
                raise


class ReadmeGenerator:
    """Generates README files for quantised models.

    Creates comprehensive README documentation including model cards,
    quantisation details, and status tracking. Supports both initial
    planning documentation and final result summaries.
    """

    def generate(
        self,
        model_source: ModelSource,
        results: dict[QuantisationType, QuantisationResult],
        models_dir: Path,
        output_repo: str | None = None,
    ) -> Path:
        """Generate README file for quantised model repository.

        Creates a comprehensive README with frontmatter, quantisation table,
        and original model information. Handles status tracking for planned,
        processing, and completed quantisations.

        Returns:
            Path to generated README file.
        """
        logger.info("Creating model card...")

        model_dir = models_dir / model_source.model_name
        readme_path = model_dir / "README.md"

        # Get original README content
        original_content = self._get_original_readme(model_source, model_dir)

        # Generate new README
        readme_content = self._generate_readme_content(
            model_source, results, original_content, output_repo
        )

        readme_path.write_text(readme_content)
        return readme_path

    def _get_original_readme(self, model_source: ModelSource, model_dir: Path) -> dict[str, str]:
        """Extract original README and metadata.

        Downloads or reads the original model's README for inclusion in the
        quantised model documentation. Parses YAML frontmatter if present.

        Returns:
            Dictionary with readme content, licence, tags, and frontmatter.
        """
        content = {"readme": "", "licence": "apache-2.0", "tags": "", "frontmatter": ""}

        # Try local file first
        readme_path = model_dir / "README.md"
        if readme_path.exists():
            content["readme"] = readme_path.read_text(encoding="utf-8")
            logger.info(f"Found original README ({len(content['readme'])} characters)")
        else:
            # Download separately
            content = self._download_readme(model_source)

        # Parse frontmatter if present
        if content["readme"].startswith("---\n"):
            content = self._parse_frontmatter(content["readme"])

        return content

    def _download_readme(self, model_source: ModelSource) -> dict[str, str]:
        """Download README from HuggingFace repository.

        Attempts to download just the README.md file from the source repository
        for efficient documentation extraction.

        Returns:
            Dictionary with readme content and default metadata.
        """
        content = {"readme": "", "licence": "apache-2.0", "tags": "", "frontmatter": ""}

        with tempfile.TemporaryDirectory() as temp_dir:
            try:
                logger.info(f"Downloading README from {model_source.source_model}...")
                subprocess.run(
                    [
                        "huggingface-cli",
                        "download",
                        model_source.source_model,
                        "--include",
                        "README.md",
                        "--local-dir",
                        temp_dir,
                    ],
                    check=True,
                    capture_output=True,
                )

                readme_path = Path(temp_dir) / "README.md"
                if readme_path.exists():
                    content["readme"] = readme_path.read_text(encoding="utf-8")
                    logger.info(f"Downloaded README ({len(content['readme'])} characters)")
            except subprocess.CalledProcessError as e:
                logger.warning(f"Failed to download README: {e}")

        return content

    def _parse_frontmatter(self, readme_text: str) -> dict[str, str]:
        """Parse YAML frontmatter from README.

        Extracts metadata from YAML frontmatter including licence, tags,
        and other model card fields.

        Returns:
            Dictionary with separated content and metadata.
        """
        lines = readme_text.split("\n")
        if lines[0] != "---":
            return {
                "readme": readme_text,
                "licence": "apache-2.0",
                "tags": "",
                "frontmatter": "",
            }

        frontmatter_end = -1
        for i, line in enumerate(lines[1:], 1):
            if line == "---":
                frontmatter_end = i
                break

        if frontmatter_end == -1:
            return {
                "readme": readme_text,
                "licence": "apache-2.0",
                "tags": "",
                "frontmatter": "",
            }

        frontmatter = "\n".join(lines[1:frontmatter_end])
        content = "\n".join(lines[frontmatter_end + 1 :])

        # Extract licence
        licence_match = re.search(r"^license:\s*(.+)$", frontmatter, re.MULTILINE)
        licence_val = licence_match.group(1).strip().strip('"') if licence_match else "apache-2.0"

        # Extract tags
        tags = []
        in_tags = False
        for line in frontmatter.split("\n"):
            if line.startswith("tags:"):
                in_tags = True
                continue
            if in_tags:
                if line.startswith("- "):
                    tags.append(line[2:].strip())
                elif line and not line.startswith(" "):
                    break

        return {
            "readme": content,
            "licence": licence_val,
            "tags": ",".join(tags),
            "frontmatter": frontmatter,
        }

    def _generate_readme_content(
        self,
        model_source: ModelSource,
        results: dict[QuantisationType, QuantisationResult],
        original_content: dict[str, str],
        output_repo: str | None = None,
    ) -> str:
        """Generate complete README content with quantisation details.

        Creates the full README including YAML frontmatter, quantisation status
        table, and original model information.

        Returns:
            Complete README markdown content.
        """
        # Build tags
        our_tags = [
            "quantised",
            "gguf",
            "q4_k_m",
            "q4_k_l",
            "q4_k_xl",
            "q4_k_xxl",
            "bartowski-method",
        ]
        original_tags = original_content["tags"].split(",") if original_content["tags"] else []
        all_tags = sorted(set(our_tags + original_tags))

        # Build frontmatter
        frontmatter = f"""---
license: {original_content["licence"]}
library_name: gguf
base_model: {model_source.source_model}
tags:
"""
        for tag in all_tags:
            if tag.strip():
                frontmatter += f"- {tag.strip()}\n"

        frontmatter += "---\n\n"

        # Build main content
        hf_url = f"https://huggingface.co/{model_source.source_model}"
        content = f"""# {model_source.original_author}-{model_source.model_name}-GGUF

GGUF quantisations of [{model_source.source_model}]({hf_url}) using Bartowski's method.

| Quantisation | Embeddings/Output | Attention | Feed-Forward | Status |
|--------------|-------------------|-----------|--------------|--------|
"""

        # Add results table
        for quant_type in [
            QuantisationType.Q4_K_M,
            QuantisationType.Q4_K_L,
            QuantisationType.Q4_K_XL,
            QuantisationType.Q4_K_XXL,
        ]:
            result = results.get(quant_type)
            if not result:
                result = type("Result", (), {"status": "planned", "success": False})()

            layers = self._get_layers_config(quant_type)
            status = self._format_status(result, model_source, quant_type, output_repo)

            content += (
                f"| {quant_type.value} | {layers['embeddings']} | "
                f"{layers['attention']} | {layers['ffn']} | {status} |\n"
            )

        content += "\n---\n\n"

        # Add original content
        if original_content["readme"]:
            content += "# Original Model Information\n\n" + original_content["readme"]
        else:
            content += f"## Original Model\n\nQuantisation of [{model_source.source_model}](https://huggingface.co/{model_source.source_model}).\n"

        return frontmatter + content

    def _get_layers_config(self, quant_type: QuantisationType) -> dict[str, str]:
        """Get layer configuration for quantisation type.

        Returns layer precision specifications for the quantisation table.

        Returns:
            Dictionary with embeddings, attention, and ffn precision labels.
        """
        configs = {
            QuantisationType.Q4_K_M: {
                "embeddings": "Q4_K_M",
                "attention": "Q4_K_M",
                "ffn": "Q4_K_M",
            },
            QuantisationType.Q4_K_L: {"embeddings": "Q6_K", "attention": "Q6_K", "ffn": "Q4_K_M"},
            QuantisationType.Q4_K_XL: {"embeddings": "Q8_0", "attention": "Q6_K", "ffn": "Q4_K_M"},
            QuantisationType.Q4_K_XXL: {"embeddings": "Q8_0", "attention": "Q8_0", "ffn": "Q4_K_M"},
        }
        return configs.get(
            quant_type, {"embeddings": "Unknown", "attention": "Unknown", "ffn": "Unknown"}
        )

    def _format_status(
        self,
        result: QuantisationResult,
        model_source: ModelSource,
        quant_type: QuantisationType,
        output_repo: str | None,
    ) -> str:
        """Format status indicator for README table.

        Creates appropriate status indicator based on quantisation state
        including progress indicators, file sizes, and download links.

        Returns:
            Formatted status string for table cell.
        """
        status_map = {
            "planned": "⏳ Planned",
            "processing": "🔄 Processing...",
            "uploading": "⬆️ Uploading...",
            "failed": "❌ Failed",
        }

        if hasattr(result, "status") and result.status in status_map:
            base_status = status_map[result.status]

            if result.status == "uploading" and hasattr(result, "file_size") and result.file_size:
                return f"{base_status} ({result.file_size})"
            if result.status == "completed" or (hasattr(result, "success") and result.success):
                return self._format_success_status(result, model_source, quant_type, output_repo)
            return base_status

        # Legacy support
        if hasattr(result, "success") and result.success:
            return self._format_success_status(result, model_source, quant_type, output_repo)
        return "❌ Failed"

    def _format_success_status(
        self,
        result: QuantisationResult,
        model_source: ModelSource,
        quant_type: QuantisationType,
        output_repo: str | None,
    ) -> str:
        """Format successful quantisation status with download link.

        Creates a download link if repository information is available,
        otherwise shows file size.

        Returns:
            Formatted success status string.
        """
        if not output_repo:
            return (
                f"✅ {result.file_size}"
                if hasattr(result, "file_size") and result.file_size
                else "✅ Available"
            )

        filename = (
            f"{model_source.original_author}-{model_source.model_name}-{quant_type.value}.gguf"
        )
        url = f"https://huggingface.co/{output_repo}?show_file_info={filename}"

        if hasattr(result, "file_size") and result.file_size:
            return f"[✅ {result.file_size}]({url})"
        return f"[✅ Available]({url})"