llm-gguf-tools/helpers/readme/formatter.py

"""README formatting utilities.

Provides formatters for status indicators, tables, and other README elements.
"""

from __future__ import annotations

from typing import TYPE_CHECKING

from helpers.config.quantisation_configs import QUANTISATION_CONFIGS
from helpers.models.quantisation import QuantisationResult, QuantisationType

if TYPE_CHECKING:
    from pathlib import Path

    from helpers.models.quantisation import ModelSource

# File size constant
GIBIBYTE = 1024**3


class StatusFormatter:
    """Formats status indicators for README tables."""

    @staticmethod
    def format_status(
        result: QuantisationResult,
        model_source: ModelSource,
        quant_type: QuantisationType,
        output_repo: str | None,
    ) -> str:
        """Format status indicator for README table.

        Creates appropriate status indicator based on quantisation state
        including progress indicators, file sizes, and download links.

        Returns:
            Formatted status string for table cell.
        """
        status_map = {
            "planned": "⏳ Queued",
            "processing": "🔄 Processing...",
            "uploading": "⬆️ Uploading...",
            "failed": "❌ Failed",
        }

        if hasattr(result, "status") and result.status in status_map:
            base_status = status_map[result.status]

            # Check for architecture not supported error
            if (
                result.status == "failed"
                and hasattr(result, "error_message")
                and result.error_message
                and "architecture not supported" in str(result.error_message).lower()
            ):
                return "⚠️ Skipped"

            if result.status == "uploading" and hasattr(result, "file_size") and result.file_size:
                return f"{base_status} ({result.file_size})"

            if result.status == "completed" or (hasattr(result, "success") and result.success):
                return StatusFormatter.format_success_status(
                    result, model_source, quant_type, output_repo
                )

            return base_status

        # Legacy support
        if hasattr(result, "success") and result.success:
            return StatusFormatter.format_success_status(
                result, model_source, quant_type, output_repo
            )

        return "❌ Failed"

    @staticmethod
    def format_success_status(
        result: QuantisationResult,
        model_source: ModelSource,
        quant_type: QuantisationType,
        output_repo: str | None,
    ) -> str:
        """Format successful quantisation status with download link.

        Creates a download link if repository information is available,
        otherwise shows file size.

        Returns:
            Formatted success status string.
        """
        if not output_repo:
            return (
                f"✅ {result.file_size}"
                if hasattr(result, "file_size") and result.file_size
                else "✅ Available"
            )

        filename = (
            f"{model_source.original_author}-{model_source.model_name}-{quant_type.value}.gguf"
        )
        url = f"https://huggingface.co/{output_repo}?show_file_info={filename}"

        if hasattr(result, "file_size") and result.file_size:
            return f"[✅ {result.file_size}]({url})"

        return f"[✅ Available]({url})"


class TableFormatter:
    """Formats quantisation tables for README."""

    @staticmethod
    def get_ordered_quantisation_types() -> list[QuantisationType]:
        """Get quantisation types in display order.

        Returns types ordered by precision level and variant.

        Returns:
            Ordered list of quantisation types.
        """
        return [
            # Q3 K-quants
            QuantisationType.Q3_K_M,
            QuantisationType.Q3_K_L,
            QuantisationType.Q3_K_XL,
            # Q4 types
            QuantisationType.Q4_0,  # Basic
            QuantisationType.Q4_K_M,
            QuantisationType.Q4_K_L,
            # Q5 types
            QuantisationType.Q5_0,  # Basic
            QuantisationType.Q5_K_M,
            QuantisationType.Q5_K_L,
            # Q6 types
            QuantisationType.Q6_0,  # Basic
            QuantisationType.Q6_K,
            QuantisationType.Q6_K_L,
            # Q8 types
            QuantisationType.Q8_0,  # Basic
            QuantisationType.Q8_K,
        ]

    @staticmethod
    def format_quantisation_row(
        quant_type: QuantisationType,
        result: QuantisationResult | None,
        model_source: ModelSource,
        output_repo: str | None,
    ) -> str:
        """Format a single quantisation table row.

        Creates a formatted table row for the README displaying quantisation
        type, configuration details, and status information. Handles cases
        where no result is available by creating a default planned result.

        Returns:
            Formatted table row string.
        """
        # Create default result if none exists
        if result is None:
            result = QuantisationResult(
                quantisation_type=quant_type, success=False, status="planned"
            )

        # Get configuration
        config = QUANTISATION_CONFIGS.get(quant_type)

        # Format status
        status_formatter = StatusFormatter()
        status = status_formatter.format_status(result, model_source, quant_type, output_repo)

        # Get configuration description
        config_desc = (
            config.get_compact_config(QUANTISATION_CONFIGS)
            if config
            else f"{quant_type} all layers"
        )

        return f"| **{quant_type.value}** | {config_desc} | {status} |\n"


class TagFormatter:
    """Formats tags for README frontmatter."""

    @staticmethod
    def build_tags(
        results: dict[QuantisationType, QuantisationResult],
        original_tags: list[str] | None = None,
    ) -> list[str]:
        """Build tags based on quantisation results.

        Generates appropriate tags for the model repository based on
        successful quantisations and combines them with any original
        tags from the source model to create a comprehensive tag list.

        Returns:
            Sorted list of unique tags.
        """
        our_tags = ["gguf"]

        # Add tags for successful quantisations
        for quant_type, result in results.items():
            if hasattr(result, "status") and result.status == "completed":
                if quant_type == QuantisationType.F16:
                    our_tags.append("f16")
                elif hasattr(result, "quantisation_type"):
                    # Convert to lowercase tag format
                    our_tags.append(result.quantisation_type.value.lower())

        # Check for F16 availability
        if (
            len(our_tags) == 1
            and QuantisationType.F16 in results
            and hasattr(results[QuantisationType.F16], "status")
            and results[QuantisationType.F16].status in {"completed", "uploading"}
        ):
            our_tags.append("f16")

        # Combine with original tags
        all_tags = our_tags
        if original_tags:
            all_tags = sorted(set(our_tags + original_tags))

        return all_tags


class FileSizeFormatter:
    """Formats file sizes for display."""

    @staticmethod
    def format_size_bytes(size_bytes: int) -> str:
        """Format bytes to human-readable size.

        Converts raw byte values into human-readable format using appropriate
        units (B, KB, MB, GB) with decimal precision for larger values to
        provide clear file size information in documentation.

        Returns:
            Formatted size string (e.g., "4.5GB").
        """
        if size_bytes < 1024:
            return f"{size_bytes}B"
        if size_bytes < 1024**2:
            return f"{size_bytes / 1024:.1f}KB"
        if size_bytes < GIBIBYTE:
            return f"{size_bytes / (1024**2):.1f}MB"
        return f"{size_bytes / GIBIBYTE:.1f}GB"

    @staticmethod
    def get_file_size(file_path: Path) -> str:
        """Get formatted file size from path.

        Retrieves file size information from the filesystem and formats
        it into human-readable format. Handles non-existent files gracefully
        by returning a placeholder string for missing files.

        Returns:
            Formatted size string or "-" if file doesn't exist.
        """
        if not file_path.exists():
            return "-"

        size_bytes = file_path.stat().st_size
        return FileSizeFormatter.format_size_bytes(size_bytes)