llm-gguf-tools/helpers/readme/templates.py

"""README templates for quantised models.

Provides template strings and builders for generating README documentation.
"""

from __future__ import annotations


def get_frontmatter_template(
    licence: str,
    base_model: str,
    tags: list[str],
) -> str:
    """Generate YAML frontmatter for README.

    Creates the YAML metadata header for HuggingFace model cards including
    licence information, library specification, base model reference, and
    tag listings formatted according to HuggingFace conventions.

    Returns:
        Formatted YAML frontmatter string.
    """
    frontmatter = f"""---
license: {licence}
library_name: gguf
base_model: {base_model}
tags:
"""
    for tag in tags:
        if tag.strip():
            frontmatter += f"- {tag.strip()}\n"

    frontmatter += "---\n\n"
    return frontmatter


def get_header_template(
    original_author: str,
    model_name: str,
    source_model: str,
) -> str:
    """Generate README header section.

    Creates the main header section with model title, description of the
    quantisation process, and initial table structure for displaying
    quantisation variants and their status information.

    Returns:
        Formatted header markdown.
    """
    hf_url = f"https://huggingface.co/{source_model}"
    return f"""# {original_author}-{model_name}-GGUF

GGUF quantisations of [{source_model}]({hf_url}) using
[Bartowski](https://huggingface.co/bartowski)'s method. Created with [llm-gguf-tools](https://git.tomfos.tr/tom/llm-gguf-tools)
which replicates Bartowski's quantisation profiles.

| Variant | Configuration | Status |
|---|---|---|
"""


def get_downloads_section(download_instruction: str | None = None) -> str:
    """Generate downloads and usage section.

    Creates comprehensive usage documentation including download instructions,
    quick start examples for various runtimes (llama.cpp, Ollama, LM Studio),
    and integration guidance with optional custom instructions.

    Returns:
        Formatted downloads section markdown.
    """
    base_section = """
## 📥 Download Links

Direct download links are available for each quantisation in the table above. Click the ✅ status to
go to the file page.

## 🚀 Quick Start

### Using llama.cpp

```bash
# Download the model (replace Q4_K_M with your chosen quantisation)
wget https://huggingface.co/YOUR_REPO/resolve/main/model-Q4_K_M.gguf

# Run with llama.cpp
./llama-cli -m model-Q4_K_M.gguf -p "Your prompt here"
```

### Using Ollama

```bash
# Create Modelfile
echo "FROM ./model-Q4_K_M.gguf" > Modelfile

# Create and run the model
ollama create mymodel -f Modelfile
ollama run mymodel
```

### Using LM Studio

1. Open LM Studio
2. Click "Download Model"
3. Paste the HuggingFace repository URL
4. Select your preferred quantisation
5. Click Download

"""
    if download_instruction:
        base_section = f"{download_instruction}\n\n{base_section}"

    return base_section


def get_quantisation_info() -> str:
    """Get information about quantisation types.

    Returns:
        Formatted quantisation information markdown.
    """
    return """
## 📊 Quantisation Information

### Bartowski Naming Convention

- **L variants** (Q3_K_L, Q4_K_L, Q5_K_L): Uses Q8_0 for embeddings/output weights
- **M variants** (Q3_K_M, Q4_K_M, Q5_K_M): Standard K-quant configuration
- **XL variant** (Q3_K_XL): Q8_0 embeddings + Q6_K output weights
- **_L suffix** (Q6_K_L): Q8_0 for output.weight tensor

### Recommended Quantisations

- **Q4_K_M**: Best balance of quality and size (4.58GB for 7B model)
- **Q5_K_M**: Higher quality, larger size (5.33GB for 7B model)
- **Q3_K_L**: Smallest with good quality (3.35GB for 7B model)
- **Q6_K_L**: Near original quality (5.65GB for 7B model)
- **Q8_0**: Highest quality quantisation (7.17GB for 7B model)

### Basic vs K-quants

- **Basic types** (Q4_0, Q5_0, Q6_0, Q8_0): Simple quantisation, universally compatible
- **K-quants** (Q#_K_*): Advanced quantisation with better quality/size ratios

Choose K-quants when available for better performance. Basic types are fallbacks for unsupported
architectures.
"""


def get_original_model_section(
    original_readme: str,
    separator: str = "---",
) -> str:
    """Format original model documentation section.

    Formats the original model's documentation for inclusion in the
    quantised model's README, preserving important context whilst
    clearly separating it from the quantisation-specific information.

    Returns:
        Formatted original model section.
    """
    if not original_readme:
        return ""

    return f"""
{separator}

## Original Model Information

{original_readme}
"""


def get_f16_row_template(
    original_author: str,
    model_name: str,
    output_repo: str,
    file_size: str = "-",
    status: str = "completed",
) -> str:
    """Generate F16 GGUF row for the table.

    Creates a formatted table row for the F16 reference model with
    appropriate status indicators, download links, and file size
    information based on upload status and availability.

    Returns:
        Formatted table row for F16.
    """
    filename = f"{original_author}-{model_name}-f16.gguf"
    url = f"https://huggingface.co/{output_repo}/blob/main/{filename}"

    if status == "uploading":
        status_text = f"⬆️ Uploading... ({file_size})"
    elif status == "completed":
        status_text = f"[✅ {file_size}]({url})"
    else:
        status_text = "⏳ Queued"

    return f"| **F16** | Full precision reference | {status_text} |\n"


def get_troubleshooting_section() -> str:
    """Get troubleshooting section for README.

    Returns:
        Formatted troubleshooting markdown.
    """
    return """
## 🔧 Troubleshooting

### File Not Found
- Ensure you're using the correct repository URL
- Check that the quantisation has completed (✅ status)
- Try refreshing the page if recently uploaded

### Performance Issues
- Use smaller quantisations for limited RAM/VRAM
- Q4_K_M offers the best balance for most users
- Enable GPU acceleration if available

### Compatibility
- K-quants require llama.cpp or compatible runtime
- Basic types (Q4_0, Q5_0, etc.) work with all runtimes
- Check your runtime's documentation for supported types
"""