228 lines
6.1 KiB
Python
228 lines
6.1 KiB
Python
"""README templates for quantised models.
|
|
|
|
Provides template strings and builders for generating README documentation.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
def get_frontmatter_template(
|
|
licence: str,
|
|
base_model: str,
|
|
tags: list[str],
|
|
) -> str:
|
|
"""Generate YAML frontmatter for README.
|
|
|
|
Creates the YAML metadata header for HuggingFace model cards including
|
|
licence information, library specification, base model reference, and
|
|
tag listings formatted according to HuggingFace conventions.
|
|
|
|
Returns:
|
|
Formatted YAML frontmatter string.
|
|
"""
|
|
frontmatter = f"""---
|
|
license: {licence}
|
|
library_name: gguf
|
|
base_model: {base_model}
|
|
tags:
|
|
"""
|
|
for tag in tags:
|
|
if tag.strip():
|
|
frontmatter += f"- {tag.strip()}\n"
|
|
|
|
frontmatter += "---\n\n"
|
|
return frontmatter
|
|
|
|
|
|
def get_header_template(
|
|
original_author: str,
|
|
model_name: str,
|
|
source_model: str,
|
|
) -> str:
|
|
"""Generate README header section.
|
|
|
|
Creates the main header section with model title, description of the
|
|
quantisation process, and initial table structure for displaying
|
|
quantisation variants and their status information.
|
|
|
|
Returns:
|
|
Formatted header markdown.
|
|
"""
|
|
hf_url = f"https://huggingface.co/{source_model}"
|
|
return f"""# {original_author}-{model_name}-GGUF
|
|
|
|
GGUF quantisations of [{source_model}]({hf_url}) using
|
|
[Bartowski](https://huggingface.co/bartowski)'s method. Created with [llm-gguf-tools](https://git.tomfos.tr/tom/llm-gguf-tools)
|
|
which replicates Bartowski's quantisation profiles.
|
|
|
|
| Variant | Configuration | Status |
|
|
|---|---|---|
|
|
"""
|
|
|
|
|
|
def get_downloads_section(download_instruction: str | None = None) -> str:
|
|
"""Generate downloads and usage section.
|
|
|
|
Creates comprehensive usage documentation including download instructions,
|
|
quick start examples for various runtimes (llama.cpp, Ollama, LM Studio),
|
|
and integration guidance with optional custom instructions.
|
|
|
|
Returns:
|
|
Formatted downloads section markdown.
|
|
"""
|
|
base_section = """
|
|
## 📥 Download Links
|
|
|
|
Direct download links are available for each quantisation in the table above. Click the ✅ status to
|
|
go to the file page.
|
|
|
|
## 🚀 Quick Start
|
|
|
|
### Using llama.cpp
|
|
|
|
```bash
|
|
# Download the model (replace Q4_K_M with your chosen quantisation)
|
|
wget https://huggingface.co/YOUR_REPO/resolve/main/model-Q4_K_M.gguf
|
|
|
|
# Run with llama.cpp
|
|
./llama-cli -m model-Q4_K_M.gguf -p "Your prompt here"
|
|
```
|
|
|
|
### Using Ollama
|
|
|
|
```bash
|
|
# Create Modelfile
|
|
echo "FROM ./model-Q4_K_M.gguf" > Modelfile
|
|
|
|
# Create and run the model
|
|
ollama create mymodel -f Modelfile
|
|
ollama run mymodel
|
|
```
|
|
|
|
### Using LM Studio
|
|
|
|
1. Open LM Studio
|
|
2. Click "Download Model"
|
|
3. Paste the HuggingFace repository URL
|
|
4. Select your preferred quantisation
|
|
5. Click Download
|
|
|
|
"""
|
|
if download_instruction:
|
|
base_section = f"{download_instruction}\n\n{base_section}"
|
|
|
|
return base_section
|
|
|
|
|
|
def get_quantisation_info() -> str:
|
|
"""Get information about quantisation types.
|
|
|
|
Returns:
|
|
Formatted quantisation information markdown.
|
|
"""
|
|
return """
|
|
## 📊 Quantisation Information
|
|
|
|
### Bartowski Naming Convention
|
|
|
|
- **L variants** (Q3_K_L, Q4_K_L, Q5_K_L): Uses Q8_0 for embeddings/output weights
|
|
- **M variants** (Q3_K_M, Q4_K_M, Q5_K_M): Standard K-quant configuration
|
|
- **XL variant** (Q3_K_XL): Q8_0 embeddings + Q6_K output weights
|
|
- **_L suffix** (Q6_K_L): Q8_0 for output.weight tensor
|
|
|
|
### Recommended Quantisations
|
|
|
|
- **Q4_K_M**: Best balance of quality and size (4.58GB for 7B model)
|
|
- **Q5_K_M**: Higher quality, larger size (5.33GB for 7B model)
|
|
- **Q3_K_L**: Smallest with good quality (3.35GB for 7B model)
|
|
- **Q6_K_L**: Near original quality (5.65GB for 7B model)
|
|
- **Q8_0**: Highest quality quantisation (7.17GB for 7B model)
|
|
|
|
### Basic vs K-quants
|
|
|
|
- **Basic types** (Q4_0, Q5_0, Q6_0, Q8_0): Simple quantisation, universally compatible
|
|
- **K-quants** (Q#_K_*): Advanced quantisation with better quality/size ratios
|
|
|
|
Choose K-quants when available for better performance. Basic types are fallbacks for unsupported
|
|
architectures.
|
|
"""
|
|
|
|
|
|
def get_original_model_section(
|
|
original_readme: str,
|
|
separator: str = "---",
|
|
) -> str:
|
|
"""Format original model documentation section.
|
|
|
|
Formats the original model's documentation for inclusion in the
|
|
quantised model's README, preserving important context whilst
|
|
clearly separating it from the quantisation-specific information.
|
|
|
|
Returns:
|
|
Formatted original model section.
|
|
"""
|
|
if not original_readme:
|
|
return ""
|
|
|
|
return f"""
|
|
{separator}
|
|
|
|
## Original Model Information
|
|
|
|
{original_readme}
|
|
"""
|
|
|
|
|
|
def get_f16_row_template(
|
|
original_author: str,
|
|
model_name: str,
|
|
output_repo: str,
|
|
file_size: str = "-",
|
|
status: str = "completed",
|
|
) -> str:
|
|
"""Generate F16 GGUF row for the table.
|
|
|
|
Creates a formatted table row for the F16 reference model with
|
|
appropriate status indicators, download links, and file size
|
|
information based on upload status and availability.
|
|
|
|
Returns:
|
|
Formatted table row for F16.
|
|
"""
|
|
filename = f"{original_author}-{model_name}-f16.gguf"
|
|
url = f"https://huggingface.co/{output_repo}/blob/main/{filename}"
|
|
|
|
if status == "uploading":
|
|
status_text = f"⬆️ Uploading... ({file_size})"
|
|
elif status == "completed":
|
|
status_text = f"[✅ {file_size}]({url})"
|
|
else:
|
|
status_text = "⏳ Queued"
|
|
|
|
return f"| **F16** | Full precision reference | {status_text} |\n"
|
|
|
|
|
|
def get_troubleshooting_section() -> str:
|
|
"""Get troubleshooting section for README.
|
|
|
|
Returns:
|
|
Formatted troubleshooting markdown.
|
|
"""
|
|
return """
|
|
## 🔧 Troubleshooting
|
|
|
|
### File Not Found
|
|
- Ensure you're using the correct repository URL
|
|
- Check that the quantisation has completed (✅ status)
|
|
- Try refreshing the page if recently uploaded
|
|
|
|
### Performance Issues
|
|
- Use smaller quantisations for limited RAM/VRAM
|
|
- Q4_K_M offers the best balance for most users
|
|
- Enable GPU acceleration if available
|
|
|
|
### Compatibility
|
|
- K-quants require llama.cpp or compatible runtime
|
|
- Basic types (Q4_0, Q5_0, etc.) work with all runtimes
|
|
- Check your runtime's documentation for supported types
|
|
"""
|