454 lines
15 KiB
Python
454 lines
15 KiB
Python
"""HuggingFace operations service.
|
|
|
|
Handles all interactions with HuggingFace including model downloads,
|
|
uploads, README generation, and repository management. Uses UK English
|
|
spelling conventions throughout.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
import subprocess
|
|
import tempfile
|
|
from pathlib import Path
|
|
from typing import TYPE_CHECKING
|
|
|
|
from helpers.logger import logger
|
|
from helpers.models.quantisation import QuantisationType
|
|
|
|
if TYPE_CHECKING:
|
|
from helpers.models.quantisation import ModelSource, QuantisationResult
|
|
|
|
|
|
class HuggingFaceService:
|
|
"""Manages HuggingFace repository operations.
|
|
|
|
Provides methods for downloading models, uploading files, and managing
|
|
repositories. Handles authentication, error recovery, and progress tracking
|
|
for robust interaction with HuggingFace services.
|
|
"""
|
|
|
|
@staticmethod
|
|
def get_username() -> str:
|
|
"""Get authenticated HuggingFace username.
|
|
|
|
Retrieves the current user's HuggingFace username using the CLI.
|
|
Requires prior authentication via `huggingface-cli login`.
|
|
|
|
Returns:
|
|
HuggingFace username.
|
|
|
|
Raises:
|
|
RuntimeError: If not authenticated or CLI not available.
|
|
"""
|
|
try:
|
|
result = subprocess.run(
|
|
["huggingface-cli", "whoami"],
|
|
capture_output=True,
|
|
text=True,
|
|
check=True,
|
|
)
|
|
return result.stdout.strip()
|
|
except (subprocess.CalledProcessError, FileNotFoundError) as err:
|
|
msg = "Please log in to HuggingFace first: huggingface-cli login"
|
|
raise RuntimeError(msg) from err
|
|
|
|
@staticmethod
|
|
def download_model(
|
|
model_name: str, output_dir: Path, include_pattern: str | None = None
|
|
) -> None:
|
|
"""Download model from HuggingFace.
|
|
|
|
Downloads a complete model or specific files matching a pattern.
|
|
Creates the output directory if it doesn't exist. Supports filtered
|
|
downloads for efficient bandwidth usage when only certain files are needed.
|
|
"""
|
|
logger.info(f"Downloading {model_name} to {output_dir}")
|
|
|
|
cmd = [
|
|
"huggingface-cli",
|
|
"download",
|
|
model_name,
|
|
"--local-dir",
|
|
str(output_dir),
|
|
]
|
|
|
|
if include_pattern:
|
|
cmd.extend(["--include", include_pattern])
|
|
|
|
subprocess.run(cmd, check=True)
|
|
logger.info("Download complete")
|
|
|
|
@staticmethod
|
|
def upload_file(
|
|
repo_id: str,
|
|
local_path: Path,
|
|
repo_path: str | None = None,
|
|
create_repo: bool = False,
|
|
) -> None:
|
|
"""Upload a file to HuggingFace repository.
|
|
|
|
Uploads a single file to the specified repository path. Can create
|
|
the repository if it doesn't exist. Handles repository creation conflicts
|
|
gracefully by retrying without the create flag when needed.
|
|
|
|
Raises:
|
|
CalledProcessError: If upload fails.
|
|
"""
|
|
repo_path = repo_path or local_path.name
|
|
logger.info(f"Uploading {local_path.name} to {repo_id}/{repo_path}")
|
|
|
|
cmd = [
|
|
"huggingface-cli",
|
|
"upload",
|
|
repo_id,
|
|
str(local_path),
|
|
repo_path,
|
|
]
|
|
|
|
if create_repo:
|
|
cmd.append("--create")
|
|
|
|
try:
|
|
subprocess.run(cmd, check=True, capture_output=True)
|
|
logger.info(f"Uploaded {repo_path}")
|
|
except subprocess.CalledProcessError:
|
|
if create_repo:
|
|
# Repository might already exist, retry without --create
|
|
cmd = cmd[:-1] # Remove --create flag
|
|
subprocess.run(cmd, check=True)
|
|
logger.info(f"Updated {repo_path}")
|
|
else:
|
|
raise
|
|
|
|
|
|
class ReadmeGenerator:
|
|
"""Generates README files for quantised models.
|
|
|
|
Creates comprehensive README documentation including model cards,
|
|
quantisation details, and status tracking. Supports both initial
|
|
planning documentation and final result summaries.
|
|
"""
|
|
|
|
def generate(
|
|
self,
|
|
model_source: ModelSource,
|
|
results: dict[QuantisationType, QuantisationResult],
|
|
models_dir: Path,
|
|
output_repo: str | None = None,
|
|
) -> Path:
|
|
"""Generate README file for quantised model repository.
|
|
|
|
Creates a comprehensive README with frontmatter, quantisation table,
|
|
and original model information. Handles status tracking for planned,
|
|
processing, and completed quantisations.
|
|
|
|
Returns:
|
|
Path to generated README file.
|
|
"""
|
|
logger.info("Creating model card...")
|
|
|
|
model_dir = models_dir / model_source.model_name
|
|
readme_path = model_dir / "README.md"
|
|
|
|
# Get original README content
|
|
original_content = self._get_original_readme(model_source, model_dir)
|
|
|
|
# Generate new README
|
|
readme_content = self._generate_readme_content(
|
|
model_source, results, original_content, output_repo
|
|
)
|
|
|
|
readme_path.write_text(readme_content)
|
|
return readme_path
|
|
|
|
def _get_original_readme(self, model_source: ModelSource, model_dir: Path) -> dict[str, str]:
|
|
"""Extract original README and metadata.
|
|
|
|
Downloads or reads the original model's README for inclusion in the
|
|
quantised model documentation. Parses YAML frontmatter if present.
|
|
|
|
Returns:
|
|
Dictionary with readme content, licence, tags, and frontmatter.
|
|
"""
|
|
content = {"readme": "", "licence": "apache-2.0", "tags": "", "frontmatter": ""}
|
|
|
|
# Try local file first
|
|
readme_path = model_dir / "README.md"
|
|
if readme_path.exists():
|
|
content["readme"] = readme_path.read_text(encoding="utf-8")
|
|
logger.info(f"Found original README ({len(content['readme'])} characters)")
|
|
else:
|
|
# Download separately
|
|
content = self._download_readme(model_source)
|
|
|
|
# Parse frontmatter if present
|
|
if content["readme"].startswith("---\n"):
|
|
content = self._parse_frontmatter(content["readme"])
|
|
|
|
return content
|
|
|
|
def _download_readme(self, model_source: ModelSource) -> dict[str, str]:
|
|
"""Download README from HuggingFace repository.
|
|
|
|
Attempts to download just the README.md file from the source repository
|
|
for efficient documentation extraction.
|
|
|
|
Returns:
|
|
Dictionary with readme content and default metadata.
|
|
"""
|
|
content = {"readme": "", "licence": "apache-2.0", "tags": "", "frontmatter": ""}
|
|
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
try:
|
|
logger.info(f"Downloading README from {model_source.source_model}...")
|
|
subprocess.run(
|
|
[
|
|
"huggingface-cli",
|
|
"download",
|
|
model_source.source_model,
|
|
"--include",
|
|
"README.md",
|
|
"--local-dir",
|
|
temp_dir,
|
|
],
|
|
check=True,
|
|
capture_output=True,
|
|
)
|
|
|
|
readme_path = Path(temp_dir) / "README.md"
|
|
if readme_path.exists():
|
|
content["readme"] = readme_path.read_text(encoding="utf-8")
|
|
logger.info(f"Downloaded README ({len(content['readme'])} characters)")
|
|
except subprocess.CalledProcessError as e:
|
|
logger.warning(f"Failed to download README: {e}")
|
|
|
|
return content
|
|
|
|
def _parse_frontmatter(self, readme_text: str) -> dict[str, str]:
|
|
"""Parse YAML frontmatter from README.
|
|
|
|
Extracts metadata from YAML frontmatter including licence, tags,
|
|
and other model card fields.
|
|
|
|
Returns:
|
|
Dictionary with separated content and metadata.
|
|
"""
|
|
lines = readme_text.split("\n")
|
|
if lines[0] != "---":
|
|
return {
|
|
"readme": readme_text,
|
|
"licence": "apache-2.0",
|
|
"tags": "",
|
|
"frontmatter": "",
|
|
}
|
|
|
|
frontmatter_end = -1
|
|
for i, line in enumerate(lines[1:], 1):
|
|
if line == "---":
|
|
frontmatter_end = i
|
|
break
|
|
|
|
if frontmatter_end == -1:
|
|
return {
|
|
"readme": readme_text,
|
|
"licence": "apache-2.0",
|
|
"tags": "",
|
|
"frontmatter": "",
|
|
}
|
|
|
|
frontmatter = "\n".join(lines[1:frontmatter_end])
|
|
content = "\n".join(lines[frontmatter_end + 1 :])
|
|
|
|
# Extract licence
|
|
licence_match = re.search(r"^license:\s*(.+)$", frontmatter, re.MULTILINE)
|
|
licence_val = licence_match.group(1).strip().strip('"') if licence_match else "apache-2.0"
|
|
|
|
# Extract tags
|
|
tags = []
|
|
in_tags = False
|
|
for line in frontmatter.split("\n"):
|
|
if line.startswith("tags:"):
|
|
in_tags = True
|
|
continue
|
|
if in_tags:
|
|
if line.startswith("- "):
|
|
tags.append(line[2:].strip())
|
|
elif line and not line.startswith(" "):
|
|
break
|
|
|
|
return {
|
|
"readme": content,
|
|
"licence": licence_val,
|
|
"tags": ",".join(tags),
|
|
"frontmatter": frontmatter,
|
|
}
|
|
|
|
def _generate_readme_content(
|
|
self,
|
|
model_source: ModelSource,
|
|
results: dict[QuantisationType, QuantisationResult],
|
|
original_content: dict[str, str],
|
|
output_repo: str | None = None,
|
|
) -> str:
|
|
"""Generate complete README content with quantisation details.
|
|
|
|
Creates the full README including YAML frontmatter, quantisation status
|
|
table, and original model information.
|
|
|
|
Returns:
|
|
Complete README markdown content.
|
|
"""
|
|
# Build tags
|
|
our_tags = [
|
|
"quantised",
|
|
"gguf",
|
|
"q4_k_m",
|
|
"q4_k_l",
|
|
"q4_k_xl",
|
|
"q4_k_xxl",
|
|
"bartowski-method",
|
|
]
|
|
original_tags = original_content["tags"].split(",") if original_content["tags"] else []
|
|
all_tags = sorted(set(our_tags + original_tags))
|
|
|
|
# Build frontmatter
|
|
frontmatter = f"""---
|
|
license: {original_content["licence"]}
|
|
library_name: gguf
|
|
base_model: {model_source.source_model}
|
|
tags:
|
|
"""
|
|
for tag in all_tags:
|
|
if tag.strip():
|
|
frontmatter += f"- {tag.strip()}\n"
|
|
|
|
frontmatter += "---\n\n"
|
|
|
|
# Build main content
|
|
hf_url = f"https://huggingface.co/{model_source.source_model}"
|
|
content = f"""# {model_source.original_author}-{model_source.model_name}-GGUF
|
|
|
|
GGUF quantisations of [{model_source.source_model}]({hf_url}) using Bartowski's method.
|
|
|
|
| Quantisation | Embeddings/Output | Attention | Feed-Forward | Status |
|
|
|--------------|-------------------|-----------|--------------|--------|
|
|
"""
|
|
|
|
# Add results table
|
|
for quant_type in [
|
|
QuantisationType.Q4_K_M,
|
|
QuantisationType.Q4_K_L,
|
|
QuantisationType.Q4_K_XL,
|
|
QuantisationType.Q4_K_XXL,
|
|
]:
|
|
result = results.get(quant_type)
|
|
if not result:
|
|
result = type("Result", (), {"status": "planned", "success": False})()
|
|
|
|
layers = self._get_layers_config(quant_type)
|
|
status = self._format_status(result, model_source, quant_type, output_repo)
|
|
|
|
content += (
|
|
f"| {quant_type.value} | {layers['embeddings']} | "
|
|
f"{layers['attention']} | {layers['ffn']} | {status} |\n"
|
|
)
|
|
|
|
content += "\n---\n\n"
|
|
|
|
# Add original content
|
|
if original_content["readme"]:
|
|
content += "# Original Model Information\n\n" + original_content["readme"]
|
|
else:
|
|
content += f"## Original Model\n\nQuantisation of [{model_source.source_model}](https://huggingface.co/{model_source.source_model}).\n"
|
|
|
|
return frontmatter + content
|
|
|
|
def _get_layers_config(self, quant_type: QuantisationType) -> dict[str, str]:
|
|
"""Get layer configuration for quantisation type.
|
|
|
|
Returns layer precision specifications for the quantisation table.
|
|
|
|
Returns:
|
|
Dictionary with embeddings, attention, and ffn precision labels.
|
|
"""
|
|
configs = {
|
|
QuantisationType.Q4_K_M: {
|
|
"embeddings": "Q4_K_M",
|
|
"attention": "Q4_K_M",
|
|
"ffn": "Q4_K_M",
|
|
},
|
|
QuantisationType.Q4_K_L: {"embeddings": "Q6_K", "attention": "Q6_K", "ffn": "Q4_K_M"},
|
|
QuantisationType.Q4_K_XL: {"embeddings": "Q8_0", "attention": "Q6_K", "ffn": "Q4_K_M"},
|
|
QuantisationType.Q4_K_XXL: {"embeddings": "Q8_0", "attention": "Q8_0", "ffn": "Q4_K_M"},
|
|
}
|
|
return configs.get(
|
|
quant_type, {"embeddings": "Unknown", "attention": "Unknown", "ffn": "Unknown"}
|
|
)
|
|
|
|
def _format_status(
|
|
self,
|
|
result: QuantisationResult,
|
|
model_source: ModelSource,
|
|
quant_type: QuantisationType,
|
|
output_repo: str | None,
|
|
) -> str:
|
|
"""Format status indicator for README table.
|
|
|
|
Creates appropriate status indicator based on quantisation state
|
|
including progress indicators, file sizes, and download links.
|
|
|
|
Returns:
|
|
Formatted status string for table cell.
|
|
"""
|
|
status_map = {
|
|
"planned": "⏳ Planned",
|
|
"processing": "🔄 Processing...",
|
|
"uploading": "⬆️ Uploading...",
|
|
"failed": "❌ Failed",
|
|
}
|
|
|
|
if hasattr(result, "status") and result.status in status_map:
|
|
base_status = status_map[result.status]
|
|
|
|
if result.status == "uploading" and hasattr(result, "file_size") and result.file_size:
|
|
return f"{base_status} ({result.file_size})"
|
|
if result.status == "completed" or (hasattr(result, "success") and result.success):
|
|
return self._format_success_status(result, model_source, quant_type, output_repo)
|
|
return base_status
|
|
|
|
# Legacy support
|
|
if hasattr(result, "success") and result.success:
|
|
return self._format_success_status(result, model_source, quant_type, output_repo)
|
|
return "❌ Failed"
|
|
|
|
def _format_success_status(
|
|
self,
|
|
result: QuantisationResult,
|
|
model_source: ModelSource,
|
|
quant_type: QuantisationType,
|
|
output_repo: str | None,
|
|
) -> str:
|
|
"""Format successful quantisation status with download link.
|
|
|
|
Creates a download link if repository information is available,
|
|
otherwise shows file size.
|
|
|
|
Returns:
|
|
Formatted success status string.
|
|
"""
|
|
if not output_repo:
|
|
return (
|
|
f"✅ {result.file_size}"
|
|
if hasattr(result, "file_size") and result.file_size
|
|
else "✅ Available"
|
|
)
|
|
|
|
filename = (
|
|
f"{model_source.original_author}-{model_source.model_name}-{quant_type.value}.gguf"
|
|
)
|
|
url = f"https://huggingface.co/{output_repo}?show_file_info={filename}"
|
|
|
|
if hasattr(result, "file_size") and result.file_size:
|
|
return f"[✅ {result.file_size}]({url})"
|
|
return f"[✅ Available]({url})"
|