llm-gguf-tools/helpers/huggingface/uploader.py
2025-08-09 17:16:02 +01:00

330 lines
11 KiB
Python

"""HuggingFace file upload operations.
Handles uploading files to HuggingFace repositories with retry logic
and error handling.
"""
from __future__ import annotations
import shutil
import subprocess
import tempfile
import time
from pathlib import Path
from helpers.huggingface.repository import RepositoryManager
from helpers.logger import logger
class FileUploader:
"""Manages file uploads to HuggingFace repositories.
Provides methods for uploading models, READMEs, and other files
with proper error handling, retry logic, and git-based fallbacks.
"""
@staticmethod
def upload_file(
repo_id: str,
local_path: Path,
repo_path: str | None = None,
create_repo: bool = False,
) -> None:
"""Upload a file to HuggingFace repository.
Uploads a single file to the specified repository path. Can create
the repository if it doesn't exist. Uses git directly when possible
to avoid automatic PR creation. Repository identifiers follow the format
"username/repo-name". Files are uploaded to the main branch by default.
Raises:
CalledProcessError: If upload fails.
"""
repo_path = repo_path or local_path.name
logger.info(f"Uploading {local_path.name} to {repo_id}/{repo_path}")
# Try git-based upload first to avoid PR creation
if FileUploader._try_git_upload(repo_id, local_path, repo_path, create_repo=create_repo):
logger.info(f"Uploaded {repo_path} via git")
return
# Fallback to huggingface-cli
logger.info("Git upload failed, trying huggingface-cli...")
cmd = [
"huggingface-cli",
"upload",
repo_id,
str(local_path),
repo_path,
"--revision",
"main", # Explicitly push to main branch
"--commit-message",
f"Add {repo_path}",
]
if create_repo:
cmd.append("--create")
try:
subprocess.run(cmd, check=True, capture_output=True)
logger.info(f"Uploaded {repo_path}")
except subprocess.CalledProcessError:
if create_repo:
# Repository might already exist, retry without --create
cmd = cmd[:-1] # Remove --create flag
subprocess.run(cmd, check=True, capture_output=True, text=True)
logger.info(f"Updated {repo_path}")
else:
raise
@staticmethod
def _try_git_upload(
repo_id: str,
local_path: Path,
repo_path: str,
*,
create_repo: bool = False,
) -> bool:
"""Try to upload file using git directly to avoid PR creation.
Returns:
bool: True if upload successful, False if should fallback to CLI.
"""
try:
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
repo_url = f"https://huggingface.co/{repo_id}"
# Clone repository
logger.info(f"Cloning {repo_url}...")
result = subprocess.run(
["git", "clone", repo_url, str(temp_path / "repo")],
check=False,
capture_output=True,
text=True,
)
if result.returncode != 0:
if create_repo:
# Repository doesn't exist, let huggingface-cli handle creation
return False
logger.warning(f"Clone failed: {result.stderr}")
return False
repo_dir = temp_path / "repo"
target_file = repo_dir / repo_path
# Ensure target directory exists
target_file.parent.mkdir(parents=True, exist_ok=True)
# Copy file
shutil.copy2(local_path, target_file)
# Check if there are any changes
status_result = subprocess.run(
["git", "status", "--porcelain"],
cwd=repo_dir,
capture_output=True,
text=True,
check=True,
)
if not status_result.stdout.strip():
logger.info(f"No changes detected for {repo_path}, file already up-to-date")
return True # File is already up-to-date, no need to push
# Git add, commit, push
subprocess.run(
["git", "add", repo_path],
cwd=repo_dir,
check=True,
capture_output=True,
text=True,
)
subprocess.run(
["git", "commit", "-m", f"Update {repo_path}"],
cwd=repo_dir,
check=True,
capture_output=True,
text=True,
)
subprocess.run(
["git", "push"],
cwd=repo_dir,
check=True,
capture_output=True,
text=True,
)
return True
except subprocess.CalledProcessError as e:
logger.warning(f"Git upload failed: {e}")
return False
except Exception as e:
logger.warning(f"Git upload error: {e}")
return False
@staticmethod
def upload_readme(
repo_id: str,
readme_path: Path,
ensure_repo: bool = True,
) -> None:
"""Upload or update README file to repository.
Creates repository if needed, handles existing repository updates.
The README is uploaded as README.md in the repository root and will
replace any existing README file.
Raises:
RuntimeError: If the README upload fails.
"""
logger.info("Uploading README...")
# Add delay to prevent rate limiting
time.sleep(2)
# First ensure the repository exists if requested
if ensure_repo:
RepositoryManager.ensure_repository_exists(repo_id)
# Upload without --create flag to avoid PR creation
try:
logger.debug(f"DEBUG: Uploading README to {repo_id}")
subprocess.run(
[
"huggingface-cli",
"upload",
repo_id,
str(readme_path),
"README.md",
"--commit-message",
"Update README.md",
],
check=True,
capture_output=True,
text=True,
)
logger.info("README uploaded successfully")
except subprocess.CalledProcessError as e:
# Retry with delay in case of rate limiting
if "429" in str(e.stderr):
logger.warning("Rate limited, waiting 30 seconds...")
time.sleep(30)
subprocess.run(
[
"huggingface-cli",
"upload",
repo_id,
str(readme_path),
"README.md",
"--commit-message",
"Update README.md",
],
check=True,
capture_output=True,
text=True,
)
logger.info("README uploaded successfully (after retry)")
else:
msg = f"Failed to upload README: {e.stderr}"
raise RuntimeError(msg) from e
@staticmethod
def upload_model_file(
repo_id: str,
model_path: Path,
repo_filename: str | None = None,
) -> None:
"""Upload a model file to repository.
Optimised for large model file uploads with progress tracking.
The model file is uploaded to the repository root by default or
to the specified filename if provided.
Raises:
subprocess.CalledProcessError: If the upload fails.
"""
repo_filename = repo_filename or model_path.name
logger.info(
f"Uploading model file {model_path.name} "
f"({model_path.stat().st_size / (1024**3):.1f}GB)..."
)
cmd = [
"huggingface-cli",
"upload",
repo_id,
str(model_path),
repo_filename,
"--commit-message",
f"Add {repo_filename}",
]
try:
# Run with output streaming for large files
process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
bufsize=1,
universal_newlines=True,
)
# Stream output
if process.stdout:
for line in iter(process.stdout.readline, ""):
if line and "upload" in line.lower():
logger.debug(line.strip())
process.wait()
if process.returncode != 0:
raise subprocess.CalledProcessError(process.returncode, cmd)
logger.info(f"Successfully uploaded {repo_filename}")
except subprocess.CalledProcessError as e:
logger.error(f"Failed to upload model file: {e}")
raise
@staticmethod
def upload_folder(
repo_id: str,
folder_path: Path,
path_in_repo: str = ".",
ignore_patterns: list[str] | None = None,
) -> None:
"""Upload an entire folder to repository.
Recursively uploads all files from a local folder to the repository,
preserving the directory structure. Supports ignore patterns for
selective uploads.
Raises:
subprocess.CalledProcessError: If the upload fails.
"""
logger.info(f"Uploading folder {folder_path} to {repo_id}/{path_in_repo}")
cmd = [
"huggingface-cli",
"upload",
repo_id,
str(folder_path),
path_in_repo,
"--commit-message",
f"Upload {folder_path.name}",
]
if ignore_patterns:
for pattern in ignore_patterns:
cmd.extend(["--exclude", pattern])
try:
subprocess.run(cmd, check=True, capture_output=True, text=True)
logger.info(f"Successfully uploaded folder {folder_path.name}")
except subprocess.CalledProcessError as e:
logger.error(f"Failed to upload folder: {e}")
raise