330 lines
11 KiB
Python
330 lines
11 KiB
Python
"""HuggingFace file upload operations.
|
|
|
|
Handles uploading files to HuggingFace repositories with retry logic
|
|
and error handling.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import shutil
|
|
import subprocess
|
|
import tempfile
|
|
import time
|
|
from pathlib import Path
|
|
|
|
from helpers.huggingface.repository import RepositoryManager
|
|
from helpers.logger import logger
|
|
|
|
|
|
class FileUploader:
|
|
"""Manages file uploads to HuggingFace repositories.
|
|
|
|
Provides methods for uploading models, READMEs, and other files
|
|
with proper error handling, retry logic, and git-based fallbacks.
|
|
"""
|
|
|
|
@staticmethod
|
|
def upload_file(
|
|
repo_id: str,
|
|
local_path: Path,
|
|
repo_path: str | None = None,
|
|
create_repo: bool = False,
|
|
) -> None:
|
|
"""Upload a file to HuggingFace repository.
|
|
|
|
Uploads a single file to the specified repository path. Can create
|
|
the repository if it doesn't exist. Uses git directly when possible
|
|
to avoid automatic PR creation. Repository identifiers follow the format
|
|
"username/repo-name". Files are uploaded to the main branch by default.
|
|
|
|
Raises:
|
|
CalledProcessError: If upload fails.
|
|
"""
|
|
repo_path = repo_path or local_path.name
|
|
logger.info(f"Uploading {local_path.name} to {repo_id}/{repo_path}")
|
|
|
|
# Try git-based upload first to avoid PR creation
|
|
if FileUploader._try_git_upload(repo_id, local_path, repo_path, create_repo=create_repo):
|
|
logger.info(f"Uploaded {repo_path} via git")
|
|
return
|
|
|
|
# Fallback to huggingface-cli
|
|
logger.info("Git upload failed, trying huggingface-cli...")
|
|
cmd = [
|
|
"huggingface-cli",
|
|
"upload",
|
|
repo_id,
|
|
str(local_path),
|
|
repo_path,
|
|
"--revision",
|
|
"main", # Explicitly push to main branch
|
|
"--commit-message",
|
|
f"Add {repo_path}",
|
|
]
|
|
|
|
if create_repo:
|
|
cmd.append("--create")
|
|
|
|
try:
|
|
subprocess.run(cmd, check=True, capture_output=True)
|
|
logger.info(f"Uploaded {repo_path}")
|
|
except subprocess.CalledProcessError:
|
|
if create_repo:
|
|
# Repository might already exist, retry without --create
|
|
cmd = cmd[:-1] # Remove --create flag
|
|
subprocess.run(cmd, check=True, capture_output=True, text=True)
|
|
logger.info(f"Updated {repo_path}")
|
|
else:
|
|
raise
|
|
|
|
@staticmethod
|
|
def _try_git_upload(
|
|
repo_id: str,
|
|
local_path: Path,
|
|
repo_path: str,
|
|
*,
|
|
create_repo: bool = False,
|
|
) -> bool:
|
|
"""Try to upload file using git directly to avoid PR creation.
|
|
|
|
Returns:
|
|
bool: True if upload successful, False if should fallback to CLI.
|
|
"""
|
|
try:
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
temp_path = Path(temp_dir)
|
|
repo_url = f"https://huggingface.co/{repo_id}"
|
|
|
|
# Clone repository
|
|
logger.info(f"Cloning {repo_url}...")
|
|
result = subprocess.run(
|
|
["git", "clone", repo_url, str(temp_path / "repo")],
|
|
check=False,
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
|
|
if result.returncode != 0:
|
|
if create_repo:
|
|
# Repository doesn't exist, let huggingface-cli handle creation
|
|
return False
|
|
logger.warning(f"Clone failed: {result.stderr}")
|
|
return False
|
|
|
|
repo_dir = temp_path / "repo"
|
|
target_file = repo_dir / repo_path
|
|
|
|
# Ensure target directory exists
|
|
target_file.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Copy file
|
|
shutil.copy2(local_path, target_file)
|
|
|
|
# Check if there are any changes
|
|
status_result = subprocess.run(
|
|
["git", "status", "--porcelain"],
|
|
cwd=repo_dir,
|
|
capture_output=True,
|
|
text=True,
|
|
check=True,
|
|
)
|
|
|
|
if not status_result.stdout.strip():
|
|
logger.info(f"No changes detected for {repo_path}, file already up-to-date")
|
|
return True # File is already up-to-date, no need to push
|
|
|
|
# Git add, commit, push
|
|
subprocess.run(
|
|
["git", "add", repo_path],
|
|
cwd=repo_dir,
|
|
check=True,
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
subprocess.run(
|
|
["git", "commit", "-m", f"Update {repo_path}"],
|
|
cwd=repo_dir,
|
|
check=True,
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
subprocess.run(
|
|
["git", "push"],
|
|
cwd=repo_dir,
|
|
check=True,
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
|
|
return True
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
logger.warning(f"Git upload failed: {e}")
|
|
return False
|
|
except Exception as e:
|
|
logger.warning(f"Git upload error: {e}")
|
|
return False
|
|
|
|
@staticmethod
|
|
def upload_readme(
|
|
repo_id: str,
|
|
readme_path: Path,
|
|
ensure_repo: bool = True,
|
|
) -> None:
|
|
"""Upload or update README file to repository.
|
|
|
|
Creates repository if needed, handles existing repository updates.
|
|
The README is uploaded as README.md in the repository root and will
|
|
replace any existing README file.
|
|
|
|
Raises:
|
|
RuntimeError: If the README upload fails.
|
|
"""
|
|
logger.info("Uploading README...")
|
|
|
|
# Add delay to prevent rate limiting
|
|
time.sleep(2)
|
|
|
|
# First ensure the repository exists if requested
|
|
if ensure_repo:
|
|
RepositoryManager.ensure_repository_exists(repo_id)
|
|
|
|
# Upload without --create flag to avoid PR creation
|
|
try:
|
|
logger.debug(f"DEBUG: Uploading README to {repo_id}")
|
|
subprocess.run(
|
|
[
|
|
"huggingface-cli",
|
|
"upload",
|
|
repo_id,
|
|
str(readme_path),
|
|
"README.md",
|
|
"--commit-message",
|
|
"Update README.md",
|
|
],
|
|
check=True,
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
logger.info("README uploaded successfully")
|
|
except subprocess.CalledProcessError as e:
|
|
# Retry with delay in case of rate limiting
|
|
if "429" in str(e.stderr):
|
|
logger.warning("Rate limited, waiting 30 seconds...")
|
|
time.sleep(30)
|
|
subprocess.run(
|
|
[
|
|
"huggingface-cli",
|
|
"upload",
|
|
repo_id,
|
|
str(readme_path),
|
|
"README.md",
|
|
"--commit-message",
|
|
"Update README.md",
|
|
],
|
|
check=True,
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
logger.info("README uploaded successfully (after retry)")
|
|
else:
|
|
msg = f"Failed to upload README: {e.stderr}"
|
|
raise RuntimeError(msg) from e
|
|
|
|
@staticmethod
|
|
def upload_model_file(
|
|
repo_id: str,
|
|
model_path: Path,
|
|
repo_filename: str | None = None,
|
|
) -> None:
|
|
"""Upload a model file to repository.
|
|
|
|
Optimised for large model file uploads with progress tracking.
|
|
The model file is uploaded to the repository root by default or
|
|
to the specified filename if provided.
|
|
|
|
Raises:
|
|
subprocess.CalledProcessError: If the upload fails.
|
|
"""
|
|
repo_filename = repo_filename or model_path.name
|
|
logger.info(
|
|
f"Uploading model file {model_path.name} "
|
|
f"({model_path.stat().st_size / (1024**3):.1f}GB)..."
|
|
)
|
|
|
|
cmd = [
|
|
"huggingface-cli",
|
|
"upload",
|
|
repo_id,
|
|
str(model_path),
|
|
repo_filename,
|
|
"--commit-message",
|
|
f"Add {repo_filename}",
|
|
]
|
|
|
|
try:
|
|
# Run with output streaming for large files
|
|
process = subprocess.Popen(
|
|
cmd,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.STDOUT,
|
|
text=True,
|
|
bufsize=1,
|
|
universal_newlines=True,
|
|
)
|
|
|
|
# Stream output
|
|
if process.stdout:
|
|
for line in iter(process.stdout.readline, ""):
|
|
if line and "upload" in line.lower():
|
|
logger.debug(line.strip())
|
|
|
|
process.wait()
|
|
|
|
if process.returncode != 0:
|
|
raise subprocess.CalledProcessError(process.returncode, cmd)
|
|
|
|
logger.info(f"Successfully uploaded {repo_filename}")
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
logger.error(f"Failed to upload model file: {e}")
|
|
raise
|
|
|
|
@staticmethod
|
|
def upload_folder(
|
|
repo_id: str,
|
|
folder_path: Path,
|
|
path_in_repo: str = ".",
|
|
ignore_patterns: list[str] | None = None,
|
|
) -> None:
|
|
"""Upload an entire folder to repository.
|
|
|
|
Recursively uploads all files from a local folder to the repository,
|
|
preserving the directory structure. Supports ignore patterns for
|
|
selective uploads.
|
|
|
|
Raises:
|
|
subprocess.CalledProcessError: If the upload fails.
|
|
"""
|
|
logger.info(f"Uploading folder {folder_path} to {repo_id}/{path_in_repo}")
|
|
|
|
cmd = [
|
|
"huggingface-cli",
|
|
"upload",
|
|
repo_id,
|
|
str(folder_path),
|
|
path_in_repo,
|
|
"--commit-message",
|
|
f"Upload {folder_path.name}",
|
|
]
|
|
|
|
if ignore_patterns:
|
|
for pattern in ignore_patterns:
|
|
cmd.extend(["--exclude", pattern])
|
|
|
|
try:
|
|
subprocess.run(cmd, check=True, capture_output=True, text=True)
|
|
logger.info(f"Successfully uploaded folder {folder_path.name}")
|
|
except subprocess.CalledProcessError as e:
|
|
logger.error(f"Failed to upload folder: {e}")
|
|
raise
|