Initial commit
This commit is contained in:
commit
ef7df1a8c3
28 changed files with 6829 additions and 0 deletions
174
helpers/services/filesystem.py
Normal file
174
helpers/services/filesystem.py
Normal file
|
@ -0,0 +1,174 @@
|
|||
"""Filesystem operations service.
|
||||
|
||||
Provides unified filesystem operations including file discovery, size
|
||||
calculation, and path management. Consolidates common filesystem patterns
|
||||
used across quantisation and conversion workflows.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from helpers.logger import logger
|
||||
|
||||
BYTES_PER_UNIT = 1024.0
|
||||
|
||||
|
||||
class FilesystemService:
|
||||
"""Handles filesystem operations with consistent error handling.
|
||||
|
||||
Provides methods for file discovery, size formatting, and JSON loading
|
||||
with proper error handling and logging. Ensures consistent behaviour
|
||||
across different tools and workflows.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def get_file_size(file_path: Path) -> str:
|
||||
"""Get human-readable file size using system utilities.
|
||||
|
||||
Attempts to use `du -h` for human-readable output, falling back to
|
||||
Python calculation if the system command fails. Provides consistent
|
||||
size formatting across the toolset.
|
||||
|
||||
Returns:
|
||||
Human-readable file size string (e.g., "1.5G", "750M").
|
||||
"""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["du", "-h", str(file_path)], capture_output=True, text=True, check=True
|
||||
)
|
||||
return result.stdout.split()[0]
|
||||
except (subprocess.CalledProcessError, FileNotFoundError):
|
||||
# Fallback to Python calculation
|
||||
|
||||
try:
|
||||
size_bytes: float = float(file_path.stat().st_size)
|
||||
for unit in ["B", "K", "M", "G", "T"]:
|
||||
if size_bytes < BYTES_PER_UNIT:
|
||||
return f"{size_bytes:.1f}{unit}"
|
||||
size_bytes /= BYTES_PER_UNIT
|
||||
except Exception:
|
||||
return "Unknown"
|
||||
else:
|
||||
return f"{size_bytes:.1f}P"
|
||||
|
||||
@staticmethod
|
||||
def load_json_config(config_path: Path) -> dict[str, Any]:
|
||||
"""Load and parse JSON configuration file.
|
||||
|
||||
Provides consistent JSON loading with proper error handling and
|
||||
encoding specification. Used for loading model configurations,
|
||||
tokeniser settings, and other JSON-based metadata.
|
||||
|
||||
Returns:
|
||||
Parsed JSON content as dictionary.
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If config file doesn't exist.
|
||||
"""
|
||||
if not config_path.exists():
|
||||
msg = f"Configuration file not found: {config_path}"
|
||||
raise FileNotFoundError(msg)
|
||||
|
||||
with Path(config_path).open(encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
|
||||
@staticmethod
|
||||
def find_safetensor_files(model_path: Path) -> list[Path]:
|
||||
"""Find all SafeTensor files in model directory using priority search.
|
||||
|
||||
Searches for tensor files in order of preference: single model.safetensors,
|
||||
sharded model-*-of-*.safetensors files, then any *.safetensors files. This
|
||||
approach handles both single-file and multi-shard model distributions whilst
|
||||
ensuring predictable file ordering for conversion consistency.
|
||||
|
||||
Returns:
|
||||
List of SafeTensor file paths in priority order.
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If no SafeTensor files are found.
|
||||
"""
|
||||
# Check for single file
|
||||
single_file = model_path / "model.safetensors"
|
||||
if single_file.exists():
|
||||
return [single_file]
|
||||
|
||||
# Check for sharded files
|
||||
pattern = "model-*-of-*.safetensors"
|
||||
sharded_files = sorted(model_path.glob(pattern))
|
||||
if sharded_files:
|
||||
return sharded_files
|
||||
|
||||
# Check for any safetensor files
|
||||
any_files = sorted(model_path.glob("*.safetensors"))
|
||||
if any_files:
|
||||
return any_files
|
||||
|
||||
msg = f"No SafeTensor files found in {model_path}"
|
||||
raise FileNotFoundError(msg)
|
||||
|
||||
@staticmethod
|
||||
def find_gguf_files(model_path: Path, pattern: str | None = None) -> list[Path]:
|
||||
"""Find GGUF files in directory, optionally filtered by pattern.
|
||||
|
||||
Searches for GGUF files with optional pattern matching. Prioritises
|
||||
multi-part files (00001-of-*) over single files for proper handling
|
||||
of large models split across multiple files.
|
||||
|
||||
Returns:
|
||||
List of GGUF file paths, sorted with multi-part files first.
|
||||
"""
|
||||
if pattern:
|
||||
gguf_files = list(model_path.glob(f"*{pattern}*.gguf"))
|
||||
else:
|
||||
gguf_files = list(model_path.glob("*.gguf"))
|
||||
|
||||
# Sort to prioritise 00001-of-* files
|
||||
gguf_files.sort(
|
||||
key=lambda x: (
|
||||
"00001-of-" not in x.name, # False sorts before True
|
||||
x.name,
|
||||
)
|
||||
)
|
||||
|
||||
return gguf_files
|
||||
|
||||
@staticmethod
|
||||
def ensure_directory(path: Path) -> Path:
|
||||
"""Ensure directory exists, creating if necessary.
|
||||
|
||||
Creates directory and all parent directories if they don't exist.
|
||||
Returns the path for method chaining convenience.
|
||||
|
||||
Returns:
|
||||
The directory path.
|
||||
"""
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
return path
|
||||
|
||||
@staticmethod
|
||||
def cleanup_directory(path: Path, pattern: str = "*") -> int:
|
||||
"""Remove files matching pattern from directory.
|
||||
|
||||
Safely removes files matching the specified glob pattern. Returns
|
||||
count of files removed for logging purposes.
|
||||
|
||||
Returns:
|
||||
Number of files removed.
|
||||
"""
|
||||
if not path.exists():
|
||||
return 0
|
||||
|
||||
files_removed = 0
|
||||
for file_path in path.glob(pattern):
|
||||
if file_path.is_file():
|
||||
try:
|
||||
file_path.unlink()
|
||||
files_removed += 1
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to remove {file_path}: {e}")
|
||||
|
||||
return files_removed
|
Loading…
Add table
Add a link
Reference in a new issue