llm-gguf-tools/helpers/filesystem/workspace.py
2025-08-09 17:16:02 +01:00

146 lines
4.9 KiB
Python

"""Workspace management for quantisation operations.
Manages working directories, model storage paths, and temporary
file locations throughout the quantisation workflow.
"""
from __future__ import annotations
import tempfile
from pathlib import Path
from shutil import disk_usage as shutil_disk_usage, rmtree as shutil_rmtree
from helpers.logger import logger
class WorkspaceManager:
"""Manages workspace directories and paths.
Provides centralised management of working directories, model
storage, and temporary file locations with automatic directory
creation and validation.
"""
def __init__(self, work_dir: Path | None = None) -> None:
"""Initialise workspace manager.
Sets up base working directory structure with models and temporary
file directories. Defaults to quantisation_work in current directory
if no path is specified.
"""
self.work_dir = work_dir or Path.cwd() / "quantisation_work"
self.models_dir = self.work_dir / "models"
self._setup_directories()
def _setup_directories(self) -> None:
"""Create necessary workspace directories."""
self.work_dir.mkdir(parents=True, exist_ok=True)
self.models_dir.mkdir(parents=True, exist_ok=True)
logger.debug(f"Workspace initialised at: {self.work_dir}")
def get_model_dir(self, model_name: str) -> Path:
"""Get directory path for a specific model.
Creates the model directory if it doesn't exist and returns the path
for storing model files and quantisation outputs.
Returns:
Path to model directory.
"""
model_dir = self.models_dir / model_name
model_dir.mkdir(parents=True, exist_ok=True)
return model_dir
def get_temp_dir(self, prefix: str = "temp") -> Path:
"""Get a temporary directory path within workspace.
Creates a unique temporary directory with specified prefix within
the workspace for intermediate processing files.
Returns:
Path to temporary directory.
"""
return Path(tempfile.mkdtemp(prefix=f"{prefix}_", dir=self.work_dir))
def get_imatrix_dir(self, model_name: str) -> Path:
"""Get directory for importance matrix files.
Creates and returns the path to the imatrix directory for storing
importance matrices used in advanced quantisation methods.
Returns:
Path to imatrix directory.
"""
imatrix_dir = self.models_dir / model_name / "imatrix"
imatrix_dir.mkdir(parents=True, exist_ok=True)
return imatrix_dir
def get_quantisation_output_path(
self,
model_name: str,
author: str,
quant_type: str,
) -> Path:
"""Get output path for a quantised model.
Constructs standardised filename and path for quantised model output
using author-model-quantisation format for consistent naming.
Returns:
Path for quantised model output.
"""
model_dir = self.get_model_dir(model_name)
filename = f"{author}-{model_name}-{quant_type}.gguf"
return model_dir / filename
def cleanup_workspace(self) -> None:
"""Clean up entire workspace directory."""
if self.work_dir.exists():
logger.info(f"Cleaning up workspace: {self.work_dir}")
shutil_rmtree(self.work_dir, ignore_errors=True)
@property
def disk_usage(self) -> dict[str, float]:
"""Get disk usage statistics for workspace.
Returns:
Dictionary with size in GB for work_dir and models_dir.
"""
def get_dir_size(path: Path) -> float:
"""Calculate total size of directory in GB.
Recursively traverses directory tree to calculate total file
sizes with GB conversion for human-readable output.
Returns:
Total size of directory in GB.
"""
total = 0
if path.exists():
for item in path.rglob("*"):
if item.is_file():
total += item.stat().st_size
return total / (1024**3) # Convert to GB
return {
"work_dir": get_dir_size(self.work_dir),
"models_dir": get_dir_size(self.models_dir),
}
def validate_space(self, required_gb: float = 50.0) -> bool:
"""Check if sufficient disk space is available.
Validates available disk space against required threshold, logging
warnings when space is insufficient for quantisation operations.
Returns:
True if sufficient space available.
"""
stat = shutil_disk_usage(self.work_dir)
free_gb = stat.free / (1024**3)
if free_gb < required_gb:
logger.warning(f"Low disk space: {free_gb:.1f}GB free, {required_gb:.1f}GB recommended")
return False
return True