"""Quantisation operations service. Provides modular quantisation engine, model management, and upload capabilities for GGUF model processing. Consolidates quantisation logic from various tools into reusable components following SOLID principles. """ from __future__ import annotations import shutil import subprocess import tempfile import traceback from pathlib import Path from helpers.logger import logger from helpers.models.quantisation import ( ModelSource, QuantisationContext, QuantisationResult, QuantisationType, ) from helpers.services.filesystem import FilesystemService from helpers.services.gguf import GGUFConverter from helpers.services.llama_python import LlamaCppPythonAPI from helpers.utils.config_parser import ConfigParser from helpers.utils.tensor_mapping import TensorMapper class QuantisationEngine: """Handles the actual quantisation process with configurable methods. Provides flexible quantisation execution supporting multiple tensor precision configurations, importance matrices, and fallback strategies. Uses llama-cpp-python API for direct quantisation without subprocess overhead. """ def __init__(self) -> None: """Initialise quantisation engine.""" self.fs = FilesystemService() self.python_api = LlamaCppPythonAPI() def quantise(self, context: QuantisationContext) -> QuantisationResult: """Perform quantisation using the specified configuration. Executes quantisation using Python API. Since llama-cpp-python is a required dependency, we can rely on it being available. Returns: QuantisationResult with success status and file information. """ logger.debug(f"DEBUG: Starting quantisation for {context.config.name}") logger.info( f"⚙️ Creating {context.config.name} quantisation ({context.config.description})..." ) output_path = context.get_output_path() logger.debug(f"DEBUG: Output path: {output_path}") # Check input file exists and is readable if not context.f16_model_path.exists(): error_msg = f"Input model file does not exist: {context.f16_model_path}" logger.error(f"❌ {error_msg}") return QuantisationResult( quantisation_type=QuantisationType(context.config.name), success=False, error_message=error_msg, ) # Check if we have enough disk space (rough estimate) try: input_size = context.f16_model_path.stat().st_size logger.debug(f"DEBUG: Input file size: {input_size / (1024**3):.2f} GB") # This is a rough check - actual available space calculation is more complex logger.debug(f"DEBUG: Output directory: {output_path.parent}") except Exception as e: logger.warning(f"⚠️ Could not check disk space: {e}") logger.info(f"🎯 Attempting {context.config.name} quantisation...") logger.debug(f"DEBUG: Source: {context.f16_model_path}") logger.debug(f"DEBUG: Target: {output_path}") logger.debug(f"DEBUG: imatrix: {context.imatrix_path}") try: # Use Python API for quantisation logger.info("🐍 Using Python API for quantisation...") logger.debug("DEBUG: Calling python_api.quantise_model...") success = self.python_api.quantise_model( context.f16_model_path, output_path, context.config, context.imatrix_path ) logger.debug(f"DEBUG: Python API returned: {success}") if success: logger.debug("DEBUG: Quantisation successful, creating success result") return self._create_success_result(context.config.name, output_path, "Python API") logger.error(f"❌ {context.config.name} quantisation failed") return QuantisationResult( quantisation_type=QuantisationType(context.config.name), success=False, error_message="Quantisation failed via Python API", ) except Exception as e: logger.error(f"❌ Exception during {context.config.name} quantisation: {e}") logger.error("Exception traceback:") for line in traceback.format_exc().splitlines(): logger.error(f" {line}") return QuantisationResult( quantisation_type=QuantisationType(context.config.name), success=False, error_message=f"Exception during quantisation: {e!s}", ) def _create_success_result( self, quant_type: str, output_path: Path, method_used: str ) -> QuantisationResult: """Create successful quantisation result with file metadata. Returns: QuantisationResult with file path and size information. """ file_size = self.fs.get_file_size(output_path) return QuantisationResult( quantisation_type=QuantisationType(quant_type), success=True, file_path=output_path, file_size=file_size, method_used=method_used, ) class ModelManager: """Handles model downloading and preparation for quantisation. Manages both GGUF repository downloads and HuggingFace model conversions, providing unified interface for model acquisition and preparation. """ def __init__(self, models_dir: Path) -> None: """Initialise model manager with storage configuration. Sets up model storage directory for model downloads and conversions. """ self.models_dir = models_dir self.fs = FilesystemService() def prepare_model(self, model_source: ModelSource) -> Path: """Prepare model for quantisation and return F16 model path. Handles both GGUF repository downloads and regular HuggingFace model conversion workflows with automatic format detection. Returns: Path to F16 GGUF model ready for quantisation. """ model_dir = self.models_dir / model_source.model_name if model_source.is_gguf_repo: return self._handle_gguf_repo(model_source, model_dir) return self._handle_regular_repo(model_source, model_dir) def _handle_gguf_repo(self, model_source: ModelSource, model_dir: Path) -> Path: """Handle GGUF repository download with pattern matching. Downloads GGUF files matching specified patterns, prioritising multi-part files and F16 variants. Returns: Path to downloaded or existing GGUF file. """ logger.info(f"⬇️ Downloading GGUF file from repository: {model_source.source_model}") logger.info(f"🔍 Looking for file pattern: *{model_source.gguf_file_pattern}*") f16_model = model_dir / f"{model_source.model_name}-f16.gguf" if f16_model.exists(): logger.info(f"✅ Found existing F16 file: {f16_model.name}") return f16_model # Check for existing GGUF files model_dir.mkdir(parents=True, exist_ok=True) existing_gguf = self.fs.find_gguf_files(model_dir) if existing_gguf: logger.info(f"✅ Found existing GGUF file: {existing_gguf[0].name}") return existing_gguf[0] # Download with patterns downloaded_file = self._download_gguf_with_patterns( model_source.source_model, model_source.gguf_file_pattern, model_dir ) if downloaded_file: # Handle multi-part files if "00001-of-" in downloaded_file.name: return downloaded_file if "-00002-of-" in downloaded_file.name or "-00003-of-" in downloaded_file.name: base_name = downloaded_file.name.replace("-00002-of-", "-00001-of-").replace( "-00003-of-", "-00001-of-" ) first_part = downloaded_file.parent / base_name if first_part.exists(): logger.info(f"🔄 Using first part: {first_part.name}") return first_part # Rename single file to standard name downloaded_file.rename(f16_model) return f16_model # Fallback to regular conversion logger.info("💡 Falling back to downloading full repository and converting...") return self._handle_regular_repo( ModelSource(**{**model_source.dict(), "is_gguf_repo": False}), model_dir, ) def _download_gguf_with_patterns( self, source_model: str, pattern: str | None, model_dir: Path ) -> Path | None: """Download GGUF file using various pattern strategies. Tries multiple pattern variations to find and download appropriate GGUF files, handling timeouts and temporary directories. Returns: Path to downloaded file, or None if all patterns fail. """ if pattern: patterns = [ f"*{pattern}*", f"*{pattern.lower()}*", f"*{pattern.upper()}*", "*f16*", "*F16*", "*fp16*", ] else: patterns = ["*f16*", "*F16*", "*fp16*"] temp_dir = model_dir / "gguf_temp" for search_pattern in patterns: logger.info(f"🔍 Trying pattern: {search_pattern}") temp_dir.mkdir(exist_ok=True) try: logger.debug( f"DEBUG: Running huggingface-cli download for pattern {search_pattern}" ) result = subprocess.run( [ "timeout", "300", "huggingface-cli", "download", source_model, "--include", search_pattern, "--local-dir", str(temp_dir), ], check=True, capture_output=True, text=True, ) logger.debug( f"DEBUG: Download command completed with return code {result.returncode}" ) # Find downloaded GGUF files gguf_files = self.fs.find_gguf_files(temp_dir, pattern) if gguf_files: found_file = gguf_files[0] logger.info(f"✅ Found GGUF file: {found_file.name}") # Move to parent directory final_path = model_dir / found_file.name shutil.move(str(found_file), str(final_path)) shutil.rmtree(temp_dir) return final_path except subprocess.CalledProcessError as e: logger.debug( f"DEBUG: Pattern {search_pattern} failed with return code {e.returncode}" ) if e.stderr: logger.debug(f"DEBUG: stderr: {e.stderr}") if e.stdout: logger.debug(f"DEBUG: stdout: {e.stdout}") logger.info(f"⚠️ Pattern {search_pattern} failed or timed out") continue except Exception as e: logger.error(f"❌ Unexpected error during download: {e}") logger.error("Exception traceback:") for line in traceback.format_exc().splitlines(): logger.error(f" {line}") continue finally: if temp_dir.exists(): shutil.rmtree(temp_dir, ignore_errors=True) return None def _handle_regular_repo( self, model_source: ModelSource, model_dir: Path, ) -> Path: """Handle regular HuggingFace repository conversion. Downloads full model repository and converts to F16 GGUF format using our native Python-based GGUFConverter for SafeTensors models. Returns: Path to converted F16 GGUF model. """ logger.info(f"⬇️ Downloading source model: {model_source.source_model}") # Download model if needed if not model_dir.exists(): self._download_repository(model_source.source_model, model_dir) else: logger.info("✅ Model already downloaded") # Convert to GGUF return self._convert_to_gguf(model_source, model_dir) def _download_repository(self, source_model: str, model_dir: Path) -> None: """Download HuggingFace repository. Args: source_model: HuggingFace model identifier. model_dir: Local directory for download. Raises: RuntimeError: If download fails. """ try: logger.debug(f"DEBUG: Downloading full repository: {source_model}") result = subprocess.run( [ "huggingface-cli", "download", source_model, "--local-dir", str(model_dir), ], check=True, capture_output=True, text=True, ) logger.debug( f"DEBUG: Repository download completed with return code {result.returncode}" ) except subprocess.CalledProcessError as e: logger.error(f"❌ Failed to download repository {source_model}") logger.error(f"Return code: {e.returncode}") if e.stderr: logger.error(f"stderr: {e.stderr}") if e.stdout: logger.error(f"stdout: {e.stdout}") msg = f"Repository download failed: {e}" raise RuntimeError(msg) from e except Exception as e: logger.error(f"❌ Unexpected error during repository download: {e}") logger.error("Exception traceback:") for line in traceback.format_exc().splitlines(): logger.error(f" {line}") raise def _convert_to_gguf(self, model_source: ModelSource, model_dir: Path) -> Path: """Convert model to GGUF F16 format. Args: model_source: Model source information. model_dir: Directory containing model files. Returns: Path to F16 GGUF model. Raises: RuntimeError: If conversion fails. """ logger.info("🔄 Converting to GGUF F16 format...") f16_model = model_dir / f"{model_source.model_name}-f16.gguf" if f16_model.exists(): logger.info("✅ F16 model already exists") return f16_model # Check for SafeTensors files safetensor_files = list(model_dir.glob("*.safetensors")) if not safetensor_files: logger.error("❌ Model format not supported") logger.info("💡 This tool supports GGUF and SafeTensors formats") msg = "Model must be in GGUF or SafeTensors format" raise RuntimeError(msg) logger.info("🐍 Using native Python GGUFConverter...") logger.info(f"✅ Found {len(safetensor_files)} SafeTensors files") # Load model configuration config_parser = ConfigParser() model_config = config_parser.load_model_config(model_dir) # Get architecture mapping arch_name = model_config.architectures[0] if model_config.architectures else "llama" arch = config_parser.get_architecture_mapping(arch_name) if arch != arch_name: logger.info(f"📝 Architecture mapping: {arch_name} → {arch}") # Convert using GGUFConverter tensor_mapper = TensorMapper() success = GGUFConverter.convert_safetensors( model_dir, f16_model, model_config, arch, tensor_mapper ) if not success: logger.error("❌ Native Python conversion failed") msg = "Failed to convert SafeTensors model to GGUF" raise RuntimeError(msg) logger.info("✅ Native Python conversion successful") return f16_model class HuggingFaceUploader: """Handles uploading models and documentation to HuggingFace. Provides methods for repository creation, file uploads, and README updates with proper error handling and retry logic. """ @staticmethod def get_username() -> str: """Get authenticated HuggingFace username. Returns: HuggingFace username from CLI authentication. Raises: RuntimeError: If not authenticated. """ try: result = subprocess.run( ["huggingface-cli", "whoami"], capture_output=True, text=True, check=True, ) return result.stdout.strip() except (subprocess.CalledProcessError, FileNotFoundError) as err: msg = "Please log in to HuggingFace first: huggingface-cli login" raise RuntimeError(msg) from err def upload_readme(self, output_repo: str, readme_path: Path) -> None: """Upload or update README file to repository. Creates repository if needed, handles existing repository updates. Raises: RuntimeError: If the README upload fails. """ logger.info("Uploading README...") # First ensure the repository exists self._ensure_repo_exists(output_repo) # Upload without --create flag to avoid PR creation try: logger.debug(f"DEBUG: Uploading README to {output_repo}") result = subprocess.run( [ "huggingface-cli", "upload", output_repo, str(readme_path), "README.md", "--commit-message", "Update README.md", ], check=True, capture_output=True, text=True, ) logger.debug(f"DEBUG: README upload completed with return code {result.returncode}") except subprocess.CalledProcessError as e: logger.error(f"❌ Failed to upload README to {output_repo}") logger.error(f"Return code: {e.returncode}") if e.stderr: logger.error(f"stderr: {e.stderr}") if e.stdout: logger.error(f"stdout: {e.stdout}") msg = f"README upload failed: {e}" raise RuntimeError(msg) from e except Exception as e: logger.error(f"❌ Unexpected error during README upload: {e}") logger.error("Exception traceback:") for line in traceback.format_exc().splitlines(): logger.error(f" {line}") raise logger.info("README uploaded") def _ensure_repo_exists(self, repo_id: str) -> None: """Ensure the repository exists, creating it if necessary.""" try: # Try to create the repo - will fail if it already exists subprocess.run( [ "huggingface-cli", "repo", "create", repo_id, "--type", "model", "-y", ], check=True, capture_output=True, text=True, ) logger.info(f"Created repository: {repo_id}") except subprocess.CalledProcessError: # Repository already exists, that's fine pass def upload_model_file(self, output_repo: str, model_path: Path) -> None: """Upload model file to repository. Uploads GGUF model file to specified repository path. Always uses huggingface-cli to ensure proper handling of large files via HuggingFace's xet backend. Raises: RuntimeError: If the model file upload fails. """ logger.info(f"Uploading {model_path.name}...") # Always use huggingface-cli for model files to ensure xet backend is used try: logger.debug(f"DEBUG: Uploading model file {model_path.name} to {output_repo}") result = subprocess.run( [ "huggingface-cli", "upload", output_repo, str(model_path), model_path.name, "--revision", "main", # Explicitly push to main branch "--commit-message", f"Add {model_path.name}", ], check=True, capture_output=True, text=True, ) logger.debug(f"DEBUG: Model upload completed with return code {result.returncode}") except subprocess.CalledProcessError as e: logger.error(f"❌ Failed to upload model file {model_path.name} to {output_repo}") logger.error(f"Return code: {e.returncode}") if e.stderr: logger.error(f"stderr: {e.stderr}") if e.stdout: logger.error(f"stdout: {e.stdout}") msg = f"Model file upload failed: {e}" raise RuntimeError(msg) from e except Exception as e: logger.error(f"❌ Unexpected error during model file upload: {e}") logger.error("Exception traceback:") for line in traceback.format_exc().splitlines(): logger.error(f" {line}") raise # Extract and log the URL if present in output if result.stdout: for line in result.stdout.splitlines(): if "https://huggingface.co/" in line: logger.info(f"Upload URL: {line.strip()}") break logger.info(f"{model_path.name} uploaded") def _try_git_upload_file( self, repo_id: str, local_path: Path, repo_path: str, *, create_repo: bool = False, ) -> bool: """Try to upload file using git directly to avoid PR creation. Returns: bool: True if upload successful, False if should fallback to CLI. """ try: with tempfile.TemporaryDirectory() as temp_dir: temp_path = Path(temp_dir) repo_url = f"https://huggingface.co/{repo_id}" # Clone repository logger.info(f"Cloning {repo_url}...") result = subprocess.run( ["git", "clone", repo_url, str(temp_path / "repo")], check=False, capture_output=True, text=True, ) if result.returncode != 0: if create_repo: # Repository doesn't exist, let huggingface-cli handle creation return False logger.warning(f"Clone failed: {result.stderr}") return False repo_dir = temp_path / "repo" target_file = repo_dir / repo_path # Ensure target directory exists target_file.parent.mkdir(parents=True, exist_ok=True) # Copy file shutil.copy2(local_path, target_file) # Check if there are any changes status_result = subprocess.run( ["git", "status", "--porcelain"], cwd=repo_dir, capture_output=True, text=True, check=True, ) if not status_result.stdout.strip(): logger.info(f"No changes detected for {repo_path}, file already up-to-date") return True # File is already up-to-date, no need to push # Git add, commit, push subprocess.run( ["git", "add", repo_path], cwd=repo_dir, check=True, capture_output=True, text=True, ) subprocess.run( ["git", "commit", "-m", f"Update {repo_path}"], cwd=repo_dir, check=True, capture_output=True, text=True, ) subprocess.run( ["git", "push"], cwd=repo_dir, check=True, capture_output=True, text=True, ) return True except subprocess.CalledProcessError as e: logger.warning(f"Git upload failed: {e}") return False except Exception as e: logger.warning(f"Git upload error: {e}") return False