618 lines
26 KiB
Python
618 lines
26 KiB
Python
"""Quantisation orchestration service.
|
|
|
|
High-level orchestration of the complete quantisation workflow from model
|
|
acquisition through processing to upload. Manages parallel processing,
|
|
status tracking, and cleanup operations for efficient resource utilisation.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import gc
|
|
import signal
|
|
import sys
|
|
import traceback
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
from typing import TYPE_CHECKING
|
|
|
|
import psutil
|
|
|
|
from helpers.config.quantisation_configs import (
|
|
DEFAULT_QUANTISATION_TYPES,
|
|
QUANTISATION_CONFIGS,
|
|
SUPPORTED_QUANTISATION_TYPES,
|
|
)
|
|
from helpers.logger import logger
|
|
from helpers.models.quantisation import (
|
|
ModelSource,
|
|
QuantisationContext,
|
|
QuantisationResult,
|
|
QuantisationType,
|
|
)
|
|
from helpers.services.huggingface import ReadmeGenerator
|
|
from helpers.services.llama_cpp import IMatrixManager
|
|
from helpers.services.quantisation import HuggingFaceUploader, ModelManager, QuantisationEngine
|
|
from helpers.utils.tensor_mapping import URLParser
|
|
|
|
if TYPE_CHECKING:
|
|
from types import FrameType
|
|
|
|
|
|
@dataclass(slots=True)
|
|
class QuantisationOrchestrator:
|
|
"""Orchestrates the complete quantisation workflow.
|
|
|
|
Uses dataclass with slots for efficient memory usage and dependency injection
|
|
for modular service interaction following SOLID principles.
|
|
"""
|
|
|
|
work_dir: Path = field(default_factory=lambda: Path.cwd() / "quantisation_work")
|
|
use_imatrix: bool = True
|
|
no_upload: bool = False
|
|
custom_profiles: list[str] | None = None
|
|
|
|
# Service dependencies with factory defaults
|
|
url_parser: URLParser = field(default_factory=URLParser)
|
|
quantisation_engine: QuantisationEngine = field(default_factory=QuantisationEngine)
|
|
imatrix_manager: IMatrixManager = field(default_factory=IMatrixManager)
|
|
readme_generator: ReadmeGenerator = field(default_factory=ReadmeGenerator)
|
|
uploader: HuggingFaceUploader = field(default_factory=HuggingFaceUploader)
|
|
|
|
# Computed properties
|
|
models_dir: Path = field(init=False)
|
|
model_manager: ModelManager = field(init=False)
|
|
|
|
def __post_init__(self) -> None:
|
|
"""Initialise computed properties after dataclass construction."""
|
|
self.models_dir = self.work_dir / "models"
|
|
self.model_manager = ModelManager(self.models_dir)
|
|
|
|
# Set up signal handlers for graceful exit tracking
|
|
self._setup_signal_handlers()
|
|
|
|
def _setup_signal_handlers(self) -> None:
|
|
"""Set up signal handlers to catch unexpected exits."""
|
|
|
|
def signal_handler(signum: int, frame: FrameType | None) -> None:
|
|
logger.error(f"❌ Received signal {signum} ({signal.Signals(signum).name})")
|
|
logger.error("Stack trace at signal:")
|
|
if frame:
|
|
for line in traceback.format_stack(frame):
|
|
logger.error(f" {line.strip()}")
|
|
logger.error("Exiting due to signal")
|
|
sys.exit(1)
|
|
|
|
# Handle common termination signals
|
|
for sig in [signal.SIGINT, signal.SIGTERM]:
|
|
signal.signal(sig, signal_handler)
|
|
|
|
def get_quantisation_types(self) -> list[QuantisationType]:
|
|
"""Get the quantisation types to use for this run.
|
|
|
|
Returns:
|
|
List of QuantisationType enums to process.
|
|
"""
|
|
if self.custom_profiles:
|
|
# Parse custom profiles from strings to QuantisationType
|
|
result = []
|
|
for profile_str in self.custom_profiles:
|
|
try:
|
|
profile = QuantisationType(profile_str.upper())
|
|
if profile in SUPPORTED_QUANTISATION_TYPES:
|
|
result.append(profile)
|
|
else:
|
|
logger.warning(f"Profile {profile_str} is not supported, skipping")
|
|
except ValueError:
|
|
logger.warning(f"Invalid profile {profile_str}, skipping")
|
|
return result or DEFAULT_QUANTISATION_TYPES
|
|
return DEFAULT_QUANTISATION_TYPES
|
|
|
|
def quantise(self, url: str) -> dict[QuantisationType, QuantisationResult]:
|
|
"""Main quantisation workflow orchestrating model processing from URL to upload.
|
|
|
|
Returns:
|
|
dict[QuantisationType, QuantisationResult]: Quantisation results for each type.
|
|
|
|
Raises:
|
|
KeyboardInterrupt: If the user interrupts the quantisation process.
|
|
"""
|
|
logger.info("Starting Bartowski quantisation process...")
|
|
logger.debug(f"DEBUG: Input URL: {url}")
|
|
logger.debug(f"DEBUG: Working directory: {self.work_dir}")
|
|
logger.debug(f"DEBUG: Use imatrix: {self.use_imatrix}")
|
|
logger.debug(f"DEBUG: No upload: {self.no_upload}")
|
|
logger.debug(f"DEBUG: Custom profiles: {self.custom_profiles}")
|
|
|
|
try:
|
|
# Setup and preparation
|
|
logger.debug("DEBUG: Starting environment setup...")
|
|
model_source, f16_model_path, imatrix_path, output_repo = self._setup_environment(url)
|
|
logger.debug(f"DEBUG: Environment setup complete. F16 model: {f16_model_path}")
|
|
|
|
# Create initial repository
|
|
logger.debug("DEBUG: Creating initial repository...")
|
|
self._create_initial_repository(model_source, output_repo)
|
|
logger.debug("DEBUG: Initial repository created")
|
|
|
|
# Execute all quantisations
|
|
logger.debug("DEBUG: Starting quantisation execution...")
|
|
results = self._execute_quantisations(
|
|
model_source, f16_model_path, imatrix_path, output_repo
|
|
)
|
|
logger.debug(f"DEBUG: Quantisation execution complete. Results: {len(results)} items")
|
|
|
|
# Cleanup
|
|
logger.debug("DEBUG: Starting cleanup...")
|
|
self._cleanup_files(f16_model_path, model_source)
|
|
logger.debug("DEBUG: Cleanup complete")
|
|
|
|
self._print_completion_summary(model_source, results, output_repo)
|
|
except KeyboardInterrupt:
|
|
logger.error("❌ Process interrupted by user (Ctrl+C)")
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"❌ Critical error in quantisation workflow: {e}")
|
|
logger.error("Full traceback:")
|
|
for line in traceback.format_exc().splitlines():
|
|
logger.error(f" {line}")
|
|
raise
|
|
else:
|
|
return results
|
|
|
|
def _setup_environment(self, url: str) -> tuple[ModelSource, Path, Path | None, str]:
|
|
"""Setup environment and prepare model for quantisation.
|
|
|
|
Returns:
|
|
Tuple of (model_source, f16_model_path, imatrix_path, output_repo).
|
|
"""
|
|
model_source = self.url_parser.parse(url)
|
|
self._print_model_info(model_source)
|
|
|
|
self.models_dir.mkdir(parents=True, exist_ok=True)
|
|
f16_model_path = self.model_manager.prepare_model(model_source)
|
|
|
|
imatrix_path = None
|
|
if self.use_imatrix:
|
|
logger.info("Checking for importance matrix (imatrix)...")
|
|
imatrix_path = self.imatrix_manager.find_imatrix(
|
|
self.models_dir / model_source.model_name
|
|
)
|
|
|
|
output_repo = (
|
|
f"{self.uploader.get_username()}/"
|
|
f"{model_source.original_author}-{model_source.model_name}-GGUF"
|
|
)
|
|
|
|
return model_source, f16_model_path, imatrix_path, output_repo
|
|
|
|
def _create_initial_repository(self, model_source: ModelSource, output_repo: str) -> None:
|
|
"""Create initial repository with planned quantisations."""
|
|
logger.info("Creating initial README with planned quantisations...")
|
|
quantisation_types = self.get_quantisation_types()
|
|
planned_results = {
|
|
qt: QuantisationResult(quantisation_type=qt, success=False, status="planned")
|
|
for qt in quantisation_types
|
|
}
|
|
readme_path = self.readme_generator.generate(
|
|
model_source, planned_results, self.models_dir, output_repo
|
|
)
|
|
|
|
if not self.no_upload:
|
|
logger.info("Creating repository with planned quantisations...")
|
|
self.uploader.upload_readme(output_repo, readme_path)
|
|
else:
|
|
logger.info("Skipping repository creation (--no-upload specified)")
|
|
|
|
def _execute_quantisations(
|
|
self,
|
|
model_source: ModelSource,
|
|
f16_model_path: Path,
|
|
imatrix_path: Path | None,
|
|
output_repo: str,
|
|
) -> dict[QuantisationType, QuantisationResult]:
|
|
"""Execute all quantisation types with parallel uploads.
|
|
|
|
Returns:
|
|
dict[QuantisationType, QuantisationResult]: Quantisation results for each type.
|
|
"""
|
|
results: dict[QuantisationType, QuantisationResult] = {}
|
|
|
|
quantisation_types = self.get_quantisation_types()
|
|
types_list = [qt.value for qt in quantisation_types]
|
|
logger.info(f"Processing {len(quantisation_types)} quantisation types: {types_list}")
|
|
|
|
# Process with parallel uploads - quantise sequentially but upload in background
|
|
upload_futures = []
|
|
with ThreadPoolExecutor(max_workers=2, thread_name_prefix="upload") as upload_executor:
|
|
for i, quant_type in enumerate(quantisation_types, 1):
|
|
logger.info(
|
|
f"Processing quantisation {i}/{len(quantisation_types)}: {quant_type.value}"
|
|
)
|
|
logger.debug(f"DEBUG: Starting quantisation {i}/{len(quantisation_types)}")
|
|
logger.debug(f"DEBUG: Current type: {quant_type.value}")
|
|
logger.debug(f"DEBUG: Results so far: {len(results)} completed")
|
|
|
|
try:
|
|
result = self._process_single_quantisation(
|
|
quant_type,
|
|
model_source,
|
|
f16_model_path,
|
|
imatrix_path,
|
|
output_repo,
|
|
results,
|
|
upload_executor,
|
|
upload_futures,
|
|
)
|
|
results[quant_type] = result
|
|
logger.debug(f"DEBUG: Quantisation {quant_type.value} completed")
|
|
|
|
# Force cleanup between quantisations
|
|
gc.collect()
|
|
logger.debug("DEBUG: Garbage collection completed")
|
|
|
|
except Exception as e:
|
|
logger.error(f"❌ Critical error processing {quant_type.value}: {e}")
|
|
logger.error("Exception traceback:")
|
|
for line in traceback.format_exc().splitlines():
|
|
logger.error(f" {line}")
|
|
results[quant_type] = QuantisationResult(
|
|
quantisation_type=quant_type,
|
|
success=False,
|
|
status="failed",
|
|
error_message=str(e),
|
|
)
|
|
|
|
# Force cleanup after error
|
|
gc.collect()
|
|
|
|
# Wait for all uploads to complete before returning
|
|
self._wait_for_uploads(upload_futures)
|
|
|
|
return results
|
|
|
|
def _process_single_quantisation(
|
|
self,
|
|
quant_type: QuantisationType,
|
|
model_source: ModelSource,
|
|
f16_model_path: Path,
|
|
imatrix_path: Path | None,
|
|
output_repo: str,
|
|
results: dict[QuantisationType, QuantisationResult],
|
|
upload_executor: ThreadPoolExecutor,
|
|
upload_futures: list,
|
|
) -> QuantisationResult:
|
|
"""Process a single quantisation type.
|
|
|
|
Returns:
|
|
QuantisationResult: Result of the quantisation attempt.
|
|
"""
|
|
try:
|
|
logger.info(f"Starting {quant_type.value} quantisation...")
|
|
logger.debug(f"DEBUG: Getting config for {quant_type.value}")
|
|
config = QUANTISATION_CONFIGS[quant_type]
|
|
logger.debug(f"DEBUG: Config loaded: {config.name}")
|
|
|
|
# Update status to processing
|
|
logger.debug("DEBUG: Creating initial quantisation result")
|
|
result = QuantisationResult(quantisation_type=quant_type, success=False)
|
|
result.status = "processing"
|
|
results[quant_type] = result
|
|
|
|
logger.debug("DEBUG: Updating README status")
|
|
self._update_readme_status(model_source, results, output_repo)
|
|
|
|
# Perform quantisation
|
|
logger.debug("DEBUG: Creating quantisation context")
|
|
context = QuantisationContext(
|
|
f16_model_path=f16_model_path,
|
|
model_source=model_source,
|
|
config=config,
|
|
models_dir=self.models_dir,
|
|
imatrix_path=imatrix_path,
|
|
)
|
|
logger.debug(f"DEBUG: Context created. F16 path: {f16_model_path}")
|
|
logger.debug(f"DEBUG: imatrix path: {imatrix_path}")
|
|
logger.debug("DEBUG: Calling quantisation engine...")
|
|
result = self.quantisation_engine.quantise(context)
|
|
logger.debug(f"DEBUG: Quantisation engine returned: success={result.success}")
|
|
|
|
self._handle_quantisation_result(
|
|
result,
|
|
quant_type,
|
|
model_source,
|
|
results,
|
|
output_repo,
|
|
upload_executor,
|
|
upload_futures,
|
|
)
|
|
except Exception as e:
|
|
return self._handle_quantisation_error(
|
|
e, quant_type, model_source, results, output_repo
|
|
)
|
|
else:
|
|
return result
|
|
|
|
def _process_single_quantisation_sequential(
|
|
self,
|
|
quant_type: QuantisationType,
|
|
model_source: ModelSource,
|
|
f16_model_path: Path,
|
|
imatrix_path: Path | None,
|
|
output_repo: str,
|
|
results: dict[QuantisationType, QuantisationResult],
|
|
) -> QuantisationResult:
|
|
"""Process a single quantisation type sequentially with immediate upload.
|
|
|
|
Returns:
|
|
QuantisationResult: Result of the quantisation attempt.
|
|
"""
|
|
# Force cleanup before starting new quantisation
|
|
gc.collect()
|
|
|
|
# Log system state before quantisation
|
|
process = psutil.Process()
|
|
logger.debug(f"DEBUG: === System state before {quant_type.value} ===")
|
|
logger.debug(f"DEBUG: Process alive: {process.is_running()}")
|
|
logger.debug(f"DEBUG: PID: {process.pid}")
|
|
logger.debug(f"DEBUG: Memory: {process.memory_info().rss / (1024**3):.2f} GB")
|
|
logger.debug(f"DEBUG: CPU percent: {process.cpu_percent()}%")
|
|
logger.debug(f"DEBUG: Threads: {process.num_threads()}")
|
|
logger.debug(f"DEBUG: Open files: {len(process.open_files())}")
|
|
|
|
try:
|
|
logger.info(f"Starting {quant_type.value} quantisation...")
|
|
logger.debug(f"DEBUG: Getting config for {quant_type.value}")
|
|
config = QUANTISATION_CONFIGS[quant_type]
|
|
logger.debug(f"DEBUG: Config loaded: {config.name}")
|
|
|
|
# Update status to processing
|
|
logger.debug("DEBUG: Creating initial quantisation result")
|
|
result = QuantisationResult(quantisation_type=quant_type, success=False)
|
|
result.status = "processing"
|
|
results[quant_type] = result
|
|
|
|
logger.debug("DEBUG: Updating README status")
|
|
self._update_readme_status(model_source, results, output_repo)
|
|
|
|
# Perform quantisation
|
|
logger.debug("DEBUG: Creating quantisation context")
|
|
context = QuantisationContext(
|
|
f16_model_path=f16_model_path,
|
|
model_source=model_source,
|
|
config=config,
|
|
models_dir=self.models_dir,
|
|
imatrix_path=imatrix_path,
|
|
)
|
|
logger.debug(f"DEBUG: Context created. F16 path: {f16_model_path}")
|
|
logger.debug(f"DEBUG: imatrix path: {imatrix_path}")
|
|
logger.debug("DEBUG: Calling quantisation engine...")
|
|
result = self.quantisation_engine.quantise(context)
|
|
logger.debug(f"DEBUG: Quantisation engine returned: success={result.success}")
|
|
|
|
if result.success and result.file_path:
|
|
# Upload immediately (if not in no-upload mode)
|
|
if not self.no_upload:
|
|
logger.info(f"Uploading {quant_type.value}...")
|
|
try:
|
|
self.uploader.upload_model_file(output_repo, result.file_path)
|
|
logger.info(f"Upload of {quant_type.value} completed successfully")
|
|
|
|
# Clean up file after successful upload
|
|
logger.info(f"Removing {result.file_path.name} to save disk space...")
|
|
result.file_path.unlink()
|
|
|
|
result.status = "completed"
|
|
self._update_readme_status(model_source, results, output_repo)
|
|
except Exception as upload_error:
|
|
logger.error(f"Failed to upload {quant_type.value}: {upload_error}")
|
|
result.status = "failed"
|
|
result.error_message = str(upload_error)
|
|
self._update_readme_status(model_source, results, output_repo)
|
|
# Keep file if upload failed
|
|
else:
|
|
# No upload mode - just mark as completed
|
|
result.status = "completed"
|
|
logger.info(f"Skipping upload of {quant_type.value} (--no-upload specified)")
|
|
else:
|
|
result.status = "failed"
|
|
self._update_readme_status(model_source, results, output_repo)
|
|
except Exception as e:
|
|
logger.error(f"Error processing {quant_type.value}: {e}")
|
|
result = QuantisationResult(quantisation_type=quant_type, success=False)
|
|
result.status = "failed"
|
|
result.error_message = str(e)
|
|
|
|
try:
|
|
self._update_readme_status(model_source, results, output_repo)
|
|
except Exception as readme_error:
|
|
logger.error(f"Failed to update README after error: {readme_error}")
|
|
# Force cleanup after error
|
|
gc.collect()
|
|
return result
|
|
else:
|
|
# Force cleanup after quantisation
|
|
gc.collect()
|
|
return result
|
|
|
|
def _handle_quantisation_result(
|
|
self,
|
|
result: QuantisationResult,
|
|
quant_type: QuantisationType,
|
|
model_source: ModelSource,
|
|
results: dict[QuantisationType, QuantisationResult],
|
|
output_repo: str,
|
|
upload_executor: ThreadPoolExecutor,
|
|
upload_futures: list,
|
|
) -> None:
|
|
"""Handle successful or failed quantisation result."""
|
|
if result.success and result.file_path:
|
|
quant_str = getattr(result.quantisation_type, "value", result.quantisation_type)
|
|
logger.info(f"Starting parallel upload of {quant_str}...")
|
|
upload_future = upload_executor.submit(
|
|
self._upload_and_cleanup,
|
|
output_repo,
|
|
result.file_path,
|
|
quant_type,
|
|
model_source,
|
|
results,
|
|
)
|
|
upload_futures.append(upload_future)
|
|
result.file_path = None # Mark as being uploaded
|
|
result.status = "uploading"
|
|
else:
|
|
result.status = "failed"
|
|
|
|
self._update_readme_status(model_source, results, output_repo)
|
|
|
|
def _handle_quantisation_error(
|
|
self,
|
|
error: Exception,
|
|
quant_type: QuantisationType,
|
|
model_source: ModelSource,
|
|
results: dict[QuantisationType, QuantisationResult],
|
|
output_repo: str,
|
|
) -> QuantisationResult:
|
|
"""Handle quantisation processing error.
|
|
|
|
Returns:
|
|
QuantisationResult: Failed quantisation result with error information.
|
|
"""
|
|
logger.error(f"Error processing {quant_type.value}: {error}")
|
|
result = QuantisationResult(quantisation_type=quant_type, success=False)
|
|
result.status = "failed"
|
|
result.error_message = str(error)
|
|
|
|
try:
|
|
self._update_readme_status(model_source, results, output_repo)
|
|
except Exception as readme_error:
|
|
logger.error(f"Failed to update README after error: {readme_error}")
|
|
|
|
return result
|
|
|
|
def _update_readme_status(
|
|
self,
|
|
model_source: ModelSource,
|
|
results: dict[QuantisationType, QuantisationResult],
|
|
output_repo: str,
|
|
) -> None:
|
|
"""Update README with current quantisation status."""
|
|
if not self.no_upload:
|
|
updated_readme_path = self.readme_generator.generate(
|
|
model_source, results, self.models_dir, output_repo
|
|
)
|
|
self.uploader.upload_readme(output_repo, updated_readme_path)
|
|
|
|
def _wait_for_uploads(self, upload_futures: list) -> None:
|
|
"""Wait for all parallel uploads to complete."""
|
|
logger.info("Waiting for any remaining uploads to complete...")
|
|
for future in upload_futures:
|
|
try:
|
|
future.result(timeout=300) # 5 minute timeout per upload
|
|
except Exception as e:
|
|
logger.warning(f"Upload error: {e}")
|
|
|
|
def _cleanup_files(self, f16_model_path: Path, model_source: ModelSource) -> None:
|
|
"""Clean up temporary files after processing."""
|
|
if f16_model_path.exists():
|
|
logger.info(f"Removing F16 model {f16_model_path.name} to save disk space...")
|
|
f16_model_path.unlink()
|
|
|
|
if not model_source.is_gguf_repo:
|
|
self._cleanup_original_model(model_source)
|
|
|
|
def _cleanup_original_model(self, model_source: ModelSource) -> None:
|
|
"""Clean up original safetensors/PyTorch files after successful conversion."""
|
|
model_dir = self.models_dir / model_source.model_name
|
|
|
|
pytorch_files = list(model_dir.glob("pytorch_model*.bin"))
|
|
if pytorch_files:
|
|
logger.info(f"Removing {len(pytorch_files)} PyTorch model files to save disk space...")
|
|
for file in pytorch_files:
|
|
file.unlink()
|
|
|
|
logger.info("Keeping config files, tokeniser, and metadata for reference")
|
|
|
|
def _upload_and_cleanup(
|
|
self,
|
|
output_repo: str,
|
|
file_path: Path,
|
|
quant_type: QuantisationType,
|
|
model_source: ModelSource,
|
|
results: dict[QuantisationType, QuantisationResult],
|
|
) -> None:
|
|
"""Upload file and clean up (runs in background thread)."""
|
|
try:
|
|
logger.info(f"[PARALLEL] Starting upload of {quant_type.value} ({file_path.name})")
|
|
self.uploader.upload_model_file(output_repo, file_path)
|
|
logger.info(f"[PARALLEL] Upload of {quant_type.value} completed successfully")
|
|
|
|
logger.info(f"[PARALLEL] Removing {file_path.name} to save disk space...")
|
|
file_path.unlink()
|
|
|
|
results[quant_type].status = "completed"
|
|
updated_readme_path = self.readme_generator.generate(
|
|
model_source, results, self.models_dir, output_repo
|
|
)
|
|
self.uploader.upload_readme(output_repo, updated_readme_path)
|
|
|
|
logger.info(f"[PARALLEL] {quant_type} upload and cleanup complete")
|
|
except Exception as e:
|
|
logger.error(f"[PARALLEL] Failed to upload {quant_type}: {e}")
|
|
results[quant_type].status = "failed"
|
|
results[quant_type].error_message = str(e)
|
|
|
|
try:
|
|
updated_readme_path = self.readme_generator.generate(
|
|
model_source, results, self.models_dir, output_repo
|
|
)
|
|
self.uploader.upload_readme(output_repo, updated_readme_path)
|
|
except Exception as readme_error:
|
|
logger.error(
|
|
f"[PARALLEL] Failed to update README after upload error: {readme_error}"
|
|
)
|
|
# Don't re-raise - let other uploads continue
|
|
|
|
def _print_model_info(self, model_source: ModelSource) -> None:
|
|
"""Print model information."""
|
|
logger.info(f"Source URL: {model_source.url}")
|
|
logger.info(f"Source model: {model_source.source_model}")
|
|
logger.info(f"Original author: {model_source.original_author}")
|
|
logger.info(f"Model name: {model_source.model_name}")
|
|
logger.info(f"Your HF username: {self.uploader.get_username()}")
|
|
logger.info(f"Working directory: {self.work_dir}")
|
|
|
|
def _print_completion_summary(
|
|
self,
|
|
model_source: ModelSource,
|
|
results: dict[QuantisationType, QuantisationResult],
|
|
output_repo: str,
|
|
) -> None:
|
|
"""Print completion summary."""
|
|
successful_results = [r for r in results.values() if r.success]
|
|
|
|
if successful_results:
|
|
logger.info("Complete! Your quantised models are available at:")
|
|
logger.info(f" https://huggingface.co/{output_repo}")
|
|
logger.info("Model info:")
|
|
logger.info(f" - Source URL: {model_source.url}")
|
|
logger.info(f" - Original: {model_source.source_model}")
|
|
logger.info(
|
|
" - Method: "
|
|
f"{'Direct GGUF download' if model_source.is_gguf_repo else 'HF model conversion'}"
|
|
)
|
|
logger.info(f" - Quantised: {output_repo}")
|
|
|
|
for result in successful_results:
|
|
if result.file_size:
|
|
filename = (
|
|
f"{model_source.original_author}-{model_source.model_name}-"
|
|
f"{result.quantisation_type}.gguf"
|
|
)
|
|
logger.info(f" - {result.quantisation_type}: {filename} ({result.file_size})")
|
|
else:
|
|
logger.error(
|
|
"All quantisations failed - repository created with documentation "
|
|
"but no model files"
|
|
)
|
|
logger.error(f" Repository: https://huggingface.co/{output_repo}")
|