#!/usr/bin/env python3 """Advanced Quantisation Script for GGUF model processing. Implements a sophisticated quantisation pipeline supporting Q4_K_M, Q4_K_L, Q4_K_XL and custom profiles with tensor-level precision control. Features parallel processing, status tracking, automatic README generation, and HuggingFace integration for streamlined model distribution workflows. Usage: python quantise.py """ from __future__ import annotations import argparse import shutil import sys from pathlib import Path from helpers.logger import logger from helpers.services.orchestrator import QuantisationOrchestrator def main() -> None: """Main entry point for the Bartowski quantisation workflow. Parses command-line arguments, initialises the quantisation orchestrator, and executes the complete model processing pipeline from HuggingFace URL to quantised GGUF files with optional HuggingFace upload and cleanup. """ parser = argparse.ArgumentParser( description=( "GGUF model quantisation tool supporting Q2-Q8 formats including K-quants, " "legacy formats, and Bartowski method variants with tensor-specific precision " "for embeddings and output layers." ), formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: uv run quantise.py https://huggingface.co/MyUser/SafeTensorModelRepo uv run quantise.py hf.co/MyUser/Model-Repo-GGUF:F16 """, ) parser.add_argument("url", help="HuggingFace model URL") parser.add_argument("--work-dir", type=Path, help="Working directory (default: ./work)") parser.add_argument( "--no-imatrix", action="store_true", help="Skip checking for imatrix files (faster but lower quality)", ) parser.add_argument( "--no-upload", action="store_true", help="Skip uploading to HuggingFace (local testing only)", ) parser.add_argument( "--profiles", nargs="*", help=( "Quantisation profiles to use " "(default: Q3_K_M Q3_K_L Q3_K_XL Q4_K_M Q4_K_L Q5_K_M Q6_K Q6_K_L Q8_0)" ), ) args = parser.parse_args() if not args.url: parser.print_help() sys.exit(1) try: orchestrator = QuantisationOrchestrator( work_dir=args.work_dir or Path.cwd() / "work", use_imatrix=not args.no_imatrix, no_upload=args.no_upload, custom_profiles=args.profiles, ) orchestrator.quantise(args.url) # Cleanup prompt logger.info("Cleaning up...") response = input("Delete working files? (y/N): ").strip().lower() if response == "y": shutil.rmtree(orchestrator.work_dir) logger.info("Cleanup complete") else: logger.info(f"Working files kept in: {orchestrator.work_dir}") except Exception as e: logger.error(f"Error: {e}") sys.exit(1) if __name__ == "__main__": main()