#!/usr/bin/env python3 """Bartowski Quantisation Script for advanced GGUF model processing. Implements a sophisticated quantisation pipeline supporting Q4_K_M, Q4_K_L, Q4_K_XL, and Q4_K_XXL methods with tensor-level precision control. Features parallel processing, status tracking, automatic README generation, and HuggingFace integration for streamlined model distribution workflows. Usage: python quantise.py """ from __future__ import annotations import argparse import shutil import sys from pathlib import Path from helpers.logger import logger from helpers.services.orchestrator import QuantisationOrchestrator def main() -> None: """Main entry point for the Bartowski quantisation workflow. Parses command-line arguments, initialises the quantisation orchestrator, and executes the complete model processing pipeline from HuggingFace URL to quantised GGUF files with optional HuggingFace upload and cleanup. """ parser = argparse.ArgumentParser( description="Bartowski Quantisation Script - Supports Q4_K_M, Q4_K_L, Q4_K_XL, Q4_K_XXL", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: python quantise.py https://huggingface.co/DavidAU/Gemma-3-4b-it-Uncensored-DBL-X python quantise.py hf.co/DavidAU/Gemma-3-it-4B-Uncensored-DBL-X-GGUF:F16 """, ) parser.add_argument("url", help="HuggingFace model URL") parser.add_argument( "--work-dir", type=Path, help="Working directory (default: ./quantisation_work)" ) parser.add_argument( "--no-imatrix", action="store_true", help="Skip imatrix generation (faster but lower quality)", ) parser.add_argument( "--imatrix-base", choices=[ "Q2_K", "Q3_K_L", "Q3_K_M", "Q3_K_S", "Q4_K_S", "Q4_K_M", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0", ], default="Q4_K_M", help="Base quantisation for imatrix generation", ) parser.add_argument( "--no-upload", action="store_true", help="Skip uploading to HuggingFace (local testing only)", ) args = parser.parse_args() if not args.url: parser.print_help() sys.exit(1) try: orchestrator = QuantisationOrchestrator( work_dir=args.work_dir or Path.cwd() / "quantisation_work", use_imatrix=not args.no_imatrix, imatrix_base=args.imatrix_base, no_upload=args.no_upload, ) orchestrator.quantise(args.url) # Cleanup prompt logger.info("Cleaning up...") response = input("Delete working files? (y/N): ").strip().lower() if response == "y": shutil.rmtree(orchestrator.work_dir) logger.info("Cleanup complete") else: logger.info(f"Working files kept in: {orchestrator.work_dir}") except Exception as e: logger.error(f"Error: {e}") sys.exit(1) if __name__ == "__main__": main()