Initial commit

2025-08-07 18:29:12 +01:00 · 2025-08-07 18:29:12 +01:00 · ef7df1a8c3
commit ef7df1a8c3
28 changed files with 6829 additions and 0 deletions
--- a/quantise_gguf.py
+++ b/quantise_gguf.py
@ -0,0 +1,101 @@
+#!/usr/bin/env python3
+"""Bartowski Quantisation Script for advanced GGUF model processing.
+
+Implements a sophisticated quantisation pipeline supporting Q4_K_M, Q4_K_L,
+Q4_K_XL, and Q4_K_XXL methods with tensor-level precision control. Features
+parallel processing, status tracking, automatic README generation, and
+HuggingFace integration for streamlined model distribution workflows.
+
+Usage: python quantise.py <huggingface_url>
+"""
+
+from __future__ import annotations
+
+import argparse
+import shutil
+import sys
+from pathlib import Path
+
+from helpers.logger import logger
+from helpers.services.orchestrator import QuantisationOrchestrator
+
+
+def main() -> None:
+    """Main entry point for the Bartowski quantisation workflow.
+
+    Parses command-line arguments, initialises the quantisation orchestrator,
+    and executes the complete model processing pipeline from HuggingFace URL
+    to quantised GGUF files with optional HuggingFace upload and cleanup.
+    """
+    parser = argparse.ArgumentParser(
+        description="Bartowski Quantisation Script - Supports Q4_K_M, Q4_K_L, Q4_K_XL, Q4_K_XXL",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python quantise.py https://huggingface.co/DavidAU/Gemma-3-4b-it-Uncensored-DBL-X
+  python quantise.py hf.co/DavidAU/Gemma-3-it-4B-Uncensored-DBL-X-GGUF:F16
+        """,
+    )
+    parser.add_argument("url", help="HuggingFace model URL")
+    parser.add_argument(
+        "--work-dir", type=Path, help="Working directory (default: ./quantisation_work)"
+    )
+    parser.add_argument(
+        "--no-imatrix",
+        action="store_true",
+        help="Skip imatrix generation (faster but lower quality)",
+    )
+    parser.add_argument(
+        "--imatrix-base",
+        choices=[
+            "Q2_K",
+            "Q3_K_L",
+            "Q3_K_M",
+            "Q3_K_S",
+            "Q4_K_S",
+            "Q4_K_M",
+            "Q5_K_S",
+            "Q5_K_M",
+            "Q6_K",
+            "Q8_0",
+        ],
+        default="Q4_K_M",
+        help="Base quantisation for imatrix generation",
+    )
+    parser.add_argument(
+        "--no-upload",
+        action="store_true",
+        help="Skip uploading to HuggingFace (local testing only)",
+    )
+
+    args = parser.parse_args()
+
+    if not args.url:
+        parser.print_help()
+        sys.exit(1)
+
+    try:
+        orchestrator = QuantisationOrchestrator(
+            work_dir=args.work_dir or Path.cwd() / "quantisation_work",
+            use_imatrix=not args.no_imatrix,
+            imatrix_base=args.imatrix_base,
+            no_upload=args.no_upload,
+        )
+        orchestrator.quantise(args.url)
+
+        # Cleanup prompt
+        logger.info("Cleaning up...")
+        response = input("Delete working files? (y/N): ").strip().lower()
+        if response == "y":
+            shutil.rmtree(orchestrator.work_dir)
+            logger.info("Cleanup complete")
+        else:
+            logger.info(f"Working files kept in: {orchestrator.work_dir}")
+
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()