llm-gguf-tools/quantise_gguf.py
2025-08-07 18:29:12 +01:00

101 lines
3 KiB
Python

#!/usr/bin/env python3
"""Bartowski Quantisation Script for advanced GGUF model processing.
Implements a sophisticated quantisation pipeline supporting Q4_K_M, Q4_K_L,
Q4_K_XL, and Q4_K_XXL methods with tensor-level precision control. Features
parallel processing, status tracking, automatic README generation, and
HuggingFace integration for streamlined model distribution workflows.
Usage: python quantise.py <huggingface_url>
"""
from __future__ import annotations
import argparse
import shutil
import sys
from pathlib import Path
from helpers.logger import logger
from helpers.services.orchestrator import QuantisationOrchestrator
def main() -> None:
"""Main entry point for the Bartowski quantisation workflow.
Parses command-line arguments, initialises the quantisation orchestrator,
and executes the complete model processing pipeline from HuggingFace URL
to quantised GGUF files with optional HuggingFace upload and cleanup.
"""
parser = argparse.ArgumentParser(
description="Bartowski Quantisation Script - Supports Q4_K_M, Q4_K_L, Q4_K_XL, Q4_K_XXL",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python quantise.py https://huggingface.co/DavidAU/Gemma-3-4b-it-Uncensored-DBL-X
python quantise.py hf.co/DavidAU/Gemma-3-it-4B-Uncensored-DBL-X-GGUF:F16
""",
)
parser.add_argument("url", help="HuggingFace model URL")
parser.add_argument(
"--work-dir", type=Path, help="Working directory (default: ./quantisation_work)"
)
parser.add_argument(
"--no-imatrix",
action="store_true",
help="Skip imatrix generation (faster but lower quality)",
)
parser.add_argument(
"--imatrix-base",
choices=[
"Q2_K",
"Q3_K_L",
"Q3_K_M",
"Q3_K_S",
"Q4_K_S",
"Q4_K_M",
"Q5_K_S",
"Q5_K_M",
"Q6_K",
"Q8_0",
],
default="Q4_K_M",
help="Base quantisation for imatrix generation",
)
parser.add_argument(
"--no-upload",
action="store_true",
help="Skip uploading to HuggingFace (local testing only)",
)
args = parser.parse_args()
if not args.url:
parser.print_help()
sys.exit(1)
try:
orchestrator = QuantisationOrchestrator(
work_dir=args.work_dir or Path.cwd() / "quantisation_work",
use_imatrix=not args.no_imatrix,
imatrix_base=args.imatrix_base,
no_upload=args.no_upload,
)
orchestrator.quantise(args.url)
# Cleanup prompt
logger.info("Cleaning up...")
response = input("Delete working files? (y/N): ").strip().lower()
if response == "y":
shutil.rmtree(orchestrator.work_dir)
logger.info("Cleanup complete")
else:
logger.info(f"Working files kept in: {orchestrator.work_dir}")
except Exception as e:
logger.error(f"Error: {e}")
sys.exit(1)
if __name__ == "__main__":
main()