101 lines
3 KiB
Python
101 lines
3 KiB
Python
#!/usr/bin/env python3
|
|
"""Bartowski Quantisation Script for advanced GGUF model processing.
|
|
|
|
Implements a sophisticated quantisation pipeline supporting Q4_K_M, Q4_K_L,
|
|
Q4_K_XL, and Q4_K_XXL methods with tensor-level precision control. Features
|
|
parallel processing, status tracking, automatic README generation, and
|
|
HuggingFace integration for streamlined model distribution workflows.
|
|
|
|
Usage: python quantise.py <huggingface_url>
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import shutil
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
from helpers.logger import logger
|
|
from helpers.services.orchestrator import QuantisationOrchestrator
|
|
|
|
|
|
def main() -> None:
|
|
"""Main entry point for the Bartowski quantisation workflow.
|
|
|
|
Parses command-line arguments, initialises the quantisation orchestrator,
|
|
and executes the complete model processing pipeline from HuggingFace URL
|
|
to quantised GGUF files with optional HuggingFace upload and cleanup.
|
|
"""
|
|
parser = argparse.ArgumentParser(
|
|
description="Bartowski Quantisation Script - Supports Q4_K_M, Q4_K_L, Q4_K_XL, Q4_K_XXL",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
python quantise.py https://huggingface.co/DavidAU/Gemma-3-4b-it-Uncensored-DBL-X
|
|
python quantise.py hf.co/DavidAU/Gemma-3-it-4B-Uncensored-DBL-X-GGUF:F16
|
|
""",
|
|
)
|
|
parser.add_argument("url", help="HuggingFace model URL")
|
|
parser.add_argument(
|
|
"--work-dir", type=Path, help="Working directory (default: ./quantisation_work)"
|
|
)
|
|
parser.add_argument(
|
|
"--no-imatrix",
|
|
action="store_true",
|
|
help="Skip imatrix generation (faster but lower quality)",
|
|
)
|
|
parser.add_argument(
|
|
"--imatrix-base",
|
|
choices=[
|
|
"Q2_K",
|
|
"Q3_K_L",
|
|
"Q3_K_M",
|
|
"Q3_K_S",
|
|
"Q4_K_S",
|
|
"Q4_K_M",
|
|
"Q5_K_S",
|
|
"Q5_K_M",
|
|
"Q6_K",
|
|
"Q8_0",
|
|
],
|
|
default="Q4_K_M",
|
|
help="Base quantisation for imatrix generation",
|
|
)
|
|
parser.add_argument(
|
|
"--no-upload",
|
|
action="store_true",
|
|
help="Skip uploading to HuggingFace (local testing only)",
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
if not args.url:
|
|
parser.print_help()
|
|
sys.exit(1)
|
|
|
|
try:
|
|
orchestrator = QuantisationOrchestrator(
|
|
work_dir=args.work_dir or Path.cwd() / "quantisation_work",
|
|
use_imatrix=not args.no_imatrix,
|
|
imatrix_base=args.imatrix_base,
|
|
no_upload=args.no_upload,
|
|
)
|
|
orchestrator.quantise(args.url)
|
|
|
|
# Cleanup prompt
|
|
logger.info("Cleaning up...")
|
|
response = input("Delete working files? (y/N): ").strip().lower()
|
|
if response == "y":
|
|
shutil.rmtree(orchestrator.work_dir)
|
|
logger.info("Cleanup complete")
|
|
else:
|
|
logger.info(f"Working files kept in: {orchestrator.work_dir}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error: {e}")
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|