94 lines
3 KiB
Python
94 lines
3 KiB
Python
#!/usr/bin/env python3
|
|
"""Advanced Quantisation Script for GGUF model processing.
|
|
|
|
Implements a sophisticated quantisation pipeline supporting Q4_K_M, Q4_K_L, Q4_K_XL and custom
|
|
profiles with tensor-level precision control. Features parallel processing, status tracking,
|
|
automatic README generation, and HuggingFace integration for streamlined model distribution
|
|
workflows.
|
|
|
|
Usage: python quantise.py <huggingface_url>
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import shutil
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
from helpers.logger import logger
|
|
from helpers.services.orchestrator import QuantisationOrchestrator
|
|
|
|
|
|
def main() -> None:
|
|
"""Main entry point for the Bartowski quantisation workflow.
|
|
|
|
Parses command-line arguments, initialises the quantisation orchestrator,
|
|
and executes the complete model processing pipeline from HuggingFace URL
|
|
to quantised GGUF files with optional HuggingFace upload and cleanup.
|
|
"""
|
|
parser = argparse.ArgumentParser(
|
|
description=(
|
|
"GGUF model quantisation tool supporting Q2-Q8 formats including K-quants, "
|
|
"legacy formats, and Bartowski method variants with tensor-specific precision "
|
|
"for embeddings and output layers."
|
|
),
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
uv run quantise.py https://huggingface.co/MyUser/SafeTensorModelRepo
|
|
uv run quantise.py hf.co/MyUser/Model-Repo-GGUF:F16
|
|
""",
|
|
)
|
|
parser.add_argument("url", help="HuggingFace model URL")
|
|
parser.add_argument("--work-dir", type=Path, help="Working directory (default: ./work)")
|
|
parser.add_argument(
|
|
"--no-imatrix",
|
|
action="store_true",
|
|
help="Skip checking for imatrix files (faster but lower quality)",
|
|
)
|
|
parser.add_argument(
|
|
"--no-upload",
|
|
action="store_true",
|
|
help="Skip uploading to HuggingFace (local testing only)",
|
|
)
|
|
parser.add_argument(
|
|
"--profiles",
|
|
nargs="*",
|
|
help=(
|
|
"Quantisation profiles to use "
|
|
"(default: Q3_K_M Q3_K_L Q3_K_XL Q4_K_M Q4_K_L Q5_K_M Q6_K Q6_K_L Q8_0)"
|
|
),
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
if not args.url:
|
|
parser.print_help()
|
|
sys.exit(1)
|
|
|
|
try:
|
|
orchestrator = QuantisationOrchestrator(
|
|
work_dir=args.work_dir or Path.cwd() / "work",
|
|
use_imatrix=not args.no_imatrix,
|
|
no_upload=args.no_upload,
|
|
custom_profiles=args.profiles,
|
|
)
|
|
orchestrator.quantise(args.url)
|
|
|
|
# Cleanup prompt
|
|
logger.info("Cleaning up...")
|
|
response = input("Delete working files? (y/N): ").strip().lower()
|
|
if response == "y":
|
|
shutil.rmtree(orchestrator.work_dir)
|
|
logger.info("Cleanup complete")
|
|
else:
|
|
logger.info(f"Working files kept in: {orchestrator.work_dir}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error: {e}")
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|