Initial commit
This commit is contained in:
commit
ef7df1a8c3
28 changed files with 6829 additions and 0 deletions
101
quantise_gguf.py
Normal file
101
quantise_gguf.py
Normal file
|
@ -0,0 +1,101 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Bartowski Quantisation Script for advanced GGUF model processing.
|
||||
|
||||
Implements a sophisticated quantisation pipeline supporting Q4_K_M, Q4_K_L,
|
||||
Q4_K_XL, and Q4_K_XXL methods with tensor-level precision control. Features
|
||||
parallel processing, status tracking, automatic README generation, and
|
||||
HuggingFace integration for streamlined model distribution workflows.
|
||||
|
||||
Usage: python quantise.py <huggingface_url>
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import shutil
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from helpers.logger import logger
|
||||
from helpers.services.orchestrator import QuantisationOrchestrator
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""Main entry point for the Bartowski quantisation workflow.
|
||||
|
||||
Parses command-line arguments, initialises the quantisation orchestrator,
|
||||
and executes the complete model processing pipeline from HuggingFace URL
|
||||
to quantised GGUF files with optional HuggingFace upload and cleanup.
|
||||
"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Bartowski Quantisation Script - Supports Q4_K_M, Q4_K_L, Q4_K_XL, Q4_K_XXL",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
python quantise.py https://huggingface.co/DavidAU/Gemma-3-4b-it-Uncensored-DBL-X
|
||||
python quantise.py hf.co/DavidAU/Gemma-3-it-4B-Uncensored-DBL-X-GGUF:F16
|
||||
""",
|
||||
)
|
||||
parser.add_argument("url", help="HuggingFace model URL")
|
||||
parser.add_argument(
|
||||
"--work-dir", type=Path, help="Working directory (default: ./quantisation_work)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-imatrix",
|
||||
action="store_true",
|
||||
help="Skip imatrix generation (faster but lower quality)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--imatrix-base",
|
||||
choices=[
|
||||
"Q2_K",
|
||||
"Q3_K_L",
|
||||
"Q3_K_M",
|
||||
"Q3_K_S",
|
||||
"Q4_K_S",
|
||||
"Q4_K_M",
|
||||
"Q5_K_S",
|
||||
"Q5_K_M",
|
||||
"Q6_K",
|
||||
"Q8_0",
|
||||
],
|
||||
default="Q4_K_M",
|
||||
help="Base quantisation for imatrix generation",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-upload",
|
||||
action="store_true",
|
||||
help="Skip uploading to HuggingFace (local testing only)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.url:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
orchestrator = QuantisationOrchestrator(
|
||||
work_dir=args.work_dir or Path.cwd() / "quantisation_work",
|
||||
use_imatrix=not args.no_imatrix,
|
||||
imatrix_base=args.imatrix_base,
|
||||
no_upload=args.no_upload,
|
||||
)
|
||||
orchestrator.quantise(args.url)
|
||||
|
||||
# Cleanup prompt
|
||||
logger.info("Cleaning up...")
|
||||
response = input("Delete working files? (y/N): ").strip().lower()
|
||||
if response == "y":
|
||||
shutil.rmtree(orchestrator.work_dir)
|
||||
logger.info("Cleanup complete")
|
||||
else:
|
||||
logger.info(f"Working files kept in: {orchestrator.work_dir}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Add table
Add a link
Reference in a new issue