Switch to llama-cpp-python
This commit is contained in:
parent
ef7df1a8c3
commit
d937f2d5fa
25 changed files with 2957 additions and 1181 deletions
|
@ -1,10 +1,10 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Bartowski Quantisation Script for advanced GGUF model processing.
|
||||
"""Advanced Quantisation Script for GGUF model processing.
|
||||
|
||||
Implements a sophisticated quantisation pipeline supporting Q4_K_M, Q4_K_L,
|
||||
Q4_K_XL, and Q4_K_XXL methods with tensor-level precision control. Features
|
||||
parallel processing, status tracking, automatic README generation, and
|
||||
HuggingFace integration for streamlined model distribution workflows.
|
||||
Implements a sophisticated quantisation pipeline supporting Q4_K_M, Q4_K_L, Q4_K_XL and custom
|
||||
profiles with tensor-level precision control. Features parallel processing, status tracking,
|
||||
automatic README generation, and HuggingFace integration for streamlined model distribution
|
||||
workflows.
|
||||
|
||||
Usage: python quantise.py <huggingface_url>
|
||||
"""
|
||||
|
@ -28,45 +28,38 @@ def main() -> None:
|
|||
to quantised GGUF files with optional HuggingFace upload and cleanup.
|
||||
"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Bartowski Quantisation Script - Supports Q4_K_M, Q4_K_L, Q4_K_XL, Q4_K_XXL",
|
||||
description=(
|
||||
"GGUF model quantisation tool supporting Q2-Q8 formats including K-quants, "
|
||||
"legacy formats, and Bartowski method variants with tensor-specific precision "
|
||||
"for embeddings and output layers."
|
||||
),
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
python quantise.py https://huggingface.co/DavidAU/Gemma-3-4b-it-Uncensored-DBL-X
|
||||
python quantise.py hf.co/DavidAU/Gemma-3-it-4B-Uncensored-DBL-X-GGUF:F16
|
||||
uv run quantise.py https://huggingface.co/MyUser/SafeTensorModelRepo
|
||||
uv run quantise.py hf.co/MyUser/Model-Repo-GGUF:F16
|
||||
""",
|
||||
)
|
||||
parser.add_argument("url", help="HuggingFace model URL")
|
||||
parser.add_argument(
|
||||
"--work-dir", type=Path, help="Working directory (default: ./quantisation_work)"
|
||||
)
|
||||
parser.add_argument("--work-dir", type=Path, help="Working directory (default: ./work)")
|
||||
parser.add_argument(
|
||||
"--no-imatrix",
|
||||
action="store_true",
|
||||
help="Skip imatrix generation (faster but lower quality)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--imatrix-base",
|
||||
choices=[
|
||||
"Q2_K",
|
||||
"Q3_K_L",
|
||||
"Q3_K_M",
|
||||
"Q3_K_S",
|
||||
"Q4_K_S",
|
||||
"Q4_K_M",
|
||||
"Q5_K_S",
|
||||
"Q5_K_M",
|
||||
"Q6_K",
|
||||
"Q8_0",
|
||||
],
|
||||
default="Q4_K_M",
|
||||
help="Base quantisation for imatrix generation",
|
||||
help="Skip checking for imatrix files (faster but lower quality)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-upload",
|
||||
action="store_true",
|
||||
help="Skip uploading to HuggingFace (local testing only)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--profiles",
|
||||
nargs="*",
|
||||
help=(
|
||||
"Quantisation profiles to use "
|
||||
"(default: Q3_K_M Q3_K_L Q3_K_XL Q4_K_M Q4_K_L Q5_K_M Q6_K Q6_K_L Q8_0)"
|
||||
),
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
@ -76,10 +69,10 @@ Examples:
|
|||
|
||||
try:
|
||||
orchestrator = QuantisationOrchestrator(
|
||||
work_dir=args.work_dir or Path.cwd() / "quantisation_work",
|
||||
work_dir=args.work_dir or Path.cwd() / "work",
|
||||
use_imatrix=not args.no_imatrix,
|
||||
imatrix_base=args.imatrix_base,
|
||||
no_upload=args.no_upload,
|
||||
custom_profiles=args.profiles,
|
||||
)
|
||||
orchestrator.quantise(args.url)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue