"""llama.cpp environment and operations service. Manages llama.cpp binary discovery, environment setup, and imatrix generation. Provides consistent interface for interacting with llama.cpp tools across different installation methods. """ from __future__ import annotations import subprocess from pathlib import Path from helpers.logger import logger from helpers.models.quantisation import LlamaCppEnvironment from helpers.services.filesystem import FilesystemService class EnvironmentManager: """Manages llama.cpp environment setup and binary discovery. Handles detection of local binaries, repository setup, and conversion script location. Provides fallback strategies for different installation scenarios including local builds and repository-based setups. """ def __init__(self, work_dir: Path) -> None: """Initialise EnvironmentManager.""" self.work_dir = work_dir self.llama_cpp_dir = work_dir / "llama.cpp" self.fs = FilesystemService() def setup(self) -> LlamaCppEnvironment: """Set up llama.cpp environment with automatic detection. Checks for local llama.cpp binaries first, then falls back to repository-based setup if needed. Handles conversion script location, dependency installation, and path resolution. Returns: Configured LlamaCppEnvironment instance. """ # Check for local binaries first local_env = self._check_local_binaries() if local_env: return local_env # Setup repository if needed return self.setup_repository() def _check_local_binaries(self) -> LlamaCppEnvironment | None: """Check for existing llama.cpp binaries in current directory. Searches for quantise and CLI binaries in the current directory and standard installation paths. Also locates conversion scripts. Returns: LlamaCppEnvironment if binaries found, None otherwise. """ quantise_bin = Path("./llama-quantize") cli_bin = Path("./llama-cli") if not (quantise_bin.exists() and cli_bin.exists()): return None logger.info("Found llama.cpp binaries in current directory") # Check for conversion script convert_script = self._find_convert_script() if convert_script: logger.info(f"Found conversion script: {convert_script}") return LlamaCppEnvironment( quantise_binary=quantise_bin.resolve(), cli_binary=cli_bin.resolve(), convert_script=convert_script, use_repo=False, ) logger.warning("No conversion script found in current directory") logger.info("Will use llama.cpp repository method for conversion") return LlamaCppEnvironment( quantise_binary=quantise_bin.resolve(), cli_binary=cli_bin.resolve(), convert_script=f"python3 {self.llama_cpp_dir}/convert_hf_to_gguf.py", use_repo=True, ) def _find_convert_script(self) -> str | None: """Find conversion script in current directory. Searches for various naming conventions of the HF to GGUF conversion script. Returns: Command to run conversion script, or None if not found. """ scripts = [ "./llama-convert-hf-to-gguf", "python3 ./convert_hf_to_gguf.py", "python3 ./convert-hf-to-gguf.py", ] for script in scripts: if script.startswith("python3"): script_path = script.split(" ", 1)[1] if Path(script_path).exists(): return script elif Path(script).exists(): return script return None def setup_repository(self) -> LlamaCppEnvironment: """Setup llama.cpp repository for conversion scripts. Clones the llama.cpp repository if not present and installs Python dependencies for model conversion. Returns: LlamaCppEnvironment configured with repository paths. """ if not self.llama_cpp_dir.exists(): logger.info("Cloning llama.cpp for conversion script...") subprocess.run( [ "git", "clone", "https://github.com/ggerganov/llama.cpp.git", str(self.llama_cpp_dir), ], check=True, ) # Install Python requirements logger.info("Installing Python requirements...") subprocess.run( [ "pip3", "install", "-r", "requirements.txt", "--break-system-packages", "--root-user-action=ignore", ], cwd=self.llama_cpp_dir, check=True, ) # Install additional conversion dependencies logger.info("Installing additional conversion dependencies...") subprocess.run( [ "pip3", "install", "transformers", "sentencepiece", "protobuf", "--break-system-packages", "--root-user-action=ignore", ], check=True, ) else: logger.info("llama.cpp repository already exists") # Use local binaries but repo conversion script return LlamaCppEnvironment( quantise_binary=Path("./llama-quantize").resolve(), cli_binary=Path("./llama-cli").resolve(), convert_script=f"python3 {self.llama_cpp_dir}/convert_hf_to_gguf.py", use_repo=False, ) class IMatrixGenerator: """Handles importance matrix generation for quantisation guidance. Generates or locates importance matrices that guide quantisation decisions, helping preserve model quality by identifying critical tensors requiring higher precision. """ def __init__(self) -> None: """Initialise IMatrixGenerator.""" self.fs = FilesystemService() def generate_imatrix( self, f16_model_path: Path, llama_env: LlamaCppEnvironment, model_dir: Path ) -> Path | None: """Generate importance matrix for quantisation guidance. Searches for existing imatrix files first, provides interactive prompts for user-supplied matrices, then generates new matrices using calibration data if necessary. Returns: Path to imatrix file, or None if generation fails. """ imatrix_path = model_dir / "imatrix.dat" # Check for existing imatrix if imatrix_path.exists(): logger.info(f"Found existing imatrix: {imatrix_path.name}") return imatrix_path # Try user-provided imatrix user_imatrix = self._prompt_for_user_imatrix(model_dir, imatrix_path) if user_imatrix: return user_imatrix # Generate new imatrix calibration_file = self._get_calibration_file() if not calibration_file: return None return self._generate_new_imatrix(f16_model_path, llama_env, imatrix_path, calibration_file) def _prompt_for_user_imatrix(self, model_dir: Path, imatrix_path: Path) -> Path | None: """Prompt user for existing imatrix file. Returns: Path to user-provided imatrix, or None if not available. """ logger.info(f"Model directory: {model_dir}") logger.info(f"Looking for imatrix file at: {imatrix_path}") logger.info( "Tip: You can download pre-computed imatrix files from Bartowski's repositories!" ) logger.info( " Example: https://huggingface.co/bartowski/MODEL-NAME-GGUF/resolve/main/MODEL-NAME.imatrix" ) response = ( input("\n❓ Do you have an imatrix file to place in the model directory? (y/N): ") .strip() .lower() ) if response != "y": return None logger.info(f"Please place your imatrix.dat file in: {model_dir}") input("⏳ Press Enter when you've placed the imatrix.dat file (or Ctrl+C to cancel)...") if imatrix_path.exists(): file_size = self.fs.get_file_size(imatrix_path) logger.info(f"Found imatrix file! ({file_size})") return imatrix_path logger.warning("No imatrix.dat file found - continuing with automatic generation") return None def _get_calibration_file(self) -> Path | None: """Get calibration data file for imatrix generation. Returns: Path to calibration file, or None if not found. """ calibration_file = Path(__file__).parent.parent.parent / "resources" / "imatrix_data.txt" if not calibration_file.exists(): logger.warning("resources/imatrix_data.txt not found - skipping imatrix generation") logger.info( "Download from: https://gist.githubusercontent.com/bartowski1182/" "eb213dccb3571f863da82e99418f81e8/raw/calibration_datav3.txt" ) return None return calibration_file def _generate_new_imatrix( self, f16_model_path: Path, llama_env: LlamaCppEnvironment, imatrix_path: Path, calibration_file: Path, ) -> Path | None: """Generate new importance matrix using calibration data. Returns: Path to generated imatrix, or None if generation fails. """ logger.info("Generating importance matrix (this may take 1-4 hours for large models)...") logger.info(f"Model: {f16_model_path.name}") logger.info(f"Calibration: {calibration_file}") logger.info(f"Output: {imatrix_path}") # Find imatrix binary imatrix_binary = self._find_imatrix_binary(llama_env) if not imatrix_binary: logger.warning("llama-imatrix binary not found - skipping imatrix generation") logger.info("Make sure llama-imatrix is in the same directory as llama-quantize") return None # Build and execute command cmd = self._build_imatrix_command( imatrix_binary, f16_model_path, calibration_file, imatrix_path ) return self._execute_imatrix_generation(cmd, imatrix_path) def _build_imatrix_command( self, binary: Path, model_path: Path, calibration_file: Path, output_path: Path ) -> list[str]: """Build imatrix generation command. Returns: Command arguments as list. """ return [ str(binary), "-m", str(model_path), "-f", str(calibration_file), "-o", str(output_path), "--process-output", "--output-frequency", "10", "--save-frequency", "50", "-t", "8", "-c", "2048", "-b", "512", ] def _execute_imatrix_generation(self, cmd: list[str], imatrix_path: Path) -> Path | None: """Execute imatrix generation command with real-time output. Returns: Path to generated imatrix file, or None if generation fails. """ logger.info(f"Running: {' '.join(cmd)}") logger.info("Starting imatrix generation... (progress will be shown)") try: process = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, bufsize=1, ) self._stream_imatrix_output(process) return_code = process.poll() if return_code == 0: return self._validate_imatrix_output(imatrix_path) except KeyboardInterrupt: logger.info("imatrix generation cancelled by user") process.terminate() return None except Exception as e: logger.error(f"imatrix generation failed with exception: {e}") return None else: logger.error(f"imatrix generation failed with return code {return_code}") return None def _stream_imatrix_output(self, process: subprocess.Popen) -> None: """Stream imatrix generation output in real-time.""" while True: if process.stdout is not None: output = process.stdout.readline() else: break if not output and process.poll() is not None: break if output: line = output.strip() if self._should_log_imatrix_line(line): logger.info(line) def _should_log_imatrix_line(self, line: str) -> bool: """Determine if imatrix output line should be logged. Returns: True if line should be logged, False otherwise. """ keywords = ["Computing imatrix", "perplexity:", "save_imatrix", "entries =", "ETA"] return any(keyword in line for keyword in keywords) or line.startswith("[") def _validate_imatrix_output(self, imatrix_path: Path) -> Path | None: """Validate generated imatrix file. Returns: Path to imatrix if valid, None otherwise. """ if imatrix_path.exists(): file_size = self.fs.get_file_size(imatrix_path) logger.info(f"imatrix generation successful! ({file_size})") return imatrix_path logger.error("imatrix generation completed but file not found") return None def _find_imatrix_binary(self, llama_env: LlamaCppEnvironment) -> Path | None: """Find llama-imatrix binary in common locations. Searches for the imatrix binary in the current directory and standard installation paths. Returns: Path to imatrix binary, or None if not found. """ candidates = [ Path("./llama-imatrix"), llama_env.quantise_binary.parent / "llama-imatrix", Path("/usr/local/bin/llama-imatrix"), Path("/usr/bin/llama-imatrix"), ] for candidate in candidates: if candidate.exists() and candidate.is_file(): return candidate return None