Initial commit

2025-08-07 18:29:12 +01:00 · 2025-08-07 18:29:12 +01:00 · ef7df1a8c3
commit ef7df1a8c3
28 changed files with 6829 additions and 0 deletions
--- a/helpers/services/llama_cpp.py
+++ b/helpers/services/llama_cpp.py
@ -0,0 +1,417 @@
+"""llama.cpp environment and operations service.
+
+Manages llama.cpp binary discovery, environment setup, and imatrix generation.
+Provides consistent interface for interacting with llama.cpp tools across
+different installation methods.
+"""
+
+from __future__ import annotations
+
+import subprocess
+from pathlib import Path
+
+from helpers.logger import logger
+from helpers.models.quantisation import LlamaCppEnvironment
+from helpers.services.filesystem import FilesystemService
+
+
+class EnvironmentManager:
+    """Manages llama.cpp environment setup and binary discovery.
+
+    Handles detection of local binaries, repository setup, and conversion
+    script location. Provides fallback strategies for different installation
+    scenarios including local builds and repository-based setups.
+    """
+
+    def __init__(self, work_dir: Path) -> None:
+        """Initialise EnvironmentManager."""
+        self.work_dir = work_dir
+        self.llama_cpp_dir = work_dir / "llama.cpp"
+        self.fs = FilesystemService()
+
+    def setup(self) -> LlamaCppEnvironment:
+        """Set up llama.cpp environment with automatic detection.
+
+        Checks for local llama.cpp binaries first, then falls back to
+        repository-based setup if needed. Handles conversion script location,
+        dependency installation, and path resolution.
+
+        Returns:
+            Configured LlamaCppEnvironment instance.
+        """
+        # Check for local binaries first
+        local_env = self._check_local_binaries()
+        if local_env:
+            return local_env
+
+        # Setup repository if needed
+        return self.setup_repository()
+
+    def _check_local_binaries(self) -> LlamaCppEnvironment | None:
+        """Check for existing llama.cpp binaries in current directory.
+
+        Searches for quantise and CLI binaries in the current directory
+        and standard installation paths. Also locates conversion scripts.
+
+        Returns:
+            LlamaCppEnvironment if binaries found, None otherwise.
+        """
+        quantise_bin = Path("./llama-quantize")
+        cli_bin = Path("./llama-cli")
+
+        if not (quantise_bin.exists() and cli_bin.exists()):
+            return None
+
+        logger.info("Found llama.cpp binaries in current directory")
+
+        # Check for conversion script
+        convert_script = self._find_convert_script()
+        if convert_script:
+            logger.info(f"Found conversion script: {convert_script}")
+            return LlamaCppEnvironment(
+                quantise_binary=quantise_bin.resolve(),
+                cli_binary=cli_bin.resolve(),
+                convert_script=convert_script,
+                use_repo=False,
+            )
+
+        logger.warning("No conversion script found in current directory")
+        logger.info("Will use llama.cpp repository method for conversion")
+        return LlamaCppEnvironment(
+            quantise_binary=quantise_bin.resolve(),
+            cli_binary=cli_bin.resolve(),
+            convert_script=f"python3 {self.llama_cpp_dir}/convert_hf_to_gguf.py",
+            use_repo=True,
+        )
+
+    def _find_convert_script(self) -> str | None:
+        """Find conversion script in current directory.
+
+        Searches for various naming conventions of the HF to GGUF
+        conversion script.
+
+        Returns:
+            Command to run conversion script, or None if not found.
+        """
+        scripts = [
+            "./llama-convert-hf-to-gguf",
+            "python3 ./convert_hf_to_gguf.py",
+            "python3 ./convert-hf-to-gguf.py",
+        ]
+
+        for script in scripts:
+            if script.startswith("python3"):
+                script_path = script.split(" ", 1)[1]
+                if Path(script_path).exists():
+                    return script
+            elif Path(script).exists():
+                return script
+        return None
+
+    def setup_repository(self) -> LlamaCppEnvironment:
+        """Setup llama.cpp repository for conversion scripts.
+
+        Clones the llama.cpp repository if not present and installs
+        Python dependencies for model conversion.
+
+        Returns:
+            LlamaCppEnvironment configured with repository paths.
+        """
+        if not self.llama_cpp_dir.exists():
+            logger.info("Cloning llama.cpp for conversion script...")
+            subprocess.run(
+                [
+                    "git",
+                    "clone",
+                    "https://github.com/ggerganov/llama.cpp.git",
+                    str(self.llama_cpp_dir),
+                ],
+                check=True,
+            )
+
+            # Install Python requirements
+            logger.info("Installing Python requirements...")
+            subprocess.run(
+                [
+                    "pip3",
+                    "install",
+                    "-r",
+                    "requirements.txt",
+                    "--break-system-packages",
+                    "--root-user-action=ignore",
+                ],
+                cwd=self.llama_cpp_dir,
+                check=True,
+            )
+
+            # Install additional conversion dependencies
+            logger.info("Installing additional conversion dependencies...")
+            subprocess.run(
+                [
+                    "pip3",
+                    "install",
+                    "transformers",
+                    "sentencepiece",
+                    "protobuf",
+                    "--break-system-packages",
+                    "--root-user-action=ignore",
+                ],
+                check=True,
+            )
+        else:
+            logger.info("llama.cpp repository already exists")
+
+        # Use local binaries but repo conversion script
+        return LlamaCppEnvironment(
+            quantise_binary=Path("./llama-quantize").resolve(),
+            cli_binary=Path("./llama-cli").resolve(),
+            convert_script=f"python3 {self.llama_cpp_dir}/convert_hf_to_gguf.py",
+            use_repo=False,
+        )
+
+
+class IMatrixGenerator:
+    """Handles importance matrix generation for quantisation guidance.
+
+    Generates or locates importance matrices that guide quantisation
+    decisions, helping preserve model quality by identifying critical
+    tensors requiring higher precision.
+    """
+
+    def __init__(self) -> None:
+        """Initialise IMatrixGenerator."""
+        self.fs = FilesystemService()
+
+    def generate_imatrix(
+        self, f16_model_path: Path, llama_env: LlamaCppEnvironment, model_dir: Path
+    ) -> Path | None:
+        """Generate importance matrix for quantisation guidance.
+
+        Searches for existing imatrix files first, provides interactive
+        prompts for user-supplied matrices, then generates new matrices
+        using calibration data if necessary.
+
+        Returns:
+            Path to imatrix file, or None if generation fails.
+        """
+        imatrix_path = model_dir / "imatrix.dat"
+
+        # Check for existing imatrix
+        if imatrix_path.exists():
+            logger.info(f"Found existing imatrix: {imatrix_path.name}")
+            return imatrix_path
+
+        # Try user-provided imatrix
+        user_imatrix = self._prompt_for_user_imatrix(model_dir, imatrix_path)
+        if user_imatrix:
+            return user_imatrix
+
+        # Generate new imatrix
+        calibration_file = self._get_calibration_file()
+        if not calibration_file:
+            return None
+
+        return self._generate_new_imatrix(f16_model_path, llama_env, imatrix_path, calibration_file)
+
+    def _prompt_for_user_imatrix(self, model_dir: Path, imatrix_path: Path) -> Path | None:
+        """Prompt user for existing imatrix file.
+
+        Returns:
+            Path to user-provided imatrix, or None if not available.
+        """
+        logger.info(f"Model directory: {model_dir}")
+        logger.info(f"Looking for imatrix file at: {imatrix_path}")
+        logger.info(
+            "Tip: You can download pre-computed imatrix files from Bartowski's repositories!"
+        )
+        logger.info(
+            "   Example: https://huggingface.co/bartowski/MODEL-NAME-GGUF/resolve/main/MODEL-NAME.imatrix"
+        )
+
+        response = (
+            input("\n❓ Do you have an imatrix file to place in the model directory? (y/N): ")
+            .strip()
+            .lower()
+        )
+
+        if response != "y":
+            return None
+
+        logger.info(f"Please place your imatrix.dat file in: {model_dir}")
+        input("⏳ Press Enter when you've placed the imatrix.dat file (or Ctrl+C to cancel)...")
+
+        if imatrix_path.exists():
+            file_size = self.fs.get_file_size(imatrix_path)
+            logger.info(f"Found imatrix file! ({file_size})")
+            return imatrix_path
+
+        logger.warning("No imatrix.dat file found - continuing with automatic generation")
+        return None
+
+    def _get_calibration_file(self) -> Path | None:
+        """Get calibration data file for imatrix generation.
+
+        Returns:
+            Path to calibration file, or None if not found.
+        """
+        calibration_file = Path(__file__).parent.parent.parent / "resources" / "imatrix_data.txt"
+        if not calibration_file.exists():
+            logger.warning("resources/imatrix_data.txt not found - skipping imatrix generation")
+            logger.info(
+                "Download from: https://gist.githubusercontent.com/bartowski1182/"
+                "eb213dccb3571f863da82e99418f81e8/raw/calibration_datav3.txt"
+            )
+            return None
+        return calibration_file
+
+    def _generate_new_imatrix(
+        self,
+        f16_model_path: Path,
+        llama_env: LlamaCppEnvironment,
+        imatrix_path: Path,
+        calibration_file: Path,
+    ) -> Path | None:
+        """Generate new importance matrix using calibration data.
+
+        Returns:
+            Path to generated imatrix, or None if generation fails.
+        """
+        logger.info("Generating importance matrix (this may take 1-4 hours for large models)...")
+        logger.info(f"Model: {f16_model_path.name}")
+        logger.info(f"Calibration: {calibration_file}")
+        logger.info(f"Output: {imatrix_path}")
+
+        # Find imatrix binary
+        imatrix_binary = self._find_imatrix_binary(llama_env)
+        if not imatrix_binary:
+            logger.warning("llama-imatrix binary not found - skipping imatrix generation")
+            logger.info("Make sure llama-imatrix is in the same directory as llama-quantize")
+            return None
+
+        # Build and execute command
+        cmd = self._build_imatrix_command(
+            imatrix_binary, f16_model_path, calibration_file, imatrix_path
+        )
+        return self._execute_imatrix_generation(cmd, imatrix_path)
+
+    def _build_imatrix_command(
+        self, binary: Path, model_path: Path, calibration_file: Path, output_path: Path
+    ) -> list[str]:
+        """Build imatrix generation command.
+
+        Returns:
+            Command arguments as list.
+        """
+        return [
+            str(binary),
+            "-m",
+            str(model_path),
+            "-f",
+            str(calibration_file),
+            "-o",
+            str(output_path),
+            "--process-output",
+            "--output-frequency",
+            "10",
+            "--save-frequency",
+            "50",
+            "-t",
+            "8",
+            "-c",
+            "2048",
+            "-b",
+            "512",
+        ]
+
+    def _execute_imatrix_generation(self, cmd: list[str], imatrix_path: Path) -> Path | None:
+        """Execute imatrix generation command with real-time output.
+
+        Returns:
+            Path to generated imatrix file, or None if generation fails.
+        """
+        logger.info(f"Running: {' '.join(cmd)}")
+        logger.info("Starting imatrix generation... (progress will be shown)")
+
+        try:
+            process = subprocess.Popen(
+                cmd,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                universal_newlines=True,
+                bufsize=1,
+            )
+
+            self._stream_imatrix_output(process)
+
+            return_code = process.poll()
+            if return_code == 0:
+                return self._validate_imatrix_output(imatrix_path)
+
+        except KeyboardInterrupt:
+            logger.info("imatrix generation cancelled by user")
+            process.terminate()
+            return None
+        except Exception as e:
+            logger.error(f"imatrix generation failed with exception: {e}")
+            return None
+        else:
+            logger.error(f"imatrix generation failed with return code {return_code}")
+            return None
+
+    def _stream_imatrix_output(self, process: subprocess.Popen) -> None:
+        """Stream imatrix generation output in real-time."""
+        while True:
+            if process.stdout is not None:
+                output = process.stdout.readline()
+            else:
+                break
+            if not output and process.poll() is not None:
+                break
+            if output:
+                line = output.strip()
+                if self._should_log_imatrix_line(line):
+                    logger.info(line)
+
+    def _should_log_imatrix_line(self, line: str) -> bool:
+        """Determine if imatrix output line should be logged.
+
+        Returns:
+            True if line should be logged, False otherwise.
+        """
+        keywords = ["Computing imatrix", "perplexity:", "save_imatrix", "entries =", "ETA"]
+        return any(keyword in line for keyword in keywords) or line.startswith("[")
+
+    def _validate_imatrix_output(self, imatrix_path: Path) -> Path | None:
+        """Validate generated imatrix file.
+
+        Returns:
+            Path to imatrix if valid, None otherwise.
+        """
+        if imatrix_path.exists():
+            file_size = self.fs.get_file_size(imatrix_path)
+            logger.info(f"imatrix generation successful! ({file_size})")
+            return imatrix_path
+        logger.error("imatrix generation completed but file not found")
+        return None
+
+    def _find_imatrix_binary(self, llama_env: LlamaCppEnvironment) -> Path | None:
+        """Find llama-imatrix binary in common locations.
+
+        Searches for the imatrix binary in the current directory and
+        standard installation paths.
+
+        Returns:
+            Path to imatrix binary, or None if not found.
+        """
+        candidates = [
+            Path("./llama-imatrix"),
+            llama_env.quantise_binary.parent / "llama-imatrix",
+            Path("/usr/local/bin/llama-imatrix"),
+            Path("/usr/bin/llama-imatrix"),
+        ]
+
+        for candidate in candidates:
+            if candidate.exists() and candidate.is_file():
+                return candidate
+
+        return None