gpu-llm-benchmarking/.env.example
Tom Foster 86e9de9e75
All checks were successful
Build Vast.ai Ollama Benchmark Image / Build and Push (push) Successful in 4m43s
Initial commit
2025-07-28 16:58:21 +01:00

26 lines
1.2 KiB
Text

# Vast.ai API configuration
VAST_API_KEY="YOUR_API_KEY" # Your Vast.ai API key for instance management
# GPU instance specifications
GPU_TYPE="RTX 5090" # GPU type to request (e.g. RTX 4090, RTX 5090, A100)
NUM_GPUS=1 # Number of GPUs to allocate for the instance
DISK_SPACE=30 # Disk space in GB for the instance
REGION="" # Region to filter for instances (e.g. "PL", "RO", or leave empty for any)
# Benchmark configuration
TEST_ITERATIONS=3 # Number of test runs per context length
CONTEXT_START=8000 # Starting context length for benchmark
CONTEXT_END=128000 # Maximum context length for benchmark
CONTEXT_MULTIPLIER=2 # Multiplier for context length progression
# Ollama configuration
OLLAMA_MODEL="hf.co/unsloth/gemma-3-12b-it-GGUF:Q5_K_XL" # Model to benchmark
OLLAMA_FLASH_ATTENTION="1" # Enable flash attention for performance
OLLAMA_KEEP_ALIVE="10m" # How long to keep model loaded in memory
OLLAMA_KV_CACHE_TYPE="q8_0" # KV cache quantisation type
OLLAMA_MAX_LOADED_MODELS="1" # Maximum number of loaded models
OLLAMA_NUM_GPU="1" # Number of GPUs to use for Ollama
OLLAMA_NUM_PARALLEL="1" # Number of parallel requests
# Local file transfer settings
LOCAL_RESULTS_DIR="./results" # Local directory to store benchmark results