All checks were successful
Build Vast.ai Ollama Benchmark Image / Build and Push (push) Successful in 4m43s
26 lines
1.2 KiB
Text
26 lines
1.2 KiB
Text
# Vast.ai API configuration
|
|
VAST_API_KEY="YOUR_API_KEY" # Your Vast.ai API key for instance management
|
|
|
|
# GPU instance specifications
|
|
GPU_TYPE="RTX 5090" # GPU type to request (e.g. RTX 4090, RTX 5090, A100)
|
|
NUM_GPUS=1 # Number of GPUs to allocate for the instance
|
|
DISK_SPACE=30 # Disk space in GB for the instance
|
|
REGION="" # Region to filter for instances (e.g. "PL", "RO", or leave empty for any)
|
|
|
|
# Benchmark configuration
|
|
TEST_ITERATIONS=3 # Number of test runs per context length
|
|
CONTEXT_START=8000 # Starting context length for benchmark
|
|
CONTEXT_END=128000 # Maximum context length for benchmark
|
|
CONTEXT_MULTIPLIER=2 # Multiplier for context length progression
|
|
|
|
# Ollama configuration
|
|
OLLAMA_MODEL="hf.co/unsloth/gemma-3-12b-it-GGUF:Q5_K_XL" # Model to benchmark
|
|
OLLAMA_FLASH_ATTENTION="1" # Enable flash attention for performance
|
|
OLLAMA_KEEP_ALIVE="10m" # How long to keep model loaded in memory
|
|
OLLAMA_KV_CACHE_TYPE="q8_0" # KV cache quantisation type
|
|
OLLAMA_MAX_LOADED_MODELS="1" # Maximum number of loaded models
|
|
OLLAMA_NUM_GPU="1" # Number of GPUs to use for Ollama
|
|
OLLAMA_NUM_PARALLEL="1" # Number of parallel requests
|
|
|
|
# Local file transfer settings
|
|
LOCAL_RESULTS_DIR="./results" # Local directory to store benchmark results
|