# Set the models path to the external volume export OLLAMA_DEBUG=0 # Show additional debug information (e.g. OLLAMA_DEBUG=1) export OLLAMA_HOST=http://0.0.0.0:11434 # IP Address for the ollama server (default 127.0.0.1:11434) export OLLAMA_KEEP_ALIVE=15m # The duration that models stay loaded in memory (default "5m") export OLLAMA_MAX_LOADED_MODELS=1 # mum number of loaded models per GPU #export OLLAMA_MAX_QUEUE= # Maximum number of queued requests #export OLLAMA_MODELS= # The path to the models directory #export OLLAMA_NUM_PARALLEL= # Maximum number of parallel requests #export OLLAMA_NOPRUNE= # Do not prune model blobs on startup #export OLLAMA_ORIGINS= # A comma separated list of allowed origins #xport OLLAMA_SCHED_SPREAD= # Always schedule model across all GPUs #export OLLAMA_FLASH_ATTENTION= #abled flash attention #xport OLLAMA_KV_CACHE_TYPE= #Quantization type for the K/V cache (default: f16) #export OLLAMA_LLM_LIBRARY= # Set LLM library to bypass autodetection #export OLLAMA_GPU_OVERHEAD= # Reserve a portion of VRAM per GPU (bytes) #export OLLAMA_LOAD_TIMEOUT= # How long to allow model loads to stall before giving up (default "5m") export OLLAMA_MODELS="/Volumes/AI/ollama/.ollama/models" export HOME="/Users/davec"