Runtime selection of new or old runners

This adjusts the new runners to comingle with existing runners so we can use an
env var to toggle the new runners on.
This commit is contained in:
Daniel Hiltgen
2024-08-01 08:54:44 -07:00
committed by jmorganca
parent 8527028bf4
commit 751009a5d7
5 changed files with 399 additions and 158 deletions

View File

@@ -140,6 +140,8 @@ var (
SchedSpread = Bool("OLLAMA_SCHED_SPREAD")
// IntelGPU enables experimental Intel GPU detection.
IntelGPU = Bool("OLLAMA_INTEL_GPU")
// Set via OLLAMA_NEW_RUNNERS in the environment
NewRunners = Bool("OLLAMA_NEW_RUNNERS")
)
func String(s string) func() string {
@@ -250,6 +252,7 @@ func AsMap() map[string]EnvVar {
"OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
"OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
"OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
"OLLAMA_NEW_RUNNERS": {"OLLAMA_NEW_RUNNERS", NewRunners(), "Enable new experimental runners"},
"OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", Origins(), "A comma separated list of allowed origins"},
"OLLAMA_RUNNERS_DIR": {"OLLAMA_RUNNERS_DIR", RunnersDir(), "Location for runners"},
"OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},