Runtime selection of new or old runners

This adjusts the new runners to comingle with existing runners so we can use an env var to toggle the new runners on.
2026-04-24 01:35:49 +02:00 · 2024-08-01 08:54:44 -07:00
parent 8527028bf4
commit 751009a5d7
5 changed files with 399 additions and 158 deletions
--- a/envconfig/config.go
+++ b/envconfig/config.go
@@ -140,6 +140,8 @@ var (
 	SchedSpread = Bool("OLLAMA_SCHED_SPREAD")
 	// IntelGPU enables experimental Intel GPU detection.
 	IntelGPU = Bool("OLLAMA_INTEL_GPU")
+	// Set via OLLAMA_NEW_RUNNERS in the environment
+	NewRunners = Bool("OLLAMA_NEW_RUNNERS")
 )

 func String(s string) func() string {
@@ -250,6 +252,7 @@ func AsMap() map[string]EnvVar {
 		"OLLAMA_NOHISTORY":         {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
 		"OLLAMA_NOPRUNE":           {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
 		"OLLAMA_NUM_PARALLEL":      {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
+		"OLLAMA_NEW_RUNNERS":       {"OLLAMA_NEW_RUNNERS", NewRunners(), "Enable new experimental runners"},
 		"OLLAMA_ORIGINS":           {"OLLAMA_ORIGINS", Origins(), "A comma separated list of allowed origins"},
 		"OLLAMA_RUNNERS_DIR":       {"OLLAMA_RUNNERS_DIR", RunnersDir(), "Location for runners"},
 		"OLLAMA_SCHED_SPREAD":      {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},