Add MLX runner with GLM4-MoE-Lite model support (#14185)

This change adds a new MLX based runner which includes: * Method-based MLX bindings * Subprocess-based MLX runner (x/mlxrunner) * KV cache with tree management * A basic sampler The GLM4-MoE-Lite model has been ported to use the new bindings. --------- Co-authored-by: Michael Yang <git@mxy.ng>
2026-04-17 15:53:27 +02:00 · 2026-02-10 14:57:57 -08:00
parent db493d6e5e
commit 44bdd9a2ef
42 changed files with 14900 additions and 9 deletions
--- a/runner/runner.go
+++ b/runner/runner.go
@@ -4,6 +4,7 @@ import (
 	"github.com/ollama/ollama/runner/llamarunner"
 	"github.com/ollama/ollama/runner/ollamarunner"
 	"github.com/ollama/ollama/x/imagegen"
+	"github.com/ollama/ollama/x/mlxrunner"
 )

 func Execute(args []string) error {
@@ -17,6 +18,8 @@ func Execute(args []string) error {
 			return ollamarunner.Execute(args[1:])
 		case "--imagegen-engine":
 			return imagegen.Execute(args[1:])
+		case "--mlx-engine":
+			return mlxrunner.Execute(args[1:])
 		}
 	}
 	return llamarunner.Execute(args)