Add MLX runner with GLM4-MoE-Lite model support (#14185)

This change adds a new MLX based runner which includes:

  * Method-based MLX bindings
  * Subprocess-based MLX runner (x/mlxrunner)
  * KV cache with tree management
  * A basic sampler

The GLM4-MoE-Lite model has been ported to use the new bindings.

---------

Co-authored-by: Michael Yang <git@mxy.ng>
This commit is contained in:
Patrick Devine
2026-02-10 14:57:57 -08:00
committed by GitHub
parent db493d6e5e
commit 44bdd9a2ef
42 changed files with 14900 additions and 9 deletions

View File

@@ -4,6 +4,7 @@ import (
"github.com/ollama/ollama/runner/llamarunner"
"github.com/ollama/ollama/runner/ollamarunner"
"github.com/ollama/ollama/x/imagegen"
"github.com/ollama/ollama/x/mlxrunner"
)
func Execute(args []string) error {
@@ -17,6 +18,8 @@ func Execute(args []string) error {
return ollamarunner.Execute(args[1:])
case "--imagegen-engine":
return imagegen.Execute(args[1:])
case "--mlx-engine":
return mlxrunner.Execute(args[1:])
}
}
return llamarunner.Execute(args)