mirror of
https://github.com/ollama/ollama.git
synced 2026-04-17 22:54:05 +02:00
* gemma4: implement Gemma 4 model for MLX (text-only runtime) * gemma4: two MoE + SWA prefill perf fixes Two performance optimizations in the gemma4 forward pass 1. Memoize the sliding-window prefill mask across layers. 2. Softmax only over the selected experts in Router.Forward. * review comments
12 lines
363 B
Go
12 lines
363 B
Go
package mlxrunner
|
|
|
|
import (
|
|
_ "github.com/ollama/ollama/x/models/gemma3"
|
|
_ "github.com/ollama/ollama/x/models/gemma4"
|
|
_ "github.com/ollama/ollama/x/models/glm4_moe_lite"
|
|
_ "github.com/ollama/ollama/x/models/llama"
|
|
_ "github.com/ollama/ollama/x/models/qwen3"
|
|
_ "github.com/ollama/ollama/x/models/qwen3_5"
|
|
_ "github.com/ollama/ollama/x/models/qwen3_5_moe"
|
|
)
|