Gemma4 on MLX (#15244)

* gemma4: implement Gemma 4 model for MLX (text-only runtime)

* gemma4: two MoE + SWA prefill perf fixes

Two performance optimizations in the gemma4 forward pass

1. Memoize the sliding-window prefill mask across layers.
2. Softmax only over the selected experts in Router.Forward.

* review comments
This commit is contained in:
Daniel Hiltgen
2026-04-13 16:36:51 -07:00
committed by GitHub
parent bf2a421727
commit 2cba7756c5
8 changed files with 2715 additions and 0 deletions

View File

@@ -560,6 +560,9 @@ func getParserName(modelDir string) string {
if strings.Contains(archLower, "deepseek") {
return "deepseek3"
}
if strings.Contains(archLower, "gemma4") {
return "gemma4"
}
if strings.Contains(archLower, "qwen3") {
return "qwen3"
}
@@ -574,6 +577,9 @@ func getParserName(modelDir string) string {
if strings.Contains(typeLower, "deepseek") {
return "deepseek3"
}
if strings.Contains(typeLower, "gemma4") {
return "gemma4"
}
if strings.Contains(typeLower, "qwen3") {
return "qwen3"
}
@@ -602,6 +608,9 @@ func getRendererName(modelDir string) string {
// Check architectures for known renderers
for _, arch := range cfg.Architectures {
archLower := strings.ToLower(arch)
if strings.Contains(archLower, "gemma4") {
return "gemma4"
}
if strings.Contains(archLower, "glm4") || strings.Contains(archLower, "glm-4") {
return "glm-4.7"
}
@@ -616,6 +625,9 @@ func getRendererName(modelDir string) string {
// Also check model_type
if cfg.ModelType != "" {
typeLower := strings.ToLower(cfg.ModelType)
if strings.Contains(typeLower, "gemma4") {
return "gemma4"
}
if strings.Contains(typeLower, "glm4") || strings.Contains(typeLower, "glm-4") {
return "glm-4.7"
}