mirror of
https://github.com/ollama/ollama.git
synced 2026-04-22 08:45:53 +02:00
Gemma4 on MLX (#15244)
* gemma4: implement Gemma 4 model for MLX (text-only runtime) * gemma4: two MoE + SWA prefill perf fixes Two performance optimizations in the gemma4 forward pass 1. Memoize the sliding-window prefill mask across layers. 2. Softmax only over the selected experts in Router.Forward. * review comments
This commit is contained in:
@@ -560,6 +560,9 @@ func getParserName(modelDir string) string {
|
||||
if strings.Contains(archLower, "deepseek") {
|
||||
return "deepseek3"
|
||||
}
|
||||
if strings.Contains(archLower, "gemma4") {
|
||||
return "gemma4"
|
||||
}
|
||||
if strings.Contains(archLower, "qwen3") {
|
||||
return "qwen3"
|
||||
}
|
||||
@@ -574,6 +577,9 @@ func getParserName(modelDir string) string {
|
||||
if strings.Contains(typeLower, "deepseek") {
|
||||
return "deepseek3"
|
||||
}
|
||||
if strings.Contains(typeLower, "gemma4") {
|
||||
return "gemma4"
|
||||
}
|
||||
if strings.Contains(typeLower, "qwen3") {
|
||||
return "qwen3"
|
||||
}
|
||||
@@ -602,6 +608,9 @@ func getRendererName(modelDir string) string {
|
||||
// Check architectures for known renderers
|
||||
for _, arch := range cfg.Architectures {
|
||||
archLower := strings.ToLower(arch)
|
||||
if strings.Contains(archLower, "gemma4") {
|
||||
return "gemma4"
|
||||
}
|
||||
if strings.Contains(archLower, "glm4") || strings.Contains(archLower, "glm-4") {
|
||||
return "glm-4.7"
|
||||
}
|
||||
@@ -616,6 +625,9 @@ func getRendererName(modelDir string) string {
|
||||
// Also check model_type
|
||||
if cfg.ModelType != "" {
|
||||
typeLower := strings.ToLower(cfg.ModelType)
|
||||
if strings.Contains(typeLower, "gemma4") {
|
||||
return "gemma4"
|
||||
}
|
||||
if strings.Contains(typeLower, "glm4") || strings.Contains(typeLower, "glm-4") {
|
||||
return "glm-4.7"
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user