mirror of
https://github.com/ollama/ollama.git
synced 2026-04-17 19:54:03 +02:00
Keep Gemma4 router projection in source precision (#15613)
This commit is contained in:
@@ -93,6 +93,13 @@ func (t gemma4ImportTransform) quantizationType(name string, shape []int32, quan
|
||||
return ""
|
||||
}
|
||||
|
||||
// MoE router logits choose the top-k expert set. Quantization noise here
|
||||
// can flip expert selection, after which downstream activations diverge
|
||||
// sharply. The tensor is small, so leave it in source precision.
|
||||
if isGemma4RouterProjection(name) {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Mixed-precision quantization: sensitive tensors get higher precision.
|
||||
//
|
||||
// Value projections (v_proj) directly determine attention output quality.
|
||||
@@ -170,6 +177,12 @@ func isEmbedTokensWeight(name string) bool {
|
||||
!strings.Contains(name, "per_layer")
|
||||
}
|
||||
|
||||
func isGemma4RouterProjection(name string) bool {
|
||||
return strings.HasSuffix(name, ".router.proj.weight") &&
|
||||
!strings.Contains(name, "audio_tower") &&
|
||||
!strings.Contains(name, "vision_tower")
|
||||
}
|
||||
|
||||
func (t gemma4ImportTransform) transformTensor(td *safetensors.TensorData) ([]*safetensors.TensorData, error) {
|
||||
if td == nil {
|
||||
return nil, nil
|
||||
|
||||
@@ -68,6 +68,11 @@ func TestGemma4QuantizationType(t *testing.T) {
|
||||
{"expert gate_up nvfp4", transform26B, "model.layers.0.moe.experts.42.gate_up_proj.weight", aligned, "nvfp4", "nvfp4"},
|
||||
{"expert gate_up mxfp4", transform26B, "model.layers.0.moe.experts.42.gate_up_proj.weight", aligned, "mxfp4", "mxfp4"},
|
||||
|
||||
// === Router projection: expert selection is sensitive; keep source precision ===
|
||||
{"router proj int4", transform26B, "model.layers.0.router.proj.weight", aligned, "int4", ""},
|
||||
{"router proj nvfp4", transform26B, "model.layers.0.router.proj.weight", aligned, "nvfp4", ""},
|
||||
{"router proj mxfp4", transform26B, "model.layers.0.router.proj.weight", aligned, "mxfp4", ""},
|
||||
|
||||
// === k_proj: promoted only for 8-expert models ===
|
||||
{"k_proj 128 experts int4", transform26B, "model.layers.0.self_attn.k_proj.weight", aligned, "int4", "int4"},
|
||||
{"k_proj 8 experts int4", transform8E, "model.layers.0.self_attn.k_proj.weight", aligned, "int4", "int8"},
|
||||
|
||||
Reference in New Issue
Block a user