package convert import ( "bytes" "encoding/binary" "os" "slices" "strings" "testing" "github.com/ollama/ollama/fs/ggml" ) func boolPtr(v bool) *bool { return &v } func readTensorData(t *testing.T, tensor *ggml.Tensor) []float32 { t.Helper() var b bytes.Buffer if _, err := tensor.WriteTo(&b); err != nil { t.Fatal(err) } numel := 1 for _, d := range tensor.Shape { numel *= int(d) } values := make([]float32, numel) if err := binary.Read(&b, binary.LittleEndian, &values); err != nil { t.Fatal(err) } return values } func TestQwen3NextLegacyModelTypeDisablesReorder(t *testing.T) { m := &qwen3NextModel{ ModelParameters: ModelParameters{ ModelType: "qwen3_next", }, } if m.shouldReorderVHeads() { t.Fatalf("legacy qwen3_next model_type should not reorder v-head layout") } } func TestQwen3NextLegacyArchitectureDisablesReorder(t *testing.T) { m := &qwen3NextModel{ ModelParameters: ModelParameters{ Architectures: []string{"Qwen3NextForCausalLM"}, }, } if m.shouldReorderVHeads() { t.Fatalf("legacy Qwen3Next architecture should not reorder v-head layout") } } func TestQwen3NextKVLegacyConfig(t *testing.T) { m := &qwen3NextModel{ ModelParameters: ModelParameters{ ModelType: "qwen3_next", }, qwen3NextTextConfig: qwen3NextTextConfig{ MaxPositionEmbeddings: 8192, HiddenSize: 512, NumHiddenLayers: 4, IntermediateSize: 2048, NumAttentionHeads: 8, NumKeyValueHeads: 2, HeadDim: 64, RopeTheta: 1_000_000, RMSNormEPS: 1e-6, NumExperts: 8, NumExpertsPerToken: 2, NormTopkProb: boolPtr(true), MoEIntermediateSize: 256, SharedExpertIntermSize: 512, FullAttentionInterval: 2, LinearConvKernelDim: 4, LinearKeyHeadDim: 64, LinearNumKeyHeads: 2, LinearNumValueHeads: 4, LinearValueHeadDim: 64, PartialRotaryFactor: 0.25, }, } if err := m.parseMore(os.DirFS(t.TempDir())); err != nil { t.Fatal(err) } kv := m.KV(&Tokenizer{Vocabulary: &Vocabulary{}}) if got, want := kv["general.architecture"], "qwen35moe"; got != want { t.Fatalf("unexpected architecture: got %v want %v", got, want) } if got, want := kv["tokenizer.ggml.pre"], "qwen35"; got != want { t.Fatalf("unexpected tokenizer pre: got %v want %v", got, want) } headCountKV, ok := kv["attention.head_count_kv"].([]uint32) if !ok { t.Fatalf("attention.head_count_kv has unexpected type: %T", kv["attention.head_count_kv"]) } if got, want := headCountKV, []uint32{0, 2, 0, 2}; !slices.Equal(got, want) { t.Fatalf("unexpected attention.head_count_kv: got %v want %v", got, want) } if _, ok := kv["ssm.v_head_reordered"]; ok { t.Fatalf("legacy qwen3next should not enable ssm.v_head_reordered") } if got, want := kv["norm_top_k_prob"], true; got != want { t.Fatalf("unexpected norm_top_k_prob: got %v want %v", got, want) } } func TestQwen35MoeOmitsNormTopKProbWhenUnset(t *testing.T) { m := &qwen3NextModel{ ModelParameters: ModelParameters{ ModelType: "qwen3_5", }, qwen3NextTextConfig: qwen3NextTextConfig{ MaxPositionEmbeddings: 4096, HiddenSize: 512, NumHiddenLayers: 4, IntermediateSize: 2048, NumAttentionHeads: 8, NumKeyValueHeads: 2, HeadDim: 64, RopeTheta: 1_000_000, RMSNormEPS: 1e-6, NumExperts: 8, NumExpertsPerToken: 2, FullAttentionInterval: 2, LinearConvKernelDim: 4, LinearKeyHeadDim: 64, LinearNumKeyHeads: 2, LinearNumValueHeads: 4, LinearValueHeadDim: 64, PartialRotaryFactor: 0.25, }, } if err := m.parseMore(os.DirFS(t.TempDir())); err != nil { t.Fatal(err) } kv := m.KV(&Tokenizer{Vocabulary: &Vocabulary{}}) if _, ok := kv["norm_top_k_prob"]; ok { t.Fatalf("expected norm_top_k_prob to be omitted when not set in config") } } func TestQwen35KVFromTextConfig(t *testing.T) { m := &qwen3NextModel{ ModelParameters: ModelParameters{ ModelType: "qwen3_5", }, TextConfig: &qwen3NextTextConfig{ MaxPositionEmbeddings: 16384, HiddenSize: 1024, NumHiddenLayers: 4, IntermediateSize: 4096, NumAttentionHeads: 8, NumKeyValueHeads: 4, HeadDim: 128, RMSNormEPS: 1e-6, LayerTypes: []string{ "linear_attention", "full_attention", "linear_attention", "full_attention", }, LinearConvKernelDim: 4, LinearKeyHeadDim: 128, LinearNumKeyHeads: 2, LinearNumValueHeads: 4, LinearValueHeadDim: 128, RopeParameters: qwen3NextRopeParams{ MRopeInterleaved: true, MropeSection: []int32{11, 11, 10}, RopeType: "default", RopeTheta: 10_000_000, PartialRotaryFactor: 0.25, }, }, VisionModel: qwen3NextVisionConfig{ Depth: 2, HiddenSize: 128, NumHeads: 4, InChannels: 3, PatchSize: 16, SpatialMergeSize: 2, RMSNormEps: 1e-6, RopeTheta: 10_000, TemporalPatchSize: 2, DeepstackVisualIndexes: []int32{1}, }, ImageTokenID: 1001, VisionStartTokenID: 1002, VisionEndTokenID: 1003, } m.VisionModel.Size.ShortestEdge = 224 m.VisionModel.Size.LongestEdge = 4096 m.VisionModel.ImageMean = []float32{0.5, 0.5, 0.5} m.VisionModel.ImageStd = []float32{0.2, 0.2, 0.2} if err := m.parseMore(os.DirFS(t.TempDir())); err != nil { t.Fatal(err) } kv := m.KV(&Tokenizer{Vocabulary: &Vocabulary{}}) if got, want := kv["general.architecture"], "qwen35"; got != want { t.Fatalf("unexpected architecture: got %v want %v", got, want) } headCountKV, ok := kv["attention.head_count_kv"].([]uint32) if !ok { t.Fatalf("attention.head_count_kv has unexpected type: %T", kv["attention.head_count_kv"]) } if got, want := headCountKV, []uint32{0, 4, 0, 4}; !slices.Equal(got, want) { t.Fatalf("unexpected attention.head_count_kv: got %v want %v", got, want) } if got, ok := kv["ssm.v_head_reordered"].(bool); !ok || !got { t.Fatalf("expected ssm.v_head_reordered=true, got %v (%T)", kv["ssm.v_head_reordered"], kv["ssm.v_head_reordered"]) } mrope, ok := kv["mrope_sections"].([]int32) if !ok { t.Fatalf("mrope_sections has unexpected type: %T", kv["mrope_sections"]) } if got, want := mrope, []int32{11, 11, 10}; !slices.Equal(got, want) { t.Fatalf("unexpected mrope_sections: got %v want %v", got, want) } ropeSections, ok := kv["rope.dimension_sections"].([]int32) if !ok { t.Fatalf("rope.dimension_sections has unexpected type: %T", kv["rope.dimension_sections"]) } if got, want := ropeSections, []int32{11, 11, 10}; !slices.Equal(got, want) { t.Fatalf("unexpected rope.dimension_sections: got %v want %v", got, want) } if got, ok := kv["rope.mrope_interleaved"].(bool); !ok || !got { t.Fatalf("expected rope.mrope_interleaved=true, got %v (%T)", kv["rope.mrope_interleaved"], kv["rope.mrope_interleaved"]) } if got, want := kv["vision.block_count"], uint32(2); got != want { t.Fatalf("unexpected vision.block_count: got %v want %v", got, want) } } func TestQwen3NextReplacements(t *testing.T) { r := strings.NewReplacer((&qwen3NextModel{}).Replacements()...) if got, want := r.Replace("model.language_model.layers.1.linear_attn.in_proj_qkv.weight"), "blk.1.attn_qkv.weight"; got != want { t.Fatalf("unexpected language-model replacement: got %q want %q", got, want) } if got, want := r.Replace("model.visual.blocks.0.attn.qkv.weight"), "v.blk.0.attn_qkv.weight"; got != want { t.Fatalf("unexpected vision replacement: got %q want %q", got, want) } if got, want := r.Replace("model.layers.1.linear_attn.in_proj_qkvz.weight"), "blk.1.ssm_in.weight"; got != want { t.Fatalf("unexpected legacy replacement: got %q want %q", got, want) } } func TestQwen35ReordersVHeads(t *testing.T) { m := &qwen3NextModel{ ModelParameters: ModelParameters{ ModelType: "qwen3_5", }, qwen3NextTextConfig: qwen3NextTextConfig{ LinearNumKeyHeads: 2, LinearNumValueHeads: 4, LinearValueHeadDim: 1, }, } out := m.Tensors([]Tensor{ &fakeTensor{ name: "blk.0.attn_gate.weight", shape: []uint64{4, 2}, data: []float32{0, 1, 2, 3, 4, 5, 6, 7}, }, }) if len(out) != 1 { t.Fatalf("unexpected output tensor count: got %d want 1", len(out)) } if got, want := readTensorData(t, out[0]), []float32{0, 1, 4, 5, 2, 3, 6, 7}; !slices.Equal(got, want) { t.Fatalf("unexpected data: got %v want %v", got, want) } } func TestQwen35ReordersAttnQKVOutputDim(t *testing.T) { m := &qwen3NextModel{ ModelParameters: ModelParameters{ ModelType: "qwen3_5", }, qwen3NextTextConfig: qwen3NextTextConfig{ LinearNumKeyHeads: 2, LinearNumValueHeads: 4, LinearKeyHeadDim: 1, LinearValueHeadDim: 1, }, } out := m.Tensors([]Tensor{ &fakeTensor{ name: "blk.0.attn_qkv.weight", shape: []uint64{8, 2}, // [out_features, in_features] (HF layout) data: []float32{ 0, 1, // q0 2, 3, // q1 4, 5, // k0 6, 7, // k1 10, 11, // v(k0,v0) 12, 13, // v(k0,v1) 20, 21, // v(k1,v0) 22, 23, // v(k1,v1) }, }, }) if len(out) != 1 { t.Fatalf("unexpected output tensor count: got %d want 1", len(out)) } if got, want := readTensorData(t, out[0]), []float32{ 0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 20, 21, 12, 13, 22, 23, }; !slices.Equal(got, want) { t.Fatalf("unexpected qkv data: got %v want %v", got, want) } } func TestQwen35ReordersSsmOutInputDim(t *testing.T) { m := &qwen3NextModel{ ModelParameters: ModelParameters{ ModelType: "qwen3_5", }, qwen3NextTextConfig: qwen3NextTextConfig{ LinearNumKeyHeads: 2, LinearNumValueHeads: 4, LinearValueHeadDim: 1, }, } out := m.Tensors([]Tensor{ &fakeTensor{ name: "blk.0.ssm_out.weight", shape: []uint64{2, 4}, data: []float32{0, 1, 2, 3, 4, 5, 6, 7}, }, }) if len(out) != 1 { t.Fatalf("unexpected output tensor count: got %d want 1", len(out)) } if got, want := readTensorData(t, out[0]), []float32{0, 2, 1, 3, 4, 6, 5, 7}; !slices.Equal(got, want) { t.Fatalf("unexpected ssm_out data: got %v want %v", got, want) } } func TestQwen35ReordersSsmBetaRows(t *testing.T) { m := &qwen3NextModel{ ModelParameters: ModelParameters{ ModelType: "qwen3_5", }, qwen3NextTextConfig: qwen3NextTextConfig{ LinearNumKeyHeads: 2, LinearNumValueHeads: 4, }, } out := m.Tensors([]Tensor{ &fakeTensor{ name: "blk.0.ssm_beta.weight", shape: []uint64{4, 2}, data: []float32{0, 1, 2, 3, 4, 5, 6, 7}, }, }) if len(out) != 1 { t.Fatalf("unexpected output tensor count: got %d want 1", len(out)) } if got, want := readTensorData(t, out[0]), []float32{0, 1, 4, 5, 2, 3, 6, 7}; !slices.Equal(got, want) { t.Fatalf("unexpected ssm_beta data: got %v want %v", got, want) } } func TestQwen35ReordersConv1DChannelDim(t *testing.T) { m := &qwen3NextModel{ ModelParameters: ModelParameters{ ModelType: "qwen3_5", }, qwen3NextTextConfig: qwen3NextTextConfig{ LinearNumKeyHeads: 2, LinearNumValueHeads: 4, LinearKeyHeadDim: 1, LinearValueHeadDim: 1, }, } out := m.Tensors([]Tensor{ &fakeTensor{ name: "blk.0.ssm_conv1d.weight", shape: []uint64{8, 2}, // [channels, kernel] after squeeze data: []float32{ 0, 1, // q0 2, 3, // q1 4, 5, // k0 6, 7, // k1 10, 11, // v(k0,v0) 12, 13, // v(k0,v1) 20, 21, // v(k1,v0) 22, 23, // v(k1,v1) }, }, }) if len(out) != 1 { t.Fatalf("unexpected output tensor count: got %d want 1", len(out)) } if got, want := readTensorData(t, out[0]), []float32{ 0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 20, 21, 12, 13, 22, 23, }; !slices.Equal(got, want) { t.Fatalf("unexpected conv1d data: got %v want %v", got, want) } } func TestLegacyQwen3NextDoesNotReorderVHeads(t *testing.T) { m := &qwen3NextModel{ ModelParameters: ModelParameters{ ModelType: "qwen3_next", }, qwen3NextTextConfig: qwen3NextTextConfig{ LinearNumKeyHeads: 2, LinearNumValueHeads: 4, LinearValueHeadDim: 1, }, } out := m.Tensors([]Tensor{ &fakeTensor{ name: "blk.0.attn_gate.weight", shape: []uint64{4, 1}, data: []float32{0, 1, 2, 3}, }, }) if len(out) != 1 { t.Fatalf("unexpected output tensor count: got %d want 1", len(out)) } if got, want := readTensorData(t, out[0]), []float32{0, 1, 2, 3}; !slices.Equal(got, want) { t.Fatalf("unexpected data for legacy qwen3next: got %v want %v", got, want) } } func TestQwen35MoePackedExperts(t *testing.T) { m := &qwen3NextModel{ qwen3NextTextConfig: qwen3NextTextConfig{ NumHiddenLayers: 1, }, } out := m.Tensors([]Tensor{ &fakeTensor{ name: "blk.0.mlp.experts.gate_up_proj", shape: []uint64{2, 4, 3}, data: []float32{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, }, }, &fakeTensor{ name: "blk.0.mlp.experts.down_proj", shape: []uint64{2, 5, 3}, data: make([]float32, 2*5*3), }, }) get := func(name string) *ggml.Tensor { for _, tensor := range out { if tensor.Name == name { return tensor } } return nil } gate := get("blk.0.ffn_gate_exps.weight") if gate == nil { t.Fatalf("missing tensor %q", "blk.0.ffn_gate_exps.weight") } if got, want := gate.Shape, []uint64{2, 2, 3}; !slices.Equal(got, want) { t.Fatalf("unexpected gate shape: got %v want %v", got, want) } if got, want := readTensorData(t, gate), []float32{ 0, 1, 2, 3, 4, 5, 12, 13, 14, 15, 16, 17, }; !slices.Equal(got, want) { t.Fatalf("unexpected gate values: got %v want %v", got, want) } up := get("blk.0.ffn_up_exps.weight") if up == nil { t.Fatalf("missing tensor %q", "blk.0.ffn_up_exps.weight") } if got, want := up.Shape, []uint64{2, 2, 3}; !slices.Equal(got, want) { t.Fatalf("unexpected up shape: got %v want %v", got, want) } if got, want := readTensorData(t, up), []float32{ 6, 7, 8, 9, 10, 11, 18, 19, 20, 21, 22, 23, }; !slices.Equal(got, want) { t.Fatalf("unexpected up values: got %v want %v", got, want) } down := get("blk.0.ffn_down_exps.weight") if down == nil { t.Fatalf("missing tensor %q", "blk.0.ffn_down_exps.weight") } if got, want := down.Shape, []uint64{2, 5, 3}; !slices.Equal(got, want) { t.Fatalf("unexpected down shape: got %v want %v", got, want) } } func TestQwen35SharedExpertGateKeepsMatrixShape(t *testing.T) { m := &qwen3NextModel{} out := m.Tensors([]Tensor{ &fakeTensor{ name: "blk.0.ffn_gate_inp_shexp.weight", shape: []uint64{1, 4}, data: []float32{0, 1, 2, 3}, }, }) if len(out) != 1 { t.Fatalf("unexpected output tensor count: got %d want 1", len(out)) } if got, want := out[0].Shape, []uint64{1, 4}; !slices.Equal(got, want) { t.Fatalf("unexpected shared gate shape: got %v want %v", got, want) } }