diff --git a/llama/patches/0027-interleave-multi-rope.patch b/llama/patches/0027-interleave-multi-rope.patch index 6ca94029d..cb67119f7 100644 --- a/llama/patches/0027-interleave-multi-rope.patch +++ b/llama/patches/0027-interleave-multi-rope.patch @@ -6,11 +6,11 @@ Subject: [PATCH] interleave multi rope since ollama doesn't use mrope for anything else, change it to mean the interleaved version used for qwen3vl --- - ggml/src/ggml-cpu/ops.cpp | 8 ++++---- - ggml/src/ggml-cuda/rope.cu | 8 ++++---- - ggml/src/ggml-metal/ggml-metal.metal | 8 ++++---- - ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl | 8 ++++---- - 4 files changed, 16 insertions(+), 16 deletions(-) + ggml/src/ggml-cpu/ops.cpp | 8 ++++---- + ggml/src/ggml-cuda/rope.cu | 8 ++++---- + ggml/src/ggml-metal/ggml-metal.metal | 10 +++++----- + ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl | 8 ++++---- + 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp index 7d1733adb..f4aae5332 100644 @@ -59,12 +59,15 @@ index 88ed79111..71ca60214 100644 } else { if (sector < sections.v[0]) { diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal -index 236838e9e..c98d269d1 100644 +index 236838e9e..18b8bb1b1 100644 --- a/ggml/src/ggml-metal/ggml-metal.metal +++ b/ggml/src/ggml-metal/ggml-metal.metal -@@ -4242,14 +4242,14 @@ kernel void kernel_rope_multi( +@@ -4240,16 +4240,16 @@ kernel void kernel_rope_multi( + const int sec_w012 = args.sect_0 + args.sect_1 + args.sect_2; // end of section 2 + const int sector = ic % sect_dims; - float theta_base; +- float theta_base; ++ float theta_base = 0.0; if (FC_rope_is_imrope) { - if (sector % 3 == 1 && sector < 3 * args.sect_1) { // h + if (sector % 3 == 1 && sector < 1 + 3 * args.sect_1) { // h diff --git a/llama/patches/0032-ggml-enable-MLA-flash-attention-for-GLM-4.7-flash.patch b/llama/patches/0032-ggml-enable-MLA-flash-attention-for-GLM-4.7-flash.patch index abd7df930..1ea8d36e1 100644 --- a/llama/patches/0032-ggml-enable-MLA-flash-attention-for-GLM-4.7-flash.patch +++ b/llama/patches/0032-ggml-enable-MLA-flash-attention-for-GLM-4.7-flash.patch @@ -296,7 +296,7 @@ index e99c1763f..80864f303 100644 const size_t smem = FATTN_SMEM(nsg); diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal -index c98d269d1..d33c16079 100644 +index 18b8bb1b1..114767785 100644 --- a/ggml/src/ggml-metal/ggml-metal.metal +++ b/ggml/src/ggml-metal/ggml-metal.metal @@ -6166,6 +6166,7 @@ kernel void kernel_flash_attn_ext( diff --git a/llama/patches/0033-ggml-metal-solve_tri.patch b/llama/patches/0033-ggml-metal-solve_tri.patch index 7bc65fda7..f0e17dd4f 100644 --- a/llama/patches/0033-ggml-metal-solve_tri.patch +++ b/llama/patches/0033-ggml-metal-solve_tri.patch @@ -204,7 +204,7 @@ index 902b54452..a475183d3 100644 int ggml_metal_op_norm (ggml_metal_op_t ctx, int idx); int ggml_metal_op_rope (ggml_metal_op_t ctx, int idx); diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal -index d33c16079..c37447a10 100644 +index 114767785..876a9eecc 100644 --- a/ggml/src/ggml-metal/ggml-metal.metal +++ b/ggml/src/ggml-metal/ggml-metal.metal @@ -3012,6 +3012,66 @@ kernel void kernel_l2_norm_f32( diff --git a/llama/patches/0034-ggml-metal-guard-mul_mat_id-map0-and-add-ne20-22-spe.patch b/llama/patches/0034-ggml-metal-guard-mul_mat_id-map0-and-add-ne20-22-spe.patch index ff0c8199d..6934ca8d0 100644 --- a/llama/patches/0034-ggml-metal-guard-mul_mat_id-map0-and-add-ne20-22-spe.patch +++ b/llama/patches/0034-ggml-metal-guard-mul_mat_id-map0-and-add-ne20-22-spe.patch @@ -24,7 +24,7 @@ index 4ac135603..ac5ad53db 100644 // ref: https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf (Table 2.5) //switch (op->src[0]->type) { diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal -index c37447a10..4f338aa13 100644 +index 876a9eecc..b14a0000c 100644 --- a/ggml/src/ggml-metal/ggml-metal.metal +++ b/ggml/src/ggml-metal/ggml-metal.metal @@ -9427,6 +9427,7 @@ template [[host_name("kernel_mul_mm_id_map0_ne20_6" )]] kernel kernel_mul_mm_id_ diff --git a/llama/patches/0036-backport-kernels-for-gemma4.patch b/llama/patches/0036-backport-kernels-for-gemma4.patch index 7c2186e08..46066a6f7 100644 --- a/llama/patches/0036-backport-kernels-for-gemma4.patch +++ b/llama/patches/0036-backport-kernels-for-gemma4.patch @@ -342,7 +342,7 @@ index 4e5acfbe5..11457f2b1 100644 return false; } diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal -index 4f338aa13..8be0c1f0c 100644 +index b14a0000c..398c80717 100644 --- a/ggml/src/ggml-metal/ggml-metal.metal +++ b/ggml/src/ggml-metal/ggml-metal.metal @@ -6276,6 +6276,7 @@ template [[host_name("kernel_flash_attn_ext_f32_dk128_dv128")]] kernel flash_at diff --git a/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.metal b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.metal index 38ce1fdc6..a02b05c94 100644 --- a/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.metal +++ b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.metal @@ -7122,7 +7122,7 @@ kernel void kernel_rope_multi( const int sec_w012 = args.sect_0 + args.sect_1 + args.sect_2; // end of section 2 const int sector = ic % sect_dims; - float theta_base; + float theta_base = 0.0; if (FC_rope_is_imrope) { if (sector % 3 == 1 && sector < 1 + 3 * args.sect_1) { // h theta_base = (float) pos[i2 + args.ne02 * 1]; diff --git a/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.metal b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.metal index 8be0c1f0c..398c80717 100644 --- a/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.metal +++ b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.metal @@ -4300,7 +4300,7 @@ kernel void kernel_rope_multi( const int sec_w012 = args.sect_0 + args.sect_1 + args.sect_2; // end of section 2 const int sector = ic % sect_dims; - float theta_base; + float theta_base = 0.0; if (FC_rope_is_imrope) { if (sector % 3 == 1 && sector < 1 + 3 * args.sect_1) { // h theta_base = (float) pos[i2 + args.ne02 * 1];