Revert "Update vendored llama.cpp to b7847" (#14061)

2026-04-20 01:03:29 +02:00 · 2026-02-03 18:39:36 -08:00
parent a6355329bf
commit b1fccabb34
240 changed files with 5050 additions and 21247 deletions
--- a/llama/llama.cpp/src/llama-hparams.h
+++ b/llama/llama.cpp/src/llama-hparams.h
@@ -3,7 +3,6 @@
 #include "llama.h"

 #include <array>
-#include <cassert>

 // bump if necessary
 #define LLAMA_MAX_LAYERS  512
@@ -53,8 +52,8 @@ struct llama_hparams {
    uint32_t n_rel_attn_bkts = 0;

    // note: deepseek2 using MLA converts into MQA with larger heads, then decompresses to MHA
-    uint32_t n_embd_head_k_mla_impl = 0;
-    uint32_t n_embd_head_v_mla_impl = 0;
+    uint32_t n_embd_head_k_mla = 0;
+    uint32_t n_embd_head_v_mla = 0;

    // for WavTokenizer
    struct llama_hparams_posnet   posnet;
@@ -108,9 +107,9 @@ struct llama_hparams {

    float    rope_attn_factor = 1.0f;
    float    rope_freq_base_train;
-    float    rope_freq_base_train_swa  = 10000.0f;
+    float    rope_freq_base_train_swa;
    float    rope_freq_scale_train;
-    float    rope_freq_scale_train_swa = 1.0f;
+    float    rope_freq_scale_train_swa;

    uint32_t n_ctx_orig_yarn;
    float    rope_yarn_log_mul = 0.0f;
@@ -126,11 +125,10 @@ struct llama_hparams {
    llama_swa_type swa_type = LLAMA_SWA_TYPE_NONE;
    // the size of the sliding window (0 - no SWA)
    uint32_t n_swa = 0;
-    // if swa_layers[il] == 1, then layer il is SWA
-    // if swa_layers[il] == 0, then layer il is dense (i.e. non-SWA)
+    // if swa_layers[il] == true, then layer il is SWA
+    // if swa_layers[il] == false, then layer il is dense (i.e. non-SWA)
    // by default, all layers are dense
-    // note: using uint32_t type for compatibility reason
-    std::array<uint32_t, LLAMA_MAX_LAYERS> swa_layers;
+    std::array<bool, LLAMA_MAX_LAYERS> swa_layers;

    // for State Space Models
    uint32_t ssm_d_conv  = 0;
@@ -165,9 +163,6 @@ struct llama_hparams {
    // for Classifiers
    uint32_t n_cls_out = 1;

-    // output embedding dimension (0 = use n_embd)
-    uint32_t n_embd_out_impl = 0;
-
    // llama4 smallthinker
    uint32_t n_moe_layer_step        = 0;
    uint32_t n_no_rope_layer_step    = 4;
@@ -240,9 +235,6 @@ struct llama_hparams {
    // dimension of main + auxiliary input embeddings
    uint32_t n_embd_inp() const;

-    // dimension of output embeddings
-    uint32_t n_embd_out() const;
-
    // dimension of key embeddings across all k-v heads
    uint32_t n_embd_k_gqa(uint32_t il = 0) const;

@@ -274,57 +266,15 @@ struct llama_hparams {

    bool is_swa(uint32_t il) const;

-    // note: currently only support if either all or none of the layers are MLA
-    bool is_mla() const;
-
-    uint32_t n_embd_head_k_mla() const;
-    uint32_t n_embd_head_v_mla() const;
-
    bool has_kv(uint32_t il) const;

    // number of layers for which has_kv() returns true
    uint32_t n_layer_kv() const;

    // note that this function uses different SWA parameters from those in the hparams
-    // note: inlined on purpose for performance reasons
    // TODO: think of a better place for this function
    // TODO: pack the SWA params in a struct?
-    static bool is_masked_swa(uint32_t n_swa, llama_swa_type swa_type, llama_pos p0, llama_pos p1) {
-        assert(p0 >= 0 && p1 >= 0);
-
-        switch (swa_type) {
-            case LLAMA_SWA_TYPE_NONE:
-                {
-                } break;
-            case LLAMA_SWA_TYPE_STANDARD:
-                {
-                    if (p1 - p0 >= (int32_t) n_swa) {
-                        return true;
-                    }
-                } break;
-            case LLAMA_SWA_TYPE_CHUNKED:
-                {
-                    const llama_pos pos_chunk_start = (p1 / n_swa) * n_swa;
-
-                    if (p0 < pos_chunk_start) {
-                        return true;
-                    }
-                } break;
-            case LLAMA_SWA_TYPE_SYMMETRIC:
-                {
-                    const int32_t half_n_swa = (int32_t) n_swa / 2;
-                    const int32_t pos_diff = p1 - p0;
-
-                    // Mask if outside the symmetric window
-                    if (pos_diff < -half_n_swa || pos_diff > half_n_swa) {
-                        return true;
-                    }
-                } break;
-        }
-
-        return false;
-    }
-
+    static bool is_masked_swa(uint32_t n_swa, llama_swa_type swa_type, llama_pos p0, llama_pos p1);

    bool use_mrope() const;
 };