fix: update callback for ffn_moe_weighted and add callback for attn_out in deepseek2 model

2025-11-18 06:19:38 +00:00 · 2025-11-18 06:19:38 +00:00 · 6c0715befc
parent 1e08157134
commit 6c0715befc
2 changed files with 2 additions and 1 deletions
--- a/src/llama-graph.cpp
+++ b/src/llama-graph.cpp
@ -1106,7 +1106,7 @@ ggml_tensor * llm_graph_context::build_moe_ffn(

    if (!weight_before_ffn) {
        experts = ggml_mul(ctx0, experts, weights);
-        cb(cur, "ffn_moe_weighted", il);
+        cb(experts, "ffn_moe_weighted", il);
    }

    ggml_tensor * cur_experts[LLAMA_MAX_EXPERTS] = { nullptr };
--- a/src/models/deepseek2.cpp
+++ b/src/models/deepseek2.cpp
@ -74,6 +74,7 @@ llm_build_deepseek2::llm_build_deepseek2(const llama_model & model, const llm_gr
            cur = build_attn(inp_attn,
                        model.layers[il].wo, NULL,
                        Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, kq_scale, il);
+            cb(cur, "attn_out", il);
        }
        else {
            ggml_tensor * q = NULL;