From ae33204660d44e778a299869ea4c0a4baa187093 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Fri, 6 Feb 2026 19:09:53 +0100 Subject: [PATCH] Fix bad permute --- src/models/delta.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/models/delta.cpp b/src/models/delta.cpp index 533d5ff3e8..d1d9837d09 100644 --- a/src/models/delta.cpp +++ b/src/models/delta.cpp @@ -118,7 +118,11 @@ std::pair llm_graph_context_delta::build_delta_net q = ggml_cont_4d(ctx0, ggml_permute(ctx0, q, 0, 2, 1, 3), S_k, n_tokens, H_k, n_seqs); k = ggml_cont_4d(ctx0, ggml_permute(ctx0, k, 0, 2, 1, 3), S_k, n_tokens, H_k, n_seqs); v = ggml_cont_4d(ctx0, ggml_permute(ctx0, v, 0, 2, 1, 3), S_v, n_tokens, H_v, n_seqs); - g = ggml_cont_4d(ctx0, ggml_permute(ctx0, g, 0, 2, 1, 3), is_kda ? S_k : 1, n_tokens, H_k, n_seqs); + if (is_kda) { + g = ggml_cont_4d(ctx0, ggml_permute(ctx0, g, 0, 2, 1, 3), S_k, n_tokens, H_k, n_seqs); + } else { + g = ggml_cont_4d(ctx0, ggml_permute(ctx0, g, 2, 0, 3, 1), n_tokens, 1, H_k, n_seqs); + } beta = ggml_cont(ctx0, ggml_permute(ctx0, beta, 2, 0, 1, 3)); cb(q, "q_perm", il); @@ -136,7 +140,7 @@ std::pair llm_graph_context_delta::build_delta_net k = ggml_pad(ctx0, k, 0, pad, 0, 0); v = ggml_pad(ctx0, v, 0, pad, 0, 0); beta = ggml_pad(ctx0, beta, 0, pad, 0, 0); - g = ggml_pad(ctx0, g, 0, pad, 0, 0); + g = ggml_pad(ctx0, g, pad, 0, 0, 0); cb(q, "q_pad", il);