diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp index 88bb8bc803..2aa3f85a84 100644 --- a/src/llama-graph.cpp +++ b/src/llama-graph.cpp @@ -2222,7 +2222,7 @@ ggml_tensor * llm_graph_context::build_attn_sparse( cb(kqv, "kqv", il); ggml_tensor * cur = ggml_permute(ctx0, kqv, 0, 2, 1, 3); - cur = ggml_cont_2d(ctx0, cur, cur->ne[0]*cur->ne[1], cur->ne[2]); + cur = ggml_cont_2d(ctx0, cur, cur->ne[0]*cur->ne[1], cur->ne[2]*cur->ne[3]); cb(cur, "kqv_out", il); if (wo) {