From 1f54cf4b73e361ad1ddff6a564a71fc5c6bdbe19 Mon Sep 17 00:00:00 2001
From: Ryan Mangeno <160974989+ryan-mangeno@users.noreply.github.com>
Date: Fri, 10 Oct 2025 12:16:38 -0400
Subject: [PATCH] Update src/llama-model.cpp
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
---
 src/llama-model.cpp | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/llama-model.cpp b/src/llama-model.cpp
index 8076c68bbc..e33655a236 100644
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -8071,13 +8071,9 @@ struct llm_build_modern_bert : public llm_graph_context {
 
             const size_t type_size = ggml_type_size(cur->type);
 
-            Qcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd, n_tokens, cur->nb[1], 0*type_size*(n_embd)));
-            Kcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd, n_tokens, cur->nb[1], 1*type_size*(n_embd)));
-            Vcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd, n_tokens, cur->nb[1], 2*type_size*(n_embd)));
-
-            Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head,    n_tokens);
-            Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
-            Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens);
+            Qcur = ggml_view_3d(ctx0, cur, n_embd_head, n_head,    n_tokens, n_embd_head*type_size, cur->nb[1], 0*type_size*(n_embd));
+            Kcur = ggml_view_3d(ctx0, cur, n_embd_head, n_head_kv, n_tokens, n_embd_head*type_size, cur->nb[1], 1*type_size*(n_embd));
+            Vcur = ggml_view_3d(ctx0, cur, n_embd_head, n_head_kv, n_tokens, n_embd_head*type_size, cur->nb[1], 1*type_size*(n_embd + n_embd_gqa));
 
             // RoPE
             Qcur = ggml_rope_ext(