From 3581b68e06c09aa42cd71d5f530b2d4c5849e846 Mon Sep 17 00:00:00 2001
From: Ryan Mangeno <160974989+ryan-mangeno@users.noreply.github.com>
Date: Fri, 10 Oct 2025 15:14:20 -0400
Subject: [PATCH] Update src/llama-model.cpp

Co-authored-by: Gabe Goodhart <ghart@us.ibm.com>
---
 src/llama-model.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/llama-model.cpp b/src/llama-model.cpp
index bd687036a9..d43d637672 100644
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -8098,6 +8098,11 @@ struct llm_build_modern_bert : public llm_graph_context {
                         Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
             cb(cur, "kqv_out", il);
 
+            if (il == n_layer - 1 && inp_out_ids) {
+                cur  = ggml_get_rows(ctx0,  cur, inp_out_ids);
+                inpL = ggml_get_rows(ctx0, inpL, inp_out_ids);
+            }
+
             // re-add the layer input
             cur = ggml_add(ctx0, cur, inpL);