From f3da97e61b0eb5966c0efd3dd6150d18b7f22bf1 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Wed, 2 Jul 2025 13:39:10 +0300
Subject: [PATCH] kv-cache : bounds-check when accessing slot_info indices

---
 src/llama-kv-cache-unified.cpp | 6 +++---
 src/llama-kv-cache-unified.h   | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/llama-kv-cache-unified.cpp b/src/llama-kv-cache-unified.cpp
index a2164c2b39..8117a7b005 100644
--- a/src/llama-kv-cache-unified.cpp
+++ b/src/llama-kv-cache-unified.cpp
@@ -717,7 +717,7 @@ void llama_kv_cache_unified::apply_ubatch(const slot_info & sinfo, const llama_u
     assert(ubatch.n_tokens == sinfo.idxs.size());
 
     for (uint32_t i = 0; i < ubatch.n_tokens; ++i) {
-        const auto idx = sinfo.idxs[i];
+        const auto idx = sinfo.idxs.at(i);
 
         if (!cells.is_empty(idx)) {
             assert(cells.seq_count(idx) == 1);
@@ -915,7 +915,7 @@ void llama_kv_cache_unified::set_input_k_idxs(ggml_tensor * dst, const llama_uba
     int64_t * data = (int64_t *) dst->data;
 
     for (int64_t i = 0; i < n_tokens; ++i) {
-        data[i] = sinfo.idxs[i];
+        data[i] = sinfo.idxs.at(i);
     }
 }
 
@@ -930,7 +930,7 @@ void llama_kv_cache_unified::set_input_v_idxs(ggml_tensor * dst, const llama_uba
     int64_t * data = (int64_t *) dst->data;
 
     for (int64_t i = 0; i < n_tokens; ++i) {
-        data[i] = sinfo.idxs[i];
+        data[i] = sinfo.idxs.at(i);
     }
 }
 
diff --git a/src/llama-kv-cache-unified.h b/src/llama-kv-cache-unified.h
index 3163b2779c..5d1c161d15 100644
--- a/src/llama-kv-cache-unified.h
+++ b/src/llama-kv-cache-unified.h
@@ -42,7 +42,7 @@ public:
         idx_vec_t idxs;
 
         uint32_t head() const {
-            return idxs[0];
+            return idxs.at(0);
         }
 
         bool empty() const {