kv-cache : bounds-check when accessing slot_info indices

2025-07-02 13:39:10 +03:00 · 2025-07-02 13:39:10 +03:00 · f3da97e61b
parent a70293bc25
commit f3da97e61b
2 changed files with 4 additions and 4 deletions
--- a/src/llama-kv-cache-unified.cpp
+++ b/src/llama-kv-cache-unified.cpp
@ -717,7 +717,7 @@ void llama_kv_cache_unified::apply_ubatch(const slot_info & sinfo, const llama_u
    assert(ubatch.n_tokens == sinfo.idxs.size());

    for (uint32_t i = 0; i < ubatch.n_tokens; ++i) {
-        const auto idx = sinfo.idxs[i];
+        const auto idx = sinfo.idxs.at(i);

        if (!cells.is_empty(idx)) {
            assert(cells.seq_count(idx) == 1);
@ -915,7 +915,7 @@ void llama_kv_cache_unified::set_input_k_idxs(ggml_tensor * dst, const llama_uba
    int64_t * data = (int64_t *) dst->data;

    for (int64_t i = 0; i < n_tokens; ++i) {
-        data[i] = sinfo.idxs[i];
+        data[i] = sinfo.idxs.at(i);
    }
 }

@ -930,7 +930,7 @@ void llama_kv_cache_unified::set_input_v_idxs(ggml_tensor * dst, const llama_uba
    int64_t * data = (int64_t *) dst->data;

    for (int64_t i = 0; i < n_tokens; ++i) {
-        data[i] = sinfo.idxs[i];
+        data[i] = sinfo.idxs.at(i);
    }
 }

--- a/src/llama-kv-cache-unified.h
+++ b/src/llama-kv-cache-unified.h
@ -42,7 +42,7 @@ public:
        idx_vec_t idxs;

        uint32_t head() const {
-            return idxs[0];
+            return idxs.at(0);
        }

        bool empty() const {