diff --git a/src/llama-kv-cache-unified.cpp b/src/llama-kv-cache-unified.cpp index 73f33b546a..a2164c2b39 100644 --- a/src/llama-kv-cache-unified.cpp +++ b/src/llama-kv-cache-unified.cpp @@ -624,14 +624,15 @@ llama_kv_cache_unified::slot_info llama_kv_cache_unified::find_slot(const llama_ } } - uint32_t n_found = 0; uint32_t n_tested = 0; const uint32_t n_test = cont ? n_tokens : 1; slot_info res; - res.idxs.resize(n_tokens); + auto & idxs = res.idxs; + + idxs.reserve(n_tokens); while (true) { if (head_cur + n_test > cells.size()) { @@ -677,20 +678,18 @@ llama_kv_cache_unified::slot_info llama_kv_cache_unified::find_slot(const llama_ n_tested++; if (can_use) { - res.idxs[n_found] = idx; - - n_found++; + idxs.push_back(idx); } else { break; } } - if (n_found == n_tokens) { + if (idxs.size() == n_tokens) { break; } if (cont) { - n_found = 0; + idxs.clear(); } if (n_tested >= cells.size()) { @@ -700,7 +699,7 @@ llama_kv_cache_unified::slot_info llama_kv_cache_unified::find_slot(const llama_ } // we didn't find a suitable slot - return empty result - if (n_found < n_tokens) { + if (idxs.size() < n_tokens) { res.clear(); }