From 0c8986403b52f43e4d3bf519afd78aefcdaee238 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A9ctor=20Estrada=20Moreno?= Date: Mon, 29 Dec 2025 05:21:13 -0600 Subject: [PATCH] retrieval : use at most n_seq_max chunks (#18400) --- examples/retrieval/retrieval.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/retrieval/retrieval.cpp b/examples/retrieval/retrieval.cpp index 2c2143ad10..8f92ff9057 100644 --- a/examples/retrieval/retrieval.cpp +++ b/examples/retrieval/retrieval.cpp @@ -222,8 +222,8 @@ int main(int argc, char ** argv) { float * emb = embeddings.data(); // break into batches - int p = 0; // number of prompts processed already - int s = 0; // number of prompts in current batch + unsigned int p = 0; // number of prompts processed already + unsigned int s = 0; // number of prompts in current batch for (int k = 0; k < n_chunks; k++) { // clamp to n_batch tokens auto & inp = chunks[k].tokens; @@ -231,7 +231,7 @@ int main(int argc, char ** argv) { const uint64_t n_toks = inp.size(); // encode if at capacity - if (batch.n_tokens + n_toks > n_batch) { + if (batch.n_tokens + n_toks > n_batch || s >= llama_n_seq_max(ctx)) { float * out = emb + p * n_embd; batch_process(ctx, batch, out, s, n_embd); common_batch_clear(batch);