diff --git a/tools/completion/completion.cpp b/tools/completion/completion.cpp index f368a2f4c6..977132756f 100644 --- a/tools/completion/completion.cpp +++ b/tools/completion/completion.cpp @@ -674,15 +674,12 @@ int main(int argc, char ** argv) { } } - for (int i = 0; i < (int) embd.size(); i += params.n_batch) { - int n_eval = (int) embd.size() - i; - if (n_eval > params.n_batch) { - n_eval = params.n_batch; - } - + if (!embd.empty()) { + int n_eval = (int) embd.size(); LOG_DBG("eval: %s\n", string_from(ctx, embd).c_str()); - if (llama_decode(ctx, llama_batch_get_one(&embd[i], n_eval))) { + GGML_ASSERT(n_eval <= params.n_batch); + if (llama_decode(ctx, llama_batch_get_one(embd.data(), n_eval))) { LOG_ERR("%s : failed to eval\n", __func__); return 1; } @@ -743,7 +740,7 @@ int main(int argc, char ** argv) { common_sampler_accept(smpl, embd_inp[n_consumed], /* accept_grammar= */ false); ++n_consumed; - if ((int) embd.size() >= params.n_batch) { + if ((int) embd.size() == params.n_batch) { break; } }