From 7e9bea7f1c2b88712fc582123e042f24cf7e8000 Mon Sep 17 00:00:00 2001 From: Samaresh Kumar Singh Date: Sun, 28 Dec 2025 12:14:46 -0600 Subject: [PATCH] [AI] android: fix infinite generation in shift_context() When shift_context() discards tokens to free KV cache space, it decrements current_position but not stop_generation_position. This causes the termination check (current_position >= stop_generation_position) to never trigger, resulting in infinite text generation. Fix by also decrementing stop_generation_position by n_discard tokens. Fixes #18409 --- examples/llama.android/lib/src/main/cpp/ai_chat.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/llama.android/lib/src/main/cpp/ai_chat.cpp b/examples/llama.android/lib/src/main/cpp/ai_chat.cpp index d655a0965f..9e8676dcdd 100644 --- a/examples/llama.android/lib/src/main/cpp/ai_chat.cpp +++ b/examples/llama.android/lib/src/main/cpp/ai_chat.cpp @@ -283,6 +283,7 @@ static void shift_context() { llama_memory_seq_rm(llama_get_memory(g_context), 0, system_prompt_position, system_prompt_position + n_discard); llama_memory_seq_add(llama_get_memory(g_context), 0, system_prompt_position + n_discard, current_position, -n_discard); current_position -= n_discard; + stop_generation_position -= n_discard; LOGi("%s: Context shifting done! Current position: %d", __func__, current_position); }