From 2d55904a1548152355ba73c2e3961da630b1520b Mon Sep 17 00:00:00 2001
From: Karlon <44919207+oopb@users.noreply.github.com>
Date: Tue, 30 Dec 2025 16:21:53 +0800
Subject: [PATCH] Clarify token handling in shift_context comments

Updated comments in shift_context function to clarify token handling.
---
 examples/llama.android/lib/src/main/cpp/ai_chat.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/examples/llama.android/lib/src/main/cpp/ai_chat.cpp b/examples/llama.android/lib/src/main/cpp/ai_chat.cpp
index 8f1026769d..a37b6b6343 100644
--- a/examples/llama.android/lib/src/main/cpp/ai_chat.cpp
+++ b/examples/llama.android/lib/src/main/cpp/ai_chat.cpp
@@ -300,9 +300,11 @@ static void reset_short_term_states() {
  * TODO-hyin: implement sliding-window version as a better alternative
  *
  * Context shifting by discarding the older half of the tokens appended after system prompt:
- * - take the [system_prompt_position] first tokens from the original prompt
- * - take half of the last (system_prompt_position - system_prompt_position) tokens
+ * - take the [keep_recent] first tokens from the original prompt
+ * - take half of the last (current_position - keep_first) tokens
  * - recompute the logits in batches
+ *
+ * attention_sink: keep the first 4 tokens to maintain attention.
  */
 static void shift_context() {
     const int attention_sink = 4;