From d1010337c370f52312883b8454d93fab48679d03 Mon Sep 17 00:00:00 2001
From: RangerUFO <ufownl@gmail.com>
Date: Wed, 25 Sep 2024 22:22:28 +0800
Subject: [PATCH] Fix prefix-LM mode assertion

---
 gemma/gemma-inl.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gemma/gemma-inl.h b/gemma/gemma-inl.h
index dec6449..f49c959 100644
--- a/gemma/gemma-inl.h
+++ b/gemma/gemma-inl.h
@@ -916,7 +916,8 @@ HWY_NOINLINE void Prefill(
     }
     // In prefix-LM mode, we need to look at all the tokens for the prefix in
     // one iteration through the layers, so we need a large enough batch size.
-    HWY_ASSERT(max_tbatch_size >= prefill_this_query);
+    HWY_ASSERT(prefix_end_this_query == 0 ||
+               max_tbatch_size >= prefill_this_query);
 
     // For each batch of tokens in the query:
     for (size_t tbatch_start = 0; tbatch_start < prefill_this_query;