diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index de9a9466bc..581bdc1c82 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -2558,7 +2558,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { || t.first == "[EOS]" // Kimi-K2 || t.first == "<|end_of_text|>" || t.first == "" // smoldocling - || t.first == "" // gemma4 + || t.first == "" // gemma4 + || t.first == "" // gemma4 || t.first == "<|tool_response>" // gemma4 || t.first == "<|end▁of▁sentence|>" // deepseek-ocr ) { @@ -2645,6 +2646,33 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { LLAMA_LOG_WARN("%s: special_eog_ids contains both '<|return|>' and '<|call|>', or '<|calls|>' and '<|flush|>' tokens, removing '<|end|>' token from EOG list\n", __func__); } } + + // workaround for gemma4 and paddleocr: do not include as an eog token + { + bool has_tool_response = false; + bool has_s = false; + + llama_token s_id = LLAMA_TOKEN_NULL; + + for (auto tid : special_eog_ids) { + const auto & text = id_to_token[tid].text; + if (text == "<|tool_response>") { + has_tool_response = true; + } else if (text == "") { + has_s = true; + s_id = tid; + } + } + + if (has_tool_response && has_s) { + special_eog_ids.erase(s_id); + + auto & attr = id_to_token[s_id].attr; + attr = LLAMA_TOKEN_ATTR_NORMAL; + + LLAMA_LOG_WARN("%s: special_eog_ids contains '<|tool_response>', removing '' token from EOG list\n", __func__); + } + } } // build special tokens cache