From b1c70e2e5419ced91eec570b9aabc050afe185e1 Mon Sep 17 00:00:00 2001 From: "Piotr Wilkin (ilintar)" Date: Sat, 21 Mar 2026 00:19:04 +0100 Subject: [PATCH] common/parser: fix nasty bug causing subtle corruption of generation prompt (#20825) --- common/chat-auto-parser-helpers.cpp | 15 +++++++++++++ tests/test-chat-auto-parser.cpp | 35 +++++++++++++++++++++++++++++ tests/test-chat.cpp | 2 +- tools/server/server-task.cpp | 1 + 4 files changed, 52 insertions(+), 1 deletion(-) diff --git a/common/chat-auto-parser-helpers.cpp b/common/chat-auto-parser-helpers.cpp index 9dcdde2501..3a7a5c13a7 100644 --- a/common/chat-auto-parser-helpers.cpp +++ b/common/chat-auto-parser-helpers.cpp @@ -188,6 +188,21 @@ diff_split calculate_diff_split(const std::string & left, const std::string & ri result.suffix = ""; // pick prefix = all as representation } + + // When left has no unique content (result.left is empty), left is entirely + // shared with right. The simultaneous prefix/suffix segment matching can + // incorrectly consume trailing segments of left as suffix when those same + // segments also appear at the end of right (e.g. "\n" at the end of both + // the shared content and the generation prompt). This rotates the diff. + // Fix: if left is a prefix of right, enforce that directly. + if (result.left.empty() && !result.right.empty() && + left.size() <= right.size() && + right.substr(0, left.size()) == left) { + result.prefix = left; + result.suffix = ""; + result.right = right.substr(left.size()); + } + return result; } diff --git a/tests/test-chat-auto-parser.cpp b/tests/test-chat-auto-parser.cpp index 6abf71d6cf..0ba51ba235 100644 --- a/tests/test-chat-auto-parser.cpp +++ b/tests/test-chat-auto-parser.cpp @@ -22,6 +22,7 @@ static void test_calculate_diff_split_no_common(testing & t); static void test_calculate_diff_split_single_char(testing & t); static void test_calculate_diff_split_overlaps(testing & t); static void test_calculate_diff_split_tag_boundaries(testing & t); +static void test_calculate_diff_split_generation_prompt(testing & t); static void test_calculate_diff_split(testing & t); static void test_until_common_prefix_basic(testing & t); @@ -179,6 +180,7 @@ static void test_calculate_diff_split(testing & t) { t.test("calculate_diff_split single char", test_calculate_diff_split_single_char); t.test("calculate_diff_split overlaps", test_calculate_diff_split_overlaps); t.test("calculate_diff_split tag boundaries", test_calculate_diff_split_tag_boundaries); + t.test("calculate_diff_split generation prompt", test_calculate_diff_split_generation_prompt); } static void test_calculate_diff_split_basic(testing & t) { @@ -502,6 +504,39 @@ static void test_calculate_diff_split_tag_boundaries(testing & t) { } } +static void test_calculate_diff_split_generation_prompt(testing & t) { + // ChatML thinking template: left is a prefix of right, generation_prompt is the appended part. + // The trailing \n in left matches the trailing \n in the generation_prompt, causing + // the suffix matcher to steal it and rotate the diff result. + { + // Simplified reproduction: left ends with \n, right = left + "<|im_start|>assistant\n\n" + std::string left = "<|im_start|>user\nHello<|im_end|>\n"; + std::string right = left + "<|im_start|>assistant\n\n"; + diff_split result = calculate_diff_split(left, right); + t.assert_equal("chatml prefix", left, result.prefix); + t.assert_equal("chatml left", "", result.left); + t.assert_equal("chatml right should be generation prompt", + "<|im_start|>assistant\n\n", result.right); + t.assert_equal("chatml suffix", "", result.suffix); + } + + { + // More realistic: longer conversation ending with tool_response + std::string common = + "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n" + "<|im_start|>user\nSearch for files<|im_end|>\n" + "<|im_start|>assistant\n\nLet me search.\n\n\n" + "\n\n\n<|im_end|>\n" + "<|im_start|>user\n\nNo files found\n<|im_end|>\n"; + std::string left = common; + std::string right = common + "<|im_start|>assistant\n\n"; + diff_split result = calculate_diff_split(left, right); + t.assert_equal("tool_response left", "", result.left); + t.assert_equal("tool_response right should be generation prompt", + "<|im_start|>assistant\n\n", result.right); + } +} + static void test_until_common_prefix(testing & t) { t.test("until_common_prefix basic", test_until_common_prefix_basic); } diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index faac9e7306..575d240791 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -1337,7 +1337,7 @@ static void test_template_output_peg_parsers(bool detailed_debug) { tst.test("I'm\nthinking\n\nHello, world!\nWhat's up?") .enable_thinking(true) .reasoning_format(COMMON_REASONING_FORMAT_NONE) - .expect_content("I'm\nthinking\n\nHello, world!\nWhat's up?") + .expect_content("\nI'm\nthinking\n\nHello, world!\nWhat's up?") .run(); tst.test("I'm\nthinking\n\nHello, world!\nWhat's up?") diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp index 39d232c2e4..7d543b9292 100644 --- a/tools/server/server-task.cpp +++ b/tools/server/server-task.cpp @@ -415,6 +415,7 @@ task_params server_task::params_from_json_cmpl( params.chat_parser_params.reasoning_in_content = params.stream && (reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY); params.chat_parser_params.generation_prompt = json_value(data, "generation_prompt", std::string()); params.sampling.generation_prompt = params.chat_parser_params.generation_prompt; + SRV_DBG("Generation prompt: '%s'\n", params.chat_parser_params.generation_prompt.c_str()); params.chat_parser_params.parse_tool_calls = json_value(data, "parse_tool_calls", false); if (data.contains("chat_parser")) { params.chat_parser_params.parser.load(data.at("chat_parser").get());