diff --git a/common/chat-diff-analyzer.cpp b/common/chat-diff-analyzer.cpp index 81c8255a11..4b0ff261be 100644 --- a/common/chat-diff-analyzer.cpp +++ b/common/chat-diff-analyzer.cpp @@ -236,46 +236,29 @@ void analyze_reasoning::compare_reasoning_presence() { const std::string reasoning_content = "Let me think about this."; if (!diff.right.empty() && diff.right.find(reasoning_content) != std::string::npos) { - auto seg = prune_whitespace_segments(segmentize_markers(diff.right)); - if (seg.size() >= 3 && trim_whitespace(seg[1].value) == reasoning_content) { - // easy one: opening marker - reasoning - closing marker (possibly with trailing whitespace) - mode = reasoning_mode::TAG_BASED; - start = trim_whitespace(seg[0].value); - end = trim_leading_whitespace(seg[2].value); - for (size_t i = 3; i < seg.size(); i++) { - end += seg[i].value; - } - // we always truncate because this doesn't really influence correctness but model might not always generate newline - end = trim_whitespace(end); - } else if (seg.size() >= 2 && trim_whitespace(seg[0].value) == reasoning_content) { - // delimited - mode = reasoning_mode::DELIMITER; - end = trim_leading_whitespace(seg[1].value); - for (size_t i = 2; i < seg.size(); i++) { - end += seg[i].value; - } - end = trim_whitespace(end); - } else if (seg.size() == 1 && trim_whitespace(seg[0].value) == reasoning_content) { - // the marker might be in the prefix actually, let's check for case of - // left: empty - // right: reasoning_content - // suffix: content - // prefix: ... - auto suf_seg = prune_whitespace_segments(segmentize_markers(diff.suffix)); - if (trim_whitespace(diff.left).empty() && suf_seg.size() >= 2 && suf_seg[0].type == segment_type::MARKER && - trim_whitespace(suf_seg[1].value).find("I can help.") == 0) { - auto pre_seg = prune_whitespace_segments(segmentize_markers(diff.prefix)); - if (pre_seg[pre_seg.size() - 1].type == segment_type::MARKER || - (pre_seg.size() > 1 && trim_whitespace(pre_seg[pre_seg.size() - 1].value).empty() && - pre_seg[pre_seg.size() - 2].type == segment_type::MARKER)) { - auto marker_seg = pre_seg[pre_seg.size() - 1]; - if (marker_seg.type == segment_type::TEXT) { - marker_seg = pre_seg[pre_seg.size() - 2]; - } - mode = reasoning_mode::FORCED_CLOSED; - start = trim_whitespace(marker_seg.value); - end = trim_whitespace(suf_seg[0].value); + auto parser_delimiter = build_tagged_peg_parser([&](common_peg_parser_builder &p) { + return p.literal(reasoning_content) + p.space() + p.optional(p.tag("post", (p.marker() + p.space())) + p.rest()); + }); + auto parser_wrapped = build_tagged_peg_parser([&](common_peg_parser_builder &p) { + return p.tag("pre", p.marker()) + p.space() + p.literal(reasoning_content) + p.space() + p.tag("post", (p.marker() + p.space())) + p.rest(); + }); + // try the more aggressive parse first, if it fails, fall back to the delimiter one + auto result = parser_wrapped.parse_anywhere_and_extract(comparison->output_B); + if (!result.result.success()) { + result = parser_delimiter.parse_anywhere_and_extract(comparison->output_B); + } + if (result.result.success()) { + if (!result.tags["pre"].empty() && !result.tags["post"].empty()) { + if (parser_wrapped.parse_anywhere_and_extract(diff.right).result.success()) { // both tags in the diff = no forced close + mode = reasoning_mode::TAG_BASED; + } else { + mode = reasoning_mode::FORCED_CLOSED; } + start = trim_whitespace(result.tags["pre"]); + end = result.tags["post"]; + } else if (!result.tags["post"].empty()) { + mode = reasoning_mode::DELIMITER; + end = result.tags["post"]; } } } @@ -301,12 +284,10 @@ void analyze_reasoning::compare_thinking_enabled() { const auto & diff = comparison->diff; - std::string left_trimmed = diff.left; - trim_whitespace(left_trimmed); + std::string left_trimmed = trim_whitespace(diff.left); if (left_trimmed.empty() && !diff.right.empty()) { - std::string right_trimmed = diff.right; - trim_whitespace(right_trimmed); + std::string right_trimmed = trim_whitespace(diff.right); if (!right_trimmed.empty() && string_ends_with(comparison->output_B, right_trimmed)) { if (start.empty()) { @@ -323,42 +304,25 @@ void analyze_reasoning::compare_thinking_enabled() { // Check for FORCED_CLOSED: when enable_thinking=false produces both start and end markers, // but enable_thinking=true produces only the start marker if (!comparison->output_A.empty() && !comparison->output_B.empty()) { - std::string output_A = comparison->output_A; // enable_thinking=false - std::string output_B = comparison->output_B; // enable_thinking=true - - // Both should end with the assistant role marker - // Check if output_A has both reasoning_start and reasoning_end markers - // while output_B has only reasoning_start - if (!start.empty()) { - // Check if output_A contains both start and end markers - bool A_has_start = output_A.find(start) != std::string::npos; - bool A_has_end = !end.empty() && output_A.find(end) != std::string::npos; - - // Check if output_B contains only the start marker (and not the end marker) - bool B_has_start = output_B.find(start) != std::string::npos; - bool B_has_end = !end.empty() && output_B.find(end) != std::string::npos; - - // For FORCED_CLOSED: A should have both, B should have only start - if (A_has_start && A_has_end && B_has_start && !B_has_end) { - mode = reasoning_mode::FORCED_CLOSED; - } - } else if (!end.empty()) { - // We might not have detected the reasoning open marker until now, - // but this is another chance to do so - auto diff = comparison->diff; - auto diff_rt = trim_whitespace(diff.right); - auto diff_lt = trim_whitespace(diff.left); - if (diff_rt.empty() && diff_lt == end) { - auto seg = segmentize_markers(trim_whitespace(diff.prefix)); - if (!seg.empty() && seg[seg.size() - 1].type == MARKER) { // this is FORCED_CLOSED - start = seg[seg.size() - 1].value; - mode = reasoning_mode::FORCED_CLOSED; - } + auto parser_start = build_tagged_peg_parser([&](common_peg_parser_builder &p) { + return p.literal(start) + p.space() + p.literal(end) + p.rest(); + }); + auto parser_start_end = build_tagged_peg_parser([&](common_peg_parser_builder &p) { + return p.tag("pre", p.literal(start)) + p.space() + p.negate(p.literal(end)) + p.rest(); + }); + if (!start.empty() && parser_start_end.parse_anywhere_and_extract(comparison->output_A).result.success() && + parser_start.parse_anywhere_and_extract(comparison->output_B).result.success()) { + mode = reasoning_mode::FORCED_CLOSED; + } else if (!end.empty()) { // we extract the starting marker now since we didn't get it earlier + auto result = parser_start_end.parse_anywhere_and_extract(comparison->output_A); + if (result.result.success()) { + start = result.tags["pre"]; + mode = reasoning_mode::FORCED_CLOSED; } } } - if (start.empty() && end.empty()) { + if (start.empty() && end.empty()) { // we might still have the case of "just open" and "just close" if (!diff.left.empty() && !diff.right.empty()) { auto seg_A = segmentize_markers(trim_trailing_whitespace(diff.left)); auto seg_B = segmentize_markers(trim_trailing_whitespace(diff.right)); @@ -408,41 +372,25 @@ void analyze_reasoning::compare_reasoning_scope() { if (!reasoning_in_A && reasoning_in_B) { mode = reasoning_mode::TOOLS_ONLY; - LOG_DBG("R3: Detected TOOLS_ONLY reasoning mode\n"); + LOG_DBG(ANSI_ORANGE "%s: Detected TOOLS_ONLY reasoning mode\n" ANSI_RESET, __func__); - // Extract reasoning markers from output_B - // The reasoning_content is "Let me think." - size_t reasoning_pos = comparison->output_B.find(reasoning_content); - if (reasoning_pos != std::string::npos) { - // Find start marker before reasoning_content - std::string before_reasoning = comparison->output_B.substr(0, reasoning_pos); - before_reasoning = trim_trailing_whitespace(before_reasoning); - auto segments_before = segmentize_markers(before_reasoning); - std::reverse(segments_before.begin(), segments_before.end()); - - for (auto & segment : segments_before) { - if (segment.type == segment_type::MARKER) { - start = segment.value; - break; - } - } - - // Find end marker after reasoning_content - size_t reasoning_end = reasoning_pos + reasoning_content.length(); - std::string after_reasoning = comparison->output_B.substr(reasoning_end); - after_reasoning = trim_leading_whitespace(after_reasoning); - - if (!after_reasoning.empty()) { - // Try to find matching end marker - if (!start.empty()) { - auto segments = segmentize_markers(after_reasoning); - for (auto & segment : segments) { - if (segment.type == segment_type::MARKER) { - end = segment.value; - break; - } - } - } + auto parser_wrapped = build_tagged_peg_parser([&](common_peg_parser_builder &p) { + return p.tag("pre", p.marker()) + p.space() + p.literal(reasoning_content) + p.space() + p.tag("post", (p.marker() + p.space())) + p.rest(); + }); + auto result = parser_wrapped.parse_anywhere_and_extract(comparison->output_B); + if (result.result.success()) { + start = result.tags["pre"]; + end = result.tags["post"]; + } else { + auto parser_delimiter = build_tagged_peg_parser([&](common_peg_parser_builder &p) { + return p.literal(reasoning_content) + p.space() + p.optional(p.tag("post", (p.marker() + p.space())) + p.rest()); + }); + result = parser_delimiter.parse_anywhere_and_extract(comparison->output_B); + if (result.result.success()) { + end = result.tags["post"]; + } else { + LOG_DBG(ANSI_ORANGE "%s: Unable to extracft reasoning markers, falling back to reasoning = NONE\n" ANSI_RESET, __func__); + mode = reasoning_mode::NONE; } } } @@ -501,18 +449,11 @@ analyze_content::analyze_content(const common_chat_template & tmpl, const analyz // We only have the content text in the diff (possibly with a stray EOG marker), so no markers mode = content_mode::PLAIN; found_plain_content = true; - } - /* auto segments = segmentize_markers(diff_reasoning.left); - if (trim_whitespace(diff_reasoning.left) == response || - (segments.size() == 2 && trim_whitespace(segments[0].value) == response)) { - // We only have the content text in the diff (possibly with a stray EOG marker), so no markers - mode = content_mode::PLAIN; - found_plain_content = true; - }*/ else if (reasoning.mode != reasoning_mode::NONE && !reasoning.end.empty() && - diff_reasoning.left.find(reasoning.end) != std::string::npos) { - std::string post_closed_reasoning = diff_reasoning.left.substr( - diff_reasoning.left.find(reasoning.end) + reasoning.end.length()); - if (trim_whitespace(post_closed_reasoning) == "Response text") { + } else if (reasoning.mode != reasoning_mode::NONE && !reasoning.end.empty()) { + auto post_reasoning_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) { + return p.literal(reasoning.end) + p.space() + p.literal(response); + }); + if (post_reasoning_parser.parse_anywhere_and_extract(diff_reasoning.left).result.success()) { mode = content_mode::PLAIN; found_plain_content = true; } @@ -525,13 +466,12 @@ analyze_content::analyze_content(const common_chat_template & tmpl, const analyz } // Take the more promising diff std::string pure_content = rdiff.length() > diff_tools.left.length() ? rdiff : diff_tools.left; - size_t pos = pure_content.find("Response text"); - if (pos == std::string::npos) { - LOG_DBG(ANSI_ORANGE "%s: Error: response text not found - improper template application?\n" ANSI_RESET, __func__); - return; - } - start = trim_leading_whitespace(pure_content.substr(0, pos)); - end = trim_leading_whitespace(pure_content.substr(pos + 13)); // 13 - len of "Response text" + auto parser_wrapped = build_tagged_peg_parser([&](common_peg_parser_builder &p) { + return p.tag("pre", p.marker()) + p.space() + p.literal(response) + p.space() + p.tag("post", (p.marker() + p.space())) + p.rest(); + }); + auto result = parser_wrapped.parse_anywhere_and_extract(pure_content); + start = result.tags["pre"]; + end = result.tags["post"]; // TODO: WRAPPED_WITH_REASONING } diff --git a/tests/test-chat-auto-parser.cpp b/tests/test-chat-auto-parser.cpp index c78428491f..9687850efd 100644 --- a/tests/test-chat-auto-parser.cpp +++ b/tests/test-chat-auto-parser.cpp @@ -1291,7 +1291,7 @@ static void test_nemotron_reasoning_detection(testing & t) { // Check reasoning markers t.assert_equal("reasoning_start should be ''", "", analysis.reasoning.start); - t.assert_equal("reasoning_end should be ''", "", analysis.reasoning.end); + t.assert_equal("reasoning_end should be '\\n'", "\n", analysis.reasoning.end); // Check reasoning mode detection // Nemotron uses forced closed reasoning with add_generation_prompt diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index d9f1eea2f2..9037dd2bd5 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -1448,7 +1448,7 @@ static void test_template_output_peg_parsers(bool detailed_debug) { tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); - tst.test("I'm thinking about the answerHello, world!") + tst.test("I'm thinking about the answer\nHello, world!") .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) .expect(simple_assist_msg("Hello, world!", "I'm thinking about the answer")) .run(); @@ -2179,7 +2179,7 @@ static void test_template_output_peg_parsers(bool detailed_debug) { { auto tst = peg_tester("models/templates/StepFun3.5-Flash.jinja", detailed_debug); - tst.test("I was thinkingNow I'm not."). + tst.test("I was thinking\nNow I'm not."). enable_thinking(true). reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK). expect_reasoning("I was thinking").