Replace reasoning and content analysis with PEG-parser-based

2026-02-17 00:07:25 +01:00 · 2026-02-17 00:07:25 +01:00 · 4580fce1c7
parent a17950fb63
commit 4580fce1c7
3 changed files with 71 additions and 131 deletions
--- a/common/chat-diff-analyzer.cpp
+++ b/common/chat-diff-analyzer.cpp
@ -236,46 +236,29 @@ void analyze_reasoning::compare_reasoning_presence() {
    const std::string reasoning_content = "Let me think about this.";

    if (!diff.right.empty() && diff.right.find(reasoning_content) != std::string::npos) {
-        auto seg = prune_whitespace_segments(segmentize_markers(diff.right));
-        if (seg.size() >= 3 && trim_whitespace(seg[1].value) == reasoning_content) {
-            // easy one: opening marker - reasoning - closing marker (possibly with trailing whitespace)
-            mode  = reasoning_mode::TAG_BASED;
-            start = trim_whitespace(seg[0].value);
-            end   = trim_leading_whitespace(seg[2].value);
-            for (size_t i = 3; i < seg.size(); i++) {
-                end += seg[i].value;
-            }
-            // we always truncate because this doesn't really influence correctness but model might not always generate newline
-            end = trim_whitespace(end);
-        } else if (seg.size() >= 2 && trim_whitespace(seg[0].value) == reasoning_content) {
-            // delimited
-            mode = reasoning_mode::DELIMITER;
-            end  = trim_leading_whitespace(seg[1].value);
-            for (size_t i = 2; i < seg.size(); i++) {
-                end += seg[i].value;
-            }
-            end = trim_whitespace(end);
-        } else if (seg.size() == 1 && trim_whitespace(seg[0].value) == reasoning_content) {
-            // the marker might be in the prefix actually, let's check for case of
-            // left: empty
-            // right: reasoning_content
-            // suffix: <closing marker>content
-            // prefix: ...<opening marker>
-            auto suf_seg = prune_whitespace_segments(segmentize_markers(diff.suffix));
-            if (trim_whitespace(diff.left).empty() && suf_seg.size() >= 2 && suf_seg[0].type == segment_type::MARKER &&
-                trim_whitespace(suf_seg[1].value).find("I can help.") == 0) {
-                auto pre_seg = prune_whitespace_segments(segmentize_markers(diff.prefix));
-                if (pre_seg[pre_seg.size() - 1].type == segment_type::MARKER ||
-                    (pre_seg.size() > 1 && trim_whitespace(pre_seg[pre_seg.size() - 1].value).empty() &&
-                     pre_seg[pre_seg.size() - 2].type == segment_type::MARKER)) {
-                    auto marker_seg = pre_seg[pre_seg.size() - 1];
-                    if (marker_seg.type == segment_type::TEXT) {
-                        marker_seg = pre_seg[pre_seg.size() - 2];
-                    }
-                    mode  = reasoning_mode::FORCED_CLOSED;
-                    start = trim_whitespace(marker_seg.value);
-                    end   = trim_whitespace(suf_seg[0].value);
+        auto parser_delimiter = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+            return p.literal(reasoning_content) + p.space() + p.optional(p.tag("post", (p.marker() + p.space())) + p.rest());
+        });
+        auto parser_wrapped = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+            return p.tag("pre", p.marker()) + p.space() + p.literal(reasoning_content) + p.space() + p.tag("post", (p.marker() + p.space())) + p.rest();
+        });
+        // try the more aggressive parse first, if it fails, fall back to the delimiter one
+        auto result = parser_wrapped.parse_anywhere_and_extract(comparison->output_B);
+        if (!result.result.success()) {
+            result = parser_delimiter.parse_anywhere_and_extract(comparison->output_B);
+        }
+        if (result.result.success()) {
+            if (!result.tags["pre"].empty() && !result.tags["post"].empty()) {
+                if (parser_wrapped.parse_anywhere_and_extract(diff.right).result.success()) { // both tags in the diff = no forced close
+                    mode = reasoning_mode::TAG_BASED;
+                } else {
+                    mode = reasoning_mode::FORCED_CLOSED;
                }
+                start = trim_whitespace(result.tags["pre"]);
+                end   = result.tags["post"];
+            } else if (!result.tags["post"].empty()) {
+                mode = reasoning_mode::DELIMITER;
+                end = result.tags["post"];
            }
        }
    }
@ -301,12 +284,10 @@ void analyze_reasoning::compare_thinking_enabled() {

    const auto & diff = comparison->diff;

-    std::string left_trimmed = diff.left;
-    trim_whitespace(left_trimmed);
+    std::string left_trimmed = trim_whitespace(diff.left);

    if (left_trimmed.empty() && !diff.right.empty()) {
-        std::string right_trimmed = diff.right;
-        trim_whitespace(right_trimmed);
+        std::string right_trimmed = trim_whitespace(diff.right);

        if (!right_trimmed.empty() && string_ends_with(comparison->output_B, right_trimmed)) {
            if (start.empty()) {
@ -323,42 +304,25 @@ void analyze_reasoning::compare_thinking_enabled() {
    // Check for FORCED_CLOSED: when enable_thinking=false produces both start and end markers,
    // but enable_thinking=true produces only the start marker
    if (!comparison->output_A.empty() && !comparison->output_B.empty()) {
-        std::string output_A = comparison->output_A;  // enable_thinking=false
-        std::string output_B = comparison->output_B;  // enable_thinking=true
-
-        // Both should end with the assistant role marker
-        // Check if output_A has both reasoning_start and reasoning_end markers
-        // while output_B has only reasoning_start
-        if (!start.empty()) {
-            // Check if output_A contains both start and end markers
-            bool A_has_start = output_A.find(start) != std::string::npos;
-            bool A_has_end   = !end.empty() && output_A.find(end) != std::string::npos;
-
-            // Check if output_B contains only the start marker (and not the end marker)
-            bool B_has_start = output_B.find(start) != std::string::npos;
-            bool B_has_end   = !end.empty() && output_B.find(end) != std::string::npos;
-
-            // For FORCED_CLOSED: A should have both, B should have only start
-            if (A_has_start && A_has_end && B_has_start && !B_has_end) {
-                mode = reasoning_mode::FORCED_CLOSED;
-            }
-        } else if (!end.empty()) {
-            // We might not have detected the reasoning open marker until now,
-            // but this is another chance to do so
-            auto diff    = comparison->diff;
-            auto diff_rt = trim_whitespace(diff.right);
-            auto diff_lt = trim_whitespace(diff.left);
-            if (diff_rt.empty() && diff_lt == end) {
-                auto seg = segmentize_markers(trim_whitespace(diff.prefix));
-                if (!seg.empty() && seg[seg.size() - 1].type == MARKER) {  // this is FORCED_CLOSED
-                    start = seg[seg.size() - 1].value;
-                    mode  = reasoning_mode::FORCED_CLOSED;
-                }
+        auto parser_start = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+            return p.literal(start) + p.space() + p.literal(end) + p.rest();
+        });
+        auto parser_start_end = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+            return p.tag("pre", p.literal(start)) + p.space() + p.negate(p.literal(end)) + p.rest();
+        });
+        if (!start.empty() && parser_start_end.parse_anywhere_and_extract(comparison->output_A).result.success() &&
+            parser_start.parse_anywhere_and_extract(comparison->output_B).result.success()) {
+            mode = reasoning_mode::FORCED_CLOSED;
+        } else if (!end.empty()) { // we extract the starting marker now since we didn't get it earlier
+            auto result = parser_start_end.parse_anywhere_and_extract(comparison->output_A);
+            if (result.result.success()) {
+                start = result.tags["pre"];
+                mode  = reasoning_mode::FORCED_CLOSED;
            }
        }
    }

-    if (start.empty() && end.empty()) {
+    if (start.empty() && end.empty()) {  // we might still have the case of "just open" and "just close"
        if (!diff.left.empty() && !diff.right.empty()) {
            auto seg_A = segmentize_markers(trim_trailing_whitespace(diff.left));
            auto seg_B = segmentize_markers(trim_trailing_whitespace(diff.right));
@ -408,41 +372,25 @@ void analyze_reasoning::compare_reasoning_scope() {

    if (!reasoning_in_A && reasoning_in_B) {
        mode = reasoning_mode::TOOLS_ONLY;
-        LOG_DBG("R3: Detected TOOLS_ONLY reasoning mode\n");
+        LOG_DBG(ANSI_ORANGE "%s: Detected TOOLS_ONLY reasoning mode\n" ANSI_RESET, __func__);

-        // Extract reasoning markers from output_B
-        // The reasoning_content is "Let me think."
-        size_t reasoning_pos = comparison->output_B.find(reasoning_content);
-        if (reasoning_pos != std::string::npos) {
-            // Find start marker before reasoning_content
-            std::string before_reasoning = comparison->output_B.substr(0, reasoning_pos);
-            before_reasoning             = trim_trailing_whitespace(before_reasoning);
-            auto segments_before         = segmentize_markers(before_reasoning);
-            std::reverse(segments_before.begin(), segments_before.end());
-
-            for (auto & segment : segments_before) {
-                if (segment.type == segment_type::MARKER) {
-                    start = segment.value;
-                    break;
-                }
-            }
-
-            // Find end marker after reasoning_content
-            size_t      reasoning_end   = reasoning_pos + reasoning_content.length();
-            std::string after_reasoning = comparison->output_B.substr(reasoning_end);
-            after_reasoning             = trim_leading_whitespace(after_reasoning);
-
-            if (!after_reasoning.empty()) {
-                // Try to find matching end marker
-                if (!start.empty()) {
-                    auto segments = segmentize_markers(after_reasoning);
-                    for (auto & segment : segments) {
-                        if (segment.type == segment_type::MARKER) {
-                            end = segment.value;
-                            break;
-                        }
-                    }
-                }
+        auto parser_wrapped = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+            return p.tag("pre", p.marker()) + p.space() + p.literal(reasoning_content) + p.space() + p.tag("post", (p.marker() + p.space())) + p.rest();
+        });
+        auto result = parser_wrapped.parse_anywhere_and_extract(comparison->output_B);
+        if (result.result.success()) {
+            start = result.tags["pre"];
+            end = result.tags["post"];
+        } else {
+            auto parser_delimiter = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+                return p.literal(reasoning_content) + p.space() + p.optional(p.tag("post", (p.marker() + p.space())) + p.rest());
+            });
+            result = parser_delimiter.parse_anywhere_and_extract(comparison->output_B);
+            if (result.result.success()) {
+                end = result.tags["post"];
+            } else {
+                LOG_DBG(ANSI_ORANGE "%s: Unable to extracft reasoning markers, falling back to reasoning = NONE\n" ANSI_RESET, __func__);
+                mode = reasoning_mode::NONE;
            }
        }
    }
@ -501,18 +449,11 @@ analyze_content::analyze_content(const common_chat_template & tmpl, const analyz
            // We only have the content text in the diff (possibly with a stray EOG marker), so no markers
            mode = content_mode::PLAIN;
            found_plain_content = true;
-        }
-        /* auto segments = segmentize_markers(diff_reasoning.left);
-        if (trim_whitespace(diff_reasoning.left) == response ||
-            (segments.size() == 2 && trim_whitespace(segments[0].value) == response)) {
-            // We only have the content text in the diff (possibly with a stray EOG marker), so no markers
-            mode = content_mode::PLAIN;
-            found_plain_content = true;
-    }*/ else if (reasoning.mode != reasoning_mode::NONE && !reasoning.end.empty() &&
-                   diff_reasoning.left.find(reasoning.end) != std::string::npos) {
-            std::string post_closed_reasoning = diff_reasoning.left.substr(
-                diff_reasoning.left.find(reasoning.end) + reasoning.end.length());
-            if (trim_whitespace(post_closed_reasoning) == "Response text") {
+        } else if (reasoning.mode != reasoning_mode::NONE && !reasoning.end.empty()) {
+            auto post_reasoning_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+                return p.literal(reasoning.end) + p.space() + p.literal(response);
+            });
+            if (post_reasoning_parser.parse_anywhere_and_extract(diff_reasoning.left).result.success()) {
                mode = content_mode::PLAIN;
                found_plain_content = true;
            }
@ -525,13 +466,12 @@ analyze_content::analyze_content(const common_chat_template & tmpl, const analyz
        }
        // Take the more promising diff
        std::string pure_content = rdiff.length() > diff_tools.left.length() ? rdiff : diff_tools.left;
-        size_t      pos          = pure_content.find("Response text");
-        if (pos == std::string::npos) {
-            LOG_DBG(ANSI_ORANGE "%s: Error: response text not found - improper template application?\n" ANSI_RESET, __func__);
-            return;
-        }
-        start = trim_leading_whitespace(pure_content.substr(0, pos));
-        end = trim_leading_whitespace(pure_content.substr(pos + 13));  // 13 - len of "Response text"
+        auto parser_wrapped = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+            return p.tag("pre", p.marker()) + p.space() + p.literal(response) + p.space() + p.tag("post", (p.marker() + p.space())) + p.rest();
+        });
+        auto result = parser_wrapped.parse_anywhere_and_extract(pure_content);
+        start = result.tags["pre"];
+        end = result.tags["post"];
        // TODO: WRAPPED_WITH_REASONING
    }

--- a/tests/test-chat-auto-parser.cpp
+++ b/tests/test-chat-auto-parser.cpp
@ -1291,7 +1291,7 @@ static void test_nemotron_reasoning_detection(testing & t) {

    // Check reasoning markers
    t.assert_equal("reasoning_start should be '<think>'", "<think>", analysis.reasoning.start);
-    t.assert_equal("reasoning_end should be '</think>'", "</think>", analysis.reasoning.end);
+    t.assert_equal("reasoning_end should be '</think>\\n'", "</think>\n", analysis.reasoning.end);

    // Check reasoning mode detection
    // Nemotron uses forced closed reasoning with add_generation_prompt
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@ -1448,7 +1448,7 @@ static void test_template_output_peg_parsers(bool detailed_debug) {

        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();

-        tst.test("<seed:think>I'm thinking about the answer</seed:think>Hello, world!")
+        tst.test("<seed:think>I'm thinking about the answer</seed:think>\nHello, world!")
            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
            .expect(simple_assist_msg("Hello, world!", "I'm thinking about the answer"))
            .run();
@ -2179,7 +2179,7 @@ static void test_template_output_peg_parsers(bool detailed_debug) {

    {
        auto tst = peg_tester("models/templates/StepFun3.5-Flash.jinja", detailed_debug);
-        tst.test("I was thinking</think>Now I'm not.").
+        tst.test("I was thinking</think>\nNow I'm not.").
            enable_thinking(true).
            reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK).
            expect_reasoning("I was thinking").