Replace reasoning and content analysis with PEG-parser-based
This commit is contained in:
parent
a17950fb63
commit
4580fce1c7
|
|
@ -236,46 +236,29 @@ void analyze_reasoning::compare_reasoning_presence() {
|
|||
const std::string reasoning_content = "Let me think about this.";
|
||||
|
||||
if (!diff.right.empty() && diff.right.find(reasoning_content) != std::string::npos) {
|
||||
auto seg = prune_whitespace_segments(segmentize_markers(diff.right));
|
||||
if (seg.size() >= 3 && trim_whitespace(seg[1].value) == reasoning_content) {
|
||||
// easy one: opening marker - reasoning - closing marker (possibly with trailing whitespace)
|
||||
mode = reasoning_mode::TAG_BASED;
|
||||
start = trim_whitespace(seg[0].value);
|
||||
end = trim_leading_whitespace(seg[2].value);
|
||||
for (size_t i = 3; i < seg.size(); i++) {
|
||||
end += seg[i].value;
|
||||
}
|
||||
// we always truncate because this doesn't really influence correctness but model might not always generate newline
|
||||
end = trim_whitespace(end);
|
||||
} else if (seg.size() >= 2 && trim_whitespace(seg[0].value) == reasoning_content) {
|
||||
// delimited
|
||||
mode = reasoning_mode::DELIMITER;
|
||||
end = trim_leading_whitespace(seg[1].value);
|
||||
for (size_t i = 2; i < seg.size(); i++) {
|
||||
end += seg[i].value;
|
||||
}
|
||||
end = trim_whitespace(end);
|
||||
} else if (seg.size() == 1 && trim_whitespace(seg[0].value) == reasoning_content) {
|
||||
// the marker might be in the prefix actually, let's check for case of
|
||||
// left: empty
|
||||
// right: reasoning_content
|
||||
// suffix: <closing marker>content
|
||||
// prefix: ...<opening marker>
|
||||
auto suf_seg = prune_whitespace_segments(segmentize_markers(diff.suffix));
|
||||
if (trim_whitespace(diff.left).empty() && suf_seg.size() >= 2 && suf_seg[0].type == segment_type::MARKER &&
|
||||
trim_whitespace(suf_seg[1].value).find("I can help.") == 0) {
|
||||
auto pre_seg = prune_whitespace_segments(segmentize_markers(diff.prefix));
|
||||
if (pre_seg[pre_seg.size() - 1].type == segment_type::MARKER ||
|
||||
(pre_seg.size() > 1 && trim_whitespace(pre_seg[pre_seg.size() - 1].value).empty() &&
|
||||
pre_seg[pre_seg.size() - 2].type == segment_type::MARKER)) {
|
||||
auto marker_seg = pre_seg[pre_seg.size() - 1];
|
||||
if (marker_seg.type == segment_type::TEXT) {
|
||||
marker_seg = pre_seg[pre_seg.size() - 2];
|
||||
}
|
||||
mode = reasoning_mode::FORCED_CLOSED;
|
||||
start = trim_whitespace(marker_seg.value);
|
||||
end = trim_whitespace(suf_seg[0].value);
|
||||
auto parser_delimiter = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
|
||||
return p.literal(reasoning_content) + p.space() + p.optional(p.tag("post", (p.marker() + p.space())) + p.rest());
|
||||
});
|
||||
auto parser_wrapped = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
|
||||
return p.tag("pre", p.marker()) + p.space() + p.literal(reasoning_content) + p.space() + p.tag("post", (p.marker() + p.space())) + p.rest();
|
||||
});
|
||||
// try the more aggressive parse first, if it fails, fall back to the delimiter one
|
||||
auto result = parser_wrapped.parse_anywhere_and_extract(comparison->output_B);
|
||||
if (!result.result.success()) {
|
||||
result = parser_delimiter.parse_anywhere_and_extract(comparison->output_B);
|
||||
}
|
||||
if (result.result.success()) {
|
||||
if (!result.tags["pre"].empty() && !result.tags["post"].empty()) {
|
||||
if (parser_wrapped.parse_anywhere_and_extract(diff.right).result.success()) { // both tags in the diff = no forced close
|
||||
mode = reasoning_mode::TAG_BASED;
|
||||
} else {
|
||||
mode = reasoning_mode::FORCED_CLOSED;
|
||||
}
|
||||
start = trim_whitespace(result.tags["pre"]);
|
||||
end = result.tags["post"];
|
||||
} else if (!result.tags["post"].empty()) {
|
||||
mode = reasoning_mode::DELIMITER;
|
||||
end = result.tags["post"];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -301,12 +284,10 @@ void analyze_reasoning::compare_thinking_enabled() {
|
|||
|
||||
const auto & diff = comparison->diff;
|
||||
|
||||
std::string left_trimmed = diff.left;
|
||||
trim_whitespace(left_trimmed);
|
||||
std::string left_trimmed = trim_whitespace(diff.left);
|
||||
|
||||
if (left_trimmed.empty() && !diff.right.empty()) {
|
||||
std::string right_trimmed = diff.right;
|
||||
trim_whitespace(right_trimmed);
|
||||
std::string right_trimmed = trim_whitespace(diff.right);
|
||||
|
||||
if (!right_trimmed.empty() && string_ends_with(comparison->output_B, right_trimmed)) {
|
||||
if (start.empty()) {
|
||||
|
|
@ -323,42 +304,25 @@ void analyze_reasoning::compare_thinking_enabled() {
|
|||
// Check for FORCED_CLOSED: when enable_thinking=false produces both start and end markers,
|
||||
// but enable_thinking=true produces only the start marker
|
||||
if (!comparison->output_A.empty() && !comparison->output_B.empty()) {
|
||||
std::string output_A = comparison->output_A; // enable_thinking=false
|
||||
std::string output_B = comparison->output_B; // enable_thinking=true
|
||||
|
||||
// Both should end with the assistant role marker
|
||||
// Check if output_A has both reasoning_start and reasoning_end markers
|
||||
// while output_B has only reasoning_start
|
||||
if (!start.empty()) {
|
||||
// Check if output_A contains both start and end markers
|
||||
bool A_has_start = output_A.find(start) != std::string::npos;
|
||||
bool A_has_end = !end.empty() && output_A.find(end) != std::string::npos;
|
||||
|
||||
// Check if output_B contains only the start marker (and not the end marker)
|
||||
bool B_has_start = output_B.find(start) != std::string::npos;
|
||||
bool B_has_end = !end.empty() && output_B.find(end) != std::string::npos;
|
||||
|
||||
// For FORCED_CLOSED: A should have both, B should have only start
|
||||
if (A_has_start && A_has_end && B_has_start && !B_has_end) {
|
||||
mode = reasoning_mode::FORCED_CLOSED;
|
||||
}
|
||||
} else if (!end.empty()) {
|
||||
// We might not have detected the reasoning open marker until now,
|
||||
// but this is another chance to do so
|
||||
auto diff = comparison->diff;
|
||||
auto diff_rt = trim_whitespace(diff.right);
|
||||
auto diff_lt = trim_whitespace(diff.left);
|
||||
if (diff_rt.empty() && diff_lt == end) {
|
||||
auto seg = segmentize_markers(trim_whitespace(diff.prefix));
|
||||
if (!seg.empty() && seg[seg.size() - 1].type == MARKER) { // this is FORCED_CLOSED
|
||||
start = seg[seg.size() - 1].value;
|
||||
mode = reasoning_mode::FORCED_CLOSED;
|
||||
}
|
||||
auto parser_start = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
|
||||
return p.literal(start) + p.space() + p.literal(end) + p.rest();
|
||||
});
|
||||
auto parser_start_end = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
|
||||
return p.tag("pre", p.literal(start)) + p.space() + p.negate(p.literal(end)) + p.rest();
|
||||
});
|
||||
if (!start.empty() && parser_start_end.parse_anywhere_and_extract(comparison->output_A).result.success() &&
|
||||
parser_start.parse_anywhere_and_extract(comparison->output_B).result.success()) {
|
||||
mode = reasoning_mode::FORCED_CLOSED;
|
||||
} else if (!end.empty()) { // we extract the starting marker now since we didn't get it earlier
|
||||
auto result = parser_start_end.parse_anywhere_and_extract(comparison->output_A);
|
||||
if (result.result.success()) {
|
||||
start = result.tags["pre"];
|
||||
mode = reasoning_mode::FORCED_CLOSED;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (start.empty() && end.empty()) {
|
||||
if (start.empty() && end.empty()) { // we might still have the case of "just open" and "just close"
|
||||
if (!diff.left.empty() && !diff.right.empty()) {
|
||||
auto seg_A = segmentize_markers(trim_trailing_whitespace(diff.left));
|
||||
auto seg_B = segmentize_markers(trim_trailing_whitespace(diff.right));
|
||||
|
|
@ -408,41 +372,25 @@ void analyze_reasoning::compare_reasoning_scope() {
|
|||
|
||||
if (!reasoning_in_A && reasoning_in_B) {
|
||||
mode = reasoning_mode::TOOLS_ONLY;
|
||||
LOG_DBG("R3: Detected TOOLS_ONLY reasoning mode\n");
|
||||
LOG_DBG(ANSI_ORANGE "%s: Detected TOOLS_ONLY reasoning mode\n" ANSI_RESET, __func__);
|
||||
|
||||
// Extract reasoning markers from output_B
|
||||
// The reasoning_content is "Let me think."
|
||||
size_t reasoning_pos = comparison->output_B.find(reasoning_content);
|
||||
if (reasoning_pos != std::string::npos) {
|
||||
// Find start marker before reasoning_content
|
||||
std::string before_reasoning = comparison->output_B.substr(0, reasoning_pos);
|
||||
before_reasoning = trim_trailing_whitespace(before_reasoning);
|
||||
auto segments_before = segmentize_markers(before_reasoning);
|
||||
std::reverse(segments_before.begin(), segments_before.end());
|
||||
|
||||
for (auto & segment : segments_before) {
|
||||
if (segment.type == segment_type::MARKER) {
|
||||
start = segment.value;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Find end marker after reasoning_content
|
||||
size_t reasoning_end = reasoning_pos + reasoning_content.length();
|
||||
std::string after_reasoning = comparison->output_B.substr(reasoning_end);
|
||||
after_reasoning = trim_leading_whitespace(after_reasoning);
|
||||
|
||||
if (!after_reasoning.empty()) {
|
||||
// Try to find matching end marker
|
||||
if (!start.empty()) {
|
||||
auto segments = segmentize_markers(after_reasoning);
|
||||
for (auto & segment : segments) {
|
||||
if (segment.type == segment_type::MARKER) {
|
||||
end = segment.value;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
auto parser_wrapped = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
|
||||
return p.tag("pre", p.marker()) + p.space() + p.literal(reasoning_content) + p.space() + p.tag("post", (p.marker() + p.space())) + p.rest();
|
||||
});
|
||||
auto result = parser_wrapped.parse_anywhere_and_extract(comparison->output_B);
|
||||
if (result.result.success()) {
|
||||
start = result.tags["pre"];
|
||||
end = result.tags["post"];
|
||||
} else {
|
||||
auto parser_delimiter = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
|
||||
return p.literal(reasoning_content) + p.space() + p.optional(p.tag("post", (p.marker() + p.space())) + p.rest());
|
||||
});
|
||||
result = parser_delimiter.parse_anywhere_and_extract(comparison->output_B);
|
||||
if (result.result.success()) {
|
||||
end = result.tags["post"];
|
||||
} else {
|
||||
LOG_DBG(ANSI_ORANGE "%s: Unable to extracft reasoning markers, falling back to reasoning = NONE\n" ANSI_RESET, __func__);
|
||||
mode = reasoning_mode::NONE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -501,18 +449,11 @@ analyze_content::analyze_content(const common_chat_template & tmpl, const analyz
|
|||
// We only have the content text in the diff (possibly with a stray EOG marker), so no markers
|
||||
mode = content_mode::PLAIN;
|
||||
found_plain_content = true;
|
||||
}
|
||||
/* auto segments = segmentize_markers(diff_reasoning.left);
|
||||
if (trim_whitespace(diff_reasoning.left) == response ||
|
||||
(segments.size() == 2 && trim_whitespace(segments[0].value) == response)) {
|
||||
// We only have the content text in the diff (possibly with a stray EOG marker), so no markers
|
||||
mode = content_mode::PLAIN;
|
||||
found_plain_content = true;
|
||||
}*/ else if (reasoning.mode != reasoning_mode::NONE && !reasoning.end.empty() &&
|
||||
diff_reasoning.left.find(reasoning.end) != std::string::npos) {
|
||||
std::string post_closed_reasoning = diff_reasoning.left.substr(
|
||||
diff_reasoning.left.find(reasoning.end) + reasoning.end.length());
|
||||
if (trim_whitespace(post_closed_reasoning) == "Response text") {
|
||||
} else if (reasoning.mode != reasoning_mode::NONE && !reasoning.end.empty()) {
|
||||
auto post_reasoning_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
|
||||
return p.literal(reasoning.end) + p.space() + p.literal(response);
|
||||
});
|
||||
if (post_reasoning_parser.parse_anywhere_and_extract(diff_reasoning.left).result.success()) {
|
||||
mode = content_mode::PLAIN;
|
||||
found_plain_content = true;
|
||||
}
|
||||
|
|
@ -525,13 +466,12 @@ analyze_content::analyze_content(const common_chat_template & tmpl, const analyz
|
|||
}
|
||||
// Take the more promising diff
|
||||
std::string pure_content = rdiff.length() > diff_tools.left.length() ? rdiff : diff_tools.left;
|
||||
size_t pos = pure_content.find("Response text");
|
||||
if (pos == std::string::npos) {
|
||||
LOG_DBG(ANSI_ORANGE "%s: Error: response text not found - improper template application?\n" ANSI_RESET, __func__);
|
||||
return;
|
||||
}
|
||||
start = trim_leading_whitespace(pure_content.substr(0, pos));
|
||||
end = trim_leading_whitespace(pure_content.substr(pos + 13)); // 13 - len of "Response text"
|
||||
auto parser_wrapped = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
|
||||
return p.tag("pre", p.marker()) + p.space() + p.literal(response) + p.space() + p.tag("post", (p.marker() + p.space())) + p.rest();
|
||||
});
|
||||
auto result = parser_wrapped.parse_anywhere_and_extract(pure_content);
|
||||
start = result.tags["pre"];
|
||||
end = result.tags["post"];
|
||||
// TODO: WRAPPED_WITH_REASONING
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1291,7 +1291,7 @@ static void test_nemotron_reasoning_detection(testing & t) {
|
|||
|
||||
// Check reasoning markers
|
||||
t.assert_equal("reasoning_start should be '<think>'", "<think>", analysis.reasoning.start);
|
||||
t.assert_equal("reasoning_end should be '</think>'", "</think>", analysis.reasoning.end);
|
||||
t.assert_equal("reasoning_end should be '</think>\\n'", "</think>\n", analysis.reasoning.end);
|
||||
|
||||
// Check reasoning mode detection
|
||||
// Nemotron uses forced closed reasoning with add_generation_prompt
|
||||
|
|
|
|||
|
|
@ -1448,7 +1448,7 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
|
|||
|
||||
tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
|
||||
|
||||
tst.test("<seed:think>I'm thinking about the answer</seed:think>Hello, world!")
|
||||
tst.test("<seed:think>I'm thinking about the answer</seed:think>\nHello, world!")
|
||||
.reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
|
||||
.expect(simple_assist_msg("Hello, world!", "I'm thinking about the answer"))
|
||||
.run();
|
||||
|
|
@ -2179,7 +2179,7 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
|
|||
|
||||
{
|
||||
auto tst = peg_tester("models/templates/StepFun3.5-Flash.jinja", detailed_debug);
|
||||
tst.test("I was thinking</think>Now I'm not.").
|
||||
tst.test("I was thinking</think>\nNow I'm not.").
|
||||
enable_thinking(true).
|
||||
reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK).
|
||||
expect_reasoning("I was thinking").
|
||||
|
|
|
|||
Loading…
Reference in New Issue