chat-parser: handle whitespace around JSON in tool call parsing

Models often output whitespace (newlines, spaces) between XML tags and
JSON content, e.g.:

    <function=transfer_to_human>
         {"reason": "..."}
    </function>

This was causing parsing failures in Hermes 2 Pro and other formats
because try_consume_json() expected JSON to start immediately at the
current position.

Fix by modifying try_consume_json() to:
- Skip leading whitespace before attempting to parse JSON
- Consume trailing whitespace after successful JSON parse
- Restore position if JSON parsing fails (important for streaming)

This fix benefits all chat formats that use try_consume_json().

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
ochafik 2025-12-14 23:54:41 +00:00
parent 52392291b2
commit 73015bbb08
2 changed files with 20 additions and 0 deletions

View File

@ -488,13 +488,24 @@ std::optional<common_chat_msg_parser::find_regex_result> common_chat_msg_parser:
} }
std::optional<common_json> common_chat_msg_parser::try_consume_json() { std::optional<common_json> common_chat_msg_parser::try_consume_json() {
// Skip leading whitespace - JSON values never start with whitespace, and models often
// output newlines/spaces between tags and JSON content (e.g., "<function=name>\n {...}")
auto saved_pos = pos_;
consume_spaces();
auto it = input_.cbegin() + pos_; auto it = input_.cbegin() + pos_;
const auto end = input_.cend(); const auto end = input_.cend();
common_json result; common_json result;
if (!common_json_parse(it, end, healing_marker_, result)) { if (!common_json_parse(it, end, healing_marker_, result)) {
// Restore position if parsing failed - don't consume whitespace without a successful parse
pos_ = saved_pos;
return std::nullopt; return std::nullopt;
} }
pos_ = std::distance(input_.cbegin(), it); pos_ = std::distance(input_.cbegin(), it);
// Consume trailing whitespace after successful JSON parse
consume_spaces();
if (result.healing_marker.marker.empty()) { if (result.healing_marker.marker.empty()) {
// No healing marker, just return the parsed json // No healing marker, just return the parsed json
return result; return result;

View File

@ -1018,6 +1018,15 @@ static void test_template_output_parsers() {
"<function=special_function>{\"arg1\": 1}</function>", "<function=special_function>{\"arg1\": 1}</function>",
/* is_partial= */ false, /* is_partial= */ false,
{COMMON_CHAT_FORMAT_HERMES_2_PRO})); {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
// Test <function=name> with whitespace before JSON (issue: model outputs newline+spaces before JSON)
assert_msg_equals(
message_assist_call,
common_chat_parse(
"<function=special_function>\n"
" {\"arg1\": 1}\n"
"</function>",
/* is_partial= */ false,
{COMMON_CHAT_FORMAT_HERMES_2_PRO}));
assert_msg_equals( assert_msg_equals(
message_assist_call, message_assist_call,
common_chat_parse( common_chat_parse(