From 73015bbb08983d5b8b51a310d08f2fbbfb805e33 Mon Sep 17 00:00:00 2001 From: ochafik Date: Sun, 14 Dec 2025 23:54:41 +0000 Subject: [PATCH] chat-parser: handle whitespace around JSON in tool call parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Models often output whitespace (newlines, spaces) between XML tags and JSON content, e.g.: {"reason": "..."} This was causing parsing failures in Hermes 2 Pro and other formats because try_consume_json() expected JSON to start immediately at the current position. Fix by modifying try_consume_json() to: - Skip leading whitespace before attempting to parse JSON - Consume trailing whitespace after successful JSON parse - Restore position if JSON parsing fails (important for streaming) This fix benefits all chat formats that use try_consume_json(). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- common/chat-parser.cpp | 11 +++++++++++ tests/test-chat.cpp | 9 +++++++++ 2 files changed, 20 insertions(+) diff --git a/common/chat-parser.cpp b/common/chat-parser.cpp index d740dac065..329a3a45b6 100644 --- a/common/chat-parser.cpp +++ b/common/chat-parser.cpp @@ -488,13 +488,24 @@ std::optional common_chat_msg_parser: } std::optional common_chat_msg_parser::try_consume_json() { + // Skip leading whitespace - JSON values never start with whitespace, and models often + // output newlines/spaces between tags and JSON content (e.g., "\n {...}") + auto saved_pos = pos_; + consume_spaces(); + auto it = input_.cbegin() + pos_; const auto end = input_.cend(); common_json result; if (!common_json_parse(it, end, healing_marker_, result)) { + // Restore position if parsing failed - don't consume whitespace without a successful parse + pos_ = saved_pos; return std::nullopt; } pos_ = std::distance(input_.cbegin(), it); + + // Consume trailing whitespace after successful JSON parse + consume_spaces(); + if (result.healing_marker.marker.empty()) { // No healing marker, just return the parsed json return result; diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 007929f517..30b88c7172 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -1018,6 +1018,15 @@ static void test_template_output_parsers() { "{\"arg1\": 1}", /* is_partial= */ false, {COMMON_CHAT_FORMAT_HERMES_2_PRO})); + // Test with whitespace before JSON (issue: model outputs newline+spaces before JSON) + assert_msg_equals( + message_assist_call, + common_chat_parse( + "\n" + " {\"arg1\": 1}\n" + "", + /* is_partial= */ false, + {COMMON_CHAT_FORMAT_HERMES_2_PRO})); assert_msg_equals( message_assist_call, common_chat_parse(