From 8c1d1bae36525191402043d72fa522c75aea73c7 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Thu, 5 Feb 2026 12:27:09 +0100 Subject: [PATCH] More edge cases --- common/chat-peg-parser.cpp | 58 ++++++++++++++----------- tests/test-chat.cpp | 87 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+), 24 deletions(-) diff --git a/common/chat-peg-parser.cpp b/common/chat-peg-parser.cpp index f84b6ba298..2922c8d582 100644 --- a/common/chat-peg-parser.cpp +++ b/common/chat-peg-parser.cpp @@ -272,13 +272,42 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) { std::string value_content = std::string(trim_trailing_space(trim_leading_space(node.text, 1), 1)); std::string value_to_add; - if (!value_content.empty()) { + if (value_content.empty() && is_arg_string_value) { + // Empty string value - start with opening quote + // arg_close will add the closing quote + if (!current_tool->name.empty()) { + value_to_add = "\""; + needs_closing_quote = true; + } else { + value_to_add = "\""; + buffer_needs_closing_quote = true; + } + } else if (!value_content.empty() && is_arg_string_value) { + // Schema declares this as string type - always treat as literal string value + // Never try to parse as JSON (this ensures consistent handling of quoted strings + // like "foo" which would otherwise be parsed as JSON string 'foo') + if (!current_tool->name.empty()) { + if (!needs_closing_quote) { + value_to_add = "\""; + needs_closing_quote = true; + } + } else { + if (!buffer_needs_closing_quote) { + value_to_add = "\""; + buffer_needs_closing_quote = true; + } + } + // Escape special characters in the string content + std::string escaped = json(value_content).dump(); + // Remove the surrounding quotes from the escaped string + if (escaped.size() >= 2 && escaped.front() == '"' && escaped.back() == '"') { + escaped = escaped.substr(1, escaped.size() - 2); + } + value_to_add += escaped; + } else if (!value_content.empty()) { // For potential containers, normalize Python-style single quotes to JSON double quotes first // This ensures consistent output during both partial and final parsing - // Note: is_arg_string_value means the schema explicitly declares this as a string type, - // so we should NOT treat it as a potential container even if it starts with [ or { - bool is_potential_container = !is_arg_string_value && - (value_content[0] == '[' || value_content[0] == '{'); + bool is_potential_container = value_content[0] == '[' || value_content[0] == '{'; if (is_potential_container) { value_content = normalize_quotes_to_json(value_content); } @@ -301,25 +330,6 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) { } else { buffer_needs_closing_quote = true; } - } else if (is_arg_string_value) { - // Schema declares this as string type but it parsed as non-string (e.g., number) - // Force treatment as string value - add opening quote and escape content - if (!current_tool->name.empty()) { - if (!needs_closing_quote) { - value_to_add = "\""; - needs_closing_quote = true; - } - } else { - if (!buffer_needs_closing_quote) { - value_to_add = "\""; - buffer_needs_closing_quote = true; - } - } - std::string escaped = json(value_content).dump(); - if (escaped.size() >= 2 && escaped.front() == '"' && escaped.back() == '"') { - escaped = escaped.substr(1, escaped.size() - 2); - } - value_to_add += escaped; } else { // For non-string values (number, bool, null, object, array), add raw value content // Using raw content instead of dump() ensures monotonicity for streaming diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index e9a18f7f4a..f55ab398bd 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -355,6 +355,40 @@ static common_chat_tool magic_int_tool{ })", }; +static common_chat_tool string_param_tool{ + /* .name = */ "string_param", + /* .description = */ "Tool with string parameter for testing", + /* .parameters = */ R"({ + "type": "object", + "properties": { + "text": { + "type": "string", + "description": "A text parameter" + } + }, + "required": [] + })", +}; + +static common_chat_tool quoted_unquoted_tool{ + /* .name = */ "quoted_unquoted", + /* .description = */ "Tool with two string parameters, one for quoted string, one for unquoted", + /* .parameters = */ R"({ + "type": "object", + "properties": { + "quoted": { + "type": "string", + "description": "Quoted value" + }, + "unquoted": { + "type": "string", + "description": "Unquoted value" + } + }, + "required": ["quoted", "unquoted"] + })", +}; + static std::vector tools{ special_function_tool, special_function_tool_with_optional_param, python_tool, html_tool, todo_list }; @@ -2187,6 +2221,59 @@ static void test_template_output_peg_parsers(bool detailed_debug) { }) .run(); + tst.test( + "Call string_param with empty text\n" + "\n" + "\n" + "\n" + "\n\n\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ string_param_tool }) + .expect_reasoning("Call string_param with empty text") + .expect_tool_calls({ + { "string_param", R"({"text": ""})", {} }, + }) + .run(); + + tst.test( + "Test simple quoted unquoted\n" + "\n" + "\n" + "\n" + "\n\"foo\"\n\n" + "\nfoo\n\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ quoted_unquoted_tool }) + .expect_reasoning("Test simple quoted unquoted") + .expect_tool_calls({ + { "quoted_unquoted", R"({"quoted": "\"foo\"", "unquoted": "foo"})", {} }, + }) + .run(); + + tst.test( + "Test complex quoted unquoted\n" + "\n" + "\n" + "\n" + "\n\"printf(\\\"foo\\\");\"\n\n" + "\nprintf(\"foo\");\n\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ quoted_unquoted_tool }) + .expect_reasoning("Test complex quoted unquoted") + .expect_tool_calls({ + { "quoted_unquoted", R"({ "quoted" : "\"printf(\\\"foo\\\");\"", "unquoted": "printf(\"foo\");" })", {} } + }) + .run(); + } }