More edge cases

This commit is contained in:
Piotr Wilkin 2026-02-05 12:27:09 +01:00
parent 8e87ba402f
commit 8c1d1bae36
2 changed files with 121 additions and 24 deletions

View File

@ -272,13 +272,42 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) {
std::string value_content = std::string(trim_trailing_space(trim_leading_space(node.text, 1), 1));
std::string value_to_add;
if (!value_content.empty()) {
if (value_content.empty() && is_arg_string_value) {
// Empty string value - start with opening quote
// arg_close will add the closing quote
if (!current_tool->name.empty()) {
value_to_add = "\"";
needs_closing_quote = true;
} else {
value_to_add = "\"";
buffer_needs_closing_quote = true;
}
} else if (!value_content.empty() && is_arg_string_value) {
// Schema declares this as string type - always treat as literal string value
// Never try to parse as JSON (this ensures consistent handling of quoted strings
// like "foo" which would otherwise be parsed as JSON string 'foo')
if (!current_tool->name.empty()) {
if (!needs_closing_quote) {
value_to_add = "\"";
needs_closing_quote = true;
}
} else {
if (!buffer_needs_closing_quote) {
value_to_add = "\"";
buffer_needs_closing_quote = true;
}
}
// Escape special characters in the string content
std::string escaped = json(value_content).dump();
// Remove the surrounding quotes from the escaped string
if (escaped.size() >= 2 && escaped.front() == '"' && escaped.back() == '"') {
escaped = escaped.substr(1, escaped.size() - 2);
}
value_to_add += escaped;
} else if (!value_content.empty()) {
// For potential containers, normalize Python-style single quotes to JSON double quotes first
// This ensures consistent output during both partial and final parsing
// Note: is_arg_string_value means the schema explicitly declares this as a string type,
// so we should NOT treat it as a potential container even if it starts with [ or {
bool is_potential_container = !is_arg_string_value &&
(value_content[0] == '[' || value_content[0] == '{');
bool is_potential_container = value_content[0] == '[' || value_content[0] == '{';
if (is_potential_container) {
value_content = normalize_quotes_to_json(value_content);
}
@ -301,25 +330,6 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) {
} else {
buffer_needs_closing_quote = true;
}
} else if (is_arg_string_value) {
// Schema declares this as string type but it parsed as non-string (e.g., number)
// Force treatment as string value - add opening quote and escape content
if (!current_tool->name.empty()) {
if (!needs_closing_quote) {
value_to_add = "\"";
needs_closing_quote = true;
}
} else {
if (!buffer_needs_closing_quote) {
value_to_add = "\"";
buffer_needs_closing_quote = true;
}
}
std::string escaped = json(value_content).dump();
if (escaped.size() >= 2 && escaped.front() == '"' && escaped.back() == '"') {
escaped = escaped.substr(1, escaped.size() - 2);
}
value_to_add += escaped;
} else {
// For non-string values (number, bool, null, object, array), add raw value content
// Using raw content instead of dump() ensures monotonicity for streaming

View File

@ -355,6 +355,40 @@ static common_chat_tool magic_int_tool{
})",
};
static common_chat_tool string_param_tool{
/* .name = */ "string_param",
/* .description = */ "Tool with string parameter for testing",
/* .parameters = */ R"({
"type": "object",
"properties": {
"text": {
"type": "string",
"description": "A text parameter"
}
},
"required": []
})",
};
static common_chat_tool quoted_unquoted_tool{
/* .name = */ "quoted_unquoted",
/* .description = */ "Tool with two string parameters, one for quoted string, one for unquoted",
/* .parameters = */ R"({
"type": "object",
"properties": {
"quoted": {
"type": "string",
"description": "Quoted value"
},
"unquoted": {
"type": "string",
"description": "Unquoted value"
}
},
"required": ["quoted", "unquoted"]
})",
};
static std::vector<common_chat_tool> tools{ special_function_tool, special_function_tool_with_optional_param,
python_tool, html_tool, todo_list };
@ -2187,6 +2221,59 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
})
.run();
tst.test(
"Call string_param with empty text\n"
"</think>\n"
"<tool_call>\n"
"<function=string_param>\n"
"<parameter=text>\n\n</parameter>\n"
"</function>\n"
"</tool_call>")
.enable_thinking(true)
.reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
.tools({ string_param_tool })
.expect_reasoning("Call string_param with empty text")
.expect_tool_calls({
{ "string_param", R"({"text": ""})", {} },
})
.run();
tst.test(
"Test simple quoted unquoted\n"
"</think>\n"
"<tool_call>\n"
"<function=quoted_unquoted>\n"
"<parameter=quoted>\n\"foo\"\n</parameter>\n"
"<parameter=unquoted>\nfoo\n</parameter>\n"
"</function>\n"
"</tool_call>")
.enable_thinking(true)
.reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
.tools({ quoted_unquoted_tool })
.expect_reasoning("Test simple quoted unquoted")
.expect_tool_calls({
{ "quoted_unquoted", R"({"quoted": "\"foo\"", "unquoted": "foo"})", {} },
})
.run();
tst.test(
"Test complex quoted unquoted\n"
"</think>\n"
"<tool_call>\n"
"<function=quoted_unquoted>\n"
"<parameter=quoted>\n\"printf(\\\"foo\\\");\"\n</parameter>\n"
"<parameter=unquoted>\nprintf(\"foo\");\n</parameter>\n"
"</function>\n"
"</tool_call>")
.enable_thinking(true)
.reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
.tools({ quoted_unquoted_tool })
.expect_reasoning("Test complex quoted unquoted")
.expect_tool_calls({
{ "quoted_unquoted", R"({ "quoted" : "\"printf(\\\"foo\\\");\"", "unquoted": "printf(\"foo\");" })", {} }
})
.run();
}
}