Fix case with object inside object, refactor long methods.
This commit is contained in:
parent
2081e9b056
commit
bd549b3b37
|
|
@ -133,234 +133,252 @@ common_peg_parser universal_peg_generator::build_tool_parser(
|
|||
const templates_params & inputs,
|
||||
const common_peg_parser & reasoning) {
|
||||
|
||||
switch (analysis.tools) {
|
||||
case tool_format::JSON_NATIVE:
|
||||
return build_tool_parser_json_native(p, analysis, inputs, reasoning);
|
||||
case tool_format::TAG_WITH_JSON:
|
||||
return build_tool_parser_tag_json(p, analysis, inputs, reasoning);
|
||||
case tool_format::TAG_WITH_TAGGED:
|
||||
return build_tool_parser_tag_tagged(p, analysis, inputs, reasoning);
|
||||
default:
|
||||
GGML_ABORT("Unable to create tool parser");
|
||||
}
|
||||
}
|
||||
|
||||
common_peg_parser universal_peg_generator::build_tool_parser_json_native(
|
||||
common_chat_peg_unified_builder & p,
|
||||
const diff_analysis_result & analysis,
|
||||
const templates_params & inputs,
|
||||
const common_peg_parser & reasoning) {
|
||||
|
||||
const auto & m = analysis.markers;
|
||||
|
||||
// Build tool choice parser based on format
|
||||
// Build effective field names with dot notation if function_field is set
|
||||
std::string name_field = analysis.name_field;
|
||||
std::string args_field = analysis.args_field;
|
||||
|
||||
if (!analysis.function_field.empty() &&
|
||||
analysis.function_field != "function" &&
|
||||
name_field.find('.') == std::string::npos) {
|
||||
name_field = analysis.function_field + "." + name_field;
|
||||
args_field = analysis.function_field + "." + args_field;
|
||||
}
|
||||
|
||||
auto tools_parser = p.standard_json_tools(
|
||||
m.tool_section_start,
|
||||
m.tool_section_end,
|
||||
inputs.tools,
|
||||
inputs.parallel_tool_calls,
|
||||
inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED,
|
||||
name_field,
|
||||
args_field,
|
||||
analysis.tools_array_wrapped,
|
||||
analysis.fun_name_is_key,
|
||||
analysis.id_field,
|
||||
analysis.gen_id_field,
|
||||
analysis.parameter_order
|
||||
);
|
||||
|
||||
// Handle content wrappers if present
|
||||
if (analysis.content == content_mode::ALWAYS_WRAPPED &&
|
||||
!m.content_start.empty() && !m.content_end.empty()) {
|
||||
auto wrapped_content = p.optional(m.content_start + p.content(p.until(m.content_end)) + m.content_end);
|
||||
return reasoning + wrapped_content + tools_parser + p.end();
|
||||
}
|
||||
|
||||
auto content_before_tools = m.tool_section_start.empty() ? p.eps() : p.until(m.tool_section_start);
|
||||
return reasoning + p.optional(p.content(content_before_tools)) + tools_parser + p.end();
|
||||
}
|
||||
|
||||
common_peg_parser universal_peg_generator::build_tool_parser_tag_json(
|
||||
common_chat_peg_unified_builder & p,
|
||||
const diff_analysis_result & analysis,
|
||||
const templates_params & inputs,
|
||||
const common_peg_parser & reasoning) {
|
||||
|
||||
const auto & m = analysis.markers;
|
||||
common_peg_parser tool_choice = p.choice();
|
||||
|
||||
if (analysis.tools == tool_format::JSON_NATIVE) {
|
||||
// Pure JSON format: use standard_json_tools helper
|
||||
// Build effective field names with dot notation if function_field is set
|
||||
std::string name_field = analysis.name_field;
|
||||
std::string args_field = analysis.args_field;
|
||||
foreach_function(inputs.tools, [&](const json & tool) {
|
||||
const auto & function = tool.at("function");
|
||||
std::string name = function.at("name");
|
||||
const auto & schema = function.at("parameters");
|
||||
|
||||
if (!analysis.function_field.empty() &&
|
||||
analysis.function_field != "function" &&
|
||||
name_field.find('.') == std::string::npos) {
|
||||
name_field = analysis.function_field + "." + name_field;
|
||||
args_field = analysis.function_field + "." + args_field;
|
||||
// Build call_id parser based on position (if supported)
|
||||
common_peg_parser call_id_section = p.eps();
|
||||
if (analysis.call_id_pos == call_id_position::BETWEEN_FUNC_AND_ARGS &&
|
||||
!m.call_id_prefix.empty() && !m.call_id_suffix.empty()) {
|
||||
call_id_section = p.optional(m.call_id_prefix + p.tool_id(p.until(m.call_id_suffix))) + m.call_id_suffix;
|
||||
}
|
||||
|
||||
auto tools_parser = p.standard_json_tools(
|
||||
m.tool_section_start,
|
||||
m.tool_section_end,
|
||||
inputs.tools,
|
||||
inputs.parallel_tool_calls,
|
||||
inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED,
|
||||
name_field,
|
||||
args_field,
|
||||
analysis.tools_array_wrapped,
|
||||
analysis.fun_name_is_key,
|
||||
analysis.id_field,
|
||||
analysis.gen_id_field,
|
||||
analysis.parameter_order
|
||||
);
|
||||
auto func_parser = p.tool_open(m.func_name_prefix + p.tool_name(p.literal(name)) + m.func_name_suffix) +
|
||||
call_id_section +
|
||||
p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema));
|
||||
|
||||
// Handle content wrappers if present
|
||||
if (analysis.content == content_mode::ALWAYS_WRAPPED &&
|
||||
!m.content_start.empty() && !m.content_end.empty()) {
|
||||
auto wrapped_content = p.optional(m.content_start + p.content(p.until(m.content_end)) + m.content_end);
|
||||
return reasoning + wrapped_content + tools_parser + p.end();
|
||||
if (!m.func_close.empty()) {
|
||||
func_parser = func_parser + m.func_close;
|
||||
}
|
||||
|
||||
auto content_before_tools = m.tool_section_start.empty() ? p.eps() : p.until(m.tool_section_start);
|
||||
return reasoning + p.optional(p.content(content_before_tools)) + tools_parser + p.end();
|
||||
}
|
||||
tool_choice |= p.rule("tool-" + name, func_parser);
|
||||
});
|
||||
|
||||
if (analysis.tools == tool_format::TAG_WITH_JSON) {
|
||||
// Tag-based with JSON args: <function=name>{args}</function>
|
||||
// With optional call_id: <function=name>[CALL_ID]id[ARGS]{args}</function>
|
||||
foreach_function(inputs.tools, [&](const json & tool) {
|
||||
const auto & function = tool.at("function");
|
||||
std::string name = function.at("name");
|
||||
const auto & schema = function.at("parameters");
|
||||
auto require_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
||||
|
||||
// Build call_id parser based on position (if supported)
|
||||
common_peg_parser call_id_section = p.eps();
|
||||
if (analysis.call_id_pos == call_id_position::BETWEEN_FUNC_AND_ARGS &&
|
||||
!m.call_id_prefix.empty() && !m.call_id_suffix.empty()) {
|
||||
// Optional call_id followed by required call_id_suffix (which is also args_start)
|
||||
// Format: optional([CALL_ID] + call_id_value) + [ARGS]
|
||||
call_id_section = p.optional(m.call_id_prefix + p.tool_id(p.until(m.call_id_suffix))) + m.call_id_suffix;
|
||||
}
|
||||
common_peg_parser tool_calls = p.eps();
|
||||
|
||||
auto func_parser = p.tool_open(m.func_name_prefix + p.tool_name(p.literal(name)) + m.func_name_suffix) +
|
||||
call_id_section +
|
||||
p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema));
|
||||
|
||||
if (!m.func_close.empty()) {
|
||||
func_parser = func_parser + m.func_close;
|
||||
}
|
||||
|
||||
tool_choice |= p.rule("tool-" + name, func_parser);
|
||||
});
|
||||
|
||||
auto require_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
||||
|
||||
common_peg_parser tool_calls = p.eps();
|
||||
|
||||
if (!m.per_call_start.empty()) {
|
||||
// Per-call wrapping: each call individually wrapped
|
||||
auto wrapped_call = m.per_call_start + tool_choice + m.per_call_end;
|
||||
if (inputs.parallel_tool_calls) {
|
||||
tool_calls = p.trigger_rule("tool-call",
|
||||
wrapped_call + p.zero_or_more(p.space() + wrapped_call));
|
||||
} else {
|
||||
tool_calls = p.trigger_rule("tool-call", wrapped_call);
|
||||
}
|
||||
if (!m.tool_section_start.empty()) {
|
||||
tool_calls = p.trigger_rule("tool-calls", p.literal(m.tool_section_start) + p.space() +
|
||||
tool_calls + p.space() + (m.tool_section_end.empty() ? p.end() : p.literal(m.tool_section_end)));
|
||||
}
|
||||
if (!m.per_call_start.empty()) {
|
||||
auto wrapped_call = m.per_call_start + tool_choice + m.per_call_end;
|
||||
if (inputs.parallel_tool_calls) {
|
||||
tool_calls = p.trigger_rule("tool-call",
|
||||
wrapped_call + p.zero_or_more(p.space() + wrapped_call));
|
||||
} else {
|
||||
std::string separator = m.call_separator;
|
||||
if (separator.empty()) {
|
||||
separator = ", "; // Default
|
||||
}
|
||||
|
||||
if (inputs.parallel_tool_calls) {
|
||||
tool_calls = p.trigger_rule("tool-call",
|
||||
m.tool_section_start + tool_choice + p.zero_or_more(separator + tool_choice) + m.tool_section_end);
|
||||
} else {
|
||||
tool_calls = p.trigger_rule("tool-call",
|
||||
m.tool_section_start + tool_choice + m.tool_section_end);
|
||||
}
|
||||
tool_calls = p.trigger_rule("tool-call", wrapped_call);
|
||||
}
|
||||
if (!m.tool_section_start.empty()) {
|
||||
tool_calls = p.trigger_rule("tool-calls", p.literal(m.tool_section_start) + p.space() +
|
||||
tool_calls + p.space() + (m.tool_section_end.empty() ? p.end() : p.literal(m.tool_section_end)));
|
||||
}
|
||||
} else {
|
||||
std::string separator = m.call_separator;
|
||||
if (separator.empty()) {
|
||||
separator = ", "; // Default
|
||||
}
|
||||
|
||||
if (!require_calls) {
|
||||
tool_calls = p.optional(tool_calls);
|
||||
}
|
||||
|
||||
std::string trigger_marker = !m.tool_section_start.empty() ? m.tool_section_start : m.per_call_start;
|
||||
auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
|
||||
return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
|
||||
}
|
||||
|
||||
if (analysis.tools == tool_format::TAG_WITH_TAGGED) {
|
||||
// Tag-based with tagged args: <function=name><param=key>value</param></function>
|
||||
foreach_function(inputs.tools, [&](const json & tool) {
|
||||
const auto & function = tool.at("function");
|
||||
std::string name = function.at("name");
|
||||
const auto & params = function.at("parameters");
|
||||
|
||||
if (!params.contains("properties") || !params.at("properties").is_object()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto & properties = params.at("properties");
|
||||
std::set<std::string> required;
|
||||
if (params.contains("required") && params.at("required").is_array()) {
|
||||
params.at("required").get_to(required);
|
||||
}
|
||||
|
||||
// Build parser for each argument
|
||||
std::vector<common_peg_parser> arg_parsers;
|
||||
for (const auto & [param_name, param_schema] : properties.items()) {
|
||||
bool is_required = required.find(param_name) != required.end();
|
||||
auto type = param_schema.value("type", "object");
|
||||
|
||||
auto arg = p.tool_arg(
|
||||
p.tool_arg_open(m.arg_name_prefix + p.tool_arg_name(p.literal(param_name)) + m.arg_name_suffix) + m.arg_value_prefix +
|
||||
(type == "string" ?
|
||||
p.tool_arg_string_value(p.schema(p.until(m.arg_value_suffix),
|
||||
"tool-" + name + "-arg-" + param_name + "-schema", param_schema, true)) :
|
||||
p.tool_arg_json_value(p.schema(p.json(),
|
||||
"tool-" + name + "-arg-" + param_name + "-schema", param_schema)) + p.space()) +
|
||||
p.tool_arg_close(p.literal(m.arg_value_suffix))
|
||||
);
|
||||
|
||||
if (is_required) {
|
||||
arg_parsers.push_back(p.rule("tool-" + name + "-arg-" + param_name, arg));
|
||||
} else {
|
||||
arg_parsers.push_back(p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg)));
|
||||
}
|
||||
}
|
||||
|
||||
// Build arg sequence with space() between consecutive args
|
||||
common_peg_parser args_seq = p.eps();
|
||||
for (size_t i = 0; i < arg_parsers.size(); i++) {
|
||||
if (i > 0) {
|
||||
args_seq = args_seq + p.space();
|
||||
}
|
||||
args_seq = args_seq + arg_parsers[i];
|
||||
}
|
||||
|
||||
// Build call_id parser based on position (if supported)
|
||||
common_peg_parser call_id_section = p.eps();
|
||||
if (analysis.call_id_pos == call_id_position::BETWEEN_FUNC_AND_ARGS &&
|
||||
!m.call_id_prefix.empty() && !m.call_id_suffix.empty()) {
|
||||
// Optional call_id followed by required call_id_suffix
|
||||
call_id_section = p.optional(m.call_id_prefix + p.tool_id(p.until(m.call_id_suffix))) + m.call_id_suffix;
|
||||
}
|
||||
|
||||
auto func_parser = p.tool_open(m.func_name_prefix + p.tool_name(p.literal(name)) + m.func_name_suffix) +
|
||||
call_id_section +
|
||||
p.space() + args_seq;
|
||||
|
||||
if (!m.func_close.empty()) {
|
||||
func_parser = func_parser + p.space() + p.tool_close(p.literal(m.func_close));
|
||||
} else if (!m.per_call_end.empty()) {
|
||||
// When there's no func_close but there is a per_call_end marker, use peek() to ensure
|
||||
// we only emit tool_close when we can actually see the closing marker. This prevents
|
||||
// premature closing during partial parsing when we've seen e.g. "</" which could be
|
||||
// either "</tool_call>" (end) or "<arg_key>" prefix that failed to match.
|
||||
func_parser = func_parser + p.tool_close(p.peek(p.literal(m.per_call_end)));
|
||||
} else {
|
||||
func_parser = func_parser + p.tool_close(p.space()); // force this to process tool closing callbacks in mapper
|
||||
}
|
||||
|
||||
tool_choice |= p.rule("tool-" + name, func_parser);
|
||||
});
|
||||
|
||||
auto require_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
||||
|
||||
common_peg_parser tool_calls = p.eps();
|
||||
|
||||
if (!m.per_call_start.empty()) {
|
||||
// Per-call wrapping: each call individually wrapped (e.g., <tool_call>...</tool_call>)
|
||||
auto wrapped_call = m.per_call_start + p.space() + tool_choice + p.space() + m.per_call_end;
|
||||
if (inputs.parallel_tool_calls) {
|
||||
tool_calls = p.trigger_rule("tool-call", wrapped_call + p.zero_or_more(p.space() + wrapped_call));
|
||||
} else {
|
||||
tool_calls = p.trigger_rule("tool-call", wrapped_call);
|
||||
}
|
||||
if (!m.tool_section_start.empty()) {
|
||||
tool_calls = p.trigger_rule("tool-calls", p.literal(m.tool_section_start) + p.space() +
|
||||
tool_calls + p.space() + (m.tool_section_end.empty() ? p.end() : p.literal(m.tool_section_end)));
|
||||
}
|
||||
if (inputs.parallel_tool_calls) {
|
||||
tool_calls = p.trigger_rule("tool-call",
|
||||
m.tool_section_start + tool_choice + p.zero_or_more(separator + tool_choice) + m.tool_section_end);
|
||||
} else {
|
||||
std::string separator = m.call_separator;
|
||||
if (separator.empty()) {
|
||||
separator = ", "; // Default
|
||||
}
|
||||
|
||||
if (inputs.parallel_tool_calls) {
|
||||
tool_calls = p.trigger_rule("tool-call",
|
||||
m.tool_section_start + p.space() + tool_choice + p.zero_or_more(separator + tool_choice) + p.space() + m.tool_section_end);
|
||||
} else {
|
||||
tool_calls = p.trigger_rule("tool-call",
|
||||
m.tool_section_start + p.space() + tool_choice + p.space() + m.tool_section_end);
|
||||
}
|
||||
tool_calls = p.trigger_rule("tool-call",
|
||||
m.tool_section_start + tool_choice + m.tool_section_end);
|
||||
}
|
||||
|
||||
if (!require_tools) {
|
||||
tool_calls = p.optional(tool_calls);
|
||||
}
|
||||
|
||||
std::string trigger_marker = !m.tool_section_start.empty() ? m.tool_section_start : m.per_call_start;
|
||||
auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
|
||||
return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
|
||||
}
|
||||
|
||||
GGML_ABORT("Unable to create tool parser");
|
||||
if (!require_calls) {
|
||||
tool_calls = p.optional(tool_calls);
|
||||
}
|
||||
|
||||
std::string trigger_marker = !m.tool_section_start.empty() ? m.tool_section_start : m.per_call_start;
|
||||
auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
|
||||
return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
|
||||
}
|
||||
|
||||
common_peg_parser universal_peg_generator::build_tool_parser_tag_tagged(
|
||||
common_chat_peg_unified_builder & p,
|
||||
const diff_analysis_result & analysis,
|
||||
const templates_params & inputs,
|
||||
const common_peg_parser & reasoning) {
|
||||
|
||||
const auto & m = analysis.markers;
|
||||
common_peg_parser tool_choice = p.choice();
|
||||
|
||||
foreach_function(inputs.tools, [&](const json & tool) {
|
||||
const auto & function = tool.at("function");
|
||||
std::string name = function.at("name");
|
||||
const auto & params = function.at("parameters");
|
||||
|
||||
if (!params.contains("properties") || !params.at("properties").is_object()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto & properties = params.at("properties");
|
||||
std::set<std::string> required;
|
||||
if (params.contains("required") && params.at("required").is_array()) {
|
||||
params.at("required").get_to(required);
|
||||
}
|
||||
|
||||
// Build parser for each argument
|
||||
std::vector<common_peg_parser> arg_parsers;
|
||||
for (const auto & [param_name, param_schema] : properties.items()) {
|
||||
bool is_required = required.find(param_name) != required.end();
|
||||
auto type = param_schema.value("type", "object");
|
||||
|
||||
auto arg = p.tool_arg(
|
||||
p.tool_arg_open(m.arg_name_prefix + p.tool_arg_name(p.literal(param_name)) + m.arg_name_suffix) + m.arg_value_prefix +
|
||||
(type == "string" ?
|
||||
p.tool_arg_string_value(p.schema(p.until(m.arg_value_suffix),
|
||||
"tool-" + name + "-arg-" + param_name + "-schema", param_schema, true)) :
|
||||
p.tool_arg_json_value(p.schema(p.json(),
|
||||
"tool-" + name + "-arg-" + param_name + "-schema", param_schema)) + p.space()) +
|
||||
p.tool_arg_close(p.literal(m.arg_value_suffix))
|
||||
);
|
||||
|
||||
if (is_required) {
|
||||
arg_parsers.push_back(p.rule("tool-" + name + "-arg-" + param_name, arg));
|
||||
} else {
|
||||
arg_parsers.push_back(p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg)));
|
||||
}
|
||||
}
|
||||
|
||||
// Build arg sequence with space() between consecutive args
|
||||
common_peg_parser args_seq = p.eps();
|
||||
for (size_t i = 0; i < arg_parsers.size(); i++) {
|
||||
if (i > 0) {
|
||||
args_seq = args_seq + p.space();
|
||||
}
|
||||
args_seq = args_seq + arg_parsers[i];
|
||||
}
|
||||
|
||||
// Build call_id parser based on position (if supported)
|
||||
common_peg_parser call_id_section = p.eps();
|
||||
if (analysis.call_id_pos == call_id_position::BETWEEN_FUNC_AND_ARGS &&
|
||||
!m.call_id_prefix.empty() && !m.call_id_suffix.empty()) {
|
||||
call_id_section = p.optional(m.call_id_prefix + p.tool_id(p.until(m.call_id_suffix))) + m.call_id_suffix;
|
||||
}
|
||||
|
||||
auto func_parser = p.tool_open(m.func_name_prefix + p.tool_name(p.literal(name)) + m.func_name_suffix) +
|
||||
call_id_section +
|
||||
p.space() + args_seq;
|
||||
|
||||
if (!m.func_close.empty()) {
|
||||
func_parser = func_parser + p.space() + p.tool_close(p.literal(m.func_close));
|
||||
} else if (!m.per_call_end.empty()) {
|
||||
// When there's no func_close but there is a per_call_end marker, use peek() to ensure
|
||||
// we only emit tool_close when we can actually see the closing marker. This prevents
|
||||
// premature closing during partial parsing when we've seen e.g. "</" which could be
|
||||
// either "</tool_call>" (end) or "<arg_key>" prefix that failed to match.
|
||||
func_parser = func_parser + p.tool_close(p.peek(p.literal(m.per_call_end)));
|
||||
} else {
|
||||
func_parser = func_parser + p.tool_close(p.space()); // force this to process tool closing callbacks in mapper
|
||||
}
|
||||
|
||||
tool_choice |= p.rule("tool-" + name, func_parser);
|
||||
});
|
||||
|
||||
auto require_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
||||
|
||||
common_peg_parser tool_calls = p.eps();
|
||||
|
||||
if (!m.per_call_start.empty()) {
|
||||
auto wrapped_call = m.per_call_start + p.space() + tool_choice + p.space() + m.per_call_end;
|
||||
if (inputs.parallel_tool_calls) {
|
||||
tool_calls = p.trigger_rule("tool-call", wrapped_call + p.zero_or_more(p.space() + wrapped_call));
|
||||
} else {
|
||||
tool_calls = p.trigger_rule("tool-call", wrapped_call);
|
||||
}
|
||||
if (!m.tool_section_start.empty()) {
|
||||
tool_calls = p.trigger_rule("tool-calls", p.literal(m.tool_section_start) + p.space() +
|
||||
tool_calls + p.space() + (m.tool_section_end.empty() ? p.end() : p.literal(m.tool_section_end)));
|
||||
}
|
||||
} else {
|
||||
std::string separator = m.call_separator;
|
||||
if (separator.empty()) {
|
||||
separator = ", "; // Default
|
||||
}
|
||||
|
||||
if (inputs.parallel_tool_calls) {
|
||||
tool_calls = p.trigger_rule("tool-call",
|
||||
m.tool_section_start + p.space() + tool_choice + p.zero_or_more(separator + tool_choice) + p.space() + m.tool_section_end);
|
||||
} else {
|
||||
tool_calls = p.trigger_rule("tool-call",
|
||||
m.tool_section_start + p.space() + tool_choice + p.space() + m.tool_section_end);
|
||||
}
|
||||
}
|
||||
|
||||
if (!require_tools) {
|
||||
tool_calls = p.optional(tool_calls);
|
||||
}
|
||||
|
||||
std::string trigger_marker = !m.tool_section_start.empty() ? m.tool_section_start : m.per_call_start;
|
||||
auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
|
||||
return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -51,4 +51,20 @@ class universal_peg_generator {
|
|||
const diff_analysis_result & analysis,
|
||||
const templates_params & inputs,
|
||||
const common_peg_parser & reasoning);
|
||||
|
||||
// Per-format tool parser builders
|
||||
static common_peg_parser build_tool_parser_json_native(common_chat_peg_unified_builder & p,
|
||||
const diff_analysis_result & analysis,
|
||||
const templates_params & inputs,
|
||||
const common_peg_parser & reasoning);
|
||||
|
||||
static common_peg_parser build_tool_parser_tag_json(common_chat_peg_unified_builder & p,
|
||||
const diff_analysis_result & analysis,
|
||||
const templates_params & inputs,
|
||||
const common_peg_parser & reasoning);
|
||||
|
||||
static common_peg_parser build_tool_parser_tag_tagged(common_chat_peg_unified_builder & p,
|
||||
const diff_analysis_result & analysis,
|
||||
const templates_params & inputs,
|
||||
const common_peg_parser & reasoning);
|
||||
};
|
||||
|
|
|
|||
|
|
@ -169,11 +169,7 @@ enum class tool_format {
|
|||
NONE, // No tool support detected
|
||||
JSON_NATIVE, // Pure JSON: {"name": "X", "arguments": {...}}
|
||||
TAG_WITH_JSON, // Tag-based with JSON args: <function=X>{...}</function>
|
||||
BRACKET_TAG, // Bracket-tag: [TOOL_CALLS]name[CALL_ID]id[ARGS]{...}
|
||||
PREFIXED_INDEXED, // Prefixed-indexed: functions.X:0{...}
|
||||
RECIPIENT_BASED, // Recipient routing: >>>func_name\n{...}
|
||||
TAG_WITH_TAGGED, // Tag-based with tagged args: <param=key>value</param>
|
||||
MARKDOWN_BLOCK, // Markdown code block: Action:\n```json\n[...]\n```
|
||||
};
|
||||
|
||||
inline std::ostream & operator<<(std::ostream & os, const tool_format & format) {
|
||||
|
|
@ -184,16 +180,8 @@ inline std::ostream & operator<<(std::ostream & os, const tool_format & format)
|
|||
return os << "JSON_NATIVE";
|
||||
case tool_format::TAG_WITH_JSON:
|
||||
return os << "TAG_WITH_JSON";
|
||||
case tool_format::BRACKET_TAG:
|
||||
return os << "BRACKET_TAG";
|
||||
case tool_format::PREFIXED_INDEXED:
|
||||
return os << "PREFIXED_INDEXED";
|
||||
case tool_format::RECIPIENT_BASED:
|
||||
return os << "RECIPIENT_BASED";
|
||||
case tool_format::TAG_WITH_TAGGED:
|
||||
return os << "TAG_WITH_TAGGED";
|
||||
case tool_format::MARKDOWN_BLOCK:
|
||||
return os << "MARKDOWN_BLOCK";
|
||||
default:
|
||||
return os << "UNKNOWN";
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,6 +35,45 @@ static std::string_view trim(std::string_view sv) {
|
|||
return trim_trailing_space(trim_leading_space(sv, 1));
|
||||
}
|
||||
|
||||
// Count the number of unclosed '{' braces in a JSON-like string,
|
||||
// properly skipping braces inside quoted strings.
|
||||
static int json_brace_depth(const std::string & s) {
|
||||
int depth = 0;
|
||||
bool in_string = false;
|
||||
bool escaped = false;
|
||||
for (char c : s) {
|
||||
if (escaped) {
|
||||
escaped = false;
|
||||
continue;
|
||||
}
|
||||
if (c == '\\' && in_string) {
|
||||
escaped = true;
|
||||
continue;
|
||||
}
|
||||
if (c == '"') {
|
||||
in_string = !in_string;
|
||||
continue;
|
||||
}
|
||||
if (!in_string) {
|
||||
if (c == '{') {
|
||||
depth++;
|
||||
} else if (c == '}') {
|
||||
depth--;
|
||||
}
|
||||
}
|
||||
}
|
||||
return depth;
|
||||
}
|
||||
|
||||
// JSON-escape a string and return the inner content (without surrounding quotes).
|
||||
static std::string escape_json_string_inner(const std::string & s) {
|
||||
std::string escaped = json(s).dump();
|
||||
if (escaped.size() >= 2 && escaped.front() == '"' && escaped.back() == '"') {
|
||||
return escaped.substr(1, escaped.size() - 2);
|
||||
}
|
||||
return escaped;
|
||||
}
|
||||
|
||||
// Convert Python-style single-quoted strings to JSON double-quoted strings
|
||||
// Only converts outer string delimiters, properly handling escape sequences:
|
||||
// - {'key': 'value'} -> {"key": "value"}
|
||||
|
|
@ -148,6 +187,10 @@ common_peg_parser common_chat_peg_builder::tag_with_safe_content(const std::stri
|
|||
return zero_or_more(choice({ p, content_chunk }));
|
||||
}
|
||||
|
||||
std::string & common_chat_peg_unified_mapper::args_target() {
|
||||
return (current_tool && !current_tool->name.empty()) ? current_tool->arguments : args_buffer;
|
||||
}
|
||||
|
||||
void common_chat_peg_unified_mapper::from_ast(const common_peg_ast_arena & arena,
|
||||
const common_peg_parse_result & parse_result_arg) {
|
||||
// Call base class to visit all nodes
|
||||
|
|
@ -156,15 +199,12 @@ void common_chat_peg_unified_mapper::from_ast(const common_peg_ast_arena & ar
|
|||
// Flush any pending tool call that was started but never got a name
|
||||
// This happens during partial parsing when the tool call is incomplete
|
||||
if (pending_tool_call.has_value() && !pending_tool_call->name.empty()) {
|
||||
// Transfer any buffered arguments
|
||||
if (!args_buffer.empty()) {
|
||||
pending_tool_call->arguments = args_buffer;
|
||||
}
|
||||
// Close any open quotes in buffered args
|
||||
if (buffer_needs_closing_quote && !pending_tool_call->arguments.empty()) {
|
||||
if (closing_quote_pending && !pending_tool_call->arguments.empty()) {
|
||||
pending_tool_call->arguments += "\"";
|
||||
}
|
||||
// Add the incomplete tool call to results
|
||||
result.tool_calls.push_back(pending_tool_call.value());
|
||||
pending_tool_call.reset();
|
||||
}
|
||||
|
|
@ -187,15 +227,11 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) {
|
|||
bool is_arg_string_value = node.tag == common_chat_peg_unified_builder::TOOL_ARG_STRING_VALUE;
|
||||
|
||||
if (is_tool_open) {
|
||||
// Don't create tool call yet - wait for name to be known
|
||||
// This prevents sending incomplete tool calls in streaming mode
|
||||
pending_tool_call = common_chat_tool_call();
|
||||
current_tool = &pending_tool_call.value();
|
||||
arg_count = 0;
|
||||
// Clear the arguments buffer for the new tool
|
||||
pending_tool_call = common_chat_tool_call();
|
||||
current_tool = &pending_tool_call.value();
|
||||
arg_count = 0;
|
||||
args_buffer.clear();
|
||||
needs_closing_quote = false;
|
||||
buffer_needs_closing_quote = false;
|
||||
closing_quote_pending = false;
|
||||
}
|
||||
|
||||
if (is_tool_id && current_tool) {
|
||||
|
|
@ -208,15 +244,14 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) {
|
|||
|
||||
if (is_tool_name && current_tool) {
|
||||
current_tool->name = std::string(trim_trailing_space(node.text));
|
||||
// Now that we have the name, we can populate the arguments from the buffer
|
||||
// Now that we have the name, populate the arguments from the buffer
|
||||
if (!args_buffer.empty()) {
|
||||
current_tool->arguments = args_buffer;
|
||||
args_buffer.clear();
|
||||
} else if (current_tool->arguments.empty()) {
|
||||
// Initialize arguments if we're using tagged format and no buffered args
|
||||
current_tool->arguments = "{";
|
||||
}
|
||||
// Now that we have the name, add the tool call to the result
|
||||
// Add the tool call to results so streaming can see it
|
||||
if (pending_tool_call.has_value()) {
|
||||
result.tool_calls.push_back(pending_tool_call.value());
|
||||
pending_tool_call.reset();
|
||||
|
|
@ -225,28 +260,16 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) {
|
|||
}
|
||||
|
||||
if (is_tool_args && current_tool) {
|
||||
// For JSON format, the arguments come as a complete JSON object
|
||||
// For tagged format, we build up arguments from individual arg_name/arg_value nodes
|
||||
// Check if this looks like JSON (starts with {) vs tagged format (starts with <)
|
||||
// For JSON format: arguments come as a complete JSON object
|
||||
// For tagged format: built up from individual arg_name/arg_value nodes
|
||||
auto text = trim_trailing_space(node.text);
|
||||
if (!text.empty() && text.front() == '{') {
|
||||
// If we have the tool name, populate directly; otherwise buffer
|
||||
if (!current_tool->name.empty()) {
|
||||
current_tool->arguments = std::string(text);
|
||||
} else {
|
||||
args_buffer = std::string(text);
|
||||
}
|
||||
args_target() = std::string(text);
|
||||
}
|
||||
// If it's tagged format, we ignore this and let arg_name/arg_value build up the JSON
|
||||
}
|
||||
|
||||
if (is_arg_open) {
|
||||
// Reset for new argument
|
||||
if (!current_tool->name.empty()) {
|
||||
needs_closing_quote = false;
|
||||
} else {
|
||||
buffer_needs_closing_quote = false;
|
||||
}
|
||||
closing_quote_pending = false;
|
||||
}
|
||||
|
||||
if (is_arg_name && current_tool) {
|
||||
|
|
@ -257,15 +280,11 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) {
|
|||
arg_entry += json(trim(node.text)).dump() + ":";
|
||||
++arg_count;
|
||||
|
||||
// If we have the tool name, add directly; otherwise buffer
|
||||
if (!current_tool->name.empty()) {
|
||||
current_tool->arguments += arg_entry;
|
||||
} else {
|
||||
if (args_buffer.empty()) {
|
||||
args_buffer = "{";
|
||||
}
|
||||
args_buffer += arg_entry;
|
||||
auto & target = args_target();
|
||||
if (target.empty()) {
|
||||
target = "{";
|
||||
}
|
||||
target += arg_entry;
|
||||
}
|
||||
|
||||
if ((is_arg_value || is_arg_string_value) && current_tool) {
|
||||
|
|
@ -273,160 +292,83 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) {
|
|||
|
||||
std::string value_to_add;
|
||||
if (value_content.empty() && is_arg_string_value) {
|
||||
// Empty string value - start with opening quote
|
||||
// arg_close will add the closing quote
|
||||
if (!current_tool->name.empty()) {
|
||||
value_to_add = "\"";
|
||||
needs_closing_quote = true;
|
||||
} else {
|
||||
value_to_add = "\"";
|
||||
buffer_needs_closing_quote = true;
|
||||
}
|
||||
// Empty string value - arg_close will add the closing quote
|
||||
value_to_add = "\"";
|
||||
closing_quote_pending = true;
|
||||
} else if (!value_content.empty() && is_arg_string_value) {
|
||||
// Schema declares this as string type - always treat as literal string value
|
||||
// Never try to parse as JSON (this ensures consistent handling of quoted strings
|
||||
// like "foo" which would otherwise be parsed as JSON string 'foo')
|
||||
if (!current_tool->name.empty()) {
|
||||
if (!needs_closing_quote) {
|
||||
value_to_add = "\"";
|
||||
needs_closing_quote = true;
|
||||
}
|
||||
} else {
|
||||
if (!buffer_needs_closing_quote) {
|
||||
value_to_add = "\"";
|
||||
buffer_needs_closing_quote = true;
|
||||
}
|
||||
if (!closing_quote_pending) {
|
||||
value_to_add = "\"";
|
||||
closing_quote_pending = true;
|
||||
}
|
||||
// Escape special characters in the string content
|
||||
std::string escaped = json(value_content).dump();
|
||||
// Remove the surrounding quotes from the escaped string
|
||||
if (escaped.size() >= 2 && escaped.front() == '"' && escaped.back() == '"') {
|
||||
escaped = escaped.substr(1, escaped.size() - 2);
|
||||
}
|
||||
value_to_add += escaped;
|
||||
value_to_add += escape_json_string_inner(value_content);
|
||||
} else if (!value_content.empty()) {
|
||||
// For potential containers, normalize Python-style single quotes to JSON double quotes first
|
||||
// This ensures consistent output during both partial and final parsing
|
||||
// For potential containers, normalize Python-style single quotes to JSON double quotes
|
||||
bool is_potential_container = value_content[0] == '[' || value_content[0] == '{';
|
||||
if (is_potential_container) {
|
||||
value_content = normalize_quotes_to_json(value_content);
|
||||
}
|
||||
|
||||
// Try to parse as JSON value (number, bool, null, object, array)
|
||||
// For strings, we need special handling to support incremental parsing
|
||||
try {
|
||||
json parsed = json::parse(value_content);
|
||||
if (parsed.is_string()) {
|
||||
// For string values, don't add closing quote yet (added by arg_close)
|
||||
// This ensures incremental parsing produces monotonic arguments
|
||||
// Don't add closing quote yet (added by arg_close) for monotonic streaming
|
||||
std::string escaped = parsed.dump();
|
||||
// Remove the trailing quote
|
||||
if (!escaped.empty() && escaped.back() == '"') {
|
||||
escaped.pop_back();
|
||||
}
|
||||
value_to_add = escaped;
|
||||
if (!current_tool->name.empty()) {
|
||||
needs_closing_quote = true;
|
||||
} else {
|
||||
buffer_needs_closing_quote = true;
|
||||
}
|
||||
value_to_add = escaped;
|
||||
closing_quote_pending = true;
|
||||
} else {
|
||||
// For non-string values (number, bool, null, object, array), add raw value content
|
||||
// Using raw content instead of dump() ensures monotonicity for streaming
|
||||
// (prevents issues with spaces being removed by dump())
|
||||
// Non-string values: use raw content to preserve whitespace for monotonicity
|
||||
value_to_add = value_content;
|
||||
}
|
||||
} catch (...) {
|
||||
// JSON parsing failed - content is either incomplete (partial) or not valid JSON
|
||||
// Note: potential containers were already normalized above, so value_content
|
||||
// already has double quotes if it started with [ or {
|
||||
|
||||
if (node.is_partial && is_potential_container) {
|
||||
// During incremental parsing, if it looks like a JSON container, don't wrap in quotes yet
|
||||
// and don't escape. Just pass through the (already normalized) content.
|
||||
// Partial container: pass through the already-normalized content
|
||||
value_to_add = value_content;
|
||||
} else {
|
||||
// Not valid JSON and NOT a potential partial container - treat as string value
|
||||
// Add opening quote if not already in a string
|
||||
if (!current_tool->name.empty()) {
|
||||
if (!needs_closing_quote) {
|
||||
value_to_add = "\"";
|
||||
needs_closing_quote = true;
|
||||
}
|
||||
} else {
|
||||
if (!buffer_needs_closing_quote) {
|
||||
value_to_add = "\"";
|
||||
buffer_needs_closing_quote = true;
|
||||
}
|
||||
// Not valid JSON - treat as string value
|
||||
if (!closing_quote_pending) {
|
||||
value_to_add = "\"";
|
||||
closing_quote_pending = true;
|
||||
}
|
||||
// Escape special characters in the string content
|
||||
std::string escaped = json(value_content).dump();
|
||||
// Remove the surrounding quotes from the escaped string
|
||||
if (escaped.size() >= 2 && escaped.front() == '"' && escaped.back() == '"') {
|
||||
escaped = escaped.substr(1, escaped.size() - 2);
|
||||
}
|
||||
value_to_add += escaped;
|
||||
value_to_add += escape_json_string_inner(value_content);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we have the tool name, add directly; otherwise buffer
|
||||
if (!current_tool->name.empty()) {
|
||||
current_tool->arguments += value_to_add;
|
||||
} else {
|
||||
if (args_buffer.empty()) {
|
||||
args_buffer = "{";
|
||||
}
|
||||
args_buffer += value_to_add;
|
||||
}
|
||||
args_target() += value_to_add;
|
||||
}
|
||||
|
||||
if (is_arg_close && current_tool) {
|
||||
if (!current_tool->name.empty()) {
|
||||
if (needs_closing_quote) {
|
||||
current_tool->arguments += "\"";
|
||||
needs_closing_quote = false;
|
||||
}
|
||||
} else {
|
||||
if (buffer_needs_closing_quote) {
|
||||
if (args_buffer.empty()) {
|
||||
args_buffer = "{";
|
||||
}
|
||||
args_buffer += "\"";
|
||||
buffer_needs_closing_quote = false;
|
||||
}
|
||||
if (closing_quote_pending) {
|
||||
args_target() += "\"";
|
||||
closing_quote_pending = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_tool_close && current_tool) {
|
||||
if (!current_tool->name.empty()) {
|
||||
if (needs_closing_quote) {
|
||||
current_tool->arguments += "\"";
|
||||
needs_closing_quote = false;
|
||||
}
|
||||
if (!current_tool->arguments.empty() && current_tool->arguments.back() != '}') {
|
||||
current_tool->arguments += "}";
|
||||
}
|
||||
// If we have a pending tool call that wasn't added yet, add it now
|
||||
if (pending_tool_call.has_value()) {
|
||||
// Flush buffer to arguments if tool name was never seen
|
||||
if (current_tool->name.empty() && !args_buffer.empty()) {
|
||||
current_tool->arguments = args_buffer;
|
||||
args_buffer.clear();
|
||||
}
|
||||
// Close any pending string quote
|
||||
if (closing_quote_pending) {
|
||||
current_tool->arguments += "\"";
|
||||
closing_quote_pending = false;
|
||||
}
|
||||
// Close any unclosed braces (accounts for nested objects)
|
||||
for (int d = json_brace_depth(current_tool->arguments); d > 0; d--) {
|
||||
current_tool->arguments += "}";
|
||||
}
|
||||
// Add tool call to results if named; otherwise discard
|
||||
if (pending_tool_call.has_value()) {
|
||||
if (!current_tool->name.empty()) {
|
||||
result.tool_calls.push_back(pending_tool_call.value());
|
||||
pending_tool_call.reset();
|
||||
}
|
||||
} else {
|
||||
// We're closing a tool without a name - flush the buffer
|
||||
if (!args_buffer.empty()) {
|
||||
current_tool->arguments = args_buffer;
|
||||
args_buffer.clear();
|
||||
}
|
||||
if (buffer_needs_closing_quote) {
|
||||
current_tool->arguments += "\"";
|
||||
buffer_needs_closing_quote = false;
|
||||
}
|
||||
// Close the arguments object if using tagged format
|
||||
if (!current_tool->arguments.empty() && current_tool->arguments.back() != '}') {
|
||||
current_tool->arguments += "}";
|
||||
}
|
||||
// Don't add to result if no name - this prevents incomplete tool calls
|
||||
pending_tool_call.reset();
|
||||
}
|
||||
}
|
||||
|
|
@ -511,6 +453,241 @@ static std::pair<std::string, std::string> parse_key_spec(const std::string & ke
|
|||
return {key.substr(0, dot_pos), key.substr(dot_pos + 1)};
|
||||
}
|
||||
|
||||
// Mode 1: function_is_key — parse {"function_name": {...}}
|
||||
common_peg_parser common_chat_peg_unified_builder::build_json_tools_function_is_key(
|
||||
const nlohmann::json & tools,
|
||||
const std::string & args_key,
|
||||
const std::string & effective_args_key,
|
||||
const std::string & call_id_key,
|
||||
const std::string & gen_call_id_key) {
|
||||
|
||||
auto tool_choices = choice();
|
||||
|
||||
for (const auto & tool_def : tools) {
|
||||
if (!tool_def.contains("function")) {
|
||||
continue;
|
||||
}
|
||||
const auto & function = tool_def.at("function");
|
||||
std::string name = function.at("name");
|
||||
nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
|
||||
|
||||
// Build inner object fields
|
||||
std::vector<common_peg_parser> inner_fields;
|
||||
|
||||
if (!call_id_key.empty()) {
|
||||
auto id_parser = atomic(
|
||||
literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
|
||||
literal("\"") + tool_id(json_string_content()) + literal("\"")
|
||||
);
|
||||
inner_fields.push_back(optional(id_parser + space() + optional(literal(",") + space())));
|
||||
}
|
||||
|
||||
if (!gen_call_id_key.empty()) {
|
||||
auto gen_id_parser = atomic(
|
||||
literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
|
||||
choice({
|
||||
literal("\"") + tool_id(json_string_content()) + literal("\""),
|
||||
tool_id(json_number())
|
||||
})
|
||||
);
|
||||
inner_fields.push_back(optional(gen_id_parser + space() + optional(literal(",") + space())));
|
||||
}
|
||||
|
||||
// Arguments — either wrapped in args_key or parsed directly
|
||||
common_peg_parser args_parser = eps();
|
||||
if (args_key.empty()) {
|
||||
args_parser = tool_args(schema(json(), "tool-" + name + "-schema", params));
|
||||
} else {
|
||||
args_parser = literal("\"" + effective_args_key + "\"") + space() + literal(":") + space() +
|
||||
tool_args(schema(json(), "tool-" + name + "-schema", params));
|
||||
}
|
||||
inner_fields.push_back(args_parser);
|
||||
|
||||
// Build inner object parser
|
||||
common_peg_parser inner_object = eps();
|
||||
if (args_key.empty() && inner_fields.size() == 1) {
|
||||
inner_object = inner_fields[0];
|
||||
} else {
|
||||
inner_object = literal("{") + space();
|
||||
for (size_t i = 0; i < inner_fields.size(); i++) {
|
||||
inner_object = inner_object + inner_fields[i];
|
||||
if (i < inner_fields.size() - 1) {
|
||||
inner_object = inner_object + space();
|
||||
}
|
||||
}
|
||||
inner_object = inner_object + space() + literal("}");
|
||||
}
|
||||
|
||||
auto tool_parser = tool(
|
||||
tool_open(literal("{")) + space() +
|
||||
literal("\"") + tool_name(literal(name)) + literal("\"") +
|
||||
space() + literal(":") + space() +
|
||||
inner_object +
|
||||
space() + tool_close(literal("}"))
|
||||
);
|
||||
|
||||
tool_choices |= rule("tool-" + name, tool_parser);
|
||||
}
|
||||
|
||||
return tool_choices;
|
||||
}
|
||||
|
||||
// Mode 2: Nested keys (dot notation like "function.name")
|
||||
common_peg_parser common_chat_peg_unified_builder::build_json_tools_nested_keys(
|
||||
const nlohmann::json & tools,
|
||||
const std::string & effective_name_key,
|
||||
const std::string & effective_args_key,
|
||||
const std::string & call_id_key,
|
||||
const std::string & gen_call_id_key) {
|
||||
|
||||
auto tool_choices = choice();
|
||||
|
||||
auto name_spec = parse_key_spec(effective_name_key);
|
||||
auto args_spec = parse_key_spec(effective_args_key);
|
||||
|
||||
std::string nested_prefix = !name_spec.first.empty() ? name_spec.first : args_spec.first;
|
||||
std::string nested_name_field = !name_spec.first.empty() ? name_spec.second : effective_name_key;
|
||||
std::string nested_args_field = !args_spec.first.empty() ? args_spec.second : effective_args_key;
|
||||
|
||||
for (const auto & tool_def : tools) {
|
||||
if (!tool_def.contains("function")) {
|
||||
continue;
|
||||
}
|
||||
const auto & function = tool_def.at("function");
|
||||
std::string name = function.at("name");
|
||||
nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
|
||||
|
||||
auto nested_name = literal("\"" + nested_name_field + "\"") + space() + literal(":") + space() +
|
||||
literal("\"") + tool_name(literal(name)) + literal("\"");
|
||||
auto nested_args = literal("\"" + nested_args_field + "\"") + space() + literal(":") + space() +
|
||||
tool_args(schema(json(), "tool-" + name + "-schema", params));
|
||||
|
||||
auto nested_object = literal("{") + space() +
|
||||
nested_name + space() + literal(",") + space() +
|
||||
nested_args +
|
||||
space() + literal("}");
|
||||
|
||||
// Format: { id?, "function": {...} }
|
||||
auto tool_parser_body = tool_open(literal("{")) + space();
|
||||
|
||||
if (!call_id_key.empty()) {
|
||||
auto id_spec = parse_key_spec(call_id_key);
|
||||
if (id_spec.first.empty()) {
|
||||
auto id_parser = atomic(
|
||||
literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
|
||||
literal("\"") + tool_id(json_string_content()) + literal("\"")
|
||||
);
|
||||
tool_parser_body = tool_parser_body + optional(id_parser + space() + literal(",") + space());
|
||||
}
|
||||
}
|
||||
|
||||
if (!gen_call_id_key.empty()) {
|
||||
auto gen_id_spec = parse_key_spec(gen_call_id_key);
|
||||
if (gen_id_spec.first.empty()) {
|
||||
auto gen_id_parser = atomic(
|
||||
literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
|
||||
choice({
|
||||
literal("\"") + tool_id(json_string_content()) + literal("\""),
|
||||
tool_id(json_number())
|
||||
})
|
||||
);
|
||||
tool_parser_body = tool_parser_body + optional(gen_id_parser + space() + literal(",") + space());
|
||||
}
|
||||
}
|
||||
|
||||
auto nested_field = literal("\"" + nested_prefix + "\"") + space() + literal(":") + space() + nested_object;
|
||||
tool_parser_body = tool_parser_body + nested_field + space() + tool_close(literal("}"));
|
||||
|
||||
tool_choices |= rule("tool-" + name, tool(tool_parser_body));
|
||||
}
|
||||
|
||||
return tool_choices;
|
||||
}
|
||||
|
||||
// Mode 3: Flat keys with optional ID fields and parameter ordering
|
||||
common_peg_parser common_chat_peg_unified_builder::build_json_tools_flat_keys(
|
||||
const nlohmann::json & tools,
|
||||
const std::string & effective_name_key,
|
||||
const std::string & effective_args_key,
|
||||
const std::string & call_id_key,
|
||||
const std::string & gen_call_id_key,
|
||||
const std::vector<std::string> & parameters_order) {
|
||||
|
||||
auto tool_choices = choice();
|
||||
auto name_key_parser = literal("\"" + effective_name_key + "\"");
|
||||
auto args_key_parser = literal("\"" + effective_args_key + "\"");
|
||||
|
||||
for (const auto & tool_def : tools) {
|
||||
if (!tool_def.contains("function")) {
|
||||
continue;
|
||||
}
|
||||
const auto & function = tool_def.at("function");
|
||||
std::string name = function.at("name");
|
||||
nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
|
||||
|
||||
auto tool_name_ = name_key_parser + space() + literal(":") + space() +
|
||||
literal("\"") + tool_name(literal(name)) + literal("\"");
|
||||
auto tool_args_ = args_key_parser + space() + literal(":") + space() +
|
||||
tool_args(schema(json(), "tool-" + name + "-schema", params));
|
||||
|
||||
// Build ID parsers if keys are provided
|
||||
common_peg_parser id_parser = eps();
|
||||
if (!call_id_key.empty()) {
|
||||
id_parser = atomic(
|
||||
literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
|
||||
choice({
|
||||
literal("\"") + tool_id(json_string_content()) + literal("\""),
|
||||
tool_id(json_number())
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
common_peg_parser gen_id_parser = eps();
|
||||
if (!gen_call_id_key.empty()) {
|
||||
gen_id_parser = atomic(
|
||||
literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
|
||||
choice({
|
||||
literal("\"") + tool_id(json_string_content()) + literal("\""),
|
||||
tool_id(json_number())
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
// Create (parser, key) pairs for all fields, then sort by parameters_order
|
||||
std::vector<std::pair<common_peg_parser, std::string>> parser_pairs;
|
||||
parser_pairs.emplace_back(tool_name_, effective_name_key);
|
||||
parser_pairs.emplace_back(tool_args_, effective_args_key);
|
||||
if (!call_id_key.empty()) {
|
||||
parser_pairs.emplace_back(optional(id_parser), call_id_key);
|
||||
}
|
||||
if (!gen_call_id_key.empty()) {
|
||||
parser_pairs.emplace_back(optional(gen_id_parser), gen_call_id_key);
|
||||
}
|
||||
|
||||
std::sort(parser_pairs.begin(), parser_pairs.end(),
|
||||
[¶meters_order](const auto & a, const auto & b) {
|
||||
auto pos_a = std::find(parameters_order.begin(), parameters_order.end(), a.second);
|
||||
auto pos_b = std::find(parameters_order.begin(), parameters_order.end(), b.second);
|
||||
size_t idx_a = (pos_a == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_a);
|
||||
size_t idx_b = (pos_b == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_b);
|
||||
return idx_a < idx_b;
|
||||
});
|
||||
|
||||
auto ordered_body = tool_open(literal("{")) + space();
|
||||
for (size_t i = 0; i < parser_pairs.size(); i++) {
|
||||
ordered_body = ordered_body + parser_pairs[i].first;
|
||||
if (i < parser_pairs.size() - 1) {
|
||||
ordered_body = ordered_body + space() + literal(",") + space();
|
||||
}
|
||||
}
|
||||
ordered_body = ordered_body + space() + tool_close(literal("}"));
|
||||
|
||||
tool_choices |= rule("tool-" + name, tool(ordered_body));
|
||||
}
|
||||
|
||||
return tool_choices;
|
||||
}
|
||||
|
||||
common_peg_parser common_chat_peg_unified_builder::standard_json_tools(
|
||||
const std::string & section_start,
|
||||
const std::string & section_end,
|
||||
|
|
@ -528,239 +705,20 @@ common_peg_parser common_chat_peg_unified_builder::standard_json_tools(
|
|||
return eps();
|
||||
}
|
||||
|
||||
// Build tool choices for JSON format
|
||||
auto tool_choices = choice();
|
||||
// auto other_member = json_string() + space() + literal(":") + space() + json();
|
||||
|
||||
// Determine effective field names
|
||||
std::string effective_name_key = name_key.empty() ? "name" : name_key;
|
||||
std::string effective_args_key = args_key.empty() ? "arguments" : args_key;
|
||||
|
||||
// Check if we have nested keys (dot notation)
|
||||
auto name_spec = parse_key_spec(effective_name_key);
|
||||
auto args_spec = parse_key_spec(effective_args_key);
|
||||
bool has_nested_keys = !name_spec.first.empty() || !args_spec.first.empty();
|
||||
|
||||
// Mode 1: function_is_key - parse {"function_name": {...}}
|
||||
// Dispatch to the appropriate builder based on the JSON layout mode
|
||||
common_peg_parser tool_choices = eps();
|
||||
if (function_is_key) {
|
||||
for (const auto & tool_def : tools) {
|
||||
if (!tool_def.contains("function")) {
|
||||
continue;
|
||||
}
|
||||
const auto & function = tool_def.at("function");
|
||||
std::string name = function.at("name");
|
||||
nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
|
||||
|
||||
// Build inner object fields
|
||||
std::vector<common_peg_parser> inner_fields;
|
||||
|
||||
// Add optional string ID field
|
||||
if (!call_id_key.empty()) {
|
||||
auto id_parser = atomic(
|
||||
literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
|
||||
literal("\"") + tool_id(json_string_content()) + literal("\"")
|
||||
);
|
||||
inner_fields.push_back(optional(id_parser + space() + optional(literal(",") + space())));
|
||||
}
|
||||
|
||||
// Add optional generated integer ID field
|
||||
if (!gen_call_id_key.empty()) {
|
||||
auto gen_id_parser = atomic(
|
||||
literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
|
||||
choice({
|
||||
literal("\"") + tool_id(json_string_content()) + literal("\""),
|
||||
tool_id(json_number())
|
||||
})
|
||||
);
|
||||
inner_fields.push_back(optional(gen_id_parser + space() + optional(literal(",") + space())));
|
||||
}
|
||||
|
||||
// Add arguments - either wrapped in args_key or parsed directly
|
||||
common_peg_parser args_parser = eps();
|
||||
if (args_key.empty()) {
|
||||
// Arguments are directly the inner object value: {"func_name": {"arg1": "val"}}
|
||||
args_parser = tool_args(schema(json(), "tool-" + name + "-schema", params));
|
||||
} else {
|
||||
// Arguments are wrapped in a key: {"func_name": {"arguments": {"arg1": "val"}}}
|
||||
args_parser = literal("\"" + effective_args_key + "\"") + space() + literal(":") + space() +
|
||||
tool_args(schema(json(), "tool-" + name + "-schema", params));
|
||||
}
|
||||
inner_fields.push_back(args_parser);
|
||||
|
||||
// Build inner object parser - no greedy other_member skipping to avoid consuming ID
|
||||
common_peg_parser inner_object = eps();
|
||||
if (args_key.empty() && inner_fields.size() == 1) {
|
||||
// Direct arguments: {"func_name": {"arg1": "val"}}
|
||||
// The args_parser is already the full object schema
|
||||
inner_object = inner_fields[0];
|
||||
} else {
|
||||
// Wrapped arguments: {"func_name": {"arguments": {"arg1": "val"}}}
|
||||
inner_object = literal("{") + space();
|
||||
for (size_t i = 0; i < inner_fields.size(); i++) {
|
||||
inner_object = inner_object + inner_fields[i];
|
||||
if (i < inner_fields.size() - 1) {
|
||||
inner_object = inner_object + space();
|
||||
}
|
||||
}
|
||||
inner_object = inner_object + space() + literal("}");
|
||||
}
|
||||
|
||||
// Tool call format: { "function_name": { inner_object } }
|
||||
auto tool_parser = tool(
|
||||
tool_open(literal("{")) + space() +
|
||||
literal("\"") + tool_name(literal(name)) + literal("\"") +
|
||||
space() + literal(":") + space() +
|
||||
inner_object +
|
||||
space() + tool_close(literal("}"))
|
||||
);
|
||||
|
||||
tool_choices |= rule("tool-" + name, tool_parser);
|
||||
}
|
||||
}
|
||||
// Mode 2: Nested keys (dot notation like "function.name")
|
||||
else if (has_nested_keys) {
|
||||
// Group fields by prefix
|
||||
std::string nested_prefix = !name_spec.first.empty() ? name_spec.first : args_spec.first;
|
||||
std::string nested_name_field = !name_spec.first.empty() ? name_spec.second : effective_name_key;
|
||||
std::string nested_args_field = !args_spec.first.empty() ? args_spec.second : effective_args_key;
|
||||
|
||||
for (const auto & tool_def : tools) {
|
||||
if (!tool_def.contains("function")) {
|
||||
continue;
|
||||
}
|
||||
const auto & function = tool_def.at("function");
|
||||
std::string name = function.at("name");
|
||||
nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
|
||||
|
||||
// Build nested object with name and arguments
|
||||
auto nested_name = literal("\"" + nested_name_field + "\"") + space() + literal(":") + space() +
|
||||
literal("\"") + tool_name(literal(name)) + literal("\"");
|
||||
auto nested_args = literal("\"" + nested_args_field + "\"") + space() + literal(":") + space() +
|
||||
tool_args(schema(json(), "tool-" + name + "-schema", params));
|
||||
|
||||
auto nested_object = literal("{") + space() +
|
||||
nested_name + space() + literal(",") + space() +
|
||||
nested_args +
|
||||
space() + literal("}");
|
||||
|
||||
// Build top-level parser - simpler structure without greedy other_member skipping
|
||||
// Format: { id?, "function": {...} }
|
||||
auto tool_parser_body = tool_open(literal("{")) + space();
|
||||
|
||||
// Add optional string ID field at top level
|
||||
if (!call_id_key.empty()) {
|
||||
auto id_spec = parse_key_spec(call_id_key);
|
||||
if (id_spec.first.empty()) { // Top-level ID field
|
||||
auto id_parser = atomic(
|
||||
literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
|
||||
literal("\"") + tool_id(json_string_content()) + literal("\"")
|
||||
);
|
||||
tool_parser_body = tool_parser_body + optional(id_parser + space() + literal(",") + space());
|
||||
}
|
||||
}
|
||||
|
||||
// Add optional generated integer ID field at top level
|
||||
if (!gen_call_id_key.empty()) {
|
||||
auto gen_id_spec = parse_key_spec(gen_call_id_key);
|
||||
if (gen_id_spec.first.empty()) { // Top-level gen ID field
|
||||
auto gen_id_parser = atomic(
|
||||
literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
|
||||
choice({
|
||||
literal("\"") + tool_id(json_string_content()) + literal("\""),
|
||||
tool_id(json_number())
|
||||
})
|
||||
);
|
||||
tool_parser_body = tool_parser_body + optional(gen_id_parser + space() + literal(",") + space());
|
||||
}
|
||||
}
|
||||
|
||||
// Add the nested object field
|
||||
auto nested_field = literal("\"" + nested_prefix + "\"") + space() + literal(":") + space() + nested_object;
|
||||
tool_parser_body = tool_parser_body + nested_field + space() + tool_close(literal("}"));
|
||||
|
||||
tool_choices |= rule("tool-" + name, tool(tool_parser_body));
|
||||
}
|
||||
}
|
||||
// Mode 3: Flat keys (enhanced with ID fields and parameter ordering)
|
||||
else {
|
||||
auto name_key_parser = literal("\"" + effective_name_key + "\"");
|
||||
auto args_key_parser = literal("\"" + effective_args_key + "\"");
|
||||
|
||||
for (const auto & tool_def : tools) {
|
||||
if (!tool_def.contains("function")) {
|
||||
continue;
|
||||
}
|
||||
const auto & function = tool_def.at("function");
|
||||
std::string name = function.at("name");
|
||||
nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
|
||||
|
||||
auto tool_name_ = name_key_parser + space() + literal(":") + space() +
|
||||
literal("\"") + tool_name(literal(name)) + literal("\"");
|
||||
auto tool_args_ = args_key_parser + space() + literal(":") + space() +
|
||||
tool_args(schema(json(), "tool-" + name + "-schema", params));
|
||||
|
||||
// Build ID parsers if keys are provided
|
||||
common_peg_parser id_parser = eps();
|
||||
if (!call_id_key.empty()) {
|
||||
id_parser = atomic(
|
||||
literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
|
||||
choice({
|
||||
literal("\"") + tool_id(json_string_content()) + literal("\""),
|
||||
tool_id(json_number())
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
common_peg_parser gen_id_parser = eps();
|
||||
if (!gen_call_id_key.empty()) {
|
||||
gen_id_parser = atomic(
|
||||
literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
|
||||
choice({
|
||||
literal("\"") + tool_id(json_string_content()) + literal("\""),
|
||||
tool_id(json_number())
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
common_peg_parser tool_parser = eps();
|
||||
|
||||
// Use parameter ordering if provided - parse fields in specified order without greedy skipping
|
||||
if (!parameters_order.empty()) {
|
||||
}
|
||||
// Build parser using parameter ordering (works with or without explicit parameters_order)
|
||||
// Create list of (parser, key) pairs for all fields
|
||||
std::vector<std::pair<common_peg_parser, std::string>> parser_pairs;
|
||||
parser_pairs.emplace_back(tool_name_, effective_name_key);
|
||||
parser_pairs.emplace_back(tool_args_, effective_args_key);
|
||||
if (!call_id_key.empty()) {
|
||||
parser_pairs.emplace_back(optional(id_parser), call_id_key);
|
||||
}
|
||||
if (!gen_call_id_key.empty()) {
|
||||
parser_pairs.emplace_back(optional(gen_id_parser), gen_call_id_key);
|
||||
}
|
||||
|
||||
// Sort by position in parameters_order (or at end if not present)
|
||||
std::sort(parser_pairs.begin(), parser_pairs.end(),
|
||||
[¶meters_order](const auto & a, const auto & b) {
|
||||
auto pos_a = std::find(parameters_order.begin(), parameters_order.end(), a.second);
|
||||
auto pos_b = std::find(parameters_order.begin(), parameters_order.end(), b.second);
|
||||
size_t idx_a = (pos_a == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_a);
|
||||
size_t idx_b = (pos_b == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_b);
|
||||
return idx_a < idx_b;
|
||||
});
|
||||
|
||||
// Build ordered parser
|
||||
auto ordered_body = tool_open(literal("{")) + space();
|
||||
for (size_t i = 0; i < parser_pairs.size(); i++) {
|
||||
ordered_body = ordered_body + parser_pairs[i].first;
|
||||
if (i < parser_pairs.size() - 1) {
|
||||
ordered_body = ordered_body + space() + literal(",") + space();
|
||||
}
|
||||
}
|
||||
ordered_body = ordered_body + space() + tool_close(literal("}"));
|
||||
tool_parser = tool(ordered_body);
|
||||
|
||||
tool_choices |= rule("tool-" + name, tool_parser);
|
||||
tool_choices = build_json_tools_function_is_key(tools, args_key, effective_args_key, call_id_key, gen_call_id_key);
|
||||
} else {
|
||||
auto name_spec = parse_key_spec(effective_name_key);
|
||||
auto args_spec = parse_key_spec(effective_args_key);
|
||||
if (!name_spec.first.empty() || !args_spec.first.empty()) {
|
||||
tool_choices = build_json_tools_nested_keys(tools, effective_name_key, effective_args_key, call_id_key, gen_call_id_key);
|
||||
} else {
|
||||
tool_choices = build_json_tools_flat_keys(tools, effective_name_key, effective_args_key, call_id_key, gen_call_id_key, parameters_order);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -770,7 +728,6 @@ common_peg_parser common_chat_peg_unified_builder::standard_json_tools(
|
|||
tool_calls = tool_calls + zero_or_more(space() + literal(",") + space() + tool_choices);
|
||||
}
|
||||
|
||||
// Optionally wrap in array brackets
|
||||
if (array_wrapped) {
|
||||
tool_calls = literal("[") + space() + tool_calls + space() + literal("]");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -108,6 +108,27 @@ class common_chat_peg_unified_builder : public common_chat_peg_builder {
|
|||
const nlohmann::json & tools,
|
||||
bool parallel_tool_calls,
|
||||
bool force_tool_calls);
|
||||
|
||||
private:
|
||||
// Implementation helpers for standard_json_tools — one per JSON tool call layout mode
|
||||
common_peg_parser build_json_tools_function_is_key(const nlohmann::json & tools,
|
||||
const std::string & args_key,
|
||||
const std::string & effective_args_key,
|
||||
const std::string & call_id_key,
|
||||
const std::string & gen_call_id_key);
|
||||
|
||||
common_peg_parser build_json_tools_nested_keys(const nlohmann::json & tools,
|
||||
const std::string & effective_name_key,
|
||||
const std::string & effective_args_key,
|
||||
const std::string & call_id_key,
|
||||
const std::string & gen_call_id_key);
|
||||
|
||||
common_peg_parser build_json_tools_flat_keys(const nlohmann::json & tools,
|
||||
const std::string & effective_name_key,
|
||||
const std::string & effective_args_key,
|
||||
const std::string & call_id_key,
|
||||
const std::string & gen_call_id_key,
|
||||
const std::vector<std::string> & parameters_order);
|
||||
};
|
||||
|
||||
inline common_peg_arena build_chat_peg_unified_parser(
|
||||
|
|
@ -119,11 +140,14 @@ inline common_peg_arena build_chat_peg_unified_parser(
|
|||
|
||||
class common_chat_peg_unified_mapper : public common_chat_peg_mapper {
|
||||
std::optional<common_chat_tool_call> pending_tool_call; // Tool call waiting for name
|
||||
common_chat_tool_call * current_tool = nullptr;
|
||||
int arg_count = 0;
|
||||
bool needs_closing_quote = false;
|
||||
common_chat_tool_call * current_tool = nullptr;
|
||||
int arg_count = 0;
|
||||
bool closing_quote_pending = false;
|
||||
std::string args_buffer; // Buffer to delay arguments until tool name is known
|
||||
bool buffer_needs_closing_quote = false; // Track quote state for buffered args
|
||||
|
||||
// Returns a reference to the active argument destination string.
|
||||
// Before tool_name is known, writes go to args_buffer; after, to current_tool->arguments.
|
||||
std::string & args_target();
|
||||
|
||||
public:
|
||||
common_chat_peg_unified_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
|
||||
|
|
|
|||
|
|
@ -369,6 +369,28 @@ static common_chat_tool amount_tool{
|
|||
})",
|
||||
};
|
||||
|
||||
static common_chat_tool imaginary_number_tool{
|
||||
/* .name = */ "imaginary_number",
|
||||
/* .description = */ "Imaginary number converter",
|
||||
/* .parameters = */ R"({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"number": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"real": {
|
||||
"type": "number"
|
||||
},
|
||||
"imaginary": {
|
||||
"type": "number"
|
||||
}
|
||||
},
|
||||
"required": ["real", "imaginary"]
|
||||
}
|
||||
},
|
||||
"required": ["number"]
|
||||
})",
|
||||
};
|
||||
|
||||
static common_chat_tool string_param_tool{
|
||||
/* .name = */ "string_param",
|
||||
|
|
@ -394,7 +416,7 @@ static common_chat_tool quoted_unquoted_tool{
|
|||
"quoted": {
|
||||
"type": "string",
|
||||
"description": "Quoted value"
|
||||
},
|
||||
},
|
||||
"unquoted": {
|
||||
"type": "string",
|
||||
"description": "Unquoted value"
|
||||
|
|
@ -2323,6 +2345,25 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
|
|||
})
|
||||
.run();
|
||||
|
||||
tst.test(
|
||||
"Test imaginary number\n"
|
||||
"</think>\n"
|
||||
"<tool_call>\n"
|
||||
"<function=imaginary_number>\n"
|
||||
"<parameter=number>\n"
|
||||
"{ \"real\": 3.14, \"imaginary\": 2.71 }\n"
|
||||
"</parameter>\n"
|
||||
"</function>\n"
|
||||
"</tool_call>")
|
||||
.enable_thinking(true)
|
||||
.reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
|
||||
.tools({ imaginary_number_tool })
|
||||
.expect_reasoning("Test imaginary number")
|
||||
.expect_tool_calls({
|
||||
{ "imaginary_number", R"({ "number" : {"real":3.14,"imaginary":2.71 } })", {} }
|
||||
})
|
||||
.run();
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue