Merge e384c6fefe into 0ccbfdef3e
This commit is contained in:
commit
a4230e169e
|
|
@ -48,10 +48,11 @@ add_library(${TARGET} STATIC
|
|||
arg.cpp
|
||||
arg.h
|
||||
base64.hpp
|
||||
chat-parser.cpp
|
||||
chat-parser.h
|
||||
chat-parser-xml-toolcall.h
|
||||
chat-parser-xml-toolcall.cpp
|
||||
chat-auto-parser-generator.cpp
|
||||
chat-auto-parser-helpers.cpp
|
||||
chat-auto-parser.h
|
||||
chat-diff-analyzer.cpp
|
||||
chat-diff-analyzer.h
|
||||
chat-peg-parser.cpp
|
||||
chat-peg-parser.h
|
||||
chat.cpp
|
||||
|
|
|
|||
|
|
@ -0,0 +1,388 @@
|
|||
#include "chat-auto-parser.h"
|
||||
#include "chat-diff-analyzer.h"
|
||||
#include "chat-peg-parser.h"
|
||||
#include "chat.h"
|
||||
#include "json-schema-to-grammar.h"
|
||||
#include "nlohmann/json.hpp"
|
||||
#include <string>
|
||||
|
||||
|
||||
using json = nlohmann::ordered_json;
|
||||
|
||||
// Helper to iterate over tools/functions
|
||||
static void foreach_function(const json & tools, const std::function<void(const json &)> & fn) {
|
||||
for (const auto & tool : tools) {
|
||||
if (!tool.contains("type") || tool.at("type") != "function" || !tool.contains("function")) {
|
||||
continue;
|
||||
}
|
||||
fn(tool);
|
||||
}
|
||||
}
|
||||
|
||||
namespace autoparser {
|
||||
|
||||
parser_build_context::parser_build_context(common_chat_peg_unified_builder & p, const templates_params & inputs)
|
||||
: p(p), inputs(inputs), reasoning_parser(p.eps()) {}
|
||||
|
||||
common_chat_params universal_peg_generator::generate_parser(const common_chat_template & tmpl,
|
||||
const struct templates_params & inputs) {
|
||||
// Run differential analysis to extract template structure
|
||||
analyze_template analysis(tmpl);
|
||||
return generate_parser(tmpl, inputs, analysis);
|
||||
}
|
||||
|
||||
common_chat_params universal_peg_generator::generate_parser(const common_chat_template & tmpl,
|
||||
const struct templates_params & inputs,
|
||||
const analyze_template & analysis) {
|
||||
// Build the parser using the analysis results
|
||||
auto parser = analysis.build_parser(inputs);
|
||||
|
||||
// Create the result structure
|
||||
common_chat_params data;
|
||||
data.prompt = common_chat_template_direct_apply(tmpl, inputs);
|
||||
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
|
||||
data.preserved_tokens = analysis.preserved_tokens;
|
||||
data.parser = parser.save();
|
||||
|
||||
// Build grammar if tools are present
|
||||
bool has_tools = analysis.tools.format.mode != tool_format::NONE && inputs.tools.is_array() && !inputs.tools.empty();
|
||||
bool include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
|
||||
|
||||
if (include_grammar) {
|
||||
data.grammar_lazy = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
|
||||
|
||||
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
||||
foreach_function(inputs.tools, [&](const json & tool) {
|
||||
const auto & function = tool.at("function");
|
||||
auto schema = function.at("parameters");
|
||||
builder.resolve_refs(schema);
|
||||
});
|
||||
parser.build_grammar(builder, data.grammar_lazy);
|
||||
});
|
||||
|
||||
// Set grammar triggers based on tool section markers (fall back to per-call markers)
|
||||
std::string trigger_marker = !analysis.tools.format.section_start.empty()
|
||||
? analysis.tools.format.section_start
|
||||
: analysis.tools.format.per_call_start;
|
||||
if (!trigger_marker.empty()) {
|
||||
data.grammar_triggers = {
|
||||
{ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, trigger_marker }
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
common_peg_arena analyze_template::build_parser(const templates_params & inputs) const {
|
||||
return build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
|
||||
p.set_allow_python_dict_format(true);
|
||||
|
||||
parser_build_context ctx(p, inputs);
|
||||
bool extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
|
||||
bool enable_thinking = inputs.enable_thinking;
|
||||
|
||||
ctx.extracting_reasoning = extract_reasoning && enable_thinking && reasoning.mode != reasoning_mode::NONE;
|
||||
ctx.content = &content;
|
||||
|
||||
// Build reasoning parser
|
||||
ctx.reasoning_parser = reasoning.build_parser(ctx);
|
||||
|
||||
bool has_tools = inputs.tools.is_array() && !inputs.tools.empty();
|
||||
bool has_response_format = inputs.json_schema.is_object() && !inputs.json_schema.empty();
|
||||
|
||||
if (has_response_format) {
|
||||
return ctx.reasoning_parser + p.space() + p.content(p.schema(p.json(), "response-format", inputs.json_schema)) + p.end();
|
||||
}
|
||||
|
||||
if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && jinja_caps.supports_tool_calls) {
|
||||
return tools.build_parser(ctx);
|
||||
}
|
||||
|
||||
return content.build_parser(ctx);
|
||||
});
|
||||
}
|
||||
|
||||
common_peg_parser analyze_reasoning::build_parser(parser_build_context & ctx) const {
|
||||
auto & p = ctx.p;
|
||||
|
||||
if (!ctx.extracting_reasoning) {
|
||||
return p.eps();
|
||||
}
|
||||
|
||||
bool thinking_forced_open = (mode == reasoning_mode::FORCED_OPEN);
|
||||
bool thinking_forced_closed = (mode == reasoning_mode::FORCED_CLOSED);
|
||||
|
||||
if (thinking_forced_open || thinking_forced_closed) {
|
||||
// Thinking is forced open OR forced closed with enable_thinking=true
|
||||
// In both cases, expect only the closing tag (opening was in template)
|
||||
return p.reasoning(p.until(end)) + end;
|
||||
}
|
||||
if (mode == reasoning_mode::TAG_BASED || mode == reasoning_mode::TOOLS_ONLY) {
|
||||
// Standard tag-based reasoning OR tools-only mode (reasoning appears with tools)
|
||||
// Both use the same tag-based pattern if markers are available
|
||||
if (!start.empty() && !end.empty()) {
|
||||
return p.optional(start + p.reasoning(p.until(end)) + end);
|
||||
}
|
||||
} else if (mode == reasoning_mode::DELIMITER) {
|
||||
return p.optional(p.reasoning(p.until(end)) + end);
|
||||
}
|
||||
|
||||
return p.eps();
|
||||
}
|
||||
|
||||
common_peg_parser analyze_content::build_parser(parser_build_context & ctx) const {
|
||||
auto & p = ctx.p;
|
||||
|
||||
if (is_always_wrapped()) {
|
||||
if (ctx.extracting_reasoning) {
|
||||
return ctx.reasoning_parser + start + p.content(p.until(end)) + end + p.end();
|
||||
}
|
||||
return p.content(p.until(start)) + start + p.content(p.until(end)) + end + p.end();
|
||||
}
|
||||
return ctx.reasoning_parser + p.content(p.rest()) + p.end();
|
||||
}
|
||||
|
||||
common_peg_parser analyze_content::build_optional_wrapped(parser_build_context & ctx) const {
|
||||
auto & p = ctx.p;
|
||||
|
||||
if (is_always_wrapped()) {
|
||||
return p.optional(start + p.content(p.until(end)) + end);
|
||||
}
|
||||
return p.eps();
|
||||
}
|
||||
|
||||
common_peg_parser analyze_tools::build_parser(parser_build_context & ctx) const {
|
||||
switch (format.mode) {
|
||||
case tool_format::JSON_NATIVE:
|
||||
return build_tool_parser_json_native(ctx);
|
||||
case tool_format::TAG_WITH_JSON:
|
||||
return build_tool_parser_tag_json(ctx);
|
||||
case tool_format::TAG_WITH_TAGGED:
|
||||
return build_tool_parser_tag_tagged(ctx);
|
||||
default:
|
||||
GGML_ABORT("Unable to create tool parser");
|
||||
}
|
||||
}
|
||||
|
||||
common_peg_parser analyze_tools::build_tool_parser_json_native(parser_build_context & ctx) const {
|
||||
auto & p = ctx.p;
|
||||
const auto & inputs = ctx.inputs;
|
||||
|
||||
// Build effective field names with dot notation if function_field is set
|
||||
std::string name_field = format.name_field;
|
||||
std::string args_field = format.args_field;
|
||||
|
||||
if (!format.function_field.empty() &&
|
||||
format.function_field != "function" &&
|
||||
name_field.find('.') == std::string::npos) {
|
||||
name_field = format.function_field + "." + name_field;
|
||||
args_field = format.function_field + "." + args_field;
|
||||
}
|
||||
|
||||
auto tools_parser = p.standard_json_tools(
|
||||
format.section_start,
|
||||
format.section_end,
|
||||
inputs.tools,
|
||||
inputs.parallel_tool_calls,
|
||||
inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED,
|
||||
name_field,
|
||||
args_field,
|
||||
format.tools_array_wrapped,
|
||||
format.fun_name_is_key,
|
||||
format.id_field,
|
||||
format.gen_id_field,
|
||||
format.parameter_order
|
||||
);
|
||||
|
||||
// Handle content wrappers if present
|
||||
if (ctx.content && ctx.content->is_always_wrapped()) {
|
||||
auto wrapped_content = ctx.content->build_optional_wrapped(ctx);
|
||||
return ctx.reasoning_parser + wrapped_content + tools_parser + p.end();
|
||||
}
|
||||
|
||||
auto content_before_tools = format.section_start.empty() ? p.eps() : p.until(format.section_start);
|
||||
return ctx.reasoning_parser + p.optional(p.content(content_before_tools)) + tools_parser + p.end();
|
||||
}
|
||||
|
||||
common_peg_parser analyze_tools::build_tool_parser_tag_json(parser_build_context & ctx) const {
|
||||
auto & p = ctx.p;
|
||||
const auto & inputs = ctx.inputs;
|
||||
|
||||
common_peg_parser tool_choice = p.choice();
|
||||
|
||||
foreach_function(inputs.tools, [&](const json & tool) {
|
||||
const auto & func = tool.at("function");
|
||||
std::string name = func.at("name");
|
||||
const auto & schema = func.at("parameters");
|
||||
|
||||
// Build call_id parser based on position (if supported)
|
||||
common_peg_parser call_id_section = p.eps();
|
||||
if (call_id.pos == call_id_position::BETWEEN_FUNC_AND_ARGS &&
|
||||
!call_id.prefix.empty() && !call_id.suffix.empty()) {
|
||||
call_id_section = p.optional(call_id.prefix + p.tool_id(p.until(call_id.suffix))) + call_id.suffix;
|
||||
}
|
||||
|
||||
auto func_parser = p.tool_open(function.name_prefix + p.tool_name(p.literal(name)) + function.name_suffix) +
|
||||
call_id_section +
|
||||
p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema));
|
||||
|
||||
if (!function.close.empty()) {
|
||||
func_parser = func_parser + function.close;
|
||||
}
|
||||
|
||||
tool_choice |= p.rule("tool-" + name, func_parser);
|
||||
});
|
||||
|
||||
auto require_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
||||
|
||||
common_peg_parser tool_calls = p.eps();
|
||||
|
||||
if (!format.per_call_start.empty()) {
|
||||
auto wrapped_call = format.per_call_start + tool_choice + format.per_call_end;
|
||||
if (inputs.parallel_tool_calls) {
|
||||
tool_calls = p.trigger_rule("tool-call",
|
||||
wrapped_call + p.zero_or_more(p.space() + wrapped_call));
|
||||
} else {
|
||||
tool_calls = p.trigger_rule("tool-call", wrapped_call);
|
||||
}
|
||||
if (!format.section_start.empty()) {
|
||||
tool_calls = p.trigger_rule("tool-calls", p.literal(format.section_start) + p.space() +
|
||||
tool_calls + p.space() + (format.section_end.empty() ? p.end() : p.literal(format.section_end)));
|
||||
}
|
||||
} else {
|
||||
std::string separator = ", "; // Default
|
||||
if (inputs.parallel_tool_calls) {
|
||||
tool_calls = p.trigger_rule("tool-call",
|
||||
format.section_start + tool_choice + p.zero_or_more(separator + tool_choice) + format.section_end);
|
||||
} else {
|
||||
tool_calls = p.trigger_rule("tool-call",
|
||||
format.section_start + tool_choice + format.section_end);
|
||||
}
|
||||
}
|
||||
|
||||
if (!require_calls) {
|
||||
tool_calls = p.optional(tool_calls);
|
||||
}
|
||||
|
||||
std::string trigger_marker = !format.section_start.empty() ? format.section_start : format.per_call_start;
|
||||
auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
|
||||
return ctx.reasoning_parser + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
|
||||
}
|
||||
|
||||
common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_context & ctx) const {
|
||||
auto & p = ctx.p;
|
||||
const auto & inputs = ctx.inputs;
|
||||
|
||||
common_peg_parser tool_choice = p.choice();
|
||||
|
||||
foreach_function(inputs.tools, [&](const json & tool) {
|
||||
const auto & func = tool.at("function");
|
||||
std::string name = func.at("name");
|
||||
const auto & params = func.at("parameters");
|
||||
|
||||
if (!params.contains("properties") || !params.at("properties").is_object()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto & properties = params.at("properties");
|
||||
std::set<std::string> required;
|
||||
if (params.contains("required") && params.at("required").is_array()) {
|
||||
params.at("required").get_to(required);
|
||||
}
|
||||
|
||||
// Build parser for each argument
|
||||
std::vector<common_peg_parser> arg_parsers;
|
||||
for (const auto & [param_name, param_schema] : properties.items()) {
|
||||
bool is_required = required.find(param_name) != required.end();
|
||||
auto type = param_schema.value("type", "object");
|
||||
|
||||
auto arg = p.tool_arg(
|
||||
p.tool_arg_open(arguments.name_prefix + p.tool_arg_name(p.literal(param_name)) + arguments.name_suffix) + arguments.value_prefix +
|
||||
(type == "string" ?
|
||||
p.tool_arg_string_value(p.schema(p.until(arguments.value_suffix),
|
||||
"tool-" + name + "-arg-" + param_name + "-schema", param_schema, true)) :
|
||||
p.tool_arg_json_value(p.schema(p.json(),
|
||||
"tool-" + name + "-arg-" + param_name + "-schema", param_schema)) + p.space()) +
|
||||
p.tool_arg_close(p.literal(arguments.value_suffix))
|
||||
);
|
||||
|
||||
if (is_required) {
|
||||
arg_parsers.push_back(p.rule("tool-" + name + "-arg-" + param_name, arg));
|
||||
} else {
|
||||
arg_parsers.push_back(p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg)));
|
||||
}
|
||||
}
|
||||
|
||||
// Build arg sequence with space() between consecutive args
|
||||
common_peg_parser args_seq = p.eps();
|
||||
for (size_t i = 0; i < arg_parsers.size(); i++) {
|
||||
if (i > 0) {
|
||||
args_seq = args_seq + p.space();
|
||||
}
|
||||
args_seq = args_seq + arg_parsers[i];
|
||||
}
|
||||
|
||||
// Build call_id parser based on position (if supported)
|
||||
common_peg_parser call_id_section = p.eps();
|
||||
if (call_id.pos == call_id_position::BETWEEN_FUNC_AND_ARGS &&
|
||||
!call_id.prefix.empty() && !call_id.suffix.empty()) {
|
||||
call_id_section = p.optional(call_id.prefix + p.tool_id(p.until(call_id.suffix))) + call_id.suffix;
|
||||
}
|
||||
|
||||
auto func_parser = p.tool_open(function.name_prefix + p.tool_name(p.literal(name)) + function.name_suffix) +
|
||||
call_id_section +
|
||||
p.space() + args_seq;
|
||||
|
||||
if (!function.close.empty()) {
|
||||
func_parser = func_parser + p.space() + p.tool_close(p.literal(function.close));
|
||||
} else if (!format.per_call_end.empty()) {
|
||||
// When there's no func_close but there is a per_call_end marker, use peek() to ensure
|
||||
// we only emit tool_close when we can actually see the closing marker. This prevents
|
||||
// premature closing during partial parsing when we've seen e.g. "</" which could be
|
||||
// either "</tool_call>" (end) or "<arg_key>" prefix that failed to match.
|
||||
func_parser = func_parser + p.tool_close(p.peek(p.literal(format.per_call_end)));
|
||||
} else {
|
||||
func_parser = func_parser + p.tool_close(p.space()); // force this to process tool closing callbacks in mapper
|
||||
}
|
||||
|
||||
tool_choice |= p.rule("tool-" + name, func_parser);
|
||||
});
|
||||
|
||||
auto require_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
||||
|
||||
common_peg_parser tool_calls = p.eps();
|
||||
|
||||
if (!format.per_call_start.empty()) {
|
||||
auto wrapped_call = format.per_call_start + p.space() + tool_choice + p.space() + format.per_call_end;
|
||||
if (inputs.parallel_tool_calls) {
|
||||
tool_calls = p.trigger_rule("tool-call", wrapped_call + p.zero_or_more(p.space() + wrapped_call));
|
||||
} else {
|
||||
tool_calls = p.trigger_rule("tool-call", wrapped_call);
|
||||
}
|
||||
if (!format.section_start.empty()) {
|
||||
tool_calls = p.trigger_rule("tool-calls", p.literal(format.section_start) + p.space() +
|
||||
tool_calls + p.space() + (format.section_end.empty() ? p.end() : p.literal(format.section_end)));
|
||||
}
|
||||
} else {
|
||||
std::string separator = ", "; // Default
|
||||
|
||||
if (inputs.parallel_tool_calls) {
|
||||
tool_calls = p.trigger_rule("tool-call",
|
||||
format.section_start + p.space() + tool_choice + p.zero_or_more(separator + tool_choice) + p.space() + format.section_end);
|
||||
} else {
|
||||
tool_calls = p.trigger_rule("tool-call",
|
||||
format.section_start + p.space() + tool_choice + p.space() + format.section_end);
|
||||
}
|
||||
}
|
||||
|
||||
if (!require_tools) {
|
||||
tool_calls = p.optional(tool_calls);
|
||||
}
|
||||
|
||||
std::string trigger_marker = !format.section_start.empty() ? format.section_start : format.per_call_start;
|
||||
auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
|
||||
return ctx.reasoning_parser + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
|
||||
}
|
||||
|
||||
} // namespace autoparser
|
||||
|
|
@ -0,0 +1,348 @@
|
|||
#include "chat-auto-parser-helpers.h"
|
||||
|
||||
#include "chat-auto-parser.h"
|
||||
#include "chat-diff-analyzer.h"
|
||||
#include "chat.h"
|
||||
#include "log.h"
|
||||
#include "nlohmann/json.hpp"
|
||||
|
||||
#include <cctype>
|
||||
#include <numeric>
|
||||
|
||||
using json = nlohmann::ordered_json;
|
||||
|
||||
std::string trim_whitespace(const std::string & str) {
|
||||
size_t start = 0;
|
||||
while (start < str.length() && std::isspace(static_cast<unsigned char>(str[start]))) {
|
||||
start++;
|
||||
}
|
||||
|
||||
if (start == str.length()) {
|
||||
return "";
|
||||
}
|
||||
|
||||
size_t end = str.length() - 1;
|
||||
while (end > start && std::isspace(static_cast<unsigned char>(str[end]))) {
|
||||
end--;
|
||||
}
|
||||
|
||||
return str.substr(start, end - start + 1);
|
||||
}
|
||||
|
||||
std::string trim_leading_whitespace(const std::string & str) {
|
||||
size_t start = 0;
|
||||
while (start < str.length() && std::isspace(static_cast<unsigned char>(str[start]))) {
|
||||
start++;
|
||||
}
|
||||
|
||||
return str.substr(start);
|
||||
}
|
||||
|
||||
std::string trim_trailing_whitespace(const std::string & str) {
|
||||
if (str.empty()) {
|
||||
return "";
|
||||
}
|
||||
|
||||
size_t end = str.length() - 1;
|
||||
while (end > 0 && std::isspace(static_cast<unsigned char>(str[end]))) {
|
||||
end--;
|
||||
}
|
||||
|
||||
// If first char is also whitespace, return empty string
|
||||
if (end == 0 && std::isspace(static_cast<unsigned char>(str[0]))) {
|
||||
return "";
|
||||
}
|
||||
|
||||
return str.substr(0, end + 1);
|
||||
}
|
||||
|
||||
std::string trim_trailing_newlines(const std::string & str) {
|
||||
size_t end = str.length();
|
||||
while (end > 0 && str[end - 1] == '\n') {
|
||||
end--;
|
||||
}
|
||||
|
||||
return str.substr(0, end);
|
||||
}
|
||||
|
||||
static size_t common_prefix_len(const std::string & left, const std::string & right) {
|
||||
size_t prefix_len = 0;
|
||||
size_t min_len = std::min(left.length(), right.length());
|
||||
while (prefix_len < min_len && left[prefix_len] == right[prefix_len]) {
|
||||
prefix_len++;
|
||||
}
|
||||
return prefix_len;
|
||||
}
|
||||
|
||||
static size_t common_suffix_len(const std::string & left, const std::string & right) {
|
||||
size_t suffix_len = 0;
|
||||
size_t min_len = std::min(left.length(), right.length());
|
||||
while (suffix_len < min_len && left[left.length() - 1 - suffix_len] == right[right.length() - 1 - suffix_len]) {
|
||||
suffix_len++;
|
||||
}
|
||||
return suffix_len;
|
||||
}
|
||||
|
||||
diff_split calculate_diff_split(const std::string & left, const std::string & right) {
|
||||
diff_split result;
|
||||
|
||||
auto left_seg = segmentize_markers(left);
|
||||
auto right_seg = segmentize_markers(right);
|
||||
|
||||
if (left_seg.empty()) {
|
||||
result.right = right;
|
||||
return result;
|
||||
}
|
||||
if (right_seg.empty()) {
|
||||
result.left = left;
|
||||
return result;
|
||||
}
|
||||
|
||||
auto left_start = left_seg.begin();
|
||||
auto left_end = --left_seg.end();
|
||||
auto right_start = right_seg.begin();
|
||||
auto right_end = --right_seg.end();
|
||||
|
||||
auto test = [&] () {
|
||||
return left_start != left_end && right_start != right_end;
|
||||
};
|
||||
|
||||
bool left_fully_consumed = false;
|
||||
bool right_fully_consumed = false;
|
||||
|
||||
while (test()) {
|
||||
bool advanced = false;
|
||||
if (*left_start == *right_start) {
|
||||
result.prefix.append(left_start->value);
|
||||
left_start++;
|
||||
right_start++;
|
||||
advanced = true;
|
||||
}
|
||||
if (*left_end == *right_end) {
|
||||
result.suffix = left_end->value + result.suffix;
|
||||
if (left_start != left_end) {
|
||||
left_end--;
|
||||
} else {
|
||||
left_fully_consumed = true;
|
||||
}
|
||||
if (right_start != right_end) {
|
||||
right_end--;
|
||||
} else {
|
||||
right_fully_consumed = true;
|
||||
}
|
||||
advanced = true;
|
||||
}
|
||||
if (!advanced) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (left_start == left_end && right_start != right_end) {
|
||||
if (*left_start == *right_end) {
|
||||
result.suffix = right_end->value + result.suffix;
|
||||
right_end--;
|
||||
left_fully_consumed = true;
|
||||
} else if (*left_start == *right_start) {
|
||||
result.prefix.append(right_start->value);
|
||||
right_start++;
|
||||
left_fully_consumed = true;
|
||||
}
|
||||
} else if (right_start == right_end && left_start != left_end) {
|
||||
if (*left_end == *right_start) {
|
||||
result.suffix = left_end->value + result.suffix;
|
||||
left_end--;
|
||||
right_fully_consumed = true;
|
||||
} else if (*left_start == *right_start) {
|
||||
result.prefix.append(left_start->value);
|
||||
left_start++;
|
||||
right_fully_consumed = true;
|
||||
}
|
||||
} else if (left_start == left_end && right_start == right_end && *left_start == *right_start && left_start->type == segment_type::MARKER) {
|
||||
result.prefix.append(right_start->value);
|
||||
left_fully_consumed = true;
|
||||
right_fully_consumed = true;
|
||||
}
|
||||
|
||||
auto eat_segment = [](std::string & str, segment & seg) -> std::string { return str.append(seg.value); };
|
||||
|
||||
bool can_have_text_suffix = left_end->type == segment_type::TEXT && right_end->type == segment_type::TEXT;
|
||||
bool can_have_text_prefix = right_start->type == segment_type::TEXT && left_start->type == segment_type::TEXT;
|
||||
|
||||
std::string remainder_left = std::accumulate(left_start, left_fully_consumed ? left_end : ++left_end, std::string(), eat_segment);
|
||||
std::string remainder_right = std::accumulate(right_start, right_fully_consumed ? right_end : ++right_end, std::string(), eat_segment);
|
||||
|
||||
size_t suffix_len = can_have_text_suffix ? common_suffix_len(remainder_left, remainder_right) : 0;
|
||||
// avoid overlaps between prefix and suffix
|
||||
size_t prefix_len = can_have_text_prefix ? common_prefix_len(remainder_left.substr(0, remainder_left.size() - suffix_len),
|
||||
remainder_right.substr(0, remainder_right.size() - suffix_len)) : 0;
|
||||
|
||||
result.prefix.append(remainder_left.substr(0, prefix_len));
|
||||
result.suffix = remainder_left.substr(remainder_left.length() - suffix_len, suffix_len) + result.suffix;
|
||||
result.left = remainder_left.substr(prefix_len, remainder_left.length() - prefix_len - suffix_len);
|
||||
result.right = remainder_right.substr(prefix_len, remainder_right.length() - prefix_len - suffix_len);
|
||||
|
||||
if (result.left == "" && result.right == "") {
|
||||
// degenerate case, no diff
|
||||
result.prefix = left;
|
||||
result.suffix = "";
|
||||
// pick prefix = all as representation
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Returns the prefix of `full` up until the first occurrence of the common prefix of `left` and `right`
|
||||
std::string until_common_prefix(const std::string & full, const std::string & left, const std::string & right) {
|
||||
// Find the common prefix of left and right
|
||||
size_t common_prefix_len = 0;
|
||||
size_t min_len = std::min(left.length(), right.length());
|
||||
while (common_prefix_len < min_len && left[common_prefix_len] == right[common_prefix_len]) {
|
||||
common_prefix_len++;
|
||||
}
|
||||
|
||||
// If there's no common prefix, return empty string
|
||||
if (common_prefix_len == 0) {
|
||||
return "";
|
||||
}
|
||||
|
||||
// Find the common prefix in the full string
|
||||
std::string common_prefix = left.substr(0, common_prefix_len);
|
||||
size_t pos = full.find(common_prefix);
|
||||
|
||||
// If not found, return empty string
|
||||
if (pos == std::string::npos) {
|
||||
return "";
|
||||
}
|
||||
|
||||
// Return everything before the common prefix
|
||||
return full.substr(0, pos);
|
||||
}
|
||||
|
||||
// Returns the suffix of `full` after the last occurrence of the common suffix of `left` and `right`
|
||||
std::string after_common_suffix(const std::string & full, const std::string & left, const std::string & right) {
|
||||
// Find the common suffix of left and right (compare from the end)
|
||||
size_t common_suffix_len = 0;
|
||||
size_t min_len = std::min(left.length(), right.length());
|
||||
while (common_suffix_len < min_len &&
|
||||
left[left.length() - 1 - common_suffix_len] == right[right.length() - 1 - common_suffix_len]) {
|
||||
common_suffix_len++;
|
||||
}
|
||||
|
||||
// If there's no common suffix, return empty string
|
||||
if (common_suffix_len == 0) {
|
||||
return "";
|
||||
}
|
||||
|
||||
// Extract the common suffix
|
||||
std::string common_suffix = left.substr(left.length() - common_suffix_len);
|
||||
|
||||
// Find the last occurrence of the common suffix in the full string
|
||||
size_t pos = full.rfind(common_suffix);
|
||||
|
||||
// If not found, return empty string
|
||||
if (pos == std::string::npos) {
|
||||
return "";
|
||||
}
|
||||
|
||||
// Return everything after the common suffix
|
||||
return full.substr(pos + common_suffix_len);
|
||||
}
|
||||
|
||||
// TODO: segmentize will treat a JSON array inside tags as a tag: <calls>[{ "fun": { ... } }]</calls> will be three markers
|
||||
// not too worried about that because it hasn't turned out as a problem anywhere, but noting here in case it will
|
||||
// Might have to put some restrictions on tag contents as well (like "no { }")
|
||||
std::vector<segment> segmentize_markers(const std::string & text) {
|
||||
std::vector<segment> retval;
|
||||
bool in_marker = false;
|
||||
char marker_opener = '\0';
|
||||
|
||||
auto is_marker_opener = [](char c) -> bool { return c == '<' || c == '['; };
|
||||
auto is_marker_closer = [](char op, char c) -> bool { return (op == '<' && c == '>') || (op == '[' && c == ']'); };
|
||||
|
||||
size_t last_border = 0;
|
||||
|
||||
for (size_t cur_pos = 0; cur_pos < text.length(); cur_pos++) {
|
||||
if (!in_marker && is_marker_opener(text[cur_pos])) {
|
||||
if (last_border < cur_pos) {
|
||||
retval.push_back(segment(segment_type::TEXT, text.substr(last_border, cur_pos - last_border)));
|
||||
}
|
||||
last_border = cur_pos;
|
||||
in_marker = true;
|
||||
marker_opener = text[cur_pos];
|
||||
} else if (in_marker && is_marker_closer(marker_opener, text[cur_pos])) {
|
||||
// no need to check because last_border will always be smaller
|
||||
retval.push_back(segment(segment_type::MARKER, text.substr(last_border, cur_pos - last_border + 1)));
|
||||
last_border = cur_pos + 1;
|
||||
in_marker = false;
|
||||
marker_opener = '\0';
|
||||
}
|
||||
}
|
||||
if (last_border < text.length()) {
|
||||
retval.push_back(segment(segment_type::TEXT, text.substr(last_border)));
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
std::vector<segment> prune_whitespace_segments(const std::vector<segment> & segments) {
|
||||
std::vector<segment> result;
|
||||
for (const auto & seg : segments) {
|
||||
if (!trim_whitespace(seg.value).empty()) {
|
||||
result.push_back(seg);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
namespace autoparser {
|
||||
|
||||
std::string apply_template(const common_chat_template & tmpl, const template_params & params) {
|
||||
templates_params tmpl_params;
|
||||
tmpl_params.messages = params.messages;
|
||||
tmpl_params.tools = params.tools;
|
||||
tmpl_params.add_generation_prompt = params.add_generation_prompt;
|
||||
tmpl_params.enable_thinking = params.enable_thinking;
|
||||
|
||||
if (params.extra_context) {
|
||||
tmpl_params.extra_context = *params.extra_context;
|
||||
}
|
||||
tmpl_params.extra_context["enable_thinking"] = params.enable_thinking;
|
||||
|
||||
try {
|
||||
return common_chat_template_direct_apply(tmpl, tmpl_params);
|
||||
} catch (const std::exception & e) {
|
||||
LOG_DBG("Template application failed: %s\n", e.what());
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<compare_variants_result> compare_variants(
|
||||
const common_chat_template & tmpl,
|
||||
const template_params & params_A,
|
||||
const std::function<void(template_params &)> & params_modifier) {
|
||||
// Create variant B by copying A
|
||||
template_params params_B = params_A;
|
||||
|
||||
// Apply modifier to create variant B
|
||||
if (params_modifier) {
|
||||
params_modifier(params_B);
|
||||
}
|
||||
|
||||
// Apply template to both variants
|
||||
std::string output_A = apply_template(tmpl, params_A);
|
||||
std::string output_B = apply_template(tmpl, params_B);
|
||||
|
||||
// Check for template application failures
|
||||
if (output_A.empty() || output_B.empty()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// Calculate diff and return result with both outputs
|
||||
compare_variants_result result;
|
||||
result.diff = calculate_diff_split(output_A, output_B);
|
||||
result.output_A = output_A;
|
||||
result.output_B = output_B;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace autoparser
|
||||
|
||||
|
|
@ -0,0 +1,73 @@
|
|||
#pragma once
|
||||
|
||||
#include "chat-diff-analyzer.h"
|
||||
#include <functional>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
|
||||
std::string trim_whitespace(const std::string & str);
|
||||
std::string trim_leading_whitespace(const std::string & str);
|
||||
std::string trim_trailing_whitespace(const std::string & str);
|
||||
std::string trim_trailing_newlines(const std::string & str);
|
||||
|
||||
// calculate a diff split (longest common prefix, longest common suffix excluding prefix,
|
||||
// mismatched part on the left, mismatched part on the right) between two strings
|
||||
// account for markers - align prefix and suffix endings so that they end on markers
|
||||
// * eg.:
|
||||
// calculate_diff_split("<html><body><div></div></body></html>", "<html><body><p>Something</p></body><html>") ->
|
||||
// { "prefix": "<html><body>" (not: "<html><body><"), "suffix": "</body></html>", "left": "<div></div>", "right": "<p>Something</p>" }
|
||||
// calculate_diff_split("<html><body>Something</body></html>", "<html><body></body><html>") ->
|
||||
// { "prefix": "<html><body>", "suffix": "</body></html>", "left": "Something", "right": "" }
|
||||
diff_split calculate_diff_split(const std::string & left, const std::string & right);
|
||||
|
||||
// Returns the prefix of `full` up until the first occurrence of the common prefix of `left` and `right`
|
||||
// Returns empty string if there's no common prefix
|
||||
// * eg.:
|
||||
// until_common_prefix("really want a FUNCTION call", "FUNCTION alpha", "FUNCTION beta") -> "really want a "
|
||||
// until_common_prefix("<tool_call>", "<something>", "<something_else>") -> ""
|
||||
// until_common_prefix("some text", "1234", "abcd") -> ""
|
||||
// until_common_prefix("one arg two args three args four", "argument alpha", "argument beta") -> "one ""
|
||||
std::string until_common_prefix(const std::string & full, const std::string & left, const std::string & right);
|
||||
|
||||
// Returns the suffix of `full` after the last occurrence of the common suffix of `left` and `right`
|
||||
// Returns empty string if there's no common suffix
|
||||
// Mirror function of `until_common_prefix`
|
||||
// * eg.:
|
||||
// after_common_suffix("really want a FUNCTION call", "first FUNCTION", "second FUNCTION") -> " call"
|
||||
// after_common_suffix("one arg two-args three args four", "alpha-args", "beta-args") -> " three args four"
|
||||
std::string after_common_suffix(const std::string & full, const std::string & left, const std::string & right);
|
||||
|
||||
// Segmentize text into markers and non-marker fragments
|
||||
// * eg.:
|
||||
// segmentize_markers("<html><head><title>The site title</title><body><div>Here's some <b>content</b></div></body></html>" ->
|
||||
// [ (MARKER, "<html>"), (MARKER, "<head>"), (MARKER, "<title>"), (TEXT, "The site title"), (MARKER, "</title>"),
|
||||
// (MARKER, "<body>"), (MARKER, "<div>"), (TEXT, "Here's some "), (MARKER, "<b>"), (TEXT, "content"), (MARKER, "</b>"),
|
||||
// (MARKER, "</div>"), (MARKER, "</body>"), (MARKER, "</html>")
|
||||
// ]
|
||||
// segmentize_markers("<|tool_call|>[args]{ are here }[/args]<|tool_call_end|>") ->
|
||||
// [ (MARKER, "<|tool_call|>"), (MARKER, "[args]"), (TEXT, "{ are here }"), (MARKER, "[/args]"), (MARKER, "<|tool_call_end|>") ]
|
||||
std::vector<segment> segmentize_markers(const std::string & text);
|
||||
|
||||
// Prune whitespace-only segments from a vector of segments
|
||||
// * eg.:
|
||||
// segmentize_markers("<tool_call>\n<function=foo>\n<arg=bar>\n \n</arg>\n</function>\n</tool_call>") ->
|
||||
// X = [ (MARKER, "<tool_call>"), (TEXT, "\n"), (MARKER, "<function=foo>"), (TEXT, "\n"), (MARKER, "<arg=bar>"), (TEXT, "\n \n"),
|
||||
// (MARKER, "</arg>"), (TEXT, "\n"), (MARKER, "</function>"), (TEXT, "\n"), (MARKER, "</tool_call>") ]
|
||||
// prune_whitespace_segments(X) -> [ (MARKER, "<tool_call>"), (MARKER, "<function=foo>"), (MARKER, "<arg=bar>"), (MARKER, "</arg>"),
|
||||
// (MARKER, "</function>"), (MARKER, "</tool_call>") ]
|
||||
std::vector<segment> prune_whitespace_segments(const std::vector<segment> & segments);
|
||||
|
||||
namespace autoparser {
|
||||
|
||||
// Apply a template with the given parameters, returning the rendered string (empty on failure)
|
||||
std::string apply_template(const common_chat_template & tmpl, const template_params & params);
|
||||
|
||||
// Factorized differential comparison function
|
||||
// Takes base params and a single modifier lambda to create variant B
|
||||
// Returns compare_variants_result containing diff and both outputs, or std::nullopt on failure
|
||||
std::optional<compare_variants_result> compare_variants(
|
||||
const common_chat_template & tmpl,
|
||||
const template_params & params_A,
|
||||
const std::function<void(template_params &)> & params_modifier);
|
||||
|
||||
} // namespace autoparser
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
#pragma once
|
||||
|
||||
#include "chat-diff-analyzer.h"
|
||||
#include "chat.h"
|
||||
#include "chat-peg-parser.h"
|
||||
#include "common.h"
|
||||
|
||||
#include <chrono>
|
||||
#include <string>
|
||||
|
||||
using json = nlohmann::ordered_json;
|
||||
|
||||
namespace autoparser {
|
||||
|
||||
struct templates_params {
|
||||
json messages;
|
||||
json tools;
|
||||
common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
|
||||
json json_schema;
|
||||
bool parallel_tool_calls = true;
|
||||
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_AUTO;
|
||||
bool stream = true;
|
||||
std::string grammar;
|
||||
bool add_generation_prompt = false;
|
||||
bool enable_thinking = true;
|
||||
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
|
||||
json extra_context;
|
||||
bool add_bos = false;
|
||||
bool add_eos = false;
|
||||
bool is_inference = true;
|
||||
bool add_inference = false;
|
||||
bool mark_input = true; // whether to mark input strings in the jinja context
|
||||
};
|
||||
|
||||
class universal_peg_generator {
|
||||
public:
|
||||
static common_chat_params generate_parser(const common_chat_template & tmpl,
|
||||
const struct templates_params & inputs);
|
||||
|
||||
static common_chat_params generate_parser(const common_chat_template & tmpl,
|
||||
const struct templates_params & inputs,
|
||||
const analyze_template & analysis);
|
||||
};
|
||||
|
||||
} // namespace autoparser
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,434 @@
|
|||
#pragma once
|
||||
|
||||
#include "chat.h"
|
||||
#include "jinja/caps.h"
|
||||
#include "peg-parser.h"
|
||||
#include "nlohmann/json.hpp"
|
||||
|
||||
#include <functional>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
using json = nlohmann::ordered_json;
|
||||
|
||||
class common_chat_peg_unified_builder;
|
||||
|
||||
// ============================================================================
|
||||
// Parameters for template application
|
||||
// ============================================================================
|
||||
struct template_params {
|
||||
json messages;
|
||||
json tools;
|
||||
bool add_generation_prompt = false;
|
||||
bool enable_thinking = true;
|
||||
std::optional<json> extra_context = std::nullopt;
|
||||
};
|
||||
|
||||
struct diff_split {
|
||||
std::string prefix;
|
||||
std::string suffix;
|
||||
std::string left;
|
||||
std::string right;
|
||||
|
||||
bool operator==(struct diff_split & other) const {
|
||||
return prefix == other.prefix && suffix == other.suffix && left == other.left && right == other.right;
|
||||
}
|
||||
};
|
||||
|
||||
// Result of compare_variants containing diff and original outputs
|
||||
struct compare_variants_result {
|
||||
diff_split diff;
|
||||
std::string output_A;
|
||||
std::string output_B;
|
||||
};
|
||||
|
||||
namespace autoparser {
|
||||
|
||||
struct templates_params;
|
||||
|
||||
// ============================================================================
|
||||
// Marker Registry: All markers extracted via differential analysis
|
||||
// ============================================================================
|
||||
|
||||
// Markers extracted from differential analysis of template outputs
|
||||
// Each marker is derived from a specific comparison in the analysis matrix
|
||||
struct marker_registry {
|
||||
// === Reasoning markers (from Phase 1: R1-R3) ===
|
||||
std::string reasoning_start; // e.g., "<think>", "[THINK]", "<|START_THINKING|>", ""
|
||||
std::string reasoning_end; // e.g., "</think>", "[BEGIN FINAL RESPONSE]", "<|END_THINKING|>"
|
||||
|
||||
// === Content markers (from Phase 2: C1-C2) ===
|
||||
std::string content_start; // e.g., "<response>", ">>>all\n", ""
|
||||
std::string content_end; // e.g., "</response>", ""
|
||||
|
||||
// === Tool section markers (from Phase 3: T1-T2) ===
|
||||
std::string tool_section_start; // e.g., "<tool_call>", "[TOOL_CALLS]", ""
|
||||
std::string tool_section_end; // e.g., "</tool_call>", ""
|
||||
std::string per_call_start; // e.g., "<|tool_call_begin|>", "" (for multi-call templates)
|
||||
std::string per_call_end; // e.g., "<|tool_call_end|>", ""
|
||||
std::string call_separator; // e.g., ",", "\n", "" (between multiple calls)
|
||||
|
||||
// === Function markers (from Phase 3: T3-T5) ===
|
||||
std::string func_name_prefix; // e.g., "<function=", "\"name\": \"", "functions."
|
||||
std::string func_name_suffix; // e.g., ">", "\"", ":0"
|
||||
std::string func_close; // e.g., "</function>", "" (for tag-based)
|
||||
std::string args_start; // e.g., "{", "<|tool_call_argument_begin|>"
|
||||
std::string args_end; // e.g., "}", ""
|
||||
|
||||
// === Argument markers (from Phase 4: A1-A3, for tagged args format) ===
|
||||
std::string arg_name_prefix; // e.g., "<param=", "<arg_key>", "\""
|
||||
std::string arg_name_suffix; // e.g., ">", "</arg_key>", "\":"
|
||||
std::string arg_value_prefix; // e.g., "", "<arg_value>", ""
|
||||
std::string arg_value_suffix; // e.g., "</param>", "</arg_value>", ""
|
||||
std::string arg_separator; // e.g., "", "\n", ","
|
||||
|
||||
// === Call ID markers (for non-JSON formats with tool call IDs) ===
|
||||
std::string call_id_prefix; // e.g., "[CALL_ID]" (marker before call ID value)
|
||||
std::string call_id_suffix; // e.g., "" (marker after call ID value, before next section)
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Analysis Result Enums
|
||||
// ============================================================================
|
||||
|
||||
// Reasoning handling mode (derived from R1-R3 comparisons)
|
||||
enum class reasoning_mode {
|
||||
NONE, // No reasoning markers detected
|
||||
TAG_BASED, // Standard tag-based: <think>...</think>
|
||||
DELIMITER, // Delimiter-based: [BEGIN FINAL RESPONSE] (reasoning ends at delimiter)
|
||||
FORCED_OPEN, // Template ends with open reasoning tag (empty start, non-empty end)
|
||||
FORCED_CLOSED, // Template ends with open reasoning tag on enabled thinking but
|
||||
// with both opened and closed tag for disabled thinking
|
||||
TOOLS_ONLY // Only reason on tool calls, not on normal content
|
||||
};
|
||||
|
||||
inline std::ostream & operator<<(std::ostream & os, const reasoning_mode & mode) {
|
||||
switch (mode) {
|
||||
case reasoning_mode::NONE:
|
||||
return os << "NONE";
|
||||
case reasoning_mode::TAG_BASED:
|
||||
return os << "TAG_BASED";
|
||||
case reasoning_mode::DELIMITER:
|
||||
return os << "DELIMITER";
|
||||
case reasoning_mode::FORCED_OPEN:
|
||||
return os << "FORCED_OPEN";
|
||||
case reasoning_mode::FORCED_CLOSED:
|
||||
return os << "FORCED_CLOSED";
|
||||
case reasoning_mode::TOOLS_ONLY:
|
||||
return os << "TOOLS_ONLY";
|
||||
default:
|
||||
return os << "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
// Content wrapping mode (derived from C1 comparison)
|
||||
enum class content_mode {
|
||||
PLAIN, // No content markers
|
||||
ALWAYS_WRAPPED, // Content always wrapped with markers
|
||||
WRAPPED_WITH_REASONING, // Content wrapped only when reasoning present
|
||||
};
|
||||
|
||||
inline std::ostream & operator<<(std::ostream & os, const content_mode & mode) {
|
||||
switch (mode) {
|
||||
case content_mode::PLAIN:
|
||||
return os << "PLAIN";
|
||||
case content_mode::ALWAYS_WRAPPED:
|
||||
return os << "ALWAYS_WRAPPED";
|
||||
case content_mode::WRAPPED_WITH_REASONING:
|
||||
return os << "WRAPPED_WITH_REASONING";
|
||||
default:
|
||||
return os << "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
// Call ID position in tool calls (for non-JSON formats)
|
||||
enum class call_id_position {
|
||||
NONE, // No call ID support detected
|
||||
PRE_FUNC_NAME, // Call ID before function name: [CALL_ID]id[FUNC]name{args}
|
||||
BETWEEN_FUNC_AND_ARGS, // Call ID between function and args: [FUNC]name[CALL_ID]id{args}
|
||||
POST_ARGS, // Call ID after arguments: [FUNC]name{args}[CALL_ID]id
|
||||
};
|
||||
|
||||
inline std::ostream & operator<<(std::ostream & os, const call_id_position & pos) {
|
||||
switch (pos) {
|
||||
case call_id_position::NONE:
|
||||
return os << "NONE";
|
||||
case call_id_position::PRE_FUNC_NAME:
|
||||
return os << "PRE_FUNC_NAME";
|
||||
case call_id_position::BETWEEN_FUNC_AND_ARGS:
|
||||
return os << "BETWEEN_FUNC_AND_ARGS";
|
||||
case call_id_position::POST_ARGS:
|
||||
return os << "POST_ARGS";
|
||||
default:
|
||||
return os << "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
// Tool call format classification (derived from T1-T5, A1-A3 comparisons)
|
||||
enum class tool_format {
|
||||
NONE, // No tool support detected
|
||||
JSON_NATIVE, // Pure JSON: {"name": "X", "arguments": {...}}
|
||||
TAG_WITH_JSON, // Tag-based with JSON args: <function=X>{...}</function>
|
||||
TAG_WITH_TAGGED, // Tag-based with tagged args: <param=key>value</param>
|
||||
};
|
||||
|
||||
inline std::ostream & operator<<(std::ostream & os, const tool_format & format) {
|
||||
switch (format) {
|
||||
case tool_format::NONE:
|
||||
return os << "NONE";
|
||||
case tool_format::JSON_NATIVE:
|
||||
return os << "JSON_NATIVE";
|
||||
case tool_format::TAG_WITH_JSON:
|
||||
return os << "TAG_WITH_JSON";
|
||||
case tool_format::TAG_WITH_TAGGED:
|
||||
return os << "TAG_WITH_TAGGED";
|
||||
default:
|
||||
return os << "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Sub-structs for tool analysis
|
||||
// ============================================================================
|
||||
|
||||
struct tool_format_analysis {
|
||||
tool_format mode = tool_format::NONE;
|
||||
|
||||
std::string section_start; // e.g., "<tool_call>", "[TOOL_CALLS]", ""
|
||||
std::string section_end; // e.g., "</tool_call>", ""
|
||||
std::string per_call_start; // e.g., "<|tool_call_begin|>", "" (for multi-call templates)
|
||||
std::string per_call_end; // e.g., "<|tool_call_end|>", ""
|
||||
|
||||
bool fun_name_is_key = false; // In JSON format function name is JSON key, i.e. { "<funname>": { ... arguments ... } }
|
||||
bool tools_array_wrapped = false; // Tool calls wrapped in JSON array [...]
|
||||
|
||||
std::string function_field = "function";
|
||||
std::string name_field = "name";
|
||||
std::string args_field = "arguments";
|
||||
std::string id_field;
|
||||
std::string gen_id_field;
|
||||
std::vector<std::string> parameter_order;
|
||||
};
|
||||
|
||||
struct tool_function_analysis {
|
||||
std::string name_prefix; // e.g., "<function=", "\"name\": \"", "functions."
|
||||
std::string name_suffix; // e.g., ">", "\"", ":0"
|
||||
std::string close; // e.g., "</function>", "" (for tag-based)
|
||||
};
|
||||
|
||||
struct tool_arguments_analysis {
|
||||
std::string start; // e.g., "<|tool_call_argument_begin|>", "<args>"
|
||||
std::string end; // e.g., "<|tool_call_argument_end|>", "</args>"
|
||||
std::string name_prefix; // e.g., "<param=", "<arg_key>", "\""
|
||||
std::string name_suffix; // e.g., ">", "</arg_key>", "\":"
|
||||
std::string value_prefix; // e.g., "", "<arg_value>", ""
|
||||
std::string value_suffix; // e.g., "</param>", "</arg_value>", ""
|
||||
std::string separator; // e.g., "", "\n", ","
|
||||
};
|
||||
|
||||
struct tool_id_analysis {
|
||||
call_id_position pos = call_id_position::NONE;
|
||||
|
||||
std::string prefix; // e.g., "[CALL_ID]" (marker before call ID value)
|
||||
std::string suffix; // e.g., "" (marker after call ID value, before next section)
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Parser build context (shared interface for build_parser methods)
|
||||
// ============================================================================
|
||||
|
||||
struct analyze_content;
|
||||
|
||||
struct parser_build_context {
|
||||
common_chat_peg_unified_builder & p;
|
||||
const templates_params & inputs;
|
||||
common_peg_parser reasoning_parser;
|
||||
bool extracting_reasoning = false;
|
||||
const analyze_content * content = nullptr;
|
||||
|
||||
parser_build_context(common_chat_peg_unified_builder & p, const templates_params & inputs);
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Base class for analyzers with parser building
|
||||
// ============================================================================
|
||||
|
||||
struct analyze_base {
|
||||
virtual ~analyze_base() = default;
|
||||
virtual common_peg_parser build_parser(parser_build_context & ctx) const = 0;
|
||||
|
||||
protected:
|
||||
const common_chat_template * tmpl = nullptr;
|
||||
|
||||
analyze_base() = default;
|
||||
explicit analyze_base(const common_chat_template & tmpl) : tmpl(&tmpl) {}
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Reasoning analyzer
|
||||
// ============================================================================
|
||||
|
||||
struct analyze_reasoning : analyze_base {
|
||||
reasoning_mode mode = reasoning_mode::NONE;
|
||||
|
||||
std::string start; // e.g., "<think>", "[THINK]", "<|START_THINKING|>", ""
|
||||
std::string end; // e.g., "</think>", "[BEGIN FINAL RESPONSE]", "<|END_THINKING|>"
|
||||
|
||||
analyze_reasoning() = default;
|
||||
analyze_reasoning(const common_chat_template & tmpl, bool supports_tools);
|
||||
|
||||
common_peg_parser build_parser(parser_build_context & ctx) const override;
|
||||
|
||||
private:
|
||||
// Look for reasoning markers in rendered content
|
||||
void compare_reasoning_presence();
|
||||
|
||||
// Compare generation prompt with enable_thinking=true vs false
|
||||
void compare_thinking_enabled();
|
||||
|
||||
// Check if reasoning is always possible or only in tool calls
|
||||
void compare_reasoning_scope();
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Content analyzer
|
||||
// ============================================================================
|
||||
|
||||
struct analyze_content : analyze_base {
|
||||
content_mode mode = content_mode::PLAIN;
|
||||
|
||||
std::string start; // e.g., "<response>", ">>>all\n", ""
|
||||
std::string end; // e.g., "</response>", ""
|
||||
|
||||
bool requires_nonnull_content = false;
|
||||
|
||||
analyze_content() = default;
|
||||
analyze_content(const common_chat_template & tmpl, const analyze_reasoning & reasoning);
|
||||
|
||||
common_peg_parser build_parser(parser_build_context & ctx) const override;
|
||||
|
||||
bool is_always_wrapped() const;
|
||||
common_peg_parser build_optional_wrapped(parser_build_context & ctx) const;
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Tool analyzer
|
||||
// ============================================================================
|
||||
|
||||
struct analyze_tools : analyze_base {
|
||||
tool_format_analysis format;
|
||||
tool_function_analysis function;
|
||||
tool_arguments_analysis arguments;
|
||||
tool_id_analysis call_id;
|
||||
|
||||
analyze_tools() = default;
|
||||
analyze_tools(const common_chat_template & tmpl,
|
||||
const jinja::caps & caps,
|
||||
const analyze_reasoning & reasoning);
|
||||
|
||||
common_peg_parser build_parser(parser_build_context & ctx) const override;
|
||||
|
||||
private:
|
||||
// Extract tool calling 'haystack' for further analysis and delegate further analysis based on format
|
||||
void analyze_tool_calls(const analyze_reasoning & reasoning);
|
||||
|
||||
// Analyze format based on position of function and argument name in needle
|
||||
void analyze_tool_call_format(const std::string & haystack,
|
||||
const std::string & fun_name_needle,
|
||||
const std::string & arg_name_needle,
|
||||
const analyze_reasoning & reasoning);
|
||||
|
||||
// Analyze specifics of JSON native format (entire tool call is a JSON object)
|
||||
void analyze_tool_call_format_json_native(const std::string & clean_haystack,
|
||||
const std::string & fun_name_needle,
|
||||
const std::string & arg_name_needle);
|
||||
|
||||
// Analyze specifics of non-JSON native format (tags for function name or for function name and arguments)
|
||||
void analyze_tool_call_format_non_json(const std::string & clean_haystack,
|
||||
const std::string & fun_name_needle);
|
||||
|
||||
// Check for and extract specific per-call markers for non-native-JSON templates with parallel call support
|
||||
void check_per_call_markers();
|
||||
|
||||
// Extract function name markers
|
||||
void extract_function_markers();
|
||||
|
||||
// Delegates to separate functions for: separator analysis, argument name analysis, argument value analysis
|
||||
void analyze_arguments();
|
||||
|
||||
// Extract argument name markers
|
||||
void extract_argument_name_markers();
|
||||
|
||||
// Extract argument value markers
|
||||
void extract_argument_value_markers();
|
||||
|
||||
// Extract argument separator, if specified (eg. <arg=foo>...</arg><sep><arg=bar>...</arg>)
|
||||
void extract_argument_separator();
|
||||
|
||||
// Extract argument wrapper markers, if present (eg. '<args><arg=foo>...</arg><arg=bar>...</arg></args>')
|
||||
void extract_args_markers();
|
||||
|
||||
// Extract call ID markers, if present
|
||||
void extract_call_id_markers();
|
||||
|
||||
// Per-format tool parser builders
|
||||
common_peg_parser build_tool_parser_json_native(parser_build_context & ctx) const;
|
||||
common_peg_parser build_tool_parser_tag_json(parser_build_context & ctx) const;
|
||||
common_peg_parser build_tool_parser_tag_tagged(parser_build_context & ctx) const;
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Top-level template analyzer (merges differential_analyzer + diff_analysis_result)
|
||||
// ============================================================================
|
||||
|
||||
struct analyze_template {
|
||||
jinja::caps jinja_caps;
|
||||
analyze_reasoning reasoning;
|
||||
analyze_content content;
|
||||
analyze_tools tools;
|
||||
|
||||
// Preserved tokens for tokenizer (union of all non-empty markers)
|
||||
std::vector<std::string> preserved_tokens;
|
||||
|
||||
// Constructor: runs full differential analysis on a template
|
||||
explicit analyze_template(const common_chat_template & tmpl);
|
||||
|
||||
// Build the unified PEG parser for this template
|
||||
common_peg_arena build_parser(const templates_params & inputs) const;
|
||||
|
||||
private:
|
||||
// Collect tokens from entire analysis to preserve
|
||||
void collect_preserved_tokens();
|
||||
};
|
||||
|
||||
} // namespace autoparser
|
||||
|
||||
enum segment_type { TEXT, MARKER };
|
||||
|
||||
inline std::ostream & operator<<(std::ostream & os, const segment_type & type) {
|
||||
switch (type) {
|
||||
case segment_type::TEXT:
|
||||
return os << "TEXT";
|
||||
case segment_type::MARKER:
|
||||
return os << "MARKER";
|
||||
default:
|
||||
return os << "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
struct segment {
|
||||
segment_type type;
|
||||
std::string value;
|
||||
|
||||
segment(segment_type type, std::string value) : type(type), value(std::move(value)) {}
|
||||
|
||||
bool operator==(const segment & other) const {
|
||||
return type == other.type && value == other.value;
|
||||
}
|
||||
|
||||
bool operator!=(const segment & other) const {
|
||||
return !(*this == other);
|
||||
}
|
||||
};
|
||||
|
|
@ -1,879 +0,0 @@
|
|||
#include "chat.h"
|
||||
#include "chat-parser.h"
|
||||
#include "common.h"
|
||||
#include "json-partial.h"
|
||||
#include "json-schema-to-grammar.h"
|
||||
#include "log.h"
|
||||
#include "regex-partial.h"
|
||||
|
||||
using json = nlohmann::ordered_json;
|
||||
|
||||
class xml_toolcall_syntax_exception : public std::runtime_error {
|
||||
public:
|
||||
xml_toolcall_syntax_exception(const std::string & message) : std::runtime_error(message) {}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
inline void sort_uniq(std::vector<T> &vec) {
|
||||
std::sort(vec.begin(), vec.end());
|
||||
vec.erase(std::unique(vec.begin(), vec.end()), vec.end());
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline bool all_space(const T &str) {
|
||||
return std::all_of(str.begin(), str.end(), [](unsigned char ch) { return std::isspace(ch); });
|
||||
}
|
||||
|
||||
static size_t utf8_truncate_safe(const std::string_view s) {
|
||||
size_t len = s.size();
|
||||
if (len == 0) return 0;
|
||||
size_t i = len;
|
||||
for (size_t back = 0; back < 4 && i > 0; ++back) {
|
||||
--i;
|
||||
unsigned char c = s[i];
|
||||
if ((c & 0x80) == 0) {
|
||||
return len;
|
||||
} else if ((c & 0xC0) == 0xC0) {
|
||||
size_t expected_len = 0;
|
||||
if ((c & 0xE0) == 0xC0) expected_len = 2;
|
||||
else if ((c & 0xF0) == 0xE0) expected_len = 3;
|
||||
else if ((c & 0xF8) == 0xF0) expected_len = 4;
|
||||
else return i;
|
||||
if (len - i >= expected_len) {
|
||||
return len;
|
||||
} else {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
}
|
||||
return len - std::min(len, size_t(3));
|
||||
}
|
||||
|
||||
inline void utf8_truncate_safe_resize(std::string &s) {
|
||||
s.resize(utf8_truncate_safe(s));
|
||||
}
|
||||
|
||||
inline std::string_view utf8_truncate_safe_view(const std::string_view s) {
|
||||
return s.substr(0, utf8_truncate_safe(s));
|
||||
}
|
||||
|
||||
static std::optional<common_chat_msg_parser::find_regex_result> try_find_2_literal_splited_by_spaces(common_chat_msg_parser & builder, const std::string & literal1, const std::string & literal2) {
|
||||
if (literal1.size() == 0) return builder.try_find_literal(literal2);
|
||||
const auto saved_pos = builder.pos();
|
||||
while (auto res = builder.try_find_literal(literal1)) {
|
||||
builder.consume_spaces();
|
||||
const auto match_len = std::min(literal2.size(), builder.input().size() - builder.pos());
|
||||
if (builder.input().compare(builder.pos(), match_len, literal2, 0, match_len) == 0) {
|
||||
if (res->prelude.size() != res->groups[0].begin - saved_pos) {
|
||||
res->prelude = builder.str({saved_pos, res->groups[0].begin});
|
||||
}
|
||||
builder.move_to(builder.pos() + match_len);
|
||||
res->groups[0].end = builder.pos();
|
||||
GGML_ASSERT(res->groups[0].begin != res->groups[0].end);
|
||||
return res;
|
||||
}
|
||||
builder.move_to(res->groups[0].begin + 1);
|
||||
}
|
||||
builder.move_to(saved_pos);
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
/**
|
||||
* make a GBNF that accept any strings except those containing any of the forbidden strings.
|
||||
*/
|
||||
std::string make_gbnf_excluding(std::vector<std::string> forbids) {
|
||||
constexpr auto charclass_escape = [](unsigned char c) -> std::string {
|
||||
if (c == '\\' || c == ']' || c == '^' || c == '-') {
|
||||
std::string s = "\\";
|
||||
s.push_back((char)c);
|
||||
return s;
|
||||
}
|
||||
if (isprint(c)) {
|
||||
return std::string(1, (char)c);
|
||||
}
|
||||
char buf[16];
|
||||
snprintf(buf, 15, "\\x%02X", c);
|
||||
return std::string(buf);
|
||||
};
|
||||
constexpr auto build_expr = [charclass_escape](auto self, const std::vector<std::string>& forbids, int l, int r, int depth) -> std::string {
|
||||
std::vector<std::pair<unsigned char, std::pair<int,int>>> children;
|
||||
int i = l;
|
||||
while (i < r) {
|
||||
const std::string &s = forbids[i];
|
||||
if ((int)s.size() == depth) {
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
unsigned char c = (unsigned char)s[depth];
|
||||
int j = i;
|
||||
while (j < r && (int)forbids[j].size() > depth &&
|
||||
(unsigned char)forbids[j][depth] == c) {
|
||||
++j;
|
||||
}
|
||||
children.push_back({c, {i, j}});
|
||||
i = j;
|
||||
}
|
||||
std::vector<std::string> alts;
|
||||
if (!children.empty()) {
|
||||
std::string cls;
|
||||
for (auto &ch : children) cls += charclass_escape(ch.first);
|
||||
alts.push_back(std::string("[^") + cls + "]");
|
||||
}
|
||||
for (auto &ch : children) {
|
||||
std::string childExpr = self(self, forbids, ch.second.first, ch.second.second, depth+1);
|
||||
if (!childExpr.empty()) {
|
||||
std::string quoted_ch = "\"";
|
||||
if (ch.first == '\\') quoted_ch += "\\\\";
|
||||
else if (ch.first == '"') quoted_ch += "\\\"";
|
||||
else if (isprint(ch.first)) quoted_ch.push_back(ch.first);
|
||||
else {
|
||||
char buf[16];
|
||||
snprintf(buf, 15, "\\x%02X", ch.first);
|
||||
quoted_ch += buf;
|
||||
}
|
||||
quoted_ch += "\"";
|
||||
std::string branch = quoted_ch + std::string(" ") + childExpr;
|
||||
alts.push_back(branch);
|
||||
}
|
||||
}
|
||||
if (alts.empty()) return "";
|
||||
std::ostringstream oss;
|
||||
oss << "( ";
|
||||
for (size_t k = 0; k < alts.size(); ++k) {
|
||||
if (k) oss << " | ";
|
||||
oss << alts[k];
|
||||
}
|
||||
oss << " )";
|
||||
return oss.str();
|
||||
};
|
||||
if (forbids.empty()) return "( . )*";
|
||||
sort(forbids.begin(), forbids.end());
|
||||
std::string expr = build_expr(build_expr, forbids, 0, forbids.size(), 0);
|
||||
if (expr.empty()) {
|
||||
std::string cls;
|
||||
for (auto &s : forbids) if (!s.empty()) cls += charclass_escape((unsigned char)s[0]);
|
||||
expr = std::string("( [^") + cls + "] )";
|
||||
}
|
||||
if (forbids.size() == 1)
|
||||
return expr + "*";
|
||||
else
|
||||
return std::string("( ") + expr + " )*";
|
||||
}
|
||||
|
||||
/**
|
||||
* Build grammar for xml-style tool call
|
||||
* form.scope_start and form.scope_end can be empty.
|
||||
* Requires data.format for model-specific hacks.
|
||||
*/
|
||||
void build_grammar_xml_tool_call(common_chat_params & data, const json & tools, const struct xml_tool_call_format & form) {
|
||||
GGML_ASSERT(!form.tool_start.empty());
|
||||
GGML_ASSERT(!form.tool_sep.empty());
|
||||
GGML_ASSERT(!form.key_start.empty());
|
||||
GGML_ASSERT(!form.val_end.empty());
|
||||
GGML_ASSERT(!form.tool_end.empty());
|
||||
|
||||
std::string key_val_sep = form.key_val_sep;
|
||||
if (form.key_val_sep2) {
|
||||
key_val_sep += "\n";
|
||||
key_val_sep += *form.key_val_sep2;
|
||||
}
|
||||
GGML_ASSERT(!key_val_sep.empty());
|
||||
|
||||
if (tools.is_array() && !tools.empty()) {
|
||||
data.grammar = build_grammar([&](const common_grammar_builder &builder) {
|
||||
auto string_arg_val = form.last_val_end ?
|
||||
builder.add_rule("string-arg-val", make_gbnf_excluding({form.val_end, *form.last_val_end})) :
|
||||
builder.add_rule("string-arg-val", make_gbnf_excluding({form.val_end}));
|
||||
|
||||
std::vector<std::string> tool_rules;
|
||||
for (const auto & tool : tools) {
|
||||
if (!tool.contains("type") || tool.at("type") != "function" || !tool.contains("function")) {
|
||||
LOG_WRN("Skipping tool without function: %s", tool.dump(2).c_str());
|
||||
continue;
|
||||
}
|
||||
const auto & function = tool.at("function");
|
||||
if (!function.contains("name") || !function.at("name").is_string()) {
|
||||
LOG_WRN("Skipping invalid function (invalid name): %s", function.dump(2).c_str());
|
||||
continue;
|
||||
}
|
||||
if (!function.contains("parameters") || !function.at("parameters").is_object()) {
|
||||
LOG_WRN("Skipping invalid function (invalid parameters): %s", function.dump(2).c_str());
|
||||
continue;
|
||||
}
|
||||
std::string name = function.at("name");
|
||||
auto parameters = function.at("parameters");
|
||||
builder.resolve_refs(parameters);
|
||||
|
||||
struct parameter_rule {
|
||||
std::string symbol_name;
|
||||
bool is_required;
|
||||
};
|
||||
std::vector<parameter_rule> arg_rules;
|
||||
if (!parameters.contains("properties") || !parameters.at("properties").is_object()) {
|
||||
LOG_WRN("Skipping invalid function (invalid properties): %s", function.dump(2).c_str());
|
||||
continue;
|
||||
} else {
|
||||
std::vector<std::string> requiredParameters;
|
||||
if (parameters.contains("required")) {
|
||||
try { parameters.at("required").get_to(requiredParameters); }
|
||||
catch (const std::runtime_error&) {
|
||||
LOG_WRN("Invalid function required parameters, ignoring: %s", function.at("required").dump(2).c_str());
|
||||
}
|
||||
}
|
||||
sort_uniq(requiredParameters);
|
||||
for (const auto & [key, value] : parameters.at("properties").items()) {
|
||||
std::string quoted_key = key;
|
||||
bool required = std::binary_search(requiredParameters.begin(), requiredParameters.end(), key);
|
||||
if (form.key_start.back() == '"' && key_val_sep[0] == '"') {
|
||||
quoted_key = gbnf_format_literal(key);
|
||||
quoted_key = quoted_key.substr(1, quoted_key.size() - 2);
|
||||
}
|
||||
arg_rules.push_back(parameter_rule {builder.add_rule("func-" + name + "-kv-" + key,
|
||||
gbnf_format_literal(form.key_start) + " " +
|
||||
gbnf_format_literal(quoted_key) + " " +
|
||||
gbnf_format_literal(key_val_sep) + " " +
|
||||
((value.contains("type") && value["type"].is_string() && value["type"] == "string" && (!form.raw_argval || *form.raw_argval)) ?
|
||||
(form.raw_argval ?
|
||||
string_arg_val :
|
||||
"( " + string_arg_val + " | " + builder.add_schema(name + "-arg-" + key, value) + " )"
|
||||
) :
|
||||
builder.add_schema(name + "-arg-" + key, value)
|
||||
)
|
||||
), required});
|
||||
}
|
||||
}
|
||||
|
||||
auto next_arg_with_sep = builder.add_rule(name + "-last-arg-end", form.last_val_end ? gbnf_format_literal(*form.last_val_end) : gbnf_format_literal(form.val_end));
|
||||
decltype(next_arg_with_sep) next_arg = "\"\"";
|
||||
for (auto i = arg_rules.size() - 1; /* i >= 0 && */ i < arg_rules.size(); --i) {
|
||||
std::string include_this_arg = arg_rules[i].symbol_name + " " + next_arg_with_sep;
|
||||
next_arg = builder.add_rule(name + "-arg-after-" + std::to_string(i), arg_rules[i].is_required ?
|
||||
include_this_arg : "( " + include_this_arg + " ) | " + next_arg
|
||||
);
|
||||
include_this_arg = gbnf_format_literal(form.val_end) + " " + include_this_arg;
|
||||
next_arg_with_sep = builder.add_rule(name + "-arg-after-" + std::to_string(i) + "-with-sep", arg_rules[i].is_required ?
|
||||
include_this_arg : "( " + include_this_arg + " ) | " + next_arg_with_sep
|
||||
);
|
||||
}
|
||||
|
||||
std::string quoted_name = name;
|
||||
if (form.tool_start.back() == '"' && form.tool_sep[0] == '"') {
|
||||
quoted_name = gbnf_format_literal(name);
|
||||
quoted_name = quoted_name.substr(1, quoted_name.size() - 2);
|
||||
}
|
||||
quoted_name = gbnf_format_literal(quoted_name);
|
||||
// Kimi-K2 uses functions.{{ tool_call['function']['name'] }}:{{ loop.index }} as function name
|
||||
if (data.format == COMMON_CHAT_FORMAT_KIMI_K2) {
|
||||
quoted_name = "\"functions.\" " + quoted_name + " \":\" [0-9]+";
|
||||
}
|
||||
tool_rules.push_back(builder.add_rule(name + "-call",
|
||||
gbnf_format_literal(form.tool_start) + " " +
|
||||
quoted_name + " " +
|
||||
gbnf_format_literal(form.tool_sep) + " " +
|
||||
next_arg
|
||||
));
|
||||
}
|
||||
|
||||
auto tool_call_once = builder.add_rule("root-tool-call-once", string_join(tool_rules, " | "));
|
||||
auto tool_call_more = builder.add_rule("root-tool-call-more", gbnf_format_literal(form.tool_end) + " " + tool_call_once);
|
||||
auto call_end = builder.add_rule("root-call-end", form.last_tool_end ? gbnf_format_literal(*form.last_tool_end) : gbnf_format_literal(form.tool_end));
|
||||
auto tool_call_multiple_with_end = builder.add_rule("root-tool-call-multiple-with-end", tool_call_once + " " + tool_call_more + "* " + call_end);
|
||||
builder.add_rule("root",
|
||||
(form.scope_start.empty() ? "" : gbnf_format_literal(form.scope_start) + " ") +
|
||||
tool_call_multiple_with_end + "?" +
|
||||
(form.scope_end.empty() ? "" : " " + gbnf_format_literal(form.scope_end))
|
||||
);
|
||||
});
|
||||
|
||||
// grammar trigger for tool call
|
||||
data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, form.scope_start + form.tool_start });
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched.
|
||||
* Throws xml_toolcall_syntax_exception if there is invalid syntax and cannot recover the original status for common_chat_msg_parser.
|
||||
* form.scope_start, form.tool_sep and form.scope_end can be empty.
|
||||
*/
|
||||
inline bool parse_xml_tool_calls(common_chat_msg_parser & builder, const struct xml_tool_call_format & form) {
|
||||
GGML_ASSERT(!form.tool_start.empty());
|
||||
GGML_ASSERT(!form.key_start.empty());
|
||||
GGML_ASSERT(!form.key_val_sep.empty());
|
||||
GGML_ASSERT(!form.val_end.empty());
|
||||
GGML_ASSERT(!form.tool_end.empty());
|
||||
|
||||
// Helper to choose return false or throw error
|
||||
constexpr auto return_error = [](common_chat_msg_parser & builder, auto &start_pos, const bool &recovery) {
|
||||
LOG_DBG("Failed to parse XML-Style tool call at position: %s\n", gbnf_format_literal(builder.consume_rest().substr(0, 20)).c_str());
|
||||
if (recovery) {
|
||||
builder.move_to(start_pos);
|
||||
return false;
|
||||
} else throw xml_toolcall_syntax_exception("Tool call parsing failed with unrecoverable errors. Try using a grammar to constrain the model’s output.");
|
||||
};
|
||||
// Drop substring from needle to end from a JSON
|
||||
constexpr auto partial_json = [](std::string &json_str, std::string_view needle = "XML_TOOL_CALL_PARTIAL_FLAG") {
|
||||
auto pos = json_str.rfind(needle);
|
||||
if (pos == std::string::npos) {
|
||||
return false;
|
||||
}
|
||||
for (auto i = pos + needle.size(); i < json_str.size(); ++i) {
|
||||
unsigned char ch = static_cast<unsigned char>(json_str[i]);
|
||||
if (ch != '\'' && ch != '"' && ch != '}' && ch != ':' && !std::isspace(ch)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (pos != 0 && json_str[pos - 1] == '"') {
|
||||
--pos;
|
||||
}
|
||||
json_str.resize(pos);
|
||||
return true;
|
||||
};
|
||||
// Helper to generate a partial argument JSON
|
||||
constexpr auto gen_partial_json = [partial_json](auto set_partial_arg, auto &arguments, auto &builder, auto &function_name) {
|
||||
auto rest = builder.consume_rest();
|
||||
utf8_truncate_safe_resize(rest);
|
||||
set_partial_arg(rest, "XML_TOOL_CALL_PARTIAL_FLAG");
|
||||
auto tool_str = arguments.dump();
|
||||
if (partial_json(tool_str)) {
|
||||
if (builder.add_tool_call(function_name, "", tool_str)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
LOG_DBG("Failed to parse partial XML-Style tool call, fallback to non-partial: %s\n", tool_str.c_str());
|
||||
};
|
||||
// Helper to find a close (because there may be form.last_val_end or form.last_tool_end)
|
||||
constexpr auto try_find_close = [](
|
||||
common_chat_msg_parser & builder,
|
||||
const std::string & end,
|
||||
const std::optional<std::string> & alt_end,
|
||||
const std::string & end_next,
|
||||
const std::optional<std::string> & alt_end_next
|
||||
) {
|
||||
auto saved_pos = builder.pos();
|
||||
auto tc = builder.try_find_literal(end);
|
||||
auto val_end_size = end.size();
|
||||
if (alt_end) {
|
||||
auto pos_1 = builder.pos();
|
||||
builder.move_to(saved_pos);
|
||||
auto tc2 = try_find_2_literal_splited_by_spaces(builder, *alt_end, end_next);
|
||||
if (alt_end_next) {
|
||||
builder.move_to(saved_pos);
|
||||
auto tc3 = try_find_2_literal_splited_by_spaces(builder, *alt_end, *alt_end_next);
|
||||
if (tc3 && (!tc2 || tc2->prelude.size() > tc3->prelude.size())) {
|
||||
tc2 = tc3;
|
||||
}
|
||||
}
|
||||
if (tc2 && (!tc || tc->prelude.size() > tc2->prelude.size())) {
|
||||
tc = tc2;
|
||||
tc->groups[0].end = std::min(builder.input().size(), tc->groups[0].begin + alt_end->size());
|
||||
builder.move_to(tc->groups[0].end);
|
||||
val_end_size = alt_end->size();
|
||||
} else {
|
||||
builder.move_to(pos_1);
|
||||
}
|
||||
}
|
||||
return std::make_pair(val_end_size, tc);
|
||||
};
|
||||
// Helper to find a val_end or last_val_end, returns matched pattern size
|
||||
const auto try_find_val_end = [try_find_close, &builder, &form]() {
|
||||
return try_find_close(builder, form.val_end, form.last_val_end, form.tool_end, form.last_tool_end);
|
||||
};
|
||||
// Helper to find a tool_end or last_tool_end, returns matched pattern size
|
||||
const auto try_find_tool_end = [try_find_close, &builder, &form]() {
|
||||
return try_find_close(builder, form.tool_end, form.last_tool_end, form.scope_end, std::nullopt);
|
||||
};
|
||||
|
||||
bool recovery = true;
|
||||
const auto start_pos = builder.pos();
|
||||
if (!all_space(form.scope_start)) {
|
||||
if (auto tc = builder.try_find_literal(form.scope_start)) {
|
||||
if (all_space(tc->prelude)) {
|
||||
if (form.scope_start.size() != tc->groups[0].end - tc->groups[0].begin)
|
||||
throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.scope_start));
|
||||
} else {
|
||||
builder.move_to(start_pos);
|
||||
return false;
|
||||
}
|
||||
} else return false;
|
||||
}
|
||||
while (auto tc = builder.try_find_literal(form.tool_start)) {
|
||||
if (!all_space(tc->prelude)) {
|
||||
LOG_DBG("XML-Style tool call: Expected %s, but found %s, trying to match next pattern\n",
|
||||
gbnf_format_literal(form.tool_start).c_str(),
|
||||
gbnf_format_literal(tc->prelude).c_str()
|
||||
);
|
||||
builder.move_to(tc->groups[0].begin - tc->prelude.size());
|
||||
break;
|
||||
}
|
||||
|
||||
// Find tool name
|
||||
auto func_name = builder.try_find_literal(all_space(form.tool_sep) ? form.key_start : form.tool_sep);
|
||||
if (!func_name) {
|
||||
auto [sz, tc] = try_find_tool_end();
|
||||
func_name = tc;
|
||||
}
|
||||
if (!func_name) {
|
||||
// Partial tool name not supported
|
||||
throw common_chat_msg_partial_exception("incomplete tool_call");
|
||||
}
|
||||
// If the model generate multiple tool call and the first tool call has no argument
|
||||
if (func_name->prelude.find(form.tool_end) != std::string::npos || (form.last_tool_end ? func_name->prelude.find(*form.last_tool_end) != std::string::npos : false)) {
|
||||
builder.move_to(func_name->groups[0].begin - func_name->prelude.size());
|
||||
auto [sz, tc] = try_find_tool_end();
|
||||
func_name = tc;
|
||||
}
|
||||
|
||||
// Parse tool name
|
||||
builder.move_to(all_space(form.tool_sep) ? func_name->groups[0].begin : func_name->groups[0].end);
|
||||
std::string function_name = string_strip(func_name->prelude);
|
||||
// Kimi-K2 uses functions.{{ tool_call['function']['name'] }}:{{ loop.index }} as function name
|
||||
if (builder.syntax().format == COMMON_CHAT_FORMAT_KIMI_K2) {
|
||||
if (string_starts_with(function_name, "functions.")) {
|
||||
static const std::regex re(":\\d+$");
|
||||
if (std::regex_search(function_name, re)) {
|
||||
function_name = function_name.substr(10, function_name.rfind(":") - 10);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Argument JSON
|
||||
json arguments = json::object();
|
||||
|
||||
// Helper to generate a partial argument JSON
|
||||
const auto gen_partial_args = [&](auto set_partial_arg) {
|
||||
gen_partial_json(set_partial_arg, arguments, builder, function_name);
|
||||
};
|
||||
|
||||
// Parse all arg_key/arg_value pairs
|
||||
while (auto tc = builder.try_find_literal(form.key_start)) {
|
||||
if (!all_space(tc->prelude)) {
|
||||
LOG_DBG("XML-Style tool call: Expected %s, but found %s, trying to match next pattern\n",
|
||||
gbnf_format_literal(form.key_start).c_str(),
|
||||
gbnf_format_literal(tc->prelude).c_str()
|
||||
);
|
||||
builder.move_to(tc->groups[0].begin - tc->prelude.size());
|
||||
break;
|
||||
}
|
||||
if (tc->groups[0].end - tc->groups[0].begin != form.key_start.size()) {
|
||||
auto tool_call_arg = arguments.dump();
|
||||
if (tool_call_arg.size() != 0 && tool_call_arg[tool_call_arg.size() - 1] == '}') {
|
||||
tool_call_arg.resize(tool_call_arg.size() - 1);
|
||||
}
|
||||
builder.add_tool_call(function_name, "", tool_call_arg);
|
||||
throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.key_start));
|
||||
}
|
||||
|
||||
// Parse arg_key
|
||||
auto key_res = builder.try_find_literal(form.key_val_sep);
|
||||
if (!key_res) {
|
||||
gen_partial_args([&](auto &rest, auto &needle) {arguments[rest + needle] = "";});
|
||||
throw common_chat_msg_partial_exception("Expected " + gbnf_format_literal(form.key_val_sep) + " after " + gbnf_format_literal(form.key_start));
|
||||
}
|
||||
if (key_res->groups[0].end - key_res->groups[0].begin != form.key_val_sep.size()) {
|
||||
gen_partial_args([&](auto &, auto &needle) {arguments[key_res->prelude + needle] = "";});
|
||||
throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.key_val_sep));
|
||||
}
|
||||
auto &key = key_res->prelude;
|
||||
recovery = false;
|
||||
|
||||
// Parse arg_value
|
||||
if (form.key_val_sep2) {
|
||||
if (auto tc = builder.try_find_literal(*form.key_val_sep2)) {
|
||||
if (!all_space(tc->prelude)) {
|
||||
LOG_DBG("Failed to parse XML-Style tool call: Unexcepted %s between %s and %s\n",
|
||||
gbnf_format_literal(tc->prelude).c_str(),
|
||||
gbnf_format_literal(form.key_val_sep).c_str(),
|
||||
gbnf_format_literal(*form.key_val_sep2).c_str()
|
||||
);
|
||||
return return_error(builder, start_pos, false);
|
||||
}
|
||||
if (tc->groups[0].end - tc->groups[0].begin != form.key_val_sep2->size()) {
|
||||
gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
|
||||
throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(*form.key_val_sep2));
|
||||
}
|
||||
} else {
|
||||
gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
|
||||
throw common_chat_msg_partial_exception("Expected " + gbnf_format_literal(*form.key_val_sep2) + " after " + gbnf_format_literal(form.key_val_sep));
|
||||
}
|
||||
}
|
||||
auto val_start = builder.pos();
|
||||
|
||||
// Test if arg_val is a partial JSON
|
||||
std::optional<common_json> value_json = std::nullopt;
|
||||
if (!form.raw_argval || !*form.raw_argval) {
|
||||
try { value_json = builder.try_consume_json(); }
|
||||
catch (const std::runtime_error&) { builder.move_to(val_start); }
|
||||
// TODO: Delete this when json_partial adds top-level support for null/true/false
|
||||
if (builder.pos() == val_start) {
|
||||
const static std::regex number_regex(R"([0-9-][0-9]*(\.\d*)?([eE][+-]?\d*)?)");
|
||||
builder.consume_spaces();
|
||||
std::string_view sv = utf8_truncate_safe_view(builder.input());
|
||||
sv.remove_prefix(builder.pos());
|
||||
std::string rest = "a";
|
||||
if (sv.size() < 6) rest = sv;
|
||||
if (string_starts_with("null", rest) || string_starts_with("true", rest) || string_starts_with("false", rest) || std::regex_match(sv.begin(), sv.end(), number_regex)) {
|
||||
value_json = {123, {"123", "123"}};
|
||||
builder.consume_rest();
|
||||
} else {
|
||||
builder.move_to(val_start);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If it is a JSON and followed by </arg_value>, parse as json
|
||||
// cannot support streaming because it may be a plain text starting with JSON
|
||||
if (value_json) {
|
||||
auto json_end = builder.pos();
|
||||
builder.consume_spaces();
|
||||
if (builder.pos() == builder.input().size()) {
|
||||
if (form.raw_argval && !*form.raw_argval && (value_json->json.is_string() || value_json->json.is_object() || value_json->json.is_array())) {
|
||||
arguments[key] = value_json->json;
|
||||
auto json_str = arguments.dump();
|
||||
if (!value_json->healing_marker.json_dump_marker.empty()) {
|
||||
GGML_ASSERT(std::string::npos != json_str.rfind(value_json->healing_marker.json_dump_marker));
|
||||
json_str.resize(json_str.rfind(value_json->healing_marker.json_dump_marker));
|
||||
} else {
|
||||
GGML_ASSERT(json_str.back() == '}');
|
||||
json_str.resize(json_str.size() - 1);
|
||||
}
|
||||
builder.add_tool_call(function_name, "", json_str);
|
||||
} else {
|
||||
gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
|
||||
}
|
||||
LOG_DBG("Possible JSON arg_value: %s\n", value_json->json.dump().c_str());
|
||||
throw common_chat_msg_partial_exception("JSON arg_value detected. Waiting for more tokens for validations.");
|
||||
}
|
||||
builder.move_to(json_end);
|
||||
auto [val_end_size, tc] = try_find_val_end();
|
||||
if (tc && all_space(tc->prelude) && value_json->healing_marker.marker.empty()) {
|
||||
if (tc->groups[0].end - tc->groups[0].begin != val_end_size) {
|
||||
gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
|
||||
LOG_DBG("Possible terminated JSON arg_value: %s\n", value_json->json.dump().c_str());
|
||||
throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.val_end) + (form.last_val_end ? gbnf_format_literal(*form.last_val_end) : ""));
|
||||
} else arguments[key] = value_json->json;
|
||||
} else builder.move_to(val_start);
|
||||
}
|
||||
|
||||
// If not, parse as plain text
|
||||
if (val_start == builder.pos()) {
|
||||
if (auto [val_end_size, value_plain] = try_find_val_end(); value_plain) {
|
||||
auto &value_str = value_plain->prelude;
|
||||
if (form.trim_raw_argval) value_str = string_strip(value_str);
|
||||
if (value_plain->groups[0].end - value_plain->groups[0].begin != val_end_size) {
|
||||
gen_partial_args([&](auto &, auto &needle) {arguments[key] = value_str + needle;});
|
||||
throw common_chat_msg_partial_exception(
|
||||
"Expected " + gbnf_format_literal(form.val_end) +
|
||||
" after " + gbnf_format_literal(form.key_val_sep) +
|
||||
(form.key_val_sep2 ? " " + gbnf_format_literal(*form.key_val_sep2) : "")
|
||||
);
|
||||
}
|
||||
arguments[key] = value_str;
|
||||
} else {
|
||||
if (form.trim_raw_argval) {
|
||||
gen_partial_args([&](auto &rest, auto &needle) {arguments[key] = string_strip(rest) + needle;});
|
||||
} else {
|
||||
gen_partial_args([&](auto &rest, auto &needle) {arguments[key] = rest + needle;});
|
||||
}
|
||||
throw common_chat_msg_partial_exception(
|
||||
"Expected " + gbnf_format_literal(form.val_end) +
|
||||
" after " + gbnf_format_literal(form.key_val_sep) +
|
||||
(form.key_val_sep2 ? " " + gbnf_format_literal(*form.key_val_sep2) : "")
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Consume closing tag
|
||||
if (auto [tool_end_size, tc] = try_find_tool_end(); tc) {
|
||||
if (!all_space(tc->prelude)) {
|
||||
LOG_DBG("Failed to parse XML-Style tool call: Expected %s, but found %s\n",
|
||||
gbnf_format_literal(form.tool_end).c_str(),
|
||||
gbnf_format_literal(tc->prelude).c_str()
|
||||
);
|
||||
return return_error(builder, start_pos, recovery);
|
||||
}
|
||||
if (tc->groups[0].end - tc->groups[0].begin == tool_end_size) {
|
||||
// Add the parsed tool call
|
||||
if (!builder.add_tool_call(function_name, "", arguments.dump())) {
|
||||
throw common_chat_msg_partial_exception("Failed to add XML-Style tool call");
|
||||
}
|
||||
recovery = false;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
auto tool_call_arg = arguments.dump();
|
||||
if (tool_call_arg.size() != 0 && tool_call_arg[tool_call_arg.size() - 1] == '}') {
|
||||
tool_call_arg.resize(tool_call_arg.size() - 1);
|
||||
}
|
||||
builder.add_tool_call(function_name, "", tool_call_arg);
|
||||
throw common_chat_msg_partial_exception("Expected " + gbnf_format_literal(form.tool_end) + " after " + gbnf_format_literal(form.val_end));
|
||||
}
|
||||
if (auto tc = builder.try_find_literal(form.scope_end)) {
|
||||
if (!all_space(tc->prelude)) {
|
||||
LOG_DBG("Failed to parse XML-Style tool call: Expected %s, but found %s\n",
|
||||
gbnf_format_literal(form.scope_end).c_str(),
|
||||
gbnf_format_literal(tc->prelude).c_str()
|
||||
);
|
||||
return return_error(builder, start_pos, recovery);
|
||||
}
|
||||
} else {
|
||||
if (all_space(form.scope_end)) return true;
|
||||
builder.consume_spaces();
|
||||
if (builder.pos() == builder.input().size())
|
||||
throw common_chat_msg_partial_exception("incomplete tool calls");
|
||||
LOG_DBG("Failed to parse XML-Style tool call: Expected %s, but found %s\n",
|
||||
gbnf_format_literal(form.scope_end).c_str(),
|
||||
gbnf_format_literal(builder.consume_rest()).c_str()
|
||||
);
|
||||
return return_error(builder, start_pos, recovery);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched.
|
||||
* May cause std::runtime_error if there is invalid syntax because partial valid tool call is already sent out to client.
|
||||
* form.scope_start, form.tool_sep and form.scope_end can be empty.
|
||||
*/
|
||||
bool common_chat_msg_parser::try_consume_xml_tool_calls(const struct xml_tool_call_format & form) {
|
||||
auto pos = pos_;
|
||||
auto tsize = result_.tool_calls.size();
|
||||
try { return parse_xml_tool_calls(*this, form); }
|
||||
catch (const xml_toolcall_syntax_exception&) {}
|
||||
move_to(pos);
|
||||
result_.tool_calls.resize(tsize);
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse content uses reasoning and XML-Style tool call
|
||||
* TODO: Note that form.allow_toolcall_in_think is not tested yet. If anyone confirms it works, this comment can be removed.
|
||||
*/
|
||||
inline void parse_msg_with_xml_tool_calls(common_chat_msg_parser & builder, const struct xml_tool_call_format & form, const std::string & start_think = "<think>", const std::string & end_think = "</think>") {
|
||||
constexpr auto rstrip = [](std::string &s) {
|
||||
s.resize(std::distance(s.begin(), std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); }).base()));
|
||||
};
|
||||
// Erase substring from l to r, along with additional spaces nearby
|
||||
constexpr auto erase_spaces = [](auto &str, size_t l, size_t r) {
|
||||
while (/* l > -1 && */ --l < str.size() && std::isspace(static_cast<unsigned char>(str[l])));
|
||||
++l;
|
||||
while (++r < str.size() && std::isspace(static_cast<unsigned char>(str[r])));
|
||||
if (l < r) str[l] = '\n';
|
||||
if (l + 1 < r) str[l + 1] = '\n';
|
||||
if (l != 0) l += 2;
|
||||
str.erase(l, r - l);
|
||||
return l;
|
||||
};
|
||||
constexpr auto trim_suffix = [](std::string &content, std::initializer_list<std::string_view> list) {
|
||||
auto best_match = content.size();
|
||||
for (auto pattern: list) {
|
||||
if (pattern.size() == 0) continue;
|
||||
for (auto match_idx = content.size() - std::min(pattern.size(), content.size()); content.size() > match_idx; match_idx++) {
|
||||
auto match_len = content.size() - match_idx;
|
||||
if (content.compare(match_idx, match_len, pattern.data(), match_len) == 0 && best_match > match_idx) {
|
||||
best_match = match_idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (content.size() > best_match) {
|
||||
content.erase(best_match);
|
||||
}
|
||||
};
|
||||
const auto trim_potential_partial_word = [&start_think, &end_think, &form, trim_suffix](std::string &content) {
|
||||
return trim_suffix(content, {
|
||||
start_think, end_think, form.scope_start, form.tool_start, form.tool_sep, form.key_start,
|
||||
form.key_val_sep, form.key_val_sep2 ? form.key_val_sep2->c_str() : "",
|
||||
form.val_end, form.last_val_end ? form.last_val_end->c_str() : "",
|
||||
form.tool_end, form.last_tool_end ? form.last_tool_end->c_str() : "",
|
||||
form.scope_end
|
||||
});
|
||||
};
|
||||
|
||||
|
||||
// Trim leading spaces without affecting keyword matching
|
||||
static const common_regex spaces_regex("\\s*");
|
||||
{
|
||||
auto tc = builder.consume_regex(spaces_regex);
|
||||
auto spaces = builder.str(tc.groups[0]);
|
||||
auto s1 = spaces.size();
|
||||
trim_potential_partial_word(spaces);
|
||||
auto s2 = spaces.size();
|
||||
builder.move_to(builder.pos() - (s1 - s2));
|
||||
}
|
||||
|
||||
// Parse content
|
||||
bool reasoning_unclosed = builder.syntax().thinking_forced_open;
|
||||
std::string unclosed_reasoning_content("");
|
||||
for (;;) {
|
||||
auto tc = try_find_2_literal_splited_by_spaces(builder, form.scope_start, form.tool_start);
|
||||
std::string content;
|
||||
std::string tool_call_start;
|
||||
|
||||
if (tc) {
|
||||
content = std::move(tc->prelude);
|
||||
tool_call_start = builder.str(tc->groups[0]);
|
||||
LOG_DBG("Matched tool start: %s\n", gbnf_format_literal(tool_call_start).c_str());
|
||||
} else {
|
||||
content = builder.consume_rest();
|
||||
utf8_truncate_safe_resize(content);
|
||||
}
|
||||
|
||||
// Handle unclosed think block
|
||||
if (reasoning_unclosed) {
|
||||
if (auto pos = content.find(end_think); pos == std::string::npos && builder.pos() != builder.input().size()) {
|
||||
unclosed_reasoning_content += content;
|
||||
if (!(form.allow_toolcall_in_think && tc)) {
|
||||
unclosed_reasoning_content += tool_call_start;
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
reasoning_unclosed = false;
|
||||
std::string reasoning_content;
|
||||
if (pos == std::string::npos) {
|
||||
reasoning_content = std::move(content);
|
||||
} else {
|
||||
reasoning_content = content.substr(0, pos);
|
||||
content.erase(0, pos + end_think.size());
|
||||
}
|
||||
if (builder.pos() == builder.input().size() && all_space(content)) {
|
||||
rstrip(reasoning_content);
|
||||
trim_potential_partial_word(reasoning_content);
|
||||
rstrip(reasoning_content);
|
||||
if (reasoning_content.empty()) {
|
||||
rstrip(unclosed_reasoning_content);
|
||||
trim_potential_partial_word(unclosed_reasoning_content);
|
||||
rstrip(unclosed_reasoning_content);
|
||||
if (unclosed_reasoning_content.empty()) continue;
|
||||
}
|
||||
}
|
||||
if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE || builder.syntax().reasoning_in_content) {
|
||||
builder.add_content(start_think);
|
||||
builder.add_content(unclosed_reasoning_content);
|
||||
builder.add_content(reasoning_content);
|
||||
if (builder.pos() != builder.input().size() || !all_space(content))
|
||||
builder.add_content(end_think);
|
||||
} else {
|
||||
builder.add_reasoning_content(unclosed_reasoning_content);
|
||||
builder.add_reasoning_content(reasoning_content);
|
||||
}
|
||||
unclosed_reasoning_content.clear();
|
||||
}
|
||||
}
|
||||
|
||||
// Handle multiple think block
|
||||
bool toolcall_in_think = false;
|
||||
for (auto think_start = content.find(start_think); think_start != std::string::npos; think_start = content.find(start_think, think_start)) {
|
||||
if (auto think_end = content.find(end_think, think_start + start_think.size()); think_end != std::string::npos) {
|
||||
if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) {
|
||||
auto reasoning_content = content.substr(think_start + start_think.size(), think_end - think_start - start_think.size());
|
||||
builder.add_reasoning_content(reasoning_content);
|
||||
think_start = erase_spaces(content, think_start, think_end + end_think.size() - 1);
|
||||
} else {
|
||||
think_start = think_end + end_think.size() - 1;
|
||||
}
|
||||
} else {
|
||||
// This <tool_call> start is in thinking block, skip this tool call
|
||||
// This <tool_call> start is in thinking block
|
||||
if (form.allow_toolcall_in_think) {
|
||||
unclosed_reasoning_content = content.substr(think_start + start_think.size());
|
||||
} else {
|
||||
unclosed_reasoning_content = content.substr(think_start + start_think.size()) + tool_call_start;
|
||||
}
|
||||
reasoning_unclosed = true;
|
||||
content.resize(think_start);
|
||||
toolcall_in_think = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) {
|
||||
rstrip(content);
|
||||
// Handle unclosed </think> token from content: delete all </think> token
|
||||
if (auto pos = content.rfind(end_think); pos != std::string::npos) {
|
||||
while (pos != std::string::npos) {
|
||||
pos = erase_spaces(content, pos, pos + end_think.size() - 1);
|
||||
pos = content.rfind(end_think, pos);
|
||||
}
|
||||
}
|
||||
// Strip if needed
|
||||
if (content.size() > 0 && std::isspace(static_cast<unsigned char>(content[0]))) {
|
||||
content = string_strip(content);
|
||||
}
|
||||
}
|
||||
|
||||
// remove potential partial suffix
|
||||
if (builder.pos() == builder.input().size()) {
|
||||
if (unclosed_reasoning_content.empty()) {
|
||||
rstrip(content);
|
||||
trim_potential_partial_word(content);
|
||||
rstrip(content);
|
||||
} else {
|
||||
rstrip(unclosed_reasoning_content);
|
||||
trim_potential_partial_word(unclosed_reasoning_content);
|
||||
rstrip(unclosed_reasoning_content);
|
||||
}
|
||||
}
|
||||
|
||||
// consume unclosed_reasoning_content if allow_toolcall_in_think is set
|
||||
if (form.allow_toolcall_in_think && !unclosed_reasoning_content.empty()) {
|
||||
if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) {
|
||||
builder.add_reasoning_content(unclosed_reasoning_content);
|
||||
} else {
|
||||
if (content.empty()) {
|
||||
content = start_think + unclosed_reasoning_content;
|
||||
} else {
|
||||
content += "\n\n" + start_think;
|
||||
content += unclosed_reasoning_content;
|
||||
}
|
||||
}
|
||||
unclosed_reasoning_content.clear();
|
||||
}
|
||||
|
||||
// Add content
|
||||
if (!content.empty()) {
|
||||
// If there are multiple content blocks
|
||||
if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content && builder.result().content.size() != 0) {
|
||||
builder.add_content("\n\n");
|
||||
}
|
||||
builder.add_content(content);
|
||||
}
|
||||
|
||||
// This <tool_call> start is in thinking block and toolcall_in_think not set, skip this tool call
|
||||
if (toolcall_in_think && !form.allow_toolcall_in_think) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// There is no tool call and all content is parsed
|
||||
if (!tc) {
|
||||
GGML_ASSERT(builder.pos() == builder.input().size());
|
||||
GGML_ASSERT(unclosed_reasoning_content.empty());
|
||||
if (!form.allow_toolcall_in_think) GGML_ASSERT(!reasoning_unclosed);
|
||||
break;
|
||||
}
|
||||
|
||||
builder.move_to(tc->groups[0].begin);
|
||||
if (builder.try_consume_xml_tool_calls(form)) {
|
||||
auto end_of_tool = builder.pos();
|
||||
builder.consume_spaces();
|
||||
if (builder.pos() != builder.input().size()) {
|
||||
builder.move_to(end_of_tool);
|
||||
if (!builder.result().content.empty()) {
|
||||
builder.add_content("\n\n");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
static const common_regex next_char_regex(".");
|
||||
auto c = builder.str(builder.consume_regex(next_char_regex).groups[0]);
|
||||
rstrip(c);
|
||||
builder.add_content(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse content uses reasoning and XML-Style tool call
|
||||
*/
|
||||
void common_chat_msg_parser::consume_reasoning_with_xml_tool_calls(const struct xml_tool_call_format & form, const std::string & start_think, const std::string & end_think) {
|
||||
parse_msg_with_xml_tool_calls(*this, form, start_think, end_think);
|
||||
}
|
||||
|
|
@ -1,45 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "chat.h"
|
||||
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
|
||||
// Sample config:
|
||||
// MiniMax-M2 (left): <minimax:tool_call>\n<invoke name="tool-name">\n<parameter name="key">value</parameter>\n...</invoke>\n...</minimax:tool_call>
|
||||
// GLM 4.5 (right): <tool_call>function_name\n<arg_key>key</arg_key>\n<arg_value>value</arg_value>\n</tool_call>
|
||||
struct xml_tool_call_format {
|
||||
std::string scope_start; // <minimax:tool_call>\n // \n // can be empty
|
||||
std::string tool_start; // <invoke name=\" // <tool_call>
|
||||
std::string tool_sep; // \">\n // \n // can be empty only for parse_xml_tool_calls
|
||||
std::string key_start; // <parameter name=\" // <arg_key>
|
||||
std::string key_val_sep; // \"> // </arg_key>\n<arg_value>
|
||||
std::string val_end; // </parameter>\n // </arg_value>\n
|
||||
std::string tool_end; // </invoke>\n // </tool_call>\n
|
||||
std::string scope_end; // </minimax:tool_call> // // can be empty
|
||||
// Set this if there can be dynamic spaces inside key_val_sep.
|
||||
// e.g. key_val_sep=</arg_key> key_val_sep2=<arg_value> for GLM4.5
|
||||
std::optional<std::string> key_val_sep2 = std::nullopt;
|
||||
// Set true if argval should only be raw string. e.g. Hello "world" hi
|
||||
// Set false if argval should only be json string. e.g. "Hello \"world\" hi"
|
||||
// Defaults to std::nullopt, both will be allowed.
|
||||
std::optional<bool> raw_argval = std::nullopt;
|
||||
std::optional<std::string> last_val_end = std::nullopt;
|
||||
std::optional<std::string> last_tool_end = std::nullopt;
|
||||
bool trim_raw_argval = false;
|
||||
bool allow_toolcall_in_think = false;
|
||||
};
|
||||
|
||||
// make a GBNF that accept any strings except those containing any of the forbidden strings.
|
||||
std::string make_gbnf_excluding(std::vector<std::string> forbids);
|
||||
|
||||
/**
|
||||
* Build grammar for xml-style tool call
|
||||
* form.scope_start and form.scope_end can be empty.
|
||||
* Requires data.format for model-specific hacks.
|
||||
*/
|
||||
void build_grammar_xml_tool_call(common_chat_params & data, const nlohmann::ordered_json & tools, const struct xml_tool_call_format & form);
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -1,133 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "chat.h"
|
||||
#include "chat-parser-xml-toolcall.h"
|
||||
#include "json-partial.h"
|
||||
#include "regex-partial.h"
|
||||
|
||||
#include <nlohmann/json_fwd.hpp>
|
||||
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
class common_chat_msg_partial_exception : public std::runtime_error {
|
||||
public:
|
||||
common_chat_msg_partial_exception(const std::string & message) : std::runtime_error(message) {}
|
||||
};
|
||||
|
||||
class common_chat_msg_parser {
|
||||
std::string input_;
|
||||
bool is_partial_;
|
||||
common_chat_parser_params syntax_; // TODO: rename to params
|
||||
std::string healing_marker_;
|
||||
|
||||
size_t pos_ = 0;
|
||||
common_chat_msg result_;
|
||||
|
||||
public:
|
||||
common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
|
||||
const std::string & input() const { return input_; }
|
||||
size_t pos() const { return pos_; }
|
||||
const std::string & healing_marker() const { return healing_marker_; }
|
||||
const bool & is_partial() const { return is_partial_; }
|
||||
const common_chat_msg & result() const { return result_; }
|
||||
const common_chat_parser_params & syntax() const { return syntax_; }
|
||||
|
||||
void move_to(size_t pos) {
|
||||
if (pos > input_.size()) {
|
||||
throw std::runtime_error("Invalid position!");
|
||||
}
|
||||
pos_ = pos;
|
||||
}
|
||||
void move_back(size_t n) {
|
||||
if (pos_ < n) {
|
||||
throw std::runtime_error("Can't move back that far!");
|
||||
}
|
||||
pos_ -= n;
|
||||
}
|
||||
|
||||
// Get the substring of the input at the given range
|
||||
std::string str(const common_string_range & rng) const;
|
||||
|
||||
// Appends to the result.content field
|
||||
void add_content(const std::string & content);
|
||||
|
||||
// Appends to the result.reasoning_content field
|
||||
void add_reasoning_content(const std::string & reasoning_content);
|
||||
|
||||
// Adds a tool call to the result. If the tool call is too incomplete (e.g. name empty), it won't add anything.
|
||||
bool add_tool_call(const std::string & name, const std::string & id, const std::string & arguments);
|
||||
|
||||
// Adds a tool call using the "name", "id" and "arguments" fields of the json object
|
||||
bool add_tool_call(const nlohmann::ordered_json & tool_call);
|
||||
|
||||
// Adds an array of tool calls using their "name", "id" and "arguments" fields.
|
||||
bool add_tool_calls(const nlohmann::ordered_json & arr);
|
||||
|
||||
// Adds a tool call using the short form: { "tool_name": { "arg1": val, "arg2": val } }
|
||||
bool add_tool_call_short_form(const nlohmann::ordered_json & tool_call);
|
||||
|
||||
void finish();
|
||||
|
||||
bool consume_spaces();
|
||||
|
||||
void consume_literal(const std::string & literal);
|
||||
|
||||
bool try_parse_reasoning(const std::string & start_think, const std::string & end_think);
|
||||
|
||||
std::string consume_rest();
|
||||
|
||||
struct find_regex_result {
|
||||
std::string prelude;
|
||||
std::vector<common_string_range> groups;
|
||||
};
|
||||
|
||||
std::optional<find_regex_result> try_find_regex(const common_regex & regex, size_t from = std::string::npos, bool add_prelude_to_content = true);
|
||||
|
||||
bool try_consume_literal(const std::string & literal);
|
||||
|
||||
std::optional<find_regex_result> try_find_literal(const std::string & literal);
|
||||
|
||||
find_regex_result consume_regex(const common_regex & regex);
|
||||
|
||||
std::optional<find_regex_result> try_consume_regex(const common_regex & regex);
|
||||
|
||||
std::optional<common_json> try_consume_json();
|
||||
common_json consume_json();
|
||||
|
||||
struct consume_json_result {
|
||||
nlohmann::ordered_json value;
|
||||
bool is_partial;
|
||||
};
|
||||
|
||||
/*
|
||||
Consume (possibly partial) json and converts specific subtrees to (possibly truncated) JSON strings.
|
||||
|
||||
By default, object keys can't be truncated, nor can string values (their corresponding key is removed,
|
||||
e.g. `{"foo": "bar", "baz": "b` -> `{"foo": "bar"}`
|
||||
|
||||
But one can allow subpaths to be kept truncated, and possibly json-dumped to truncated json strings
|
||||
- with `content_paths={{"foo"}}` -> `{"foo": "b` -> {"foo": "b"}`
|
||||
- with `args_paths={{"foo"}}` -> `{"foo": {"b` -> `{"foo": "{b"}`
|
||||
*/
|
||||
consume_json_result consume_json_with_dumped_args(
|
||||
const std::vector<std::vector<std::string>> & args_paths = {},
|
||||
const std::vector<std::vector<std::string>> & content_paths = {}
|
||||
);
|
||||
std::optional<consume_json_result> try_consume_json_with_dumped_args(
|
||||
const std::vector<std::vector<std::string>> & args_paths = {},
|
||||
const std::vector<std::vector<std::string>> & content_paths = {}
|
||||
);
|
||||
|
||||
/**
|
||||
* Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched.
|
||||
* form.scope_start, form.tool_sep and form.scope_end can be empty.
|
||||
*/
|
||||
bool try_consume_xml_tool_calls(const struct xml_tool_call_format & form);
|
||||
|
||||
// Parse content uses reasoning and XML-Style tool call
|
||||
void consume_reasoning_with_xml_tool_calls(const struct xml_tool_call_format & form, const std::string & start_think = "<think>", const std::string & end_think = "</think>");
|
||||
|
||||
void clear_tools();
|
||||
};
|
||||
|
|
@ -1,13 +1,16 @@
|
|||
#include "chat-peg-parser.h"
|
||||
|
||||
#include "chat-auto-parser.h"
|
||||
#include "ggml.h"
|
||||
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
using json = nlohmann::json;
|
||||
using json = nlohmann::ordered_json;
|
||||
|
||||
static std::string_view trim_trailing_space(std::string_view sv, int max = -1) {
|
||||
int count = 0;
|
||||
while (!sv.empty() && std::isspace(static_cast<unsigned char>(sv.back()))) {
|
||||
if (max != -1 && count <= max) {
|
||||
if (max != -1 && count >= max) {
|
||||
break;
|
||||
}
|
||||
sv.remove_suffix(1);
|
||||
|
|
@ -16,109 +19,746 @@ static std::string_view trim_trailing_space(std::string_view sv, int max = -1) {
|
|||
return sv;
|
||||
}
|
||||
|
||||
static std::string_view trim_leading_space(std::string_view sv, int max = -1) {
|
||||
int count = 0;
|
||||
while (!sv.empty() && std::isspace(static_cast<unsigned char>(sv.front()))) {
|
||||
if (max != -1 && count >= max) {
|
||||
break;
|
||||
}
|
||||
sv.remove_prefix(1);
|
||||
count++;
|
||||
}
|
||||
return sv;
|
||||
}
|
||||
|
||||
static std::string_view trim(std::string_view sv) {
|
||||
return trim_trailing_space(trim_leading_space(sv, 1));
|
||||
}
|
||||
|
||||
// Count the number of unclosed '{' braces in a JSON-like string,
|
||||
// properly skipping braces inside quoted strings.
|
||||
static int json_brace_depth(const std::string & s) {
|
||||
int depth = 0;
|
||||
bool in_string = false;
|
||||
bool escaped = false;
|
||||
for (char c : s) {
|
||||
if (escaped) {
|
||||
escaped = false;
|
||||
continue;
|
||||
}
|
||||
if (c == '\\' && in_string) {
|
||||
escaped = true;
|
||||
continue;
|
||||
}
|
||||
if (c == '"') {
|
||||
in_string = !in_string;
|
||||
continue;
|
||||
}
|
||||
if (!in_string) {
|
||||
if (c == '{') {
|
||||
depth++;
|
||||
} else if (c == '}') {
|
||||
depth--;
|
||||
}
|
||||
}
|
||||
}
|
||||
return depth;
|
||||
}
|
||||
|
||||
// JSON-escape a string and return the inner content (without surrounding quotes).
|
||||
static std::string escape_json_string_inner(const std::string & s) {
|
||||
std::string escaped = json(s).dump();
|
||||
if (escaped.size() >= 2 && escaped.front() == '"' && escaped.back() == '"') {
|
||||
return escaped.substr(1, escaped.size() - 2);
|
||||
}
|
||||
return escaped;
|
||||
}
|
||||
|
||||
// Convert Python-style single-quoted strings to JSON double-quoted strings
|
||||
// Only converts outer string delimiters, properly handling escape sequences:
|
||||
// - {'key': 'value'} -> {"key": "value"}
|
||||
// - {'code': 'print(\'hello\')'} -> {"code": "print('hello')"}
|
||||
// - {'msg': 'He said "hi"'} -> {"msg": "He said \"hi\""}
|
||||
static std::string normalize_quotes_to_json(const std::string & input) {
|
||||
std::string result;
|
||||
result.reserve(input.size() + 16); // May need extra space for escaping
|
||||
|
||||
bool in_single_quoted = false;
|
||||
bool in_double_quoted = false;
|
||||
|
||||
for (size_t i = 0; i < input.size(); ++i) {
|
||||
char c = input[i];
|
||||
|
||||
// Handle escape sequences
|
||||
if (c == '\\' && i + 1 < input.size()) {
|
||||
char next = input[i + 1];
|
||||
|
||||
if (in_single_quoted) {
|
||||
// Inside a single-quoted string being converted to double quotes
|
||||
if (next == '\'') {
|
||||
// \' -> ' (escaped single quote becomes unescaped in double-quoted string)
|
||||
result += '\'';
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
if (next == '"') {
|
||||
// \" stays as \" (already escaped, works in double-quoted string)
|
||||
result += "\\\"";
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
// Other escapes (\n, \\, etc.): pass through both characters
|
||||
result += c;
|
||||
result += next;
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (in_double_quoted) {
|
||||
// Inside a double-quoted string - pass through escape sequences as-is
|
||||
result += c;
|
||||
result += next;
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Outside any string - just pass through the backslash
|
||||
result += c;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle quote characters
|
||||
if (c == '"') {
|
||||
if (in_single_quoted) {
|
||||
// Unescaped double quote inside single-quoted string -> must escape for JSON
|
||||
result += "\\\"";
|
||||
} else {
|
||||
// Double quote as string delimiter or outside strings
|
||||
in_double_quoted = !in_double_quoted;
|
||||
result += c;
|
||||
}
|
||||
} else if (c == '\'') {
|
||||
if (in_double_quoted) {
|
||||
// Single quote inside double-quoted string -> pass through
|
||||
result += c;
|
||||
} else if (in_single_quoted) {
|
||||
// Closing single quote -> convert to double quote
|
||||
in_single_quoted = false;
|
||||
result += '"';
|
||||
} else {
|
||||
// Opening single quote -> convert to double quote
|
||||
in_single_quoted = true;
|
||||
result += '"';
|
||||
}
|
||||
} else {
|
||||
result += c;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void common_chat_peg_mapper::from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result) {
|
||||
arena.visit(result, [this](const common_peg_ast_node & node) {
|
||||
map(node);
|
||||
});
|
||||
arena.visit(result, [this](const common_peg_ast_node & node) { map(node); });
|
||||
}
|
||||
|
||||
void common_chat_peg_mapper::map(const common_peg_ast_node & node) {
|
||||
bool is_reasoning = node.tag == common_chat_peg_builder::REASONING;
|
||||
bool is_content = node.tag == common_chat_peg_builder::CONTENT;
|
||||
bool is_content = node.tag == common_chat_peg_builder::CONTENT;
|
||||
|
||||
if (is_reasoning) {
|
||||
result.reasoning_content = std::string(trim_trailing_space(node.text));
|
||||
if (is_reasoning) { // GPT OSS can have more than 1 reasoning block, so concatenate here
|
||||
result.reasoning_content += std::string(node.text);
|
||||
}
|
||||
|
||||
if (is_content) {
|
||||
result.content = std::string(trim_trailing_space(node.text));
|
||||
// Concatenate content from multiple content nodes (e.g., when reasoning markers
|
||||
// are preserved before content markers in reasoning_format=NONE mode)
|
||||
result.content += std::string(node.text);
|
||||
}
|
||||
}
|
||||
|
||||
void common_chat_peg_native_mapper::map(const common_peg_ast_node & node) {
|
||||
void tag_based_peg_mapper::from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result) {
|
||||
arena.visit(result, [this](const common_peg_ast_node & node) {
|
||||
if (!node.tag.empty()) {
|
||||
tags[node.tag] = std::string(node.text);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
tagged_parse_result tagged_peg_parser::parse_and_extract(const std::string & input, bool is_partial) const {
|
||||
common_peg_parse_context ctx(input, is_partial);
|
||||
auto parse_result = arena.parse(ctx);
|
||||
|
||||
tag_based_peg_mapper mapper;
|
||||
mapper.from_ast(ctx.ast, parse_result);
|
||||
|
||||
return { std::move(parse_result), std::move(mapper.tags) };
|
||||
}
|
||||
|
||||
tagged_peg_parser build_tagged_peg_parser(
|
||||
const std::function<common_peg_parser(common_peg_parser_builder & builder)> & fn) {
|
||||
common_peg_parser_builder builder;
|
||||
builder.set_root(fn(builder));
|
||||
return { builder.build() };
|
||||
}
|
||||
|
||||
common_peg_parser common_chat_peg_builder::tag_with_safe_content(const std::string & tag_name,
|
||||
const std::string & marker,
|
||||
const common_peg_parser & p) {
|
||||
if (marker.empty()) {
|
||||
return zero_or_more(choice({ p, rule(tag_name, content(any())) }));
|
||||
}
|
||||
auto content_chunk = rule(tag_name, content(negate(literal(marker)) + any() + until(marker)));
|
||||
return zero_or_more(choice({ p, content_chunk }));
|
||||
}
|
||||
|
||||
std::string & common_chat_peg_unified_mapper::args_target() {
|
||||
return (current_tool && !current_tool->name.empty()) ? current_tool->arguments : args_buffer;
|
||||
}
|
||||
|
||||
void common_chat_peg_unified_mapper::from_ast(const common_peg_ast_arena & arena,
|
||||
const common_peg_parse_result & parse_result_arg) {
|
||||
// Call base class to visit all nodes
|
||||
common_chat_peg_mapper::from_ast(arena, parse_result_arg);
|
||||
|
||||
// Flush any pending tool call that was started but never got a name
|
||||
// This happens during partial parsing when the tool call is incomplete
|
||||
if (pending_tool_call.has_value() && !pending_tool_call->name.empty()) {
|
||||
if (!args_buffer.empty()) {
|
||||
pending_tool_call->arguments = args_buffer;
|
||||
}
|
||||
if (closing_quote_pending && !pending_tool_call->arguments.empty()) {
|
||||
pending_tool_call->arguments += "\"";
|
||||
}
|
||||
result.tool_calls.push_back(pending_tool_call.value());
|
||||
pending_tool_call.reset();
|
||||
}
|
||||
}
|
||||
|
||||
void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) {
|
||||
// First call base class for reasoning/content handling
|
||||
common_chat_peg_mapper::map(node);
|
||||
|
||||
bool is_tool_open = node.tag == common_chat_peg_native_builder::TOOL_OPEN;
|
||||
bool is_tool_name = node.tag == common_chat_peg_native_builder::TOOL_NAME;
|
||||
bool is_tool_id = node.tag == common_chat_peg_native_builder::TOOL_ID;
|
||||
bool is_tool_args = node.tag == common_chat_peg_native_builder::TOOL_ARGS;
|
||||
// Handle tool-related tags (unified version supporting both JSON and tagged formats)
|
||||
bool is_tool_open = node.tag == common_chat_peg_unified_builder::TOOL_OPEN;
|
||||
bool is_tool_close = node.tag == common_chat_peg_unified_builder::TOOL_CLOSE;
|
||||
bool is_tool_name = node.tag == common_chat_peg_unified_builder::TOOL_NAME;
|
||||
bool is_tool_id = node.tag == common_chat_peg_unified_builder::TOOL_ID;
|
||||
bool is_tool_args = node.tag == common_chat_peg_unified_builder::TOOL_ARGS;
|
||||
bool is_arg_open = node.tag == common_chat_peg_unified_builder::TOOL_ARG_OPEN;
|
||||
bool is_arg_close = node.tag == common_chat_peg_unified_builder::TOOL_ARG_CLOSE;
|
||||
bool is_arg_name = node.tag == common_chat_peg_unified_builder::TOOL_ARG_NAME;
|
||||
bool is_arg_value = node.tag == common_chat_peg_unified_builder::TOOL_ARG_VALUE;
|
||||
bool is_arg_string_value = node.tag == common_chat_peg_unified_builder::TOOL_ARG_STRING_VALUE;
|
||||
|
||||
if (is_tool_open) {
|
||||
result.tool_calls.emplace_back();
|
||||
current_tool = &result.tool_calls.back();
|
||||
pending_tool_call = common_chat_tool_call();
|
||||
current_tool = &pending_tool_call.value();
|
||||
arg_count = 0;
|
||||
args_buffer.clear();
|
||||
closing_quote_pending = false;
|
||||
}
|
||||
|
||||
if (is_tool_id && current_tool) {
|
||||
current_tool->id = std::string(trim_trailing_space(node.text));
|
||||
auto text = trim_trailing_space(node.text);
|
||||
if (text.size() >= 2 && text.front() == '"' && text.back() == '"') {
|
||||
text = text.substr(1, text.size() - 2);
|
||||
}
|
||||
current_tool->id = std::string(text);
|
||||
}
|
||||
|
||||
if (is_tool_name && current_tool) {
|
||||
current_tool->name = std::string(trim_trailing_space(node.text));
|
||||
// Now that we have the name, populate the arguments from the buffer
|
||||
if (!args_buffer.empty()) {
|
||||
current_tool->arguments = args_buffer;
|
||||
args_buffer.clear();
|
||||
} else if (current_tool->arguments.empty()) {
|
||||
current_tool->arguments = "{";
|
||||
}
|
||||
// Add the tool call to results so streaming can see it
|
||||
if (pending_tool_call.has_value()) {
|
||||
result.tool_calls.push_back(pending_tool_call.value());
|
||||
pending_tool_call.reset();
|
||||
current_tool = &result.tool_calls.back();
|
||||
}
|
||||
}
|
||||
|
||||
if (is_tool_args && current_tool) {
|
||||
current_tool->arguments = std::string(trim_trailing_space(node.text));
|
||||
}
|
||||
}
|
||||
|
||||
void common_chat_peg_constructed_mapper::map(const common_peg_ast_node & node) {
|
||||
common_chat_peg_mapper::map(node);
|
||||
|
||||
bool is_tool_open = node.tag == common_chat_peg_constructed_builder::TOOL_OPEN;
|
||||
bool is_tool_name = node.tag == common_chat_peg_constructed_builder::TOOL_NAME;
|
||||
bool is_tool_close = node.tag == common_chat_peg_constructed_builder::TOOL_CLOSE;
|
||||
bool is_arg_open = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_OPEN;
|
||||
bool is_arg_close = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_CLOSE;
|
||||
bool is_arg_name = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_NAME;
|
||||
bool is_arg_string = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_STRING_VALUE;
|
||||
bool is_arg_json = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_JSON_VALUE;
|
||||
|
||||
if (is_tool_open) {
|
||||
result.tool_calls.emplace_back();
|
||||
current_tool = &result.tool_calls.back();
|
||||
arg_count = 0;
|
||||
}
|
||||
|
||||
if (is_tool_name) {
|
||||
current_tool->name = std::string(node.text);
|
||||
current_tool->arguments = "{";
|
||||
// For JSON format: arguments come as a complete JSON object
|
||||
// For tagged format: built up from individual arg_name/arg_value nodes
|
||||
auto text = trim_trailing_space(node.text);
|
||||
if (!text.empty() && text.front() == '{') {
|
||||
args_target() = std::string(text);
|
||||
}
|
||||
}
|
||||
|
||||
if (is_arg_open) {
|
||||
needs_closing_quote = false;
|
||||
closing_quote_pending = false;
|
||||
}
|
||||
|
||||
if (is_arg_name && current_tool) {
|
||||
std::string arg_entry;
|
||||
if (arg_count > 0) {
|
||||
current_tool->arguments += ",";
|
||||
arg_entry = ",";
|
||||
}
|
||||
current_tool->arguments += json(trim_trailing_space(node.text)).dump() + ":";
|
||||
arg_entry += json(trim(node.text)).dump() + ":";
|
||||
++arg_count;
|
||||
|
||||
auto & target = args_target();
|
||||
if (target.empty()) {
|
||||
target = "{";
|
||||
}
|
||||
target += arg_entry;
|
||||
}
|
||||
|
||||
if (is_arg_string && current_tool) {
|
||||
// Serialize to JSON, but exclude the end quote
|
||||
std::string dumped = json(trim_trailing_space(node.text)).dump();
|
||||
current_tool->arguments += dumped.substr(0, dumped.size() - 1);
|
||||
needs_closing_quote = true;
|
||||
if ((is_arg_value || is_arg_string_value) && current_tool) {
|
||||
std::string value_content = std::string(trim_trailing_space(trim_leading_space(node.text, 1), 1));
|
||||
|
||||
std::string value_to_add;
|
||||
if (value_content.empty() && is_arg_string_value) {
|
||||
// Empty string value - arg_close will add the closing quote
|
||||
value_to_add = "\"";
|
||||
closing_quote_pending = true;
|
||||
} else if (!value_content.empty() && is_arg_string_value) {
|
||||
// Schema declares this as string type - always treat as literal string value
|
||||
if (!closing_quote_pending) {
|
||||
value_to_add = "\"";
|
||||
closing_quote_pending = true;
|
||||
}
|
||||
value_to_add += escape_json_string_inner(value_content);
|
||||
} else if (!value_content.empty()) {
|
||||
// For potential containers, normalize Python-style single quotes to JSON double quotes
|
||||
bool is_potential_container = value_content[0] == '[' || value_content[0] == '{';
|
||||
if (is_potential_container) {
|
||||
value_content = normalize_quotes_to_json(value_content);
|
||||
}
|
||||
|
||||
// Try to parse as JSON value (number, bool, null, object, array)
|
||||
try {
|
||||
json parsed = json::parse(value_content);
|
||||
if (parsed.is_string()) {
|
||||
// Don't add closing quote yet (added by arg_close) for monotonic streaming
|
||||
std::string escaped = parsed.dump();
|
||||
if (!escaped.empty() && escaped.back() == '"') {
|
||||
escaped.pop_back();
|
||||
}
|
||||
value_to_add = escaped;
|
||||
closing_quote_pending = true;
|
||||
} else {
|
||||
// Non-string values: use raw content to preserve whitespace for monotonicity
|
||||
value_to_add = value_content;
|
||||
}
|
||||
} catch (...) {
|
||||
if (node.is_partial && is_potential_container) {
|
||||
// Partial container: pass through the already-normalized content
|
||||
value_to_add = value_content;
|
||||
} else {
|
||||
// Not valid JSON - treat as string value
|
||||
if (!closing_quote_pending) {
|
||||
value_to_add = "\"";
|
||||
closing_quote_pending = true;
|
||||
}
|
||||
value_to_add += escape_json_string_inner(value_content);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
args_target() += value_to_add;
|
||||
}
|
||||
|
||||
if (is_arg_close && current_tool) {
|
||||
if (needs_closing_quote) {
|
||||
current_tool->arguments += "\"";
|
||||
needs_closing_quote = false;
|
||||
if (closing_quote_pending) {
|
||||
args_target() += "\"";
|
||||
closing_quote_pending = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_arg_json && current_tool) {
|
||||
current_tool->arguments += std::string(trim_trailing_space(node.text));
|
||||
}
|
||||
|
||||
if (is_tool_close && current_tool) {
|
||||
if (needs_closing_quote) {
|
||||
current_tool->arguments += "\"";
|
||||
needs_closing_quote = false;
|
||||
// Flush buffer to arguments if tool name was never seen
|
||||
if (current_tool->name.empty() && !args_buffer.empty()) {
|
||||
current_tool->arguments = args_buffer;
|
||||
args_buffer.clear();
|
||||
}
|
||||
// Close any pending string quote
|
||||
if (closing_quote_pending) {
|
||||
current_tool->arguments += "\"";
|
||||
closing_quote_pending = false;
|
||||
}
|
||||
// Close any unclosed braces (accounts for nested objects)
|
||||
for (int d = json_brace_depth(current_tool->arguments); d > 0; d--) {
|
||||
current_tool->arguments += "}";
|
||||
}
|
||||
// Add tool call to results if named; otherwise discard
|
||||
if (pending_tool_call.has_value()) {
|
||||
if (!current_tool->name.empty()) {
|
||||
result.tool_calls.push_back(pending_tool_call.value());
|
||||
}
|
||||
pending_tool_call.reset();
|
||||
}
|
||||
current_tool->arguments += "}";
|
||||
}
|
||||
}
|
||||
|
||||
common_peg_parser common_chat_peg_unified_builder::standard_constructed_tools(
|
||||
const std::map<std::string, std::string> & markers,
|
||||
const nlohmann::json & tools,
|
||||
bool parallel_tool_calls,
|
||||
bool force_tool_calls) {
|
||||
if (!tools.is_array() || tools.empty()) {
|
||||
return eps();
|
||||
}
|
||||
|
||||
// Extract markers with defaults
|
||||
auto get_marker = [&markers](const std::string & key, const std::string & default_val = "") -> std::string {
|
||||
auto it = markers.find(key);
|
||||
return it != markers.end() ? it->second : default_val;
|
||||
};
|
||||
|
||||
std::string section_start = get_marker("tool_call_start_marker", "<tool_call>");
|
||||
std::string section_end = get_marker("tool_call_end_marker", "</tool_call>");
|
||||
std::string func_opener = get_marker("function_opener", "<function=");
|
||||
std::string func_name_suffix = get_marker("function_name_suffix", ">");
|
||||
std::string func_closer = get_marker("function_closer", "</function>");
|
||||
std::string param_key_prefix = get_marker("parameter_key_prefix", "<param=");
|
||||
std::string param_key_suffix = get_marker("parameter_key_suffix", ">");
|
||||
std::string param_closer = get_marker("parameter_closer", "</param>");
|
||||
|
||||
// Build tool choices for tagged format
|
||||
auto tool_choices = choice();
|
||||
|
||||
for (const auto & tool_def : tools) {
|
||||
if (!tool_def.contains("function")) {
|
||||
continue;
|
||||
}
|
||||
const auto & function = tool_def.at("function");
|
||||
std::string name = function.at("name");
|
||||
nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
|
||||
|
||||
// Build argument parsers
|
||||
auto args = eps();
|
||||
if (params.contains("properties") && !params["properties"].empty()) {
|
||||
auto arg_choice = choice();
|
||||
for (const auto & el : params["properties"].items()) {
|
||||
const std::string & prop_name = el.key();
|
||||
|
||||
auto arg_name_parser =
|
||||
choice({ literal(prop_name), literal("\"" + prop_name + "\""), literal("'" + prop_name + "'") });
|
||||
|
||||
auto arg_rule = tool_arg(tool_arg_open(literal(param_key_prefix)) + tool_arg_name(arg_name_parser) +
|
||||
literal(param_key_suffix) + tool_arg_value(until(param_closer)) +
|
||||
tool_arg_close(literal(param_closer)));
|
||||
arg_choice |= arg_rule;
|
||||
}
|
||||
args = zero_or_more(arg_choice + space());
|
||||
}
|
||||
|
||||
// Build function parser: <function=name>args</function>
|
||||
auto tool_parser = tool(tool_open(literal(func_opener) + tool_name(literal(name)) + literal(func_name_suffix)) +
|
||||
space() + tool_args(args) + space() + tool_close(literal(func_closer)));
|
||||
|
||||
tool_choices |= rule("tool-" + name, tool_parser);
|
||||
}
|
||||
|
||||
// Build the section with markers
|
||||
auto section =
|
||||
parallel_tool_calls ?
|
||||
trigger_rule("tool-call", literal(section_start) + space() + one_or_more(tool_choices + space()) +
|
||||
literal(section_end)) :
|
||||
trigger_rule("tool-call", literal(section_start) + space() + tool_choices + space() + literal(section_end));
|
||||
|
||||
return force_tool_calls ? section : optional(section);
|
||||
}
|
||||
|
||||
// Helper: Parse dot notation key into prefix and field name
|
||||
static std::pair<std::string, std::string> parse_key_spec(const std::string & key) {
|
||||
auto dot_pos = key.find('.');
|
||||
if (dot_pos == std::string::npos) {
|
||||
return {"", key}; // Top-level field
|
||||
}
|
||||
return {key.substr(0, dot_pos), key.substr(dot_pos + 1)};
|
||||
}
|
||||
|
||||
// Mode 1: function_is_key — parse {"function_name": {...}}
|
||||
common_peg_parser common_chat_peg_unified_builder::build_json_tools_function_is_key(
|
||||
const nlohmann::json & tools,
|
||||
const std::string & args_key,
|
||||
const std::string & effective_args_key,
|
||||
const std::string & call_id_key,
|
||||
const std::string & gen_call_id_key) {
|
||||
|
||||
auto tool_choices = choice();
|
||||
|
||||
for (const auto & tool_def : tools) {
|
||||
if (!tool_def.contains("function")) {
|
||||
continue;
|
||||
}
|
||||
const auto & function = tool_def.at("function");
|
||||
std::string name = function.at("name");
|
||||
nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
|
||||
|
||||
// Build inner object fields
|
||||
std::vector<common_peg_parser> inner_fields;
|
||||
|
||||
if (!call_id_key.empty()) {
|
||||
auto id_parser = atomic(
|
||||
literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
|
||||
literal("\"") + tool_id(json_string_content()) + literal("\"")
|
||||
);
|
||||
inner_fields.push_back(optional(id_parser + space() + optional(literal(",") + space())));
|
||||
}
|
||||
|
||||
if (!gen_call_id_key.empty()) {
|
||||
auto gen_id_parser = atomic(
|
||||
literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
|
||||
choice({
|
||||
literal("\"") + tool_id(json_string_content()) + literal("\""),
|
||||
tool_id(json_number())
|
||||
})
|
||||
);
|
||||
inner_fields.push_back(optional(gen_id_parser + space() + optional(literal(",") + space())));
|
||||
}
|
||||
|
||||
// Arguments — either wrapped in args_key or parsed directly
|
||||
common_peg_parser args_parser = eps();
|
||||
if (args_key.empty()) {
|
||||
args_parser = tool_args(schema(json(), "tool-" + name + "-schema", params));
|
||||
} else {
|
||||
args_parser = literal("\"" + effective_args_key + "\"") + space() + literal(":") + space() +
|
||||
tool_args(schema(json(), "tool-" + name + "-schema", params));
|
||||
}
|
||||
inner_fields.push_back(args_parser);
|
||||
|
||||
// Build inner object parser
|
||||
common_peg_parser inner_object = eps();
|
||||
if (args_key.empty() && inner_fields.size() == 1) {
|
||||
inner_object = inner_fields[0];
|
||||
} else {
|
||||
inner_object = literal("{") + space();
|
||||
for (size_t i = 0; i < inner_fields.size(); i++) {
|
||||
inner_object = inner_object + inner_fields[i];
|
||||
if (i < inner_fields.size() - 1) {
|
||||
inner_object = inner_object + space();
|
||||
}
|
||||
}
|
||||
inner_object = inner_object + space() + literal("}");
|
||||
}
|
||||
|
||||
auto tool_parser = tool(
|
||||
tool_open(literal("{")) + space() +
|
||||
literal("\"") + tool_name(literal(name)) + literal("\"") +
|
||||
space() + literal(":") + space() +
|
||||
inner_object +
|
||||
space() + tool_close(literal("}"))
|
||||
);
|
||||
|
||||
tool_choices |= rule("tool-" + name, tool_parser);
|
||||
}
|
||||
|
||||
return tool_choices;
|
||||
}
|
||||
|
||||
// Mode 2: Nested keys (dot notation like "function.name")
|
||||
common_peg_parser common_chat_peg_unified_builder::build_json_tools_nested_keys(
|
||||
const nlohmann::json & tools,
|
||||
const std::string & effective_name_key,
|
||||
const std::string & effective_args_key,
|
||||
const std::string & call_id_key,
|
||||
const std::string & gen_call_id_key) {
|
||||
|
||||
auto tool_choices = choice();
|
||||
|
||||
auto name_spec = parse_key_spec(effective_name_key);
|
||||
auto args_spec = parse_key_spec(effective_args_key);
|
||||
|
||||
std::string nested_prefix = !name_spec.first.empty() ? name_spec.first : args_spec.first;
|
||||
std::string nested_name_field = !name_spec.first.empty() ? name_spec.second : effective_name_key;
|
||||
std::string nested_args_field = !args_spec.first.empty() ? args_spec.second : effective_args_key;
|
||||
|
||||
for (const auto & tool_def : tools) {
|
||||
if (!tool_def.contains("function")) {
|
||||
continue;
|
||||
}
|
||||
const auto & function = tool_def.at("function");
|
||||
std::string name = function.at("name");
|
||||
nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
|
||||
|
||||
auto nested_name = literal("\"" + nested_name_field + "\"") + space() + literal(":") + space() +
|
||||
literal("\"") + tool_name(literal(name)) + literal("\"");
|
||||
auto nested_args = literal("\"" + nested_args_field + "\"") + space() + literal(":") + space() +
|
||||
tool_args(schema(json(), "tool-" + name + "-schema", params));
|
||||
|
||||
auto nested_object = literal("{") + space() +
|
||||
nested_name + space() + literal(",") + space() +
|
||||
nested_args +
|
||||
space() + literal("}");
|
||||
|
||||
// Format: { id?, "function": {...} }
|
||||
auto tool_parser_body = tool_open(literal("{")) + space();
|
||||
|
||||
if (!call_id_key.empty()) {
|
||||
auto id_spec = parse_key_spec(call_id_key);
|
||||
if (id_spec.first.empty()) {
|
||||
auto id_parser = atomic(
|
||||
literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
|
||||
literal("\"") + tool_id(json_string_content()) + literal("\"")
|
||||
);
|
||||
tool_parser_body = tool_parser_body + optional(id_parser + space() + literal(",") + space());
|
||||
}
|
||||
}
|
||||
|
||||
if (!gen_call_id_key.empty()) {
|
||||
auto gen_id_spec = parse_key_spec(gen_call_id_key);
|
||||
if (gen_id_spec.first.empty()) {
|
||||
auto gen_id_parser = atomic(
|
||||
literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
|
||||
choice({
|
||||
literal("\"") + tool_id(json_string_content()) + literal("\""),
|
||||
tool_id(json_number())
|
||||
})
|
||||
);
|
||||
tool_parser_body = tool_parser_body + optional(gen_id_parser + space() + literal(",") + space());
|
||||
}
|
||||
}
|
||||
|
||||
auto nested_field = literal("\"" + nested_prefix + "\"") + space() + literal(":") + space() + nested_object;
|
||||
tool_parser_body = tool_parser_body + nested_field + space() + tool_close(literal("}"));
|
||||
|
||||
tool_choices |= rule("tool-" + name, tool(tool_parser_body));
|
||||
}
|
||||
|
||||
return tool_choices;
|
||||
}
|
||||
|
||||
// Mode 3: Flat keys with optional ID fields and parameter ordering
|
||||
common_peg_parser common_chat_peg_unified_builder::build_json_tools_flat_keys(
|
||||
const nlohmann::json & tools,
|
||||
const std::string & effective_name_key,
|
||||
const std::string & effective_args_key,
|
||||
const std::string & call_id_key,
|
||||
const std::string & gen_call_id_key,
|
||||
const std::vector<std::string> & parameters_order) {
|
||||
|
||||
auto tool_choices = choice();
|
||||
auto name_key_parser = literal("\"" + effective_name_key + "\"");
|
||||
auto args_key_parser = literal("\"" + effective_args_key + "\"");
|
||||
|
||||
for (const auto & tool_def : tools) {
|
||||
if (!tool_def.contains("function")) {
|
||||
continue;
|
||||
}
|
||||
const auto & function = tool_def.at("function");
|
||||
std::string name = function.at("name");
|
||||
nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
|
||||
|
||||
auto tool_name_ = name_key_parser + space() + literal(":") + space() +
|
||||
literal("\"") + tool_name(literal(name)) + literal("\"");
|
||||
auto tool_args_ = args_key_parser + space() + literal(":") + space() +
|
||||
tool_args(schema(json(), "tool-" + name + "-schema", params));
|
||||
|
||||
// Build ID parsers if keys are provided
|
||||
common_peg_parser id_parser = eps();
|
||||
if (!call_id_key.empty()) {
|
||||
id_parser = atomic(
|
||||
literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
|
||||
choice({
|
||||
literal("\"") + tool_id(json_string_content()) + literal("\""),
|
||||
tool_id(json_number())
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
common_peg_parser gen_id_parser = eps();
|
||||
if (!gen_call_id_key.empty()) {
|
||||
gen_id_parser = atomic(
|
||||
literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
|
||||
choice({
|
||||
literal("\"") + tool_id(json_string_content()) + literal("\""),
|
||||
tool_id(json_number())
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
// Create (parser, key) pairs for all fields, then sort by parameters_order
|
||||
std::vector<std::pair<common_peg_parser, std::string>> parser_pairs;
|
||||
parser_pairs.emplace_back(tool_name_, effective_name_key);
|
||||
parser_pairs.emplace_back(tool_args_, effective_args_key);
|
||||
if (!call_id_key.empty()) {
|
||||
parser_pairs.emplace_back(optional(id_parser), call_id_key);
|
||||
}
|
||||
if (!gen_call_id_key.empty()) {
|
||||
parser_pairs.emplace_back(optional(gen_id_parser), gen_call_id_key);
|
||||
}
|
||||
|
||||
std::sort(parser_pairs.begin(), parser_pairs.end(),
|
||||
[¶meters_order](const auto & a, const auto & b) {
|
||||
auto pos_a = std::find(parameters_order.begin(), parameters_order.end(), a.second);
|
||||
auto pos_b = std::find(parameters_order.begin(), parameters_order.end(), b.second);
|
||||
size_t idx_a = (pos_a == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_a);
|
||||
size_t idx_b = (pos_b == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_b);
|
||||
return idx_a < idx_b;
|
||||
});
|
||||
|
||||
auto ordered_body = tool_open(literal("{")) + space();
|
||||
for (size_t i = 0; i < parser_pairs.size(); i++) {
|
||||
ordered_body = ordered_body + parser_pairs[i].first;
|
||||
if (i < parser_pairs.size() - 1) {
|
||||
ordered_body = ordered_body + space() + literal(",") + space();
|
||||
}
|
||||
}
|
||||
ordered_body = ordered_body + space() + tool_close(literal("}"));
|
||||
|
||||
tool_choices |= rule("tool-" + name, tool(ordered_body));
|
||||
}
|
||||
|
||||
return tool_choices;
|
||||
}
|
||||
|
||||
common_peg_parser common_chat_peg_unified_builder::standard_json_tools(
|
||||
const std::string & section_start,
|
||||
const std::string & section_end,
|
||||
const nlohmann::json & tools,
|
||||
bool parallel_tool_calls,
|
||||
bool force_tool_calls,
|
||||
const std::string & name_key,
|
||||
const std::string & args_key,
|
||||
bool array_wrapped,
|
||||
bool function_is_key,
|
||||
const std::string & call_id_key,
|
||||
const std::string & gen_call_id_key,
|
||||
const std::vector<std::string> & parameters_order) {
|
||||
if (!tools.is_array() || tools.empty()) {
|
||||
return eps();
|
||||
}
|
||||
|
||||
std::string effective_name_key = name_key.empty() ? "name" : name_key;
|
||||
std::string effective_args_key = args_key.empty() ? "arguments" : args_key;
|
||||
|
||||
// Dispatch to the appropriate builder based on the JSON layout mode
|
||||
common_peg_parser tool_choices = eps();
|
||||
if (function_is_key) {
|
||||
tool_choices = build_json_tools_function_is_key(tools, args_key, effective_args_key, call_id_key, gen_call_id_key);
|
||||
} else {
|
||||
auto name_spec = parse_key_spec(effective_name_key);
|
||||
auto args_spec = parse_key_spec(effective_args_key);
|
||||
if (!name_spec.first.empty() || !args_spec.first.empty()) {
|
||||
tool_choices = build_json_tools_nested_keys(tools, effective_name_key, effective_args_key, call_id_key, gen_call_id_key);
|
||||
} else {
|
||||
tool_choices = build_json_tools_flat_keys(tools, effective_name_key, effective_args_key, call_id_key, gen_call_id_key, parameters_order);
|
||||
}
|
||||
}
|
||||
|
||||
// Build the section with markers
|
||||
auto tool_calls = tool_choices;
|
||||
if (parallel_tool_calls) {
|
||||
tool_calls = tool_calls + zero_or_more(space() + literal(",") + space() + tool_choices);
|
||||
}
|
||||
|
||||
if (array_wrapped) {
|
||||
tool_calls = literal("[") + space() + tool_calls + space() + literal("]");
|
||||
}
|
||||
|
||||
auto section =
|
||||
trigger_rule("tool-call", literal(section_start) + space() + tool_calls + space() + literal(section_end));
|
||||
|
||||
return force_tool_calls ? section : optional(section);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,18 +3,29 @@
|
|||
#include "chat.h"
|
||||
#include "peg-parser.h"
|
||||
|
||||
#include <map>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
|
||||
class common_chat_peg_builder : public common_peg_parser_builder {
|
||||
public:
|
||||
static constexpr const char * REASONING_BLOCK = "reasoning-block";
|
||||
static constexpr const char * REASONING = "reasoning";
|
||||
static constexpr const char * CONTENT = "content";
|
||||
static constexpr const char * REASONING = "reasoning";
|
||||
static constexpr const char * CONTENT = "content";
|
||||
|
||||
common_peg_parser reasoning_block(const common_peg_parser & p) { return tag(REASONING_BLOCK, p); }
|
||||
|
||||
common_peg_parser reasoning(const common_peg_parser & p) { return tag(REASONING, p); }
|
||||
|
||||
common_peg_parser content(const common_peg_parser & p) { return tag(CONTENT, p); }
|
||||
|
||||
common_peg_parser tag_with_safe_content(const std::string & tag_name,
|
||||
const std::string & marker,
|
||||
const common_peg_parser & p);
|
||||
};
|
||||
|
||||
inline common_peg_arena build_chat_peg_parser(const std::function<common_peg_parser(common_chat_peg_builder & builder)> & fn) {
|
||||
inline common_peg_arena build_chat_peg_parser(
|
||||
const std::function<common_peg_parser(common_chat_peg_builder & builder)> & fn) {
|
||||
common_chat_peg_builder builder;
|
||||
builder.set_root(fn(builder));
|
||||
return builder.build();
|
||||
|
|
@ -26,80 +37,142 @@ class common_chat_peg_mapper {
|
|||
|
||||
common_chat_peg_mapper(common_chat_msg & msg) : result(msg) {}
|
||||
|
||||
virtual ~common_chat_peg_mapper() = default;
|
||||
|
||||
virtual void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result);
|
||||
virtual void map(const common_peg_ast_node & node);
|
||||
};
|
||||
|
||||
class common_chat_peg_native_builder : public common_chat_peg_builder {
|
||||
public:
|
||||
static constexpr const char * TOOL = "tool";
|
||||
static constexpr const char * TOOL_OPEN = "tool-open";
|
||||
static constexpr const char * TOOL_CLOSE = "tool-close";
|
||||
static constexpr const char * TOOL_ID = "tool-id";
|
||||
static constexpr const char * TOOL_NAME = "tool-name";
|
||||
static constexpr const char * TOOL_ARGS = "tool-args";
|
||||
struct content_structure;
|
||||
struct tool_call_structure;
|
||||
|
||||
class common_chat_peg_unified_builder : public common_chat_peg_builder {
|
||||
public:
|
||||
// Tag constants
|
||||
static constexpr const char * TOOL = "tool";
|
||||
static constexpr const char * TOOL_OPEN = "tool-open";
|
||||
static constexpr const char * TOOL_CLOSE = "tool-close";
|
||||
static constexpr const char * TOOL_ID = "tool-id";
|
||||
static constexpr const char * TOOL_NAME = "tool-name";
|
||||
static constexpr const char * TOOL_ARGS = "tool-args";
|
||||
static constexpr const char * TOOL_ARG = "tool-arg";
|
||||
static constexpr const char * TOOL_ARG_OPEN = "tool-arg-open";
|
||||
static constexpr const char * TOOL_ARG_CLOSE = "tool-arg-close";
|
||||
static constexpr const char * TOOL_ARG_NAME = "tool-arg-name";
|
||||
static constexpr const char * TOOL_ARG_VALUE = "tool-arg-value";
|
||||
static constexpr const char * TOOL_ARG_STRING_VALUE = "tool-arg-string-value"; // For schema-declared string types
|
||||
|
||||
// Low-level tag methods
|
||||
common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); }
|
||||
common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); }
|
||||
common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); }
|
||||
common_peg_parser tool_id(const common_peg_parser & p) { return atomic(tag(TOOL_ID, p)); }
|
||||
common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); }
|
||||
common_peg_parser tool_args(const common_peg_parser & p) { return tag(TOOL_ARGS, p); }
|
||||
};
|
||||
|
||||
class common_chat_peg_native_mapper : public common_chat_peg_mapper {
|
||||
common_chat_tool_call * current_tool;
|
||||
|
||||
public:
|
||||
common_chat_peg_native_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
|
||||
|
||||
void map(const common_peg_ast_node & node) override;
|
||||
};
|
||||
|
||||
inline common_peg_arena build_chat_peg_native_parser(const std::function<common_peg_parser(common_chat_peg_native_builder & builder)> & fn) {
|
||||
common_chat_peg_native_builder builder;
|
||||
builder.set_root(fn(builder));
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
class common_chat_peg_constructed_builder : public common_chat_peg_builder {
|
||||
public:
|
||||
static constexpr const char * TOOL = "tool";
|
||||
static constexpr const char * TOOL_OPEN = "tool-open";
|
||||
static constexpr const char * TOOL_CLOSE = "tool-close";
|
||||
static constexpr const char * TOOL_NAME = "tool-name";
|
||||
static constexpr const char * TOOL_ARG = "tool-arg";
|
||||
static constexpr const char * TOOL_ARG_OPEN = "tool-arg-open";
|
||||
static constexpr const char * TOOL_ARG_CLOSE = "tool-arg-close";
|
||||
static constexpr const char * TOOL_ARG_NAME = "tool-arg-name";
|
||||
static constexpr const char * TOOL_ARG_STRING_VALUE = "tool-arg-string-value";
|
||||
static constexpr const char * TOOL_ARG_JSON_VALUE = "tool-arg-json-value";
|
||||
|
||||
common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); }
|
||||
common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); }
|
||||
common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); }
|
||||
common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); }
|
||||
common_peg_parser tool_arg(const common_peg_parser & p) { return tag(TOOL_ARG, p); }
|
||||
common_peg_parser tool_arg_open(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_OPEN, p)); }
|
||||
common_peg_parser tool_arg_close(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_CLOSE, p)); }
|
||||
common_peg_parser tool_arg_name(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_NAME, p)); }
|
||||
common_peg_parser tool_arg_value(const common_peg_parser & p) { return tag(TOOL_ARG_VALUE, p); }
|
||||
|
||||
// Use for schema-declared string types - won't be treated as potential JSON container
|
||||
common_peg_parser tool_arg_string_value(const common_peg_parser & p) { return tag(TOOL_ARG_STRING_VALUE, p); }
|
||||
common_peg_parser tool_arg_json_value(const common_peg_parser & p) { return tag(TOOL_ARG_JSON_VALUE, p); }
|
||||
common_peg_parser tool_arg_json_value(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_VALUE, p)); }
|
||||
|
||||
// Legacy-compatible helper for building standard JSON tool calls
|
||||
// Used by tests and manual parsers
|
||||
// name_key/args_key: JSON key names for function name and arguments
|
||||
// Empty or "name"/"arguments" will accept both common variations
|
||||
// Supports dot notation for nested objects (e.g., "function.name")
|
||||
// array_wrapped: if true, tool calls are wrapped in JSON array [...]
|
||||
// function_is_key: if true, function name is the JSON key (e.g., {"func_name": {...}})
|
||||
// call_id_key: JSON key for string call ID (e.g., "id")
|
||||
// gen_call_id_key: JSON key for generated integer call ID (e.g., "tool_call_id")
|
||||
// parameters_order: order in which JSON fields should be parsed
|
||||
common_peg_parser standard_json_tools(const std::string & section_start,
|
||||
const std::string & section_end,
|
||||
const nlohmann::json & tools,
|
||||
bool parallel_tool_calls,
|
||||
bool force_tool_calls,
|
||||
const std::string & name_key = "",
|
||||
const std::string & args_key = "",
|
||||
bool array_wrapped = false,
|
||||
bool function_is_key = false,
|
||||
const std::string & call_id_key = "",
|
||||
const std::string & gen_call_id_key = "",
|
||||
const std::vector<std::string> & parameters_order = {});
|
||||
|
||||
// Legacy-compatible helper for building XML/tagged style tool calls
|
||||
// Used by tests and manual parsers
|
||||
common_peg_parser standard_constructed_tools(const std::map<std::string, std::string> & markers,
|
||||
const nlohmann::json & tools,
|
||||
bool parallel_tool_calls,
|
||||
bool force_tool_calls);
|
||||
|
||||
private:
|
||||
// Implementation helpers for standard_json_tools — one per JSON tool call layout mode
|
||||
common_peg_parser build_json_tools_function_is_key(const nlohmann::json & tools,
|
||||
const std::string & args_key,
|
||||
const std::string & effective_args_key,
|
||||
const std::string & call_id_key,
|
||||
const std::string & gen_call_id_key);
|
||||
|
||||
common_peg_parser build_json_tools_nested_keys(const nlohmann::json & tools,
|
||||
const std::string & effective_name_key,
|
||||
const std::string & effective_args_key,
|
||||
const std::string & call_id_key,
|
||||
const std::string & gen_call_id_key);
|
||||
|
||||
common_peg_parser build_json_tools_flat_keys(const nlohmann::json & tools,
|
||||
const std::string & effective_name_key,
|
||||
const std::string & effective_args_key,
|
||||
const std::string & call_id_key,
|
||||
const std::string & gen_call_id_key,
|
||||
const std::vector<std::string> & parameters_order);
|
||||
};
|
||||
|
||||
class common_chat_peg_constructed_mapper : public common_chat_peg_mapper {
|
||||
common_chat_tool_call * current_tool;
|
||||
int arg_count = 0;
|
||||
bool needs_closing_quote = false;
|
||||
|
||||
public:
|
||||
common_chat_peg_constructed_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
|
||||
|
||||
void map(const common_peg_ast_node & node) override;
|
||||
};
|
||||
|
||||
inline common_peg_arena build_chat_peg_constructed_parser(const std::function<common_peg_parser(common_chat_peg_constructed_builder & builder)> & fn) {
|
||||
common_chat_peg_constructed_builder builder;
|
||||
inline common_peg_arena build_chat_peg_unified_parser(
|
||||
const std::function<common_peg_parser(common_chat_peg_unified_builder & builder)> & fn) {
|
||||
common_chat_peg_unified_builder builder;
|
||||
builder.set_root(fn(builder));
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
class tag_based_peg_mapper {
|
||||
public:
|
||||
std::map<std::string, std::string> tags;
|
||||
|
||||
void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result);
|
||||
};
|
||||
|
||||
struct tagged_parse_result {
|
||||
common_peg_parse_result result;
|
||||
std::map<std::string, std::string> tags;
|
||||
};
|
||||
|
||||
struct tagged_peg_parser {
|
||||
common_peg_arena arena;
|
||||
|
||||
tagged_parse_result parse_and_extract(const std::string & input, bool is_partial = false) const;
|
||||
};
|
||||
|
||||
tagged_peg_parser build_tagged_peg_parser(
|
||||
const std::function<common_peg_parser(common_peg_parser_builder & builder)> & fn);
|
||||
|
||||
class common_chat_peg_unified_mapper : public common_chat_peg_mapper {
|
||||
std::optional<common_chat_tool_call> pending_tool_call; // Tool call waiting for name
|
||||
common_chat_tool_call * current_tool = nullptr;
|
||||
int arg_count = 0;
|
||||
bool closing_quote_pending = false;
|
||||
std::string args_buffer; // Buffer to delay arguments until tool name is known
|
||||
|
||||
// Returns a reference to the active argument destination string.
|
||||
// Before tool_name is known, writes go to args_buffer; after, to current_tool->arguments.
|
||||
std::string & args_target();
|
||||
|
||||
public:
|
||||
common_chat_peg_unified_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
|
||||
|
||||
void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & parse_result_arg) override;
|
||||
void map(const common_peg_ast_node & node) override;
|
||||
};
|
||||
|
|
|
|||
2931
common/chat.cpp
2931
common/chat.cpp
File diff suppressed because it is too large
Load Diff
249
common/chat.h
249
common/chat.h
|
|
@ -3,17 +3,30 @@
|
|||
#pragma once
|
||||
|
||||
#include "common.h"
|
||||
#include "jinja/parser.h"
|
||||
#include "nlohmann/json_fwd.hpp"
|
||||
#include "peg-parser.h"
|
||||
#include <functional>
|
||||
#include "jinja/runtime.h"
|
||||
#include "jinja/caps.h"
|
||||
#include "nlohmann/json.hpp"
|
||||
|
||||
#include <chrono>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
using chat_template_caps = jinja::caps;
|
||||
using json = nlohmann::ordered_json;
|
||||
|
||||
#include <nlohmann/json_fwd.hpp>
|
||||
|
||||
struct common_chat_templates;
|
||||
|
||||
namespace autoparser {
|
||||
struct templates_params;
|
||||
} // namespace autoparser
|
||||
|
||||
struct common_chat_tool_call {
|
||||
std::string name;
|
||||
std::string arguments;
|
||||
|
|
@ -38,21 +51,85 @@ struct common_chat_msg_content_part {
|
|||
}
|
||||
};
|
||||
|
||||
struct common_chat_template {
|
||||
jinja::program prog;
|
||||
std::string bos_tok;
|
||||
std::string eos_tok;
|
||||
std::string src;
|
||||
chat_template_caps caps;
|
||||
|
||||
common_chat_template(const std::string & src, const std::string & bos_token, const std::string & eos_token) {
|
||||
jinja::lexer lexer;
|
||||
auto lexer_res = lexer.tokenize(src);
|
||||
this->prog = jinja::parse_from_tokens(lexer_res);
|
||||
|
||||
this->src = lexer_res.source;
|
||||
this->bos_tok = bos_token;
|
||||
this->eos_tok = eos_token;
|
||||
|
||||
this->caps = jinja::caps_get(prog);
|
||||
// LOG_INF("%s: caps:\n%s\n", __func__, this->caps.to_string().c_str());
|
||||
}
|
||||
|
||||
const std::string & source() const { return src; }
|
||||
const std::string & bos_token() const { return bos_tok; }
|
||||
const std::string & eos_token() const { return eos_tok; }
|
||||
|
||||
// TODO: this is ugly, refactor it somehow
|
||||
json add_system(const json & messages, const std::string & system_prompt) const {
|
||||
GGML_ASSERT(messages.is_array());
|
||||
auto msgs_copy = messages;
|
||||
if (!caps.supports_system_role) {
|
||||
if (msgs_copy.empty()) {
|
||||
msgs_copy.insert(msgs_copy.begin(), json{
|
||||
{"role", "user"},
|
||||
{"content", system_prompt}
|
||||
});
|
||||
} else {
|
||||
auto & first_msg = msgs_copy[0];
|
||||
if (!first_msg.contains("content")) {
|
||||
first_msg["content"] = "";
|
||||
}
|
||||
first_msg["content"] = system_prompt + "\n\n"
|
||||
+ first_msg["content"].get<std::string>();
|
||||
}
|
||||
} else {
|
||||
if (msgs_copy.empty() || msgs_copy[0].at("role") != "system") {
|
||||
msgs_copy.insert(msgs_copy.begin(), json{
|
||||
{"role", "system"},
|
||||
{"content", system_prompt}
|
||||
});
|
||||
} else if (msgs_copy[0].at("role") == "system") {
|
||||
msgs_copy[0]["content"] = system_prompt;
|
||||
}
|
||||
}
|
||||
return msgs_copy;
|
||||
}
|
||||
|
||||
chat_template_caps original_caps() const {
|
||||
return caps;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
struct common_chat_msg {
|
||||
std::string role;
|
||||
std::string content;
|
||||
std::string role;
|
||||
std::string content;
|
||||
std::vector<common_chat_msg_content_part> content_parts;
|
||||
std::vector<common_chat_tool_call> tool_calls;
|
||||
std::string reasoning_content;
|
||||
std::string tool_name;
|
||||
std::string tool_call_id;
|
||||
std::vector<common_chat_tool_call> tool_calls;
|
||||
std::string reasoning_content;
|
||||
std::string tool_name;
|
||||
std::string tool_call_id;
|
||||
|
||||
nlohmann::ordered_json to_json_oaicompat(bool concat_typed_text = false) const;
|
||||
|
||||
bool empty() const {
|
||||
return content.empty() && content_parts.empty() && tool_calls.empty() && reasoning_content.empty() && tool_name.empty() && tool_call_id.empty();
|
||||
return content.empty() && content_parts.empty() && tool_calls.empty() && reasoning_content.empty() &&
|
||||
tool_name.empty() && tool_call_id.empty();
|
||||
}
|
||||
void set_tool_call_ids(std::vector<std::string> & ids_cache, const std::function<std::string()> & gen_tool_call_id) {
|
||||
|
||||
void set_tool_call_ids(std::vector<std::string> & ids_cache,
|
||||
const std::function<std::string()> & gen_tool_call_id) {
|
||||
for (auto i = 0u; i < tool_calls.size(); i++) {
|
||||
if (ids_cache.size() <= i) {
|
||||
auto id = tool_calls[i].id;
|
||||
|
|
@ -64,32 +141,28 @@ struct common_chat_msg {
|
|||
tool_calls[i].id = ids_cache[i];
|
||||
}
|
||||
}
|
||||
|
||||
bool operator==(const common_chat_msg & other) const {
|
||||
return role == other.role
|
||||
&& content == other.content
|
||||
&& content_parts == other.content_parts
|
||||
&& tool_calls == other.tool_calls
|
||||
&& reasoning_content == other.reasoning_content
|
||||
&& tool_name == other.tool_name
|
||||
&& tool_call_id == other.tool_call_id;
|
||||
}
|
||||
bool operator!=(const common_chat_msg & other) const {
|
||||
return !(*this == other);
|
||||
return role == other.role && content == other.content && content_parts == other.content_parts &&
|
||||
tool_calls == other.tool_calls && reasoning_content == other.reasoning_content &&
|
||||
tool_name == other.tool_name && tool_call_id == other.tool_call_id;
|
||||
}
|
||||
|
||||
bool operator!=(const common_chat_msg & other) const { return !(*this == other); }
|
||||
};
|
||||
|
||||
struct common_chat_msg_diff {
|
||||
std::string reasoning_content_delta;
|
||||
std::string content_delta;
|
||||
size_t tool_call_index = std::string::npos;
|
||||
std::string reasoning_content_delta;
|
||||
std::string content_delta;
|
||||
size_t tool_call_index = std::string::npos;
|
||||
common_chat_tool_call tool_call_delta;
|
||||
|
||||
static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new);
|
||||
static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg & msg_prv,
|
||||
const common_chat_msg & msg_new);
|
||||
|
||||
bool operator==(const common_chat_msg_diff & other) const {
|
||||
return content_delta == other.content_delta
|
||||
&& tool_call_index == other.tool_call_index
|
||||
&& tool_call_delta == other.tool_call_delta;
|
||||
return content_delta == other.content_delta && tool_call_index == other.tool_call_index &&
|
||||
tool_call_delta == other.tool_call_delta;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -107,64 +180,37 @@ enum common_chat_tool_choice {
|
|||
|
||||
enum common_chat_format {
|
||||
COMMON_CHAT_FORMAT_CONTENT_ONLY,
|
||||
COMMON_CHAT_FORMAT_GENERIC,
|
||||
COMMON_CHAT_FORMAT_MISTRAL_NEMO,
|
||||
COMMON_CHAT_FORMAT_MAGISTRAL,
|
||||
COMMON_CHAT_FORMAT_LLAMA_3_X,
|
||||
COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
|
||||
COMMON_CHAT_FORMAT_DEEPSEEK_R1,
|
||||
COMMON_CHAT_FORMAT_FIREFUNCTION_V2,
|
||||
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2,
|
||||
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
|
||||
COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
|
||||
COMMON_CHAT_FORMAT_HERMES_2_PRO,
|
||||
COMMON_CHAT_FORMAT_COMMAND_R7B,
|
||||
COMMON_CHAT_FORMAT_GRANITE,
|
||||
COMMON_CHAT_FORMAT_GPT_OSS,
|
||||
COMMON_CHAT_FORMAT_SEED_OSS,
|
||||
COMMON_CHAT_FORMAT_NEMOTRON_V2,
|
||||
COMMON_CHAT_FORMAT_APERTUS,
|
||||
COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS,
|
||||
COMMON_CHAT_FORMAT_GLM_4_5,
|
||||
COMMON_CHAT_FORMAT_MINIMAX_M2,
|
||||
COMMON_CHAT_FORMAT_KIMI_K2,
|
||||
COMMON_CHAT_FORMAT_QWEN3_CODER_XML,
|
||||
COMMON_CHAT_FORMAT_APRIEL_1_5,
|
||||
COMMON_CHAT_FORMAT_XIAOMI_MIMO,
|
||||
COMMON_CHAT_FORMAT_SOLAR_OPEN,
|
||||
COMMON_CHAT_FORMAT_EXAONE_MOE,
|
||||
|
||||
// These are intended to be parsed by the PEG parser
|
||||
COMMON_CHAT_FORMAT_PEG_SIMPLE,
|
||||
COMMON_CHAT_FORMAT_PEG_NATIVE,
|
||||
COMMON_CHAT_FORMAT_PEG_CONSTRUCTED,
|
||||
|
||||
COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
|
||||
COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
|
||||
};
|
||||
|
||||
struct common_chat_templates_inputs {
|
||||
std::vector<common_chat_msg> messages;
|
||||
std::string grammar;
|
||||
std::string json_schema;
|
||||
bool add_generation_prompt = true;
|
||||
bool use_jinja = true;
|
||||
std::vector<common_chat_msg> messages;
|
||||
std::string grammar;
|
||||
std::string json_schema;
|
||||
bool add_generation_prompt = true;
|
||||
bool use_jinja = true;
|
||||
// Parameters below only supported when use_jinja is true
|
||||
std::vector<common_chat_tool> tools;
|
||||
common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
|
||||
bool parallel_tool_calls = false;
|
||||
std::vector<common_chat_tool> tools;
|
||||
common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
|
||||
bool parallel_tool_calls = false;
|
||||
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool enable_thinking"
|
||||
bool enable_thinking = true;
|
||||
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
|
||||
std::map<std::string, std::string> chat_template_kwargs;
|
||||
bool add_bos = false;
|
||||
bool add_eos = false;
|
||||
bool enable_thinking = true;
|
||||
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
|
||||
std::map<std::string, std::string> chat_template_kwargs;
|
||||
bool add_bos = false;
|
||||
bool add_eos = false;
|
||||
};
|
||||
|
||||
struct common_chat_params {
|
||||
common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
||||
std::string prompt;
|
||||
std::string grammar;
|
||||
bool grammar_lazy = false;
|
||||
bool grammar_lazy = false;
|
||||
bool thinking_forced_open = false;
|
||||
std::vector<common_grammar_trigger> grammar_triggers;
|
||||
std::vector<std::string> preserved_tokens;
|
||||
|
|
@ -175,13 +221,14 @@ struct common_chat_params {
|
|||
// per-message parsing syntax
|
||||
// should be derived from common_chat_params
|
||||
struct common_chat_parser_params {
|
||||
common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
||||
common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
||||
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool parse_reasoning"
|
||||
// Whether reasoning_content should be inlined in the content (e.g. for reasoning_format=deepseek in stream mode)
|
||||
bool reasoning_in_content = false;
|
||||
bool thinking_forced_open = false;
|
||||
bool parse_tool_calls = true;
|
||||
common_peg_arena parser = {};
|
||||
bool reasoning_in_content = false;
|
||||
bool thinking_forced_open = false;
|
||||
bool parse_tool_calls = true;
|
||||
bool debug = false; // Enable debug output for PEG parser
|
||||
common_peg_arena parser = {};
|
||||
common_chat_parser_params() = default;
|
||||
common_chat_parser_params(const common_chat_params & chat_params) {
|
||||
format = chat_params.format;
|
||||
|
|
@ -194,45 +241,42 @@ bool common_chat_verify_template(const std::string & tmpl, bool use_jinja);
|
|||
|
||||
void common_chat_templates_free(struct common_chat_templates * tmpls);
|
||||
|
||||
struct common_chat_templates_deleter { void operator()(common_chat_templates * tmpls) { common_chat_templates_free(tmpls); } };
|
||||
struct common_chat_templates_deleter {
|
||||
void operator()(common_chat_templates * tmpls) { common_chat_templates_free(tmpls); }
|
||||
};
|
||||
|
||||
typedef std::unique_ptr<struct common_chat_templates, common_chat_templates_deleter> common_chat_templates_ptr;
|
||||
|
||||
common_chat_templates_ptr common_chat_templates_init(
|
||||
const struct llama_model * model,
|
||||
const std::string & chat_template_override,
|
||||
const std::string & bos_token_override = "",
|
||||
const std::string & eos_token_override = "");
|
||||
common_chat_templates_ptr common_chat_templates_init(const struct llama_model * model,
|
||||
const std::string & chat_template_override,
|
||||
const std::string & bos_token_override = "",
|
||||
const std::string & eos_token_override = "");
|
||||
|
||||
bool common_chat_templates_was_explicit(const struct common_chat_templates * tmpls);
|
||||
std::string common_chat_templates_source(const struct common_chat_templates * tmpls, const std::string & variant = "");
|
||||
|
||||
|
||||
struct common_chat_params common_chat_templates_apply(
|
||||
const struct common_chat_templates * tmpls,
|
||||
const struct common_chat_templates_inputs & inputs);
|
||||
struct common_chat_params common_chat_templates_apply(const struct common_chat_templates * tmpls,
|
||||
const struct common_chat_templates_inputs & inputs);
|
||||
|
||||
// Format single message, while taking into account the position of that message in chat history
|
||||
std::string common_chat_format_single(
|
||||
const struct common_chat_templates * tmpls,
|
||||
const std::vector<common_chat_msg> & past_msg,
|
||||
const common_chat_msg & new_msg,
|
||||
bool add_ass,
|
||||
bool use_jinja);
|
||||
std::string common_chat_format_single(const struct common_chat_templates * tmpls,
|
||||
const std::vector<common_chat_msg> & past_msg,
|
||||
const common_chat_msg & new_msg,
|
||||
bool add_ass,
|
||||
bool use_jinja);
|
||||
|
||||
// Returns an example of formatted chat
|
||||
std::string common_chat_format_example(
|
||||
const struct common_chat_templates * tmpls,
|
||||
bool use_jinja,
|
||||
const std::map<std::string, std::string> & chat_template_kwargs);
|
||||
std::string common_chat_format_example(const struct common_chat_templates * tmpls,
|
||||
bool use_jinja,
|
||||
const std::map<std::string, std::string> & chat_template_kwargs);
|
||||
|
||||
const char* common_chat_format_name(common_chat_format format);
|
||||
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
|
||||
common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
|
||||
const char * common_chat_format_name(common_chat_format format);
|
||||
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & params);
|
||||
common_chat_msg common_chat_peg_parse(const common_peg_arena & src_parser, const std::string & input, bool is_partial, const common_chat_parser_params & params);
|
||||
|
||||
// used by arg and server
|
||||
const char * common_reasoning_format_name(common_reasoning_format format);
|
||||
common_reasoning_format common_reasoning_format_from_name(const std::string & format);
|
||||
const char * common_reasoning_format_name(common_reasoning_format format);
|
||||
common_reasoning_format common_reasoning_format_from_name(const std::string & format);
|
||||
|
||||
common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
|
||||
|
||||
|
|
@ -251,3 +295,10 @@ nlohmann::ordered_json common_chat_msg_diff_to_json_oaicompat(const common_chat_
|
|||
|
||||
// get template caps, useful for reporting to server /props endpoint
|
||||
std::map<std::string, bool> common_chat_templates_get_caps(const common_chat_templates * chat_templates);
|
||||
|
||||
std::string common_chat_template_direct_apply(
|
||||
const common_chat_template & tmpl,
|
||||
const autoparser::templates_params & inputs,
|
||||
const std::optional<json> & messages_override = std::nullopt,
|
||||
const std::optional<json> & tools_override = std::nullopt,
|
||||
const std::optional<json> & additional_context = std::nullopt);
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
#include "log.h"
|
||||
#include "value.h"
|
||||
#include "runtime.h"
|
||||
#include "caps.h"
|
||||
|
|
@ -16,7 +17,7 @@ using json = nlohmann::ordered_json;
|
|||
namespace jinja {
|
||||
|
||||
using caps_json_fn = std::function<json()>;
|
||||
using caps_analyze_fn = std::function<void(bool, value &, value &)>;
|
||||
using caps_analyze_fn = std::function<void(bool, value &, value &, const std::string &)>;
|
||||
|
||||
static void caps_try_execute(jinja::program & prog,
|
||||
const caps_json_fn & messages_fn,
|
||||
|
|
@ -36,16 +37,20 @@ static void caps_try_execute(jinja::program & prog,
|
|||
auto tools = ctx.get_val("tools");
|
||||
|
||||
bool success = false;
|
||||
std::string result;
|
||||
try {
|
||||
jinja::runtime runtime(ctx);
|
||||
runtime.execute(prog);
|
||||
auto results = runtime.execute(prog);
|
||||
auto parts = jinja::runtime::gather_string_parts(results);
|
||||
result = parts->as_string().str();
|
||||
success = true;
|
||||
} catch (const std::exception & e) {
|
||||
JJ_DEBUG("Exception during execution: %s", e.what());
|
||||
result = "";
|
||||
// ignore exceptions during capability analysis
|
||||
}
|
||||
|
||||
analyze_fn(success, messages, tools);
|
||||
analyze_fn(success, messages, tools, result);
|
||||
}
|
||||
|
||||
// for debugging only
|
||||
|
|
@ -90,6 +95,8 @@ caps caps_get(jinja::program & prog) {
|
|||
return v->stats.ops.find(op_name) != v->stats.ops.end();
|
||||
};
|
||||
|
||||
JJ_DEBUG("%s\n", ">>> Running capability check: typed content");
|
||||
|
||||
// case: typed content support
|
||||
caps_try_execute(
|
||||
prog,
|
||||
|
|
@ -106,7 +113,7 @@ caps caps_get(jinja::program & prog) {
|
|||
// tools
|
||||
return json{nullptr};
|
||||
},
|
||||
[&](bool success, value & messages, value &) {
|
||||
[&](bool success, value & messages, value &, const std::string &) {
|
||||
auto & content = messages->at(0)->at("content");
|
||||
caps_print_stats(content, "messages[0].content");
|
||||
if (has_op(content, "selectattr") || has_op(content, "array_access")) {
|
||||
|
|
@ -120,6 +127,7 @@ caps caps_get(jinja::program & prog) {
|
|||
}
|
||||
);
|
||||
|
||||
JJ_DEBUG("%s\n", ">>> Running capability check: system prompt");
|
||||
|
||||
// case: system prompt support
|
||||
caps_try_execute(
|
||||
|
|
@ -141,7 +149,7 @@ caps caps_get(jinja::program & prog) {
|
|||
// tools
|
||||
return json::array();
|
||||
},
|
||||
[&](bool, value & messages, value &) {
|
||||
[&](bool, value & messages, value &, const std::string &) {
|
||||
auto & content = messages->at(0)->at("content");
|
||||
caps_print_stats(content, "messages[0].content");
|
||||
if (!content->stats.used) {
|
||||
|
|
@ -150,6 +158,8 @@ caps caps_get(jinja::program & prog) {
|
|||
}
|
||||
);
|
||||
|
||||
JJ_DEBUG("%s\n", ">>> Running capability check: tool support");
|
||||
|
||||
// case: tools support
|
||||
caps_try_execute(
|
||||
prog,
|
||||
|
|
@ -162,10 +172,10 @@ caps caps_get(jinja::program & prog) {
|
|||
},
|
||||
{
|
||||
{"role", "assistant"},
|
||||
{"content", "Assistant message"},
|
||||
{"content", ""}, // Some templates expect content to be empty with tool calls
|
||||
{"tool_calls", json::array({
|
||||
{
|
||||
{"id", "call1"},
|
||||
{"id", "call00001"},
|
||||
{"type", "function"},
|
||||
{"function", {
|
||||
{"name", "tool1"},
|
||||
|
|
@ -175,10 +185,10 @@ caps caps_get(jinja::program & prog) {
|
|||
}}
|
||||
},
|
||||
{
|
||||
{"id", "call2"},
|
||||
{"id", "call00002"},
|
||||
{"type", "function"},
|
||||
{"function", {
|
||||
{"name", "tool2"},
|
||||
{"name", "tool1"},
|
||||
{"arguments", {
|
||||
{"arg", "value"}
|
||||
}}
|
||||
|
|
@ -186,6 +196,15 @@ caps caps_get(jinja::program & prog) {
|
|||
}
|
||||
})}
|
||||
},
|
||||
{
|
||||
{"role", "tool"},
|
||||
{"content", "Tool response"},
|
||||
{"tool_call_id", "call00001"}
|
||||
},
|
||||
{
|
||||
{"role", "assistant"},
|
||||
{"content", "The tool response was 'tool response'"}
|
||||
},
|
||||
{
|
||||
{"role", "user"},
|
||||
{"content", "User message"},
|
||||
|
|
@ -199,7 +218,7 @@ caps caps_get(jinja::program & prog) {
|
|||
{"name", "tool"},
|
||||
{"type", "function"},
|
||||
{"function", {
|
||||
{"name", "tool"},
|
||||
{"name", "tool1"},
|
||||
{"description", "Tool description"},
|
||||
{"parameters", {
|
||||
{"type", "object"},
|
||||
|
|
@ -215,7 +234,7 @@ caps caps_get(jinja::program & prog) {
|
|||
},
|
||||
});
|
||||
},
|
||||
[&](bool success, value & messages, value & tools) {
|
||||
[&](bool success, value & messages, value & tools, const std::string & res) {
|
||||
if (!success) {
|
||||
result.supports_tool_calls = false;
|
||||
result.supports_tools = false;
|
||||
|
|
@ -224,8 +243,11 @@ caps caps_get(jinja::program & prog) {
|
|||
|
||||
auto & tool_name = tools->at(0)->at("function")->at("name");
|
||||
caps_print_stats(tool_name, "tools[0].function.name");
|
||||
caps_print_stats(tools, "tools");
|
||||
if (!tool_name->stats.used) {
|
||||
result.supports_tools = false;
|
||||
if (!tools->stats.used && res.find(tool_name->as_string().str()) == std::string::npos) {
|
||||
result.supports_tools = false;
|
||||
}
|
||||
}
|
||||
|
||||
auto & tool_calls = messages->at(1)->at("tool_calls");;
|
||||
|
|
@ -243,6 +265,8 @@ caps caps_get(jinja::program & prog) {
|
|||
}
|
||||
);
|
||||
|
||||
JJ_DEBUG("%s\n", ">>> Running capability check: preserve reasoning");
|
||||
|
||||
// case: preserve reasoning content in chat history
|
||||
caps_try_execute(
|
||||
prog,
|
||||
|
|
@ -268,7 +292,7 @@ caps caps_get(jinja::program & prog) {
|
|||
// tools
|
||||
return json::array();
|
||||
},
|
||||
[&](bool, value & messages, value &) {
|
||||
[&](bool, value & messages, value &, const std::string &) {
|
||||
auto & content = messages->at(1)->at("reasoning_content");
|
||||
caps_print_stats(content, "messages[1].reasoning_content");
|
||||
if (content->stats.used) {
|
||||
|
|
|
|||
|
|
@ -114,8 +114,10 @@ value binary_expression::execute_impl(context & ctx) {
|
|||
|
||||
// Logical operators
|
||||
if (op.value == "and") {
|
||||
JJ_DEBUG("Executing logical test: %s AND %s", left->type().c_str(), right->type().c_str());
|
||||
return left_val->as_bool() ? right->execute(ctx) : std::move(left_val);
|
||||
} else if (op.value == "or") {
|
||||
JJ_DEBUG("Executing logical test: %s OR %s", left->type().c_str(), right->type().c_str());
|
||||
return left_val->as_bool() ? std::move(left_val) : right->execute(ctx);
|
||||
}
|
||||
|
||||
|
|
@ -835,7 +837,7 @@ value call_expression::execute_impl(context & ctx) {
|
|||
for (auto & arg_stmt : this->args) {
|
||||
auto arg_val = arg_stmt->execute(ctx);
|
||||
JJ_DEBUG(" Argument type: %s", arg_val->type().c_str());
|
||||
args.push_back(std::move(arg_val));
|
||||
args.push_back(arg_val);
|
||||
}
|
||||
// execute callee
|
||||
value callee_val = callee->execute(ctx);
|
||||
|
|
|
|||
|
|
@ -715,8 +715,26 @@ const func_builtins & value_string_t::get_builtins() const {
|
|||
return args.get_pos(0);
|
||||
}},
|
||||
{"tojson", tojson},
|
||||
{"indent", [](const func_args &) -> value {
|
||||
throw not_implemented_exception("String indent builtin not implemented");
|
||||
{"indent", [](const func_args &args) -> value {
|
||||
// no support for "first" as that would require us to somehow access generation context
|
||||
args.ensure_count(2, 4);
|
||||
args.ensure_vals<value_string, value_int, value_bool, value_bool>(true, true, false, false);
|
||||
|
||||
auto input = args.get_pos(0);
|
||||
auto arg0 = args.get_pos(1);
|
||||
|
||||
int count = arg0->as_int();
|
||||
if (count <= 0) {
|
||||
throw raised_exception("indent must be a positive number");
|
||||
}
|
||||
std::string indented;
|
||||
for (int i = 0; i < count; i++) {
|
||||
indented.append(" ");
|
||||
}
|
||||
indented.append(input->as_string().str());
|
||||
auto res = mk_val<value_string>(indented);
|
||||
res->val_str.mark_input_based_on(input->as_string());
|
||||
return res;
|
||||
}},
|
||||
{"join", [](const func_args &) -> value {
|
||||
throw not_implemented_exception("String join builtin not implemented");
|
||||
|
|
|
|||
|
|
@ -12,8 +12,8 @@
|
|||
#include <set>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace jinja {
|
||||
|
||||
|
|
|
|||
|
|
@ -27,11 +27,11 @@ static std::string build_repetition(const std::string & item_rule, int min_items
|
|||
if (separator_rule.empty()) {
|
||||
if (min_items == 1 && !has_max) {
|
||||
return item_rule + "+";
|
||||
} else if (min_items == 0 && !has_max) {
|
||||
return item_rule + "*";
|
||||
} else {
|
||||
return item_rule + "{" + std::to_string(min_items) + "," + (has_max ? std::to_string(max_items) : "") + "}";
|
||||
}
|
||||
if (min_items == 0 && !has_max) {
|
||||
return item_rule + "*";
|
||||
}
|
||||
return item_rule + "{" + std::to_string(min_items) + "," + (has_max ? std::to_string(max_items) : "") + "}";
|
||||
}
|
||||
|
||||
auto result = item_rule + " " + build_repetition("(" + separator_rule + " " + item_rule + ")", min_items == 0 ? 0 : min_items - 1, has_max ? max_items - 1 : max_items);
|
||||
|
|
@ -41,7 +41,7 @@ static std::string build_repetition(const std::string & item_rule, int min_items
|
|||
return result;
|
||||
}
|
||||
|
||||
static void _build_min_max_int(int64_t min_value, int64_t max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) {
|
||||
static void build_min_max_int(int64_t min_value, int64_t max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) {
|
||||
auto has_min = min_value != std::numeric_limits<int64_t>::min();
|
||||
auto has_max = max_value != std::numeric_limits<int64_t>::max();
|
||||
|
||||
|
|
@ -128,14 +128,14 @@ static void _build_min_max_int(int64_t min_value, int64_t max_value, std::string
|
|||
if (has_min && has_max) {
|
||||
if (min_value < 0 && max_value < 0) {
|
||||
out << "\"-\" (";
|
||||
_build_min_max_int(-max_value, -min_value, out, decimals_left, /* top_level= */ true);
|
||||
build_min_max_int(-max_value, -min_value, out, decimals_left, /* top_level= */ true);
|
||||
out << ")";
|
||||
return;
|
||||
}
|
||||
|
||||
if (min_value < 0) {
|
||||
out << "\"-\" (";
|
||||
_build_min_max_int(0, -min_value, out, decimals_left, /* top_level= */ true);
|
||||
build_min_max_int(0, -min_value, out, decimals_left, /* top_level= */ true);
|
||||
out << ") | ";
|
||||
min_value = 0;
|
||||
}
|
||||
|
|
@ -159,7 +159,7 @@ static void _build_min_max_int(int64_t min_value, int64_t max_value, std::string
|
|||
if (has_min) {
|
||||
if (min_value < 0) {
|
||||
out << "\"-\" (";
|
||||
_build_min_max_int(std::numeric_limits<int64_t>::min(), -min_value, out, decimals_left, /* top_level= */ false);
|
||||
build_min_max_int(std::numeric_limits<int64_t>::min(), -min_value, out, decimals_left, /* top_level= */ false);
|
||||
out << ") | [0] | [1-9] ";
|
||||
more_digits(0, decimals_left - 1);
|
||||
} else if (min_value == 0) {
|
||||
|
|
@ -194,7 +194,7 @@ static void _build_min_max_int(int64_t min_value, int64_t max_value, std::string
|
|||
}
|
||||
digit_range(c, c);
|
||||
out << " (";
|
||||
_build_min_max_int(std::stoll(min_s.substr(1)), std::numeric_limits<int64_t>::max(), out, less_decimals, /* top_level= */ false);
|
||||
build_min_max_int(std::stoll(min_s.substr(1)), std::numeric_limits<int64_t>::max(), out, less_decimals, /* top_level= */ false);
|
||||
out << ")";
|
||||
if (c < '9') {
|
||||
out << " | ";
|
||||
|
|
@ -213,10 +213,10 @@ static void _build_min_max_int(int64_t min_value, int64_t max_value, std::string
|
|||
more_digits(0, less_decimals);
|
||||
out << " | ";
|
||||
}
|
||||
_build_min_max_int(0, max_value, out, decimals_left, /* top_level= */ true);
|
||||
build_min_max_int(0, max_value, out, decimals_left, /* top_level= */ true);
|
||||
} else {
|
||||
out << "\"-\" (";
|
||||
_build_min_max_int(-max_value, std::numeric_limits<int64_t>::max(), out, decimals_left, /* top_level= */ false);
|
||||
build_min_max_int(-max_value, std::numeric_limits<int64_t>::max(), out, decimals_left, /* top_level= */ false);
|
||||
out << ")";
|
||||
}
|
||||
return;
|
||||
|
|
@ -232,7 +232,7 @@ struct BuiltinRule {
|
|||
std::vector<std::string> deps;
|
||||
};
|
||||
|
||||
std::unordered_map<std::string, BuiltinRule> PRIMITIVE_RULES = {
|
||||
static std::unordered_map<std::string, BuiltinRule> PRIMITIVE_RULES = {
|
||||
{"boolean", {"(\"true\" | \"false\") space", {}}},
|
||||
{"decimal-part", {"[0-9]{1,16}", {}}},
|
||||
{"integral-part", {"[0] | [1-9] [0-9]{0,15}", {}}},
|
||||
|
|
@ -247,7 +247,7 @@ std::unordered_map<std::string, BuiltinRule> PRIMITIVE_RULES = {
|
|||
{"null", {"\"null\" space", {}}},
|
||||
};
|
||||
|
||||
std::unordered_map<std::string, BuiltinRule> STRING_FORMAT_RULES = {
|
||||
static std::unordered_map<std::string, BuiltinRule> STRING_FORMAT_RULES = {
|
||||
{"date", {"[0-9]{4} \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( \"0\" [1-9] | [1-2] [0-9] | \"3\" [0-1] )", {}}},
|
||||
{"time", {"([01] [0-9] | \"2\" [0-3]) \":\" [0-5] [0-9] \":\" [0-5] [0-9] ( \".\" [0-9]{3} )? ( \"Z\" | ( \"+\" | \"-\" ) ( [01] [0-9] | \"2\" [0-3] ) \":\" [0-5] [0-9] )", {}}},
|
||||
{"date-time", {"date \"T\" time", {"date", "time"}}},
|
||||
|
|
@ -260,22 +260,26 @@ static bool is_reserved_name(const std::string & name) {
|
|||
static const std::unordered_set<std::string> RESERVED_NAMES = [] {
|
||||
std::unordered_set<std::string> s;
|
||||
s.insert("root");
|
||||
for (const auto & p : PRIMITIVE_RULES) s.insert(p.first);
|
||||
for (const auto & p : STRING_FORMAT_RULES) s.insert(p.first);
|
||||
for (const auto & p : PRIMITIVE_RULES) {
|
||||
s.insert(p.first);
|
||||
}
|
||||
for (const auto & p : STRING_FORMAT_RULES) {
|
||||
s.insert(p.first);
|
||||
}
|
||||
return s;
|
||||
}();
|
||||
return RESERVED_NAMES.find(name) != RESERVED_NAMES.end();
|
||||
}
|
||||
|
||||
std::regex INVALID_RULE_CHARS_RE("[^a-zA-Z0-9-]+");
|
||||
std::regex GRAMMAR_LITERAL_ESCAPE_RE("[\r\n\"\\\\]");
|
||||
std::regex GRAMMAR_RANGE_LITERAL_ESCAPE_RE("[\r\n\"\\]\\-\\\\]");
|
||||
std::unordered_map<char, std::string> GRAMMAR_LITERAL_ESCAPES = {
|
||||
static std::regex INVALID_RULE_CHARS_RE("[^a-zA-Z0-9-]+");
|
||||
static std::regex GRAMMAR_LITERAL_ESCAPE_RE("[\r\n\"\\\\]");
|
||||
static std::regex GRAMMAR_RANGE_LITERAL_ESCAPE_RE("[\r\n\"\\]\\-\\\\]");
|
||||
static std::unordered_map<char, std::string> GRAMMAR_LITERAL_ESCAPES = {
|
||||
{'\r', "\\r"}, {'\n', "\\n"}, {'"', "\\\""}, {'-', "\\-"}, {']', "\\]"}, {'\\', "\\\\"}
|
||||
};
|
||||
|
||||
std::unordered_set<char> NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'};
|
||||
std::unordered_set<char> ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'^', '$', '.', '[', ']', '(', ')', '|', '{', '}', '*', '+', '?'};
|
||||
static std::unordered_set<char> NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'};
|
||||
static std::unordered_set<char> ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'^', '$', '.', '[', ']', '(', ')', '|', '{', '}', '*', '+', '?'};
|
||||
|
||||
static std::string replacePattern(const std::string & input, const std::regex & regex, const std::function<std::string(const std::smatch &)> & replacement) {
|
||||
std::smatch match;
|
||||
|
|
@ -322,19 +326,19 @@ private:
|
|||
if (_rules.find(esc_name) == _rules.end() || _rules[esc_name] == rule) {
|
||||
_rules[esc_name] = rule;
|
||||
return esc_name;
|
||||
} else {
|
||||
int i = 0;
|
||||
while (_rules.find(esc_name + std::to_string(i)) != _rules.end() && _rules[esc_name + std::to_string(i)] != rule) {
|
||||
i++;
|
||||
}
|
||||
std::string key = esc_name + std::to_string(i);
|
||||
_rules[key] = rule;
|
||||
return key;
|
||||
}
|
||||
int i = 0;
|
||||
while (_rules.find(esc_name + std::to_string(i)) != _rules.end() && _rules[esc_name + std::to_string(i)] != rule) {
|
||||
i++;
|
||||
}
|
||||
std::string key = esc_name + std::to_string(i);
|
||||
_rules[key] = rule;
|
||||
return key;
|
||||
}
|
||||
|
||||
std::string _generate_union_rule(const std::string & name, const std::vector<json> & alt_schemas) {
|
||||
std::vector<std::string> rules;
|
||||
rules.reserve(alt_schemas.size());
|
||||
for (size_t i = 0; i < alt_schemas.size(); i++) {
|
||||
rules.push_back(visit(alt_schemas[i], name + (name.empty() ? "alternative-" : "-") + std::to_string(i)));
|
||||
}
|
||||
|
|
@ -398,6 +402,7 @@ private:
|
|||
flush_literal();
|
||||
|
||||
std::vector<std::string> results;
|
||||
results.reserve(ret.size());
|
||||
for (const auto & item : ret) {
|
||||
results.push_back(to_rule(item));
|
||||
}
|
||||
|
|
@ -551,7 +556,7 @@ private:
|
|||
TrieNode() : is_end_of_string(false) {}
|
||||
|
||||
void insert(const std::string & string) {
|
||||
auto node = this;
|
||||
auto *node = this;
|
||||
for (char c : string) {
|
||||
node = &node->children[c];
|
||||
}
|
||||
|
|
@ -676,7 +681,7 @@ private:
|
|||
if (ks.empty()) {
|
||||
return res;
|
||||
}
|
||||
std::string k = ks[0];
|
||||
const std::string& k = ks[0];
|
||||
std::string kv_rule_name = prop_kv_rule_names[k];
|
||||
std::string comma_ref = "( \",\" space " + kv_rule_name + " )";
|
||||
if (first_is_optional) {
|
||||
|
|
@ -779,7 +784,7 @@ public:
|
|||
std::string pointer = ref.substr(ref.find('#') + 1);
|
||||
std::vector<std::string> tokens = string_split(pointer, "/");
|
||||
for (size_t i = 1; i < tokens.size(); ++i) {
|
||||
std::string sel = tokens[i];
|
||||
const std::string& sel = tokens[i];
|
||||
if (target.is_object() && target.contains(sel)) {
|
||||
target = target[sel];
|
||||
} else if (target.is_array()) {
|
||||
|
|
@ -802,7 +807,7 @@ public:
|
|||
_refs[ref] = target;
|
||||
}
|
||||
} else {
|
||||
for (auto & kv : n.items()) {
|
||||
for (const auto & kv : n.items()) {
|
||||
visit_refs(kv.value());
|
||||
}
|
||||
}
|
||||
|
|
@ -812,7 +817,7 @@ public:
|
|||
visit_refs(schema);
|
||||
}
|
||||
|
||||
std::string _generate_constant_rule(const json & value) {
|
||||
static std::string _generate_constant_rule(const json & value) {
|
||||
return format_literal(value.dump());
|
||||
}
|
||||
|
||||
|
|
@ -823,10 +828,12 @@ public:
|
|||
|
||||
if (schema.contains("$ref")) {
|
||||
return _add_rule(rule_name, _resolve_ref(schema["$ref"]));
|
||||
} else if (schema.contains("oneOf") || schema.contains("anyOf")) {
|
||||
}
|
||||
if (schema.contains("oneOf") || schema.contains("anyOf")) {
|
||||
std::vector<json> alt_schemas = schema.contains("oneOf") ? schema["oneOf"].get<std::vector<json>>() : schema["anyOf"].get<std::vector<json>>();
|
||||
return _add_rule(rule_name, _generate_union_rule(name, alt_schemas));
|
||||
} else if (schema_type.is_array()) {
|
||||
}
|
||||
if (schema_type.is_array()) {
|
||||
std::vector<json> schema_types;
|
||||
for (const auto & t : schema_type) {
|
||||
json schema_copy(schema);
|
||||
|
|
@ -834,15 +841,18 @@ public:
|
|||
schema_types.push_back(schema_copy);
|
||||
}
|
||||
return _add_rule(rule_name, _generate_union_rule(name, schema_types));
|
||||
} else if (schema.contains("const")) {
|
||||
}
|
||||
if (schema.contains("const")) {
|
||||
return _add_rule(rule_name, _generate_constant_rule(schema["const"]) + " space");
|
||||
} else if (schema.contains("enum")) {
|
||||
}
|
||||
if (schema.contains("enum")) {
|
||||
std::vector<std::string> enum_values;
|
||||
for (const auto & v : schema["enum"]) {
|
||||
enum_values.push_back(_generate_constant_rule(v));
|
||||
}
|
||||
return _add_rule(rule_name, "(" + string_join(enum_values, " | ") + ") space");
|
||||
} else if ((schema_type.is_null() || schema_type == "object")
|
||||
}
|
||||
if ((schema_type.is_null() || schema_type == "object")
|
||||
&& (schema.contains("properties") ||
|
||||
(schema.contains("additionalProperties") && schema["additionalProperties"] != true))) {
|
||||
std::unordered_set<std::string> required;
|
||||
|
|
@ -863,11 +873,12 @@ public:
|
|||
_build_object_rule(
|
||||
properties, required, name,
|
||||
schema.contains("additionalProperties") ? schema["additionalProperties"] : json()));
|
||||
} else if ((schema_type.is_null() || schema_type == "object" || schema_type == "string") && schema.contains("allOf")) {
|
||||
}
|
||||
if ((schema_type.is_null() || schema_type == "object" || schema_type == "string") && schema.contains("allOf")) {
|
||||
std::unordered_set<std::string> required;
|
||||
std::vector<std::pair<std::string, json>> properties;
|
||||
std::map<std::string, size_t> enum_values;
|
||||
std::string hybrid_name = name;
|
||||
const std::string& hybrid_name = name;
|
||||
std::function<void(const json &, bool)> add_component = [&](const json & comp_schema, bool is_required) {
|
||||
if (comp_schema.contains("$ref")) {
|
||||
add_component(_refs[comp_schema["$ref"]], is_required);
|
||||
|
|
@ -890,9 +901,9 @@ public:
|
|||
// todo warning
|
||||
}
|
||||
};
|
||||
for (auto & t : schema["allOf"]) {
|
||||
for (const auto & t : schema["allOf"]) {
|
||||
if (t.contains("anyOf")) {
|
||||
for (auto & tt : t["anyOf"]) {
|
||||
for (const auto & tt : t["anyOf"]) {
|
||||
add_component(tt, false);
|
||||
}
|
||||
} else {
|
||||
|
|
@ -911,7 +922,8 @@ public:
|
|||
}
|
||||
}
|
||||
return _add_rule(rule_name, _build_object_rule(properties, required, hybrid_name, json()));
|
||||
} else if ((schema_type.is_null() || schema_type == "array") && (schema.contains("items") || schema.contains("prefixItems"))) {
|
||||
}
|
||||
if ((schema_type.is_null() || schema_type == "array") && (schema.contains("items") || schema.contains("prefixItems"))) {
|
||||
json items = schema.contains("items") ? schema["items"] : schema["prefixItems"];
|
||||
if (items.is_array()) {
|
||||
std::string rule = "\"[\" space ";
|
||||
|
|
@ -923,27 +935,31 @@ public:
|
|||
}
|
||||
rule += " \"]\" space";
|
||||
return _add_rule(rule_name, rule);
|
||||
} else {
|
||||
std::string item_rule_name = visit(items, name + (name.empty() ? "" : "-") + "item");
|
||||
int min_items = schema.contains("minItems") ? schema["minItems"].get<int>() : 0;
|
||||
json max_items_json = schema.contains("maxItems") ? schema["maxItems"] : json();
|
||||
int max_items = max_items_json.is_number_integer() ? max_items_json.get<int>() : std::numeric_limits<int>::max();
|
||||
|
||||
return _add_rule(rule_name, "\"[\" space " + build_repetition(item_rule_name, min_items, max_items, "\",\" space") + " \"]\" space");
|
||||
}
|
||||
} else if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) {
|
||||
std::string item_rule_name = visit(items, name + (name.empty() ? "" : "-") + "item");
|
||||
int min_items = schema.contains("minItems") ? schema["minItems"].get<int>() : 0;
|
||||
json max_items_json = schema.contains("maxItems") ? schema["maxItems"] : json();
|
||||
int max_items = max_items_json.is_number_integer() ? max_items_json.get<int>() : std::numeric_limits<int>::max();
|
||||
|
||||
return _add_rule(rule_name, "\"[\" space " + build_repetition(item_rule_name, min_items, max_items, "\",\" space") + " \"]\" space");
|
||||
}
|
||||
if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) {
|
||||
return _visit_pattern(schema["pattern"], rule_name);
|
||||
} else if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) {
|
||||
}
|
||||
if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) {
|
||||
return _add_primitive(rule_name == "root" ? "root" : schema_format, PRIMITIVE_RULES.at("uuid"));
|
||||
} else if ((schema_type.is_null() || schema_type == "string") && STRING_FORMAT_RULES.find(schema_format + "-string") != STRING_FORMAT_RULES.end()) {
|
||||
}
|
||||
if ((schema_type.is_null() || schema_type == "string") && STRING_FORMAT_RULES.find(schema_format + "-string") != STRING_FORMAT_RULES.end()) {
|
||||
auto prim_name = schema_format + "-string";
|
||||
return _add_rule(rule_name, _add_primitive(prim_name, STRING_FORMAT_RULES.at(prim_name)));
|
||||
} else if (schema_type == "string" && (schema.contains("minLength") || schema.contains("maxLength"))) {
|
||||
}
|
||||
if (schema_type == "string" && (schema.contains("minLength") || schema.contains("maxLength"))) {
|
||||
std::string char_rule = _add_primitive("char", PRIMITIVE_RULES.at("char"));
|
||||
int min_len = schema.contains("minLength") ? schema["minLength"].get<int>() : 0;
|
||||
int max_len = schema.contains("maxLength") ? schema["maxLength"].get<int>() : std::numeric_limits<int>::max();
|
||||
return _add_rule(rule_name, "\"\\\"\" " + build_repetition(char_rule, min_len, max_len) + " \"\\\"\" space");
|
||||
} else if (schema_type == "integer" && (schema.contains("minimum") || schema.contains("exclusiveMinimum") || schema.contains("maximum") || schema.contains("exclusiveMaximum"))) {
|
||||
}
|
||||
if (schema_type == "integer" && (schema.contains("minimum") || schema.contains("exclusiveMinimum") || schema.contains("maximum") || schema.contains("exclusiveMaximum"))) {
|
||||
int64_t min_value = std::numeric_limits<int64_t>::min();
|
||||
int64_t max_value = std::numeric_limits<int64_t>::max();
|
||||
if (schema.contains("minimum")) {
|
||||
|
|
@ -958,19 +974,19 @@ public:
|
|||
}
|
||||
std::stringstream out;
|
||||
out << "(";
|
||||
_build_min_max_int(min_value, max_value, out);
|
||||
build_min_max_int(min_value, max_value, out);
|
||||
out << ") space";
|
||||
return _add_rule(rule_name, out.str());
|
||||
} else if (schema.empty() || schema_type == "object") {
|
||||
return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object")));
|
||||
} else {
|
||||
if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get<std::string>()) == PRIMITIVE_RULES.end()) {
|
||||
_errors.push_back("Unrecognized schema: " + schema.dump());
|
||||
return "";
|
||||
}
|
||||
// TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
|
||||
return _add_primitive(rule_name == "root" ? "root" : schema_type.get<std::string>(), PRIMITIVE_RULES.at(schema_type.get<std::string>()));
|
||||
}
|
||||
if (schema.empty() || schema_type == "object") {
|
||||
return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object")));
|
||||
}
|
||||
if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get<std::string>()) == PRIMITIVE_RULES.end()) {
|
||||
_errors.push_back("Unrecognized schema: " + schema.dump());
|
||||
return "";
|
||||
}
|
||||
// TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
|
||||
return _add_primitive(rule_name == "root" ? "root" : schema_type.get<std::string>(), PRIMITIVE_RULES.at(schema_type.get<std::string>()));
|
||||
}
|
||||
|
||||
void check_errors() {
|
||||
|
|
@ -985,7 +1001,7 @@ public:
|
|||
std::string format_grammar() {
|
||||
std::stringstream ss;
|
||||
for (const auto & kv : _rules) {
|
||||
ss << kv.first << " ::= " << kv.second << std::endl;
|
||||
ss << kv.first << " ::= " << kv.second << '\n';
|
||||
}
|
||||
return ss.str();
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <functional>
|
||||
|
|
@ -111,6 +112,8 @@ class common_peg_ast_arena {
|
|||
|
||||
void visit(common_peg_ast_id id, const common_peg_ast_visitor & visitor) const;
|
||||
void visit(const common_peg_parse_result & result, const common_peg_ast_visitor & visitor) const;
|
||||
|
||||
std::string dump();
|
||||
};
|
||||
|
||||
struct common_peg_parse_result {
|
||||
|
|
@ -139,6 +142,7 @@ struct common_peg_parse_result {
|
|||
struct common_peg_parse_context {
|
||||
std::string input;
|
||||
bool is_partial;
|
||||
bool debug = false; // Enable debug output for parser tracing
|
||||
common_peg_ast_arena ast;
|
||||
|
||||
int parse_depth;
|
||||
|
|
@ -207,6 +211,7 @@ struct common_peg_chars_parser {
|
|||
};
|
||||
|
||||
struct common_peg_json_string_parser {};
|
||||
struct common_peg_python_dict_string_parser {};
|
||||
|
||||
struct common_peg_until_parser {
|
||||
std::vector<std::string> delimiters;
|
||||
|
|
@ -255,6 +260,7 @@ using common_peg_parser_variant = std::variant<
|
|||
common_peg_space_parser,
|
||||
common_peg_chars_parser,
|
||||
common_peg_json_string_parser,
|
||||
common_peg_python_dict_string_parser,
|
||||
common_peg_until_parser,
|
||||
common_peg_schema_parser,
|
||||
common_peg_rule_parser,
|
||||
|
|
@ -299,6 +305,8 @@ class common_peg_arena {
|
|||
friend class common_peg_parser_builder;
|
||||
|
||||
private:
|
||||
std::string dump_impl(common_peg_parser_id id, std::unordered_set<common_peg_parser_id> & visited) const;
|
||||
|
||||
common_peg_parser_id add_parser(common_peg_parser_variant parser);
|
||||
void add_rule(const std::string & name, common_peg_parser_id id);
|
||||
|
||||
|
|
@ -311,9 +319,16 @@ class common_peg_parser_builder {
|
|||
common_peg_parser wrap(common_peg_parser_id id) { return common_peg_parser(id, *this); }
|
||||
common_peg_parser add(const common_peg_parser_variant & p) { return wrap(arena_.add_parser(p)); }
|
||||
|
||||
bool allow_python_dict_format_ = false;
|
||||
|
||||
public:
|
||||
common_peg_parser_builder();
|
||||
|
||||
// Enable/disable Python dict format support (single-quoted strings).
|
||||
// When enabled, JSON parsers will also accept Python dict-style single-quoted strings.
|
||||
void set_allow_python_dict_format(bool allow) { allow_python_dict_format_ = allow; }
|
||||
bool get_allow_python_dict_format() const { return allow_python_dict_format_; }
|
||||
|
||||
// Match nothing, always succeed.
|
||||
// S -> ε
|
||||
common_peg_parser eps() { return add(common_peg_epsilon_parser{}); }
|
||||
|
|
@ -418,10 +433,32 @@ class common_peg_parser_builder {
|
|||
// Useful for extracting content within a JSON string.
|
||||
common_peg_parser json_string_content();
|
||||
|
||||
// Matches a string that accepts both JSON double-quoted and Python dict single-quoted styles.
|
||||
// This is useful when you explicitly want to accept both formats regardless of the allow_python_dict_format flag.
|
||||
common_peg_parser flexible_string();
|
||||
|
||||
// Matches a Python dict-style single-quoted string content without the surrounding quotes.
|
||||
// Useful for extracting content within a Python dict string.
|
||||
common_peg_parser python_dict_string_content();
|
||||
|
||||
// Matches a JSON object member with a key and associated parser as the
|
||||
// value.
|
||||
common_peg_parser json_member(const std::string & key, const common_peg_parser & p);
|
||||
|
||||
// Creates a complete Python dict format parser supporting objects, arrays, single-quoted strings,
|
||||
// numbers, booleans, and null. Similar to JSON but uses single quotes for strings.
|
||||
// value -> object | array | string | number | true | false | null
|
||||
common_peg_parser python_dict();
|
||||
common_peg_parser python_dict_object();
|
||||
common_peg_parser python_dict_string();
|
||||
common_peg_parser python_dict_array();
|
||||
common_peg_parser python_dict_number();
|
||||
common_peg_parser python_dict_bool();
|
||||
common_peg_parser python_dict_null();
|
||||
|
||||
// A marker, i.e. text delimited by a pair of <> or []
|
||||
common_peg_parser marker();
|
||||
|
||||
// Wraps a parser with JSON schema metadata for grammar generation.
|
||||
// Used internally to convert JSON schemas to GBNF grammar rules.
|
||||
common_peg_parser schema(const common_peg_parser & p, const std::string & name, const nlohmann::ordered_json & schema, bool raw = false);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,609 @@
|
|||
# Unified Auto-Parser Architecture
|
||||
|
||||
The auto-parser automatically analyzes chat templates to determine how to parse model outputs, including content, reasoning, and tool calls.
|
||||
|
||||
## Overview
|
||||
|
||||
The unified auto-parser uses a **pure differential, compositional approach** to analyze chat templates:
|
||||
|
||||
**Core Philosophy**:
|
||||
|
||||
- **Zero Hardcoded Patterns**: All markers extracted through template comparison (the **only heuristic** is JSON detection)
|
||||
- **Compositional Architecture**: Separate parsers for reasoning, content, and tools that compose cleanly
|
||||
|
||||
**Four-Phase Analysis**:
|
||||
|
||||
1. **Phase 1: Reasoning Analysis** (R1-R3) - Detects reasoning markers and mode
|
||||
2. **Phase 2: Content Analysis** (C1) - Detects content wrapping markers
|
||||
3. **Phase 3: Tool Call Analysis** (T1-T7) - Extracts tool section, function, and call ID markers
|
||||
4. **Phase 4: Argument Analysis** (A1-A2) - Extracts argument name/value markers (TAG_WITH_TAGGED only)
|
||||
|
||||
## Data Structures
|
||||
|
||||
### diff_analysis_result
|
||||
|
||||
The result of differential analysis contains all extracted markers and format classifications:
|
||||
|
||||
```cpp
|
||||
struct diff_analysis_result {
|
||||
// Classification results
|
||||
reasoning_mode reasoning = reasoning_mode::NONE;
|
||||
content_mode content = content_mode::PLAIN;
|
||||
tool_format tools = tool_format::NONE;
|
||||
|
||||
// All extracted markers (see marker_registry below)
|
||||
marker_registry markers;
|
||||
|
||||
// JSON field names (for JSON_NATIVE format)
|
||||
bool fun_name_is_key = false; // Function name is the JSON key: {"func_name": {...}}
|
||||
std::string function_field = "function"; // Outer object key (e.g., "function" in "function.name")
|
||||
std::string name_field = "name";
|
||||
std::string args_field = "arguments";
|
||||
std::string id_field; // String call ID field (e.g., "id")
|
||||
std::string gen_id_field; // Generated integer call ID field (e.g., "tool_call_id")
|
||||
std::vector<std::string> parameter_order; // Order of JSON fields for parsing
|
||||
|
||||
// Call ID position (for non-JSON formats)
|
||||
call_id_position call_id_pos = call_id_position::NONE;
|
||||
|
||||
// Flags
|
||||
bool supports_tools = false;
|
||||
bool supports_parallel_calls = false;
|
||||
bool requires_nonnull_content = false;
|
||||
bool tools_array_wrapped = false; // Tool calls wrapped in JSON array [...]
|
||||
|
||||
// Preserved tokens for tokenizer (union of all non-empty markers)
|
||||
std::vector<std::string> preserved_tokens;
|
||||
};
|
||||
```
|
||||
|
||||
### Enums
|
||||
|
||||
**`reasoning_mode`**: How the template handles reasoning/thinking blocks.
|
||||
|
||||
| Value | Description |
|
||||
|----------------------|-------------------------------------------------------------------------------|
|
||||
| `NONE` | No reasoning markers detected |
|
||||
| `TAG_BASED` | Standard tag-based: `<think>...</think>` |
|
||||
| `DELIMITER` | Delimiter-based: reasoning ends at delimiter (e.g., `[BEGIN FINAL RESPONSE]`) |
|
||||
| `FORCED_OPEN` | Template ends with open reasoning tag (empty start, non-empty end) |
|
||||
| `FORCED_CLOSED` | Both tags when disabled; only start tag when enabled |
|
||||
| `TOOLS_ONLY` | Reasoning only appears when tool calls are present |
|
||||
|
||||
**`content_mode`**: How the template wraps content.
|
||||
|
||||
| Value | Description |
|
||||
|----------------------------|------------------------------------------------------|
|
||||
| `PLAIN` | No content markers |
|
||||
| `ALWAYS_WRAPPED` | Content always wrapped: `<response>...</response>` |
|
||||
| `WRAPPED_WITH_REASONING` | Content wrapped only when reasoning is present |
|
||||
|
||||
**`tool_format`**: Classification of tool call structure.
|
||||
|
||||
| Value | Description |
|
||||
|--------------------|------------------------------------------------------------------|
|
||||
| `NONE` | No tool support detected |
|
||||
| `JSON_NATIVE` | Pure JSON: `{"name": "X", "arguments": {...}}` |
|
||||
| `TAG_WITH_JSON` | Tag-based with JSON args: `<function=X>{...}</function>` |
|
||||
| `TAG_WITH_TAGGED` | Tag-based with tagged args: `<param=key>value</param>` |
|
||||
|
||||
**`call_id_position`**: Where call IDs appear relative to function name and arguments (for non-JSON formats).
|
||||
|
||||
| Value | Description |
|
||||
|----------------------------|------------------------------------------|
|
||||
| `NONE` | No call ID support detected |
|
||||
| `PRE_FUNC_NAME` | Before function name |
|
||||
| `BETWEEN_FUNC_AND_ARGS` | Between function name and arguments |
|
||||
| `POST_ARGS` | After arguments |
|
||||
|
||||
### marker_registry
|
||||
|
||||
All markers are extracted via differential analysis without hardcoded patterns:
|
||||
|
||||
```cpp
|
||||
struct marker_registry {
|
||||
// === Reasoning markers (from R1-R3) ===
|
||||
std::string reasoning_start; // e.g., "<think>", "[THINK]", "<|START_THINKING|>", ""
|
||||
std::string reasoning_end; // e.g., "</think>", "[BEGIN FINAL RESPONSE]", "<|END_THINKING|>"
|
||||
|
||||
// === Content markers (from C1) ===
|
||||
std::string content_start; // e.g., "<response>", ""
|
||||
std::string content_end; // e.g., "</response>", ""
|
||||
|
||||
// === Tool section markers (from T1-T2) ===
|
||||
std::string tool_section_start; // e.g., "<tool_call>", "[TOOL_CALLS]"
|
||||
std::string tool_section_end; // e.g., "</tool_call>", ""
|
||||
std::string per_call_start; // e.g., "<|tool_call_begin|>" (for multi-call templates)
|
||||
std::string per_call_end; // e.g., "<|tool_call_end|>"
|
||||
std::string call_separator; // e.g., ",", "\n"
|
||||
|
||||
// === Function markers (from T3-T6) ===
|
||||
std::string func_name_prefix; // e.g., "<function=", "functions."
|
||||
std::string func_name_suffix; // e.g., ">", ":0"
|
||||
std::string func_close; // e.g., "</function>"
|
||||
std::string args_start; // e.g., "{"
|
||||
std::string args_end; // e.g., "}"
|
||||
|
||||
// === Argument markers (from A1-A2, for TAG_WITH_TAGGED) ===
|
||||
std::string arg_name_prefix; // e.g., "<param=", "<arg_key>"
|
||||
std::string arg_name_suffix; // e.g., ">", "</arg_key>"
|
||||
std::string arg_value_prefix; // e.g., "", "<arg_value>"
|
||||
std::string arg_value_suffix; // e.g., "</param>", "</arg_value>"
|
||||
std::string arg_separator; // e.g., "", "\n"
|
||||
|
||||
// === Call ID markers (from T7) ===
|
||||
std::string call_id_prefix; // e.g., "[CALL_ID]"
|
||||
std::string call_id_suffix; // e.g., "[ARGS]"
|
||||
|
||||
// === Special markers ===
|
||||
std::string code_block_marker; // e.g., "Action:" (for markdown code block format)
|
||||
std::string code_block_language; // e.g., "json"
|
||||
std::string function_namespace; // e.g., "functions." (for prefixed-indexed format)
|
||||
};
|
||||
```
|
||||
|
||||
## Tool Calling Formats
|
||||
|
||||
The auto-parser recognizes three tool calling formats.
|
||||
|
||||
### JSON_NATIVE
|
||||
|
||||
**Structure**: The entire tool call (function name, arguments, and values) is in JSON format. There may be enclosing tags around the tool calling section.
|
||||
|
||||
**Characteristics**:
|
||||
|
||||
- Function name is a JSON field: `"name": "function_name"`
|
||||
- Arguments are a JSON object: `"arguments": {"key": "value"}`
|
||||
- May be wrapped in section markers like `<tool_call>...</tool_call>` or `[TOOL_CALLS]...]`
|
||||
|
||||
**Examples**:
|
||||
|
||||
Standard OpenAI-style:
|
||||
|
||||
```json
|
||||
<tool_call>
|
||||
{"name": "get_weather", "arguments": {"location": "Paris", "unit": "celsius"}}
|
||||
</tool_call>
|
||||
```
|
||||
|
||||
Mistral Nemo with array wrapper:
|
||||
|
||||
```json
|
||||
[TOOL_CALLS]
|
||||
[{"name": "calculate", "arguments": {"expr": "2+2"}}]
|
||||
```
|
||||
|
||||
**Detection**: Function name found inside a JSON structure (determined by JSON parse attempt).
|
||||
|
||||
---
|
||||
|
||||
### TAG_WITH_JSON
|
||||
|
||||
**Structure**: The function name is outside the JSON structure, typically within quasi-XML markers. Arguments are still provided as a JSON object.
|
||||
|
||||
**Characteristics**:
|
||||
|
||||
- Function name appears in tag attributes: `<function=function_name>` or `<tool_name>function_name</tool_name>`
|
||||
- Arguments are a JSON object following the tag
|
||||
- Has closing tags: `</function>` or `</tool_call>`
|
||||
- Arguments remain valid JSON
|
||||
|
||||
**Examples**:
|
||||
|
||||
Functionary v3.1:
|
||||
|
||||
```xml
|
||||
<function=get_weather>{"location": "Paris", "unit": "celsius"}</function>
|
||||
```
|
||||
|
||||
MiniMax:
|
||||
|
||||
```xml
|
||||
<minimax:tool_call>
|
||||
<tool_name>calculate</tool_name>
|
||||
<arguments>{"expr": "2+2"}</arguments>
|
||||
</minimax:tool_call>
|
||||
```
|
||||
|
||||
**Detection**: Function name not in JSON, but arguments are JSON (args_start is `{`).
|
||||
|
||||
---
|
||||
|
||||
### TAG_WITH_TAGGED
|
||||
|
||||
**Structure**: Both the function name AND argument names are in XML-style tags. Argument values may be JSON or unquoted primitives depending on schema type.
|
||||
|
||||
**Characteristics**:
|
||||
|
||||
- Function name in tag: `<function=name>` or `<invoke=name>`
|
||||
- Each argument has its own tag: `<param=key>value</param>`
|
||||
- String values are **unquoted** (raw text content of the tag)
|
||||
- Non-string values (objects, arrays, numbers, booleans) are still JSON-formatted
|
||||
- Supports streaming: partial arguments can be parsed incrementally
|
||||
|
||||
**Examples**:
|
||||
|
||||
Qwen/Hermes XML format:
|
||||
|
||||
```xml
|
||||
<function=get_weather>
|
||||
<param=location>Paris</param>
|
||||
<param=unit>celsius</param>
|
||||
</function>
|
||||
```
|
||||
|
||||
Note how string values (`Paris`, `celsius`) are unquoted inside the tags.
|
||||
|
||||
Mixed types example:
|
||||
|
||||
```xml
|
||||
<function=calculate>
|
||||
<param=expr>2+2</param>
|
||||
<param=precision>2</param>
|
||||
<param=options>{"round": true}</param>
|
||||
</function>
|
||||
```
|
||||
|
||||
Here:
|
||||
|
||||
- `expr` and `precision` are strings (unquoted)
|
||||
- `options` is an object (JSON-formatted inside the tag)
|
||||
|
||||
**Detection**: `arg_name_prefix` is non-empty, arguments use tagged format rather than JSON object.
|
||||
|
||||
---
|
||||
|
||||
## Analysis Flow
|
||||
|
||||
```text
|
||||
Template
|
||||
|
|
||||
v
|
||||
differential_analyzer::analyze(tmpl)
|
||||
|
|
||||
|-- Phase 1: analyze_reasoning(tmpl, result)
|
||||
| |-- R1: compare_reasoning_presence() — with/without reasoning_content field
|
||||
| |-- R2: compare_thinking_enabled() — enable_thinking=false vs true
|
||||
| '-- R3: compare_reasoning_scope() — reasoning with content vs with tools
|
||||
|
|
||||
|-- Phase 2: analyze_content(tmpl, result)
|
||||
| '-- C1: compare_content_values() — content vs tools vs reasoning
|
||||
|
|
||||
|-- Phase 3: analyze_tools(tmpl, result)
|
||||
| |-- T1: analyze_tool_calls() — no tools vs with tools + format classification
|
||||
| |-- T2: check_per_call_markers() — per-section vs per-call markers
|
||||
| |-- T3: extract_call_separator() — separator between multiple calls
|
||||
| |-- T4: extract_function_markers() — func_alpha vs func_beta
|
||||
| |-- T5: extract_argument_separator() — 1 arg vs 2 args
|
||||
| |-- T6: extract_args_markers() — no args vs with args
|
||||
| '-- T7: extract_call_id_markers() — call_id "call00001" vs "call99999"
|
||||
|
|
||||
|-- Phase 4: analyze_arguments(tmpl, result) [TAG_WITH_TAGGED only]
|
||||
| |-- A1: extract_argument_name_markers() — "first" arg vs "second" arg
|
||||
| '-- A2: extract_argument_value_markers() — value "XXXX" vs "YYYY"
|
||||
|
|
||||
'-- collect_preserved_tokens(result)
|
||||
|
|
||||
v
|
||||
diff_analysis_result
|
||||
|
|
||||
v
|
||||
universal_peg_generator::generate_parser(tmpl, inputs, analysis)
|
||||
|-- build_parser(analysis, inputs, ...) — builds PEG parser arena
|
||||
| |-- Reasoning parser (based on reasoning_mode)
|
||||
| |-- Content parser (based on content_mode)
|
||||
| '-- Tool parser (dispatches by tool_format):
|
||||
| |-- build_tool_parser_json_native()
|
||||
| |-- build_tool_parser_tag_json()
|
||||
| '-- build_tool_parser_tag_tagged()
|
||||
|
|
||||
|-- Build GBNF grammar (if tools present)
|
||||
'-- Set grammar triggers from tool markers
|
||||
|
|
||||
v
|
||||
common_chat_params (prompt, parser, grammar, triggers, preserved_tokens)
|
||||
```
|
||||
|
||||
## Entry Point
|
||||
|
||||
The auto-parser is invoked in `common/chat.cpp` in `common_chat_templates_apply_jinja`. A few specialized templates are handled first (Ministral/Magistral Large 3, GPT-OSS, Functionary v3.2), then the auto-parser handles everything else:
|
||||
|
||||
```cpp
|
||||
try {
|
||||
LOG_DBG("Using differential autoparser\n");
|
||||
auto auto_params = universal_peg_generator::generate_parser(tmpl, params);
|
||||
return auto_params;
|
||||
} catch (const std::exception & e) {
|
||||
LOG_WRN("Automatic parser generation failed: %s\n", e.what());
|
||||
}
|
||||
```
|
||||
|
||||
## Algorithm Details
|
||||
|
||||
### Core Mechanism: Differential Comparison
|
||||
|
||||
All analysis phases use the same factorized comparison function:
|
||||
|
||||
```cpp
|
||||
compare_variants(tmpl, params_A, params_modifier)
|
||||
```
|
||||
|
||||
This creates variant B by applying a modifier lambda to a copy of params_A, renders both through the template, and computes a `diff_split`:
|
||||
|
||||
```cpp
|
||||
struct diff_split {
|
||||
std::string prefix; // Common prefix between A and B
|
||||
std::string suffix; // Common suffix between A and B
|
||||
std::string left; // Unique to variant A
|
||||
std::string right; // Unique to variant B
|
||||
};
|
||||
```
|
||||
|
||||
The diff is computed via `calculate_diff_split()`, which uses longest-common-prefix/suffix with iterative tag boundary fixing — it moves incomplete `<...>` or `[...]` markers from prefix/suffix into the left/right parts until stable.
|
||||
|
||||
Text is segmentized into markers and non-marker fragments using `segmentize_markers()`, which splits on `<...>` and `[...]` boundaries.
|
||||
|
||||
### Phase 1: Reasoning Analysis
|
||||
|
||||
Three comparisons extract reasoning markers and classify the reasoning mode:
|
||||
|
||||
**R1 — `compare_reasoning_presence()`**: Compares assistant message with vs without a `reasoning_content` field.
|
||||
|
||||
- Segmentizes `diff.right` to find markers around the reasoning content
|
||||
- 3+ segments → `TAG_BASED` (start marker, content, end marker)
|
||||
- 2 segments → `DELIMITER` (content followed by delimiter)
|
||||
- Special case: markers found in prefix/suffix → `FORCED_CLOSED`
|
||||
|
||||
**R2 — `compare_thinking_enabled()`**: Compares `enable_thinking=false` vs `true`.
|
||||
|
||||
- Detects `FORCED_OPEN`: template adds opening tag when thinking enabled
|
||||
- Detects `FORCED_CLOSED`: disable mode has both markers, enable mode has only start
|
||||
- Handles reverse patterns (e.g., GLM-4.6 where disabled adds empty block)
|
||||
|
||||
**R3 — `compare_reasoning_scope()`**: Compares reasoning with content vs with tool calls.
|
||||
|
||||
- Detects `TOOLS_ONLY`: reasoning appears only when tool calls are present
|
||||
- Extracts reasoning markers from tool call output by segmentizing
|
||||
|
||||
### Phase 2: Content Analysis
|
||||
|
||||
**C1 — `compare_content_values()`**: Compares content-only output vs tools output vs reasoning output.
|
||||
|
||||
- Creates two comparisons: content→tools and content→reasoning
|
||||
- Finds content text position in diff to extract surrounding markers
|
||||
- Classifies:
|
||||
- `ALWAYS_WRAPPED`: content has start/end markers in both comparisons
|
||||
- `WRAPPED_WITH_REASONING`: markers only when reasoning is present
|
||||
- `PLAIN`: no wrapping markers detected
|
||||
|
||||
### Phase 3: Tool Call Analysis
|
||||
|
||||
**T1 — `analyze_tool_calls()`**: Compares no-tools vs with-tools output.
|
||||
|
||||
- Calls `analyze_tool_call_format()` to classify the format using the **only heuristic**: a JSON parse attempt
|
||||
- `in_json_haystack()` checks whether the function name appears inside a JSON structure
|
||||
- If function name is in JSON → `JSON_NATIVE` → `analyze_tool_call_format_json_native()`:
|
||||
- Parses JSON structure, matches needle values to extract field names
|
||||
- Detects `fun_name_is_key`, `function_field`, `name_field`, `args_field`, `id_field`, `gen_id_field`
|
||||
- Detects `tools_array_wrapped` by checking for `[` before JSON
|
||||
- Builds `parameter_order` by sorting fields by position
|
||||
- Extracts `tool_section_start`/`tool_section_end`
|
||||
- If function name is not in JSON → `analyze_tool_call_format_non_json()`:
|
||||
- Segmentizes the haystack into markers and text
|
||||
- Uses symmetry: counts opening markers, matches with closing markers
|
||||
- Extracts `tool_section_start`, `tool_section_end`, `per_call_start`, `per_call_end`
|
||||
|
||||
**T2 — `check_per_call_markers()`**: Compares 1 call vs 2 calls.
|
||||
|
||||
- If the second call starts with `tool_section_start`, markers are per-call not per-section
|
||||
- Moves tool_section markers to per_call markers, clears section markers
|
||||
|
||||
**T3 — `extract_call_separator()`**: Compares 1 call vs 2 calls.
|
||||
|
||||
- Finds separator between calls using `until_common_prefix(diff.right, ...)` with the two function names as anchors
|
||||
|
||||
**T4 — `extract_function_markers()`**: Compares function name "foofoo" vs "barbar".
|
||||
|
||||
- Finds function name in diff, segmentizes to extract prefix/suffix markers
|
||||
- Extracts `func_name_prefix`, `func_name_suffix`
|
||||
- Searches for closing marker after args to extract `func_close`
|
||||
|
||||
**T5 — `extract_argument_separator()`**: Compares 1 argument vs 2 arguments.
|
||||
|
||||
- Uses `until_common_prefix()` with argument names as anchors to find the separator
|
||||
|
||||
**T6 — `extract_args_markers()`**: Compares 0 arguments vs 1 argument.
|
||||
|
||||
- Uses `until_common_prefix()` and `after_common_suffix()` to find container markers
|
||||
- Extracts `args_start`, `args_end`
|
||||
|
||||
**T7 — `extract_call_id_markers()`**: Compares call IDs "call00001" vs "call99999".
|
||||
|
||||
- Determines position relative to function name and arguments
|
||||
- Classifies as `PRE_FUNC_NAME`, `BETWEEN_FUNC_AND_ARGS`, or `POST_ARGS`
|
||||
- Extracts `call_id_prefix`, `call_id_suffix`
|
||||
|
||||
### Phase 4: Argument Analysis (TAG_WITH_TAGGED only)
|
||||
|
||||
Only runs when Phase 3 detected TAG_WITH_TAGGED or TAG_WITH_JSON format with non-JSON argument structures.
|
||||
|
||||
**A1 — `extract_argument_name_markers()`**: Compares argument name "first" vs "second".
|
||||
|
||||
- Finds common prefix of diff.left/right to extract marker structure
|
||||
- Extracts `arg_name_prefix`, `arg_name_suffix`
|
||||
|
||||
**A2 — `extract_argument_value_markers()`**: Compares value "XXXX" vs "YYYY".
|
||||
|
||||
- Segmentizes prefix/suffix around value to find markers
|
||||
- Extracts `arg_value_prefix`, `arg_value_suffix`
|
||||
|
||||
### Parser Building
|
||||
|
||||
The parser generator (`universal_peg_generator`) takes the analysis result and builds a PEG parser arena. The entry point is `generate_parser(tmpl, inputs)`, which:
|
||||
|
||||
1. Runs `differential_analyzer::analyze(tmpl)` to get the analysis result
|
||||
2. Calls `build_parser(analysis, inputs, ...)` to construct the PEG parser
|
||||
3. Builds a GBNF grammar if tools are present (for constrained decoding)
|
||||
4. Sets grammar triggers from `tool_section_start` or `per_call_start`
|
||||
|
||||
#### Reasoning Parser Construction
|
||||
|
||||
Built inline in `build_parser()` based on `reasoning_mode`:
|
||||
|
||||
| Mode | Parser |
|
||||
|-----------------------------------|---------------------------------------------------------------------------------------------|
|
||||
| `FORCED_OPEN` / `FORCED_CLOSED` | `reasoning(until(end)) + end` — expects reasoning immediately (opening tag was in template) |
|
||||
| `TAG_BASED` / `TOOLS_ONLY` | `optional(start + reasoning(until(end)) + end)` |
|
||||
| `DELIMITER` | `optional(reasoning(until(end)) + end)` — no start marker, reasoning ends at delimiter |
|
||||
|
||||
#### Content Parser Construction
|
||||
|
||||
| Condition | Parser |
|
||||
|------------------------------------|---------------------------------------------------------------------------|
|
||||
| `json_schema` present | `reasoning + space() + content(schema(json(), ...)) + end()` |
|
||||
| Tools present | Dispatches to tool parser builder |
|
||||
| `ALWAYS_WRAPPED` with reasoning | `reasoning + start + content(until(end)) + end + end()` |
|
||||
| `ALWAYS_WRAPPED` without reasoning | `content(until(start)) + start + content(until(end)) + end + end()` |
|
||||
| Default | `reasoning + content(rest()) + end()` |
|
||||
|
||||
#### Tool Parser Construction
|
||||
|
||||
`build_tool_parser()` dispatches by `tool_format`:
|
||||
|
||||
**`build_tool_parser_json_native()`**: Uses the `standard_json_tools()` builder helper which has three internal modes:
|
||||
|
||||
- `build_json_tools_function_is_key()` — function name is the JSON key: `{"get_weather": {"location": "Paris"}}`
|
||||
- `build_json_tools_nested_keys()` — nested object: `{"function": {"name": "X", "arguments": {...}}}`
|
||||
- `build_json_tools_flat_keys()` — flat object: `{"name": "X", "arguments": {...}}`
|
||||
|
||||
Handles content wrappers, array wrapping, parallel calls, and section markers.
|
||||
|
||||
**`build_tool_parser_tag_json()`**: For each tool, builds:
|
||||
|
||||
```text
|
||||
tool_open(prefix + tool_name(literal(name)) + suffix) +
|
||||
call_id_section +
|
||||
tool_args(schema(json(), tool_schema))
|
||||
```
|
||||
|
||||
Wraps in per-call or section markers. Handles parallel calls.
|
||||
|
||||
**`build_tool_parser_tag_tagged()`**: For each tool, builds per-argument parsers:
|
||||
|
||||
- String types: `tool_arg_string_value(schema(until(suffix), ...))`
|
||||
- JSON types: `tool_arg_json_value(schema(json(), ...))`
|
||||
- Required vs optional arguments
|
||||
- Arguments joined with `space()` between them
|
||||
|
||||
Handles `func_close`, `peek()` for partial parsing safety, and call_id sections.
|
||||
|
||||
All three return: `reasoning + optional(content(until(trigger))) + tool_calls + end()`
|
||||
|
||||
### Mapper
|
||||
|
||||
The `common_chat_peg_unified_mapper` maps PEG parse results (AST nodes) into `common_chat_msg` structures. Key design:
|
||||
|
||||
- **Buffered arguments**: Before `tool_name` is known, argument text goes to `args_buffer`; once name is set, the buffer is flushed to `current_tool->arguments`
|
||||
- **`args_target()`**: Returns a reference to whichever destination is active, eliminating branching
|
||||
- **`closing_quote_pending`**: Tracks whether a closing `"` needs to be appended when a string argument value is finalized
|
||||
- **Quote normalization**: Python-style quotes (`'key': 'value'`) are converted to JSON (`"key": "value"`)
|
||||
- **Brace auto-closing**: At tool close, unclosed `{` braces are closed automatically (tracked via `json_brace_depth()`)
|
||||
|
||||
## Files
|
||||
|
||||
| File | Purpose |
|
||||
|-------------------------------------------|-------------------------------------------------------------------|
|
||||
| `common/chat-auto-parser.h` | `universal_peg_generator` class and `templates_params` struct |
|
||||
| `common/chat-auto-parser-generator.cpp` | Parser generator implementation |
|
||||
| `common/chat-diff-analyzer.h` | Analysis result types, enums, and `differential_analyzer` class |
|
||||
| `common/chat-diff-analyzer.cpp` | Differential analysis implementation |
|
||||
| `common/chat-auto-parser-helpers.h/cpp` | `calculate_diff_split()`, `segmentize_markers()`, string helpers |
|
||||
| `common/chat-peg-parser.h/cpp` | PEG builder and mapper classes |
|
||||
| `common/chat.cpp` | Entry point: `common_chat_templates_apply_jinja()` |
|
||||
| `tools/parser/debug-template-parser.cpp` | Debug tool for template analysis |
|
||||
| `tools/parser/template-analysis.cpp` | Template analysis tool |
|
||||
|
||||
## Testing & Debugging
|
||||
|
||||
### Debug Tools
|
||||
|
||||
**Template Debugger**: `tools/parser/debug-template-parser.cpp`
|
||||
|
||||
- Usage: `./bin/llama-debug-template-parser path/to/template.jinja`
|
||||
- Shows detected format, markers, generated parser, and GBNF grammar
|
||||
|
||||
**Template Analysis**: `tools/parser/template-analysis.cpp`
|
||||
|
||||
- Usage: `./bin/llama-template-analysis path/to/template.jinja`
|
||||
|
||||
**Debug Logging**: Enable with `LLAMA_LOG_VERBOSITY=2`
|
||||
|
||||
- Shows detailed analysis steps, pattern extraction results, and generated parser structure
|
||||
|
||||
**PEG Test Builder**: Fluent API for creating test cases in `tests/test-chat.cpp`:
|
||||
|
||||
```cpp
|
||||
auto tst = peg_tester("models/templates/Template.jinja");
|
||||
tst.test("input text")
|
||||
.reasoning_format(COMMON_REASONING_FORMAT_AUTO)
|
||||
.tools({tool_json})
|
||||
.parallel_tool_calls(true)
|
||||
.enable_thinking(true)
|
||||
.expect(expected_message)
|
||||
.run();
|
||||
```
|
||||
|
||||
### Tested Templates
|
||||
|
||||
The following templates have active tests in `tests/test-chat.cpp`:
|
||||
|
||||
| Template | Format | Notes |
|
||||
| -------- | ------ | ----- |
|
||||
| Ministral-3-14B-Reasoning | Reasoning | `[THINK]...[/THINK]` tags |
|
||||
| NVIDIA-Nemotron-3-Nano-30B | TAG_WITH_TAGGED | Reasoning + tools |
|
||||
| CohereForAI Command-R7B | JSON_NATIVE | `<\|START_THINKING\|>`/`<\|START_RESPONSE\|>` markers |
|
||||
| Google Gemma 2 2B | Content only | No tool support |
|
||||
| Qwen-QwQ-32B | Reasoning | Forced-open thinking |
|
||||
| NousResearch Hermes 2 Pro | JSON_NATIVE | `<tool_call>` wrapper |
|
||||
| IBM Granite 3.3 | JSON_NATIVE | `<think></think>` + `<response></response>` |
|
||||
| ByteDance Seed-OSS | TAG_WITH_TAGGED | Custom `<seed:think>` and `<seed:tool_call>` tags |
|
||||
| Qwen3-Coder | TAG_WITH_TAGGED | XML-style tool format |
|
||||
| DeepSeek V3.1 | JSON_NATIVE | Forced thinking mode |
|
||||
| GLM-4.6 | TAG_WITH_TAGGED | `<tool_call>name\n<arg_key>...<arg_value>...` format |
|
||||
| GLM-4.7-Flash | TAG_WITH_TAGGED | Updated GLM format |
|
||||
| Kimi-K2-Thinking | JSON_NATIVE | Reasoning + JSON tools |
|
||||
| Apertus-8B-Instruct | JSON_NATIVE | Function name as JSON key |
|
||||
| MiniMax-M2 | TAG_WITH_JSON | XML invoke with JSON args |
|
||||
| NVIDIA-Nemotron-Nano-v2 | JSON_NATIVE | `<TOOLCALL>` wrapper (nested) |
|
||||
| CohereForAI Command-R Plus | JSON_NATIVE | Markdown code block format |
|
||||
| Mistral-Nemo-Instruct-2407 | JSON_NATIVE | `[TOOL_CALLS]` wrapper with ID field |
|
||||
| Functionary v3.1 | TAG_WITH_JSON | `<function=X>` format |
|
||||
| Functionary v3.2 | Specialized | `>>>` recipient delimiter (dedicated handler) |
|
||||
| Fireworks Firefunction v2 | TAG_WITH_JSON | Fireworks tool format |
|
||||
| DeepSeek R1 Distill (Llama/Qwen) | Reasoning | Forced-open thinking |
|
||||
| llama-cpp-deepseek-r1 | Reasoning | Forced-open thinking |
|
||||
| Kimi-K2 / Kimi-K2-Instruct | JSON_NATIVE | JSON tools with special markers |
|
||||
| Llama 3.1/3.2/3.3 | JSON_NATIVE | Standard Llama tool format |
|
||||
| OpenAI GPT-OSS | Specialized | Channel-based (dedicated handler) |
|
||||
| Apriel 1.5 | JSON_NATIVE | `<tool_calls>` wrapper with JSON array |
|
||||
| Apriel 1.6 Thinker | Reasoning | Implicit reasoning start |
|
||||
| Mistral Small 3.2 | JSON_NATIVE | `[TOOL_CALLS]func[ARGS]{...}` with call ID |
|
||||
| Devstral | JSON_NATIVE | `[TOOL_CALLS]func[ARGS]{...}` without call ID |
|
||||
| StepFun 3.5 Flash | TAG_WITH_TAGGED | `<function=X><parameter=Y>` format |
|
||||
|
||||
## Adding Support for New Templates
|
||||
|
||||
To support a new template format:
|
||||
|
||||
1. **If it follows standard patterns** - The auto-parser should detect it automatically using the three formats (JSON_NATIVE, TAG_WITH_JSON, TAG_WITH_TAGGED)
|
||||
2. **If differential analysis doesn't extract markers correctly** - Add a workaround in the workarounds array in `chat-diff-analyzer.cpp`
|
||||
3. **If it needs fundamentally different handling** - Add a dedicated handler in `chat.cpp` before the auto-parser block (as done for GPT-OSS, Functionary v3.2, and Ministral)
|
||||
|
||||
## Edge Cases and Quirks
|
||||
|
||||
1. **Forced Thinking**: If `enable_thinking` is true but the model has already started a thought block (e.g., ended the prompt with `<think>`), the parser enters "forced thinking" mode where it immediately expects reasoning content.
|
||||
2. **Per-Call vs Per-Section Markers**: Some templates wrap each tool call individually (`per_call_start`/`per_call_end`), others wrap the entire tool section (`tool_section_start`/`tool_section_end`). T2 disambiguates by checking if the second call in a two-call output starts with the section marker.
|
||||
3. **Double Wrapping**: Some templates (e.g., Functionary) use the same string for both the tool section start and the function prefix (e.g., `<function=`). The analyzer detects this overlap and prevents double-wrapping in the generated parser.
|
||||
4. **Null Content Rendering**: Some templates render `null` content as Python "None" string. The analyzer detects this and patches content to empty string.
|
||||
5. **Tag Boundary Fixing**: The `calculate_diff_split()` function iteratively adjusts the prefix/suffix boundary to avoid splitting `<tag>` or `[marker]` tokens, ensuring clean extraction.
|
||||
6. **Workarounds**: A workaround array in `chat-diff-analyzer.cpp` applies post-analysis patches for templates whose differential analysis produces incomplete or incorrect results (e.g., old Qwen thinking, Granite 3.3, Cohere Command-R+, Functionary, DeepSeek-R1-Distill-Qwen).
|
||||
|
|
@ -22,7 +22,7 @@ Below is a contrived example demonstrating how to use the PEG parser to parse
|
|||
output from a model that emits arguments as JSON.
|
||||
|
||||
```cpp
|
||||
auto parser = build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) {
|
||||
auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
|
||||
// Build a choice of all available tools
|
||||
auto tool_choice = p.choice();
|
||||
for (const auto & tool : tools) {
|
||||
|
|
@ -212,7 +212,7 @@ mapper.from_ast(ctx.ast, result);
|
|||
|
||||
### Native
|
||||
|
||||
The `common_chat_peg_native_builder` builds a `native` parser suitable for
|
||||
The `common_chat_peg_unified_builder` builds a `native` parser suitable for
|
||||
models that emit tool arguments as a direct JSON object.
|
||||
|
||||
- **`reasoning(p)`** - Tag node for `reasoning_content`
|
||||
|
|
@ -225,7 +225,7 @@ models that emit tool arguments as a direct JSON object.
|
|||
- **`tool_args(p)`** - Tag the tool arguments
|
||||
|
||||
```cpp
|
||||
build_chat_peg_native_parser([&](common_chat_peg_native_parser & p) {
|
||||
build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
|
||||
auto get_weather_tool = p.tool(p.sequence({
|
||||
p.tool_open(p.literal("{")),
|
||||
p.json_member("name", "\"" + p.tool_name(p.literal("get_weather")) + "\""),
|
||||
|
|
@ -246,7 +246,7 @@ build_chat_peg_native_parser([&](common_chat_peg_native_parser & p) {
|
|||
|
||||
### Constructed
|
||||
|
||||
The `common_chat_peg_constructed_builder` builds a `constructed` parser
|
||||
The `common_chat_peg_unified_builder` builds a `constructed` parser
|
||||
suitable for models that emit tool arguments as separate entities, such as XML
|
||||
tags.
|
||||
|
||||
|
|
@ -264,7 +264,7 @@ tags.
|
|||
- **`tool_arg_json_value(p)`** - Tag JSON value for the argument
|
||||
|
||||
```cpp
|
||||
build_chat_peg_constructed_parser([&](common_chat_peg_constructed_builder & p) {
|
||||
build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
|
||||
auto location_arg = p.tool_arg(
|
||||
p.tool_arg_open("<parameter name=\"" + p.tool_arg_name(p.literal("location")) + "\">"),
|
||||
p.tool_arg_string_value(p.until("</parameter>")),
|
||||
|
|
|
|||
|
|
@ -294,7 +294,7 @@
|
|||
{%- for tool_call in message.tool_calls -%}
|
||||
{%- if tool_call.type == 'function' -%}
|
||||
{%- set function = tool_call.function -%}
|
||||
{{- '{"' + function.name + '": ' + function.arguments + '}' }}
|
||||
{{- '{"' + function.name + '": ' + function.arguments|tojson + '}' }}
|
||||
{%- if not loop.last -%}
|
||||
{{- ", " }}
|
||||
{%- endif -%}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,173 @@
|
|||
{# ---------------------------------------------------------------------- #}
|
||||
{# ƛƬ Default setup and flags #}
|
||||
{# ---------------------------------------------------------------------- #}
|
||||
{# FIX: Use "is defined" check BEFORE accessing the variable #}
|
||||
{%- set messages = messages if (messages is defined and messages) else [] -%}
|
||||
{%- set tools = tools if (tools is defined and tools) else [] -%}
|
||||
{%- set add_generation_prompt = add_generation_prompt if (add_generation_prompt is defined) else false -%}
|
||||
{%- set available_tool_string = '' -%}
|
||||
{%- set add_tool_id = true -%}
|
||||
{%- set add_thoughts = true -%} {# whether to include <thinking> reasoning blocks #}
|
||||
{%- set add_generation_prompt = true -%} {# whether to emit reasoning starter before assistant response #}
|
||||
{# Optional token placeholders (safe defaults) #}
|
||||
{%- set bos_token = bos_token if (bos_token is defined) else '' -%}
|
||||
{%- set eos_token = eos_token if (eos_token is defined) else '' -%}
|
||||
{# ---------------------------------------------------------------------- #}
|
||||
{# Core reasoning prompt and assistant reasoning prefix #}
|
||||
{# ---------------------------------------------------------------------- #}
|
||||
{%- set reasoning_prompt -%}
|
||||
You are a thoughtful, systematic AI assistant from ServiceNow Language Models (SLAM) lab.
|
||||
Analyze each question carefully, present your reasoning step-by-step, then provide the final
|
||||
response after the marker [BEGIN FINAL RESPONSE].
|
||||
{%- endset -%}
|
||||
{%- set reasoning_asst_turn_start = 'Here are my reasoning steps:\n' -%}
|
||||
{# ---------------------------------------------------------------------- #}
|
||||
{# Tool list and tool call output format #}
|
||||
{# ---------------------------------------------------------------------- #}
|
||||
{%- if tools|length > 0 -%}
|
||||
{%- set available_tool_string -%}
|
||||
You are provided with function signatures within <available_tools></available_tools> XML tags.
|
||||
You may call one or more functions to assist with the user query.
|
||||
Don't make assumptions about the arguments. You should infer the argument values from previous
|
||||
user responses and the system message.
|
||||
Here are the available tools:
|
||||
<available_tools>
|
||||
{% for tool in tools %}{{ tool|string }}{% endfor %}
|
||||
|
||||
</available_tools>.
|
||||
|
||||
Return all function calls as a list of JSON objects within <tool_calls></tool_calls> XML tags.
|
||||
Each JSON object should contain a function name and arguments as follows:
|
||||
<tool_calls>[
|
||||
{"name": <function-name-1>, "arguments": <args-dict-1>},
|
||||
{"name": <function-name-2>, "arguments": <args-dict-2>},
|
||||
...
|
||||
]</tool_calls>
|
||||
{%- endset -%}
|
||||
{%- endif -%}
|
||||
{# ---------------------------------------------------------------------- #}
|
||||
{# Start system block if first message is not system #}
|
||||
{# ---------------------------------------------------------------------- #}
|
||||
{%- if messages|length > 0 and messages[0]['role'] != 'system' -%}
|
||||
{%- if tools|length > 0 -%}
|
||||
{{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' + available_tool_string + '\n' }}
|
||||
{%- else -%}
|
||||
{{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' }}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{# ---------------------------------------------------------------------- #}
|
||||
{# Iterate through messages #}
|
||||
{# ---------------------------------------------------------------------- #}
|
||||
{%- for message in messages -%}
|
||||
|
||||
{# ---------------- USER MESSAGE ---------------- #}
|
||||
{%- if message['role'] == 'user' -%}
|
||||
{{ '<|begin_user|>\n' }}
|
||||
{%- if message['content'] is not string -%}
|
||||
{%- for chunk in message['content'] -%}
|
||||
{%- if chunk['type'] == 'text' -%}
|
||||
{{ chunk['text'] }}
|
||||
{%- elif chunk['type'] in ['image', 'image_url'] -%}
|
||||
{{ '[IMG]' }}
|
||||
{%- else -%}
|
||||
{{ raise_exception('Unrecognized content type!') }}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{%- else -%}
|
||||
{{ message['content'] }}
|
||||
{%- endif -%}
|
||||
|
||||
{# ---------------- SYSTEM MESSAGE ---------------- #}
|
||||
{%- elif message['role'] == 'system' -%}
|
||||
{%- set sys_content = message.get('content', '') -%}
|
||||
{%- if sys_content and sys_content|length > 0 -%}
|
||||
{%- if sys_content is string -%}
|
||||
{%- set system_message = sys_content -%}
|
||||
{%- else -%}
|
||||
{%- set system_message = sys_content[0]['text'] -%}
|
||||
{%- endif -%}
|
||||
{%- else -%}
|
||||
{%- set system_message = '' -%}
|
||||
{%- endif -%}
|
||||
|
||||
{%- if tools|length > 0 -%}
|
||||
{{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' + system_message + '\n' + available_tool_string + '\n' }}
|
||||
{%- else -%}
|
||||
{{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' + system_message + '\n' }}
|
||||
{%- endif -%}
|
||||
|
||||
{# ---------------- ASSISTANT MESSAGE ---------------- #}
|
||||
{%- elif message['role'] == 'assistant' -%}
|
||||
{%- if loop.last -%}
|
||||
{%- set add_tool_id = false -%}
|
||||
{%- endif -%}
|
||||
|
||||
{{ '\n<|begin_assistant|>\n' }}
|
||||
|
||||
{%- if add_thoughts and message.get('reasoning_content') and loop.last -%}
|
||||
{{ message['reasoning_content'] + '\n[BEGIN FINAL RESPONSE]\n' }}
|
||||
{%- endif -%}
|
||||
|
||||
{%- set asst_content = message.get('content', '') -%}
|
||||
{%- if asst_content and asst_content|length > 0 -%}
|
||||
{%- if asst_content is not string -%}
|
||||
{%- set asst_text = asst_content[0]['text'] -%}
|
||||
{%- else -%}
|
||||
{%- set asst_text = asst_content -%}
|
||||
{%- endif -%}
|
||||
{# For historical turns (not the last), strip reasoning and keep only final response #}
|
||||
{%- if not loop.last and '[BEGIN FINAL RESPONSE]' in asst_text -%}
|
||||
{{- asst_text.split('[BEGIN FINAL RESPONSE]')[-1] | trim -}}
|
||||
{%- else -%}
|
||||
{{- asst_text -}}
|
||||
{%- endif -%}
|
||||
{%- elif message.get('chosen') and message['chosen']|length > 0 -%}
|
||||
{{ message['chosen'][0] }}
|
||||
{%- endif -%}
|
||||
|
||||
{# Tool call output #}
|
||||
{%- set tool_calls = message.get('tool_calls', []) -%}
|
||||
{%- if tool_calls and tool_calls|length > 0 -%}
|
||||
{{ '\n<tool_calls>[' }}
|
||||
{%- for tool_call in tool_calls -%}
|
||||
{{ '{"name": "' + tool_call['function']['name'] + '", "arguments": ' + tool_call['function']['arguments']|tojson }}
|
||||
{%- if add_tool_id == true and 'id' in tool_call -%}
|
||||
{{ ', "id": "' + tool_call['id'] + '"' }}
|
||||
{%- endif -%}
|
||||
{{ '}' }}
|
||||
{%- if not loop.last -%}{{ ', ' }}{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{{ ']</tool_calls>' }}
|
||||
{%- endif -%}
|
||||
|
||||
{%- set training_prompt = training_prompt if (training_prompt is defined) else false -%}
|
||||
{%- if not loop.last or training_prompt -%}
|
||||
{{ '\n<|end|>\n' }}
|
||||
{%- endif -%}
|
||||
|
||||
{# ---------------- TOOL RESULT MESSAGE ---------------- #}
|
||||
{%- elif message['role'] == 'tool' -%}
|
||||
{%- set tool_content = message.get('content', '') -%}
|
||||
{%- if tool_content is string -%}
|
||||
{%- set tool_message = tool_content -%}
|
||||
{%- else -%}
|
||||
{%- set tool_message = tool_content[0]['text'] if tool_content else '' -%}
|
||||
{%- endif -%}
|
||||
{{ '<|begin_tool_result|>\n' + tool_message|string + '\n' }}
|
||||
|
||||
{# ---------------- CONTENT MESSAGE ---------------- #}
|
||||
{%- elif message['role'] == 'content' -%}
|
||||
{%- set msg_content = message.get('content', '') -%}
|
||||
{%- if msg_content is not string -%}
|
||||
{{ '<|begin_content|>\n' + msg_content[0]['text'] + '\n' }}
|
||||
{%- else -%}
|
||||
{{ '<|begin_content|>\n' + msg_content + '\n' }}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
|
||||
{# ---------------- REASONING PROMPT BEFORE NEXT ASSISTANT ---------------- #}
|
||||
{%- if loop.last and add_generation_prompt and message['role'] != 'assistant' -%}
|
||||
{{ '\n<|begin_assistant|>\n' + reasoning_asst_turn_start }}
|
||||
{%- endif -%}
|
||||
|
||||
{%- endfor -%}
|
||||
|
|
@ -0,0 +1,77 @@
|
|||
{{ bos_token }}
|
||||
{%- if messages[0]['role'] == 'system' %}
|
||||
{%- set system_message = messages[0]['content'] %}
|
||||
{%- set loop_start_index = 1 %}
|
||||
{%- else %}
|
||||
{%- set system_message = "" %}
|
||||
{%- set loop_start_index = 0 %}
|
||||
{%- endif %}
|
||||
|
||||
{%- if system_message or tools %}
|
||||
{{- '<|im_start|>system\n' }}
|
||||
{%- if system_message %}
|
||||
{{- system_message }}
|
||||
{%- endif %}
|
||||
{%- if tools %}
|
||||
{{- '\n\nMasz dostęp do następujących narzędzi. Definicje narzędzi znajdują się poniżej wewnątrz znaczników <|function_list|>:\n<|function_list|>\n' }}
|
||||
{{- '[' }}
|
||||
{%- for tool in tools %}
|
||||
{{- tool | tojson }}
|
||||
{%- if not loop.last %}
|
||||
{{- ',\n' }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{{- ']\n<|function_list|>\n\nAby wywołać narzędzie, użyj formatu <tool_call>{"name": "nazwa_narzędzia", "arguments": {"argument": "wartość"}}</tool_call>. Wyniki działania narzędzi zostaną przekazane z markerem <|function_output|>.\n' }}
|
||||
{%- endif %}
|
||||
{%- if enable_thinking %}
|
||||
{{- '\n\nZanim odpowiesz na pytanie, najpierw przemyśl swoje kroki i umieść swoje myśli wewnątrz tagów <think>...</think>. Musisz najpierw pomyśleć, zanim udzielisz odpowiedzi. ' -}}
|
||||
{{- 'WAŻNE: Powinieneś myśleć w tym samym języku, co pytanie użytkownika. Jeśli pytanie jest zadane po polsku, powinieneś również myśleć po polsku. Jeśli pytanie jest po angielsku, myślisz również po angielsku itd. ' }}
|
||||
{{- '** PAMIĘTAJ! ** Pytanie po polsku -> myślenie po polsku -> odpowiedź po polsku!' -}}
|
||||
{{- '*** BARDZO WAŻNE!!! *** Jesteś Bielikiem, polskim modelem językowym. Twoją główną cechą jest umiejętność pisania po polsku. Jeśli użytkownik zadaje Ci pytania po polsku, ZAWSZE odpowiadaj po polsku. ' -}}
|
||||
{{- 'Nawet, jeśli korzystasz z narzędzia, którego większość instrukcji jest po angielsku, powinieneś przede wszystkim odpowiadać po polsku, jeśli użytkownik zadaje pytanie w tym języku. ' -}}
|
||||
{%- endif %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
|
||||
{%- for message in messages[loop_start_index:] %}
|
||||
{%- if message['role'] == 'user' %}
|
||||
{{- '<|im_start|>user\n' + message['content'] + '<|im_end|>\n' }}
|
||||
{%- elif message['role'] == 'assistant' %}
|
||||
{{- '<|im_start|>assistant\n' }}
|
||||
{%- set content = message.content | default('') %}
|
||||
{%- set reasoning_content = message.reasoning_content | default('') %}
|
||||
{%- if not reasoning_content and '<think>' in content and '</think>' in content %}
|
||||
{%- set reasoning_parts = content.split('</think>') %}
|
||||
{%- set reasoning_content = reasoning_parts[0].split('<think>')[-1] %}
|
||||
{%- set content = reasoning_parts[1:] | join('</think>') %}
|
||||
{%- endif %}
|
||||
{%- if reasoning_content %}
|
||||
{{- '<think>\n' + reasoning_content.strip() + '\n</think>\n' }}
|
||||
{%- endif %}
|
||||
{{- content.lstrip() }}
|
||||
{%- if message.tool_calls %}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{%- if tool_call.function %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_call>\n{"name": "' + tool_call.name + '", "arguments": ' + (tool_call.arguments if tool_call.arguments is string else tool_call.arguments | tojson) + '}\n</tool_call>' }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- elif message['role'] == 'tool' %}
|
||||
{%- if loop.index0 == 0 or messages[loop.index0 - 1]['role'] != 'tool' %}
|
||||
{{- '<|im_start|>user\n' }}
|
||||
{%- endif %}
|
||||
{{- '<|function_output|>' + message['content'] }}
|
||||
{%- if loop.last or messages[loop.index0 + 1]['role'] != 'tool' %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|im_start|>assistant\n' }}
|
||||
{%- if enable_thinking %}
|
||||
{{- '<think>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
|
|
@ -132,7 +132,7 @@ The following instructions take precedence over instructions in the default prea
|
|||
{%- elif message.role|lower == 'user' %}
|
||||
<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{ message.content }}<|END_OF_TURN_TOKEN|>{%- if documents and not sent_documents.value %}{%- set sent_documents.value = true %}{% set tool_idx.value = tool_idx.value + 1 %}{{ document_turn(documents) }}{% endif %}
|
||||
{%- elif message.role|lower == 'assistant' or message.role|lower == 'chatbot' %}
|
||||
<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{% if message.tool_calls %}<|START_THINKING|>{{message.tool_plan}}<|END_THINKING|><|START_ACTION|>[
|
||||
<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{% if message.tool_calls %}<|START_THINKING|>{{message.reasoning_content}}<|END_THINKING|><|START_ACTION|>[
|
||||
{% for tc in message.tool_calls %}
|
||||
{"tool_call_id": "{{ tool_idx.value }}", "tool_name": "{{ tc['function']['name'] }}", "parameters": {{ tc['function']['arguments']|tojson }}}{% if not loop.last %},{% endif %}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,86 @@
|
|||
[gMASK]<sop>
|
||||
{%- if tools -%}
|
||||
<|system|>
|
||||
# Tools
|
||||
|
||||
You may call one or more functions to assist with the user query.
|
||||
|
||||
You are provided with function signatures within <tools></tools> XML tags:
|
||||
<tools>
|
||||
{% for tool in tools %}
|
||||
{{ tool | tojson(ensure_ascii=False) }}
|
||||
{% endfor %}
|
||||
</tools>
|
||||
|
||||
For each function call, output the function name and arguments within the following XML format:
|
||||
<tool_call>{function-name}<arg_key>{arg-key-1}</arg_key><arg_value>{arg-value-1}</arg_value><arg_key>{arg-key-2}</arg_key><arg_value>{arg-value-2}</arg_value>...</tool_call>{%- endif -%}
|
||||
{%- macro visible_text(content) -%}
|
||||
{%- if content is string -%}
|
||||
{{- content }}
|
||||
{%- elif content is iterable and content is not mapping -%}
|
||||
{%- for item in content -%}
|
||||
{%- if item is mapping and item.type == 'text' -%}
|
||||
{{- item.text }}
|
||||
{%- elif item is string -%}
|
||||
{{- item }}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{%- else -%}
|
||||
{{- content }}
|
||||
{%- endif -%}
|
||||
{%- endmacro -%}
|
||||
{%- set ns = namespace(last_user_index=-1) %}
|
||||
{%- for m in messages %}
|
||||
{%- if m.role == 'user' %}
|
||||
{% set ns.last_user_index = loop.index0 -%}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{% for m in messages %}
|
||||
{%- if m.role == 'user' -%}<|user|>{{ visible_text(m.content) }}
|
||||
{%- elif m.role == 'assistant' -%}
|
||||
<|assistant|>
|
||||
{%- set reasoning_content = '' %}
|
||||
{%- set content = visible_text(m.content) %}
|
||||
{%- if m.reasoning_content is string %}
|
||||
{%- set reasoning_content = m.reasoning_content %}
|
||||
{%- else %}
|
||||
{%- if '</think>' in content %}
|
||||
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
||||
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if ((clear_thinking is defined and not clear_thinking) or loop.index0 > ns.last_user_index) and reasoning_content -%}
|
||||
{{ '<think>' + reasoning_content.strip() + '</think>'}}
|
||||
{%- else -%}
|
||||
{{ '</think>' }}
|
||||
{%- endif -%}
|
||||
{%- if content.strip() -%}
|
||||
{{ content.strip() }}
|
||||
{%- endif -%}
|
||||
{% if m.tool_calls %}
|
||||
{% for tc in m.tool_calls %}
|
||||
{%- if tc.function %}
|
||||
{%- set tc = tc.function %}
|
||||
{%- endif %}
|
||||
{{- '<tool_call>' + tc.name -}}
|
||||
{% set _args = tc.arguments %}{% for k, v in _args.items() %}<arg_key>{{ k }}</arg_key><arg_value>{{ v | tojson(ensure_ascii=False) if v is not string else v }}</arg_value>{% endfor %}</tool_call>{% endfor %}
|
||||
{% endif %}
|
||||
{%- elif m.role == 'tool' -%}
|
||||
{%- if m.content is string -%}
|
||||
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
||||
{{- '<|observation|>' }}
|
||||
{%- endif %}
|
||||
{{- '<tool_response>' }}
|
||||
{{- m.content }}
|
||||
{{- '</tool_response>' }}
|
||||
{%- else -%}
|
||||
<|observation|>{% for tr in m.content %}
|
||||
<tool_response>{{ tr.output if tr.output is defined else tr }}</tool_response>{% endfor -%}
|
||||
{% endif -%}
|
||||
{%- elif m.role == 'system' -%}
|
||||
<|system|>{{ visible_text(m.content) }}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{%- if add_generation_prompt -%}
|
||||
<|assistant|>{{- '</think>' if (enable_thinking is defined and not enable_thinking) else '<think>' -}}
|
||||
{%- endif -%}
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
{{- bos_token -}}
|
||||
{%- set system_prompt = "" -%}
|
||||
{%- set ns = namespace(system_prompt="") -%}
|
||||
{%- if messages[0]["role"] == "system" -%}
|
||||
{%- set ns.system_prompt = messages[0]["content"] -%}
|
||||
{%- set messages = messages[1:] -%}
|
||||
{%- endif -%}
|
||||
{%- if tools -%}
|
||||
{%- set ns.system_prompt = ns.system_prompt + ("\n" if ns.system_prompt else "") + "You can use the following tools: <|tool_list_start|>[" -%}
|
||||
{%- for tool in tools -%}
|
||||
{%- if tool is not string -%}
|
||||
{%- set tool = tool | tojson -%}
|
||||
{%- endif -%}
|
||||
{%- set ns.system_prompt = ns.system_prompt + tool -%}
|
||||
{%- if not loop.last -%}
|
||||
{%- set ns.system_prompt = ns.system_prompt + ", " -%}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{%- set ns.system_prompt = ns.system_prompt + "]<|tool_list_end|>" -%}
|
||||
{{- '**IMPORTANT**: The syntax for calling the tools is: <|tool_call_start|>JSON tool call goes here<|tool_call_end|>. Please only call tools in the specified manner.' -}}
|
||||
{%- endif -%}
|
||||
{%- if ns.system_prompt -%}
|
||||
{{- "<|im_start|>system\n" + ns.system_prompt + "<|im_end|>\n" -}}
|
||||
{%- endif -%}
|
||||
{%- for message in messages -%}
|
||||
{{- "<|im_start|>" + message["role"] + "\n" -}}
|
||||
{%- set content = message["content"] -%}
|
||||
{%- if content is not string -%}
|
||||
{%- set content = content | tojson -%}
|
||||
{%- endif -%}
|
||||
{%- if message["role"] == "tool" -%}
|
||||
{%- set content = "<|tool_response_start|>" + content + "<|tool_response_end|>" -%}
|
||||
{%- elif message["role"] == "assistant" -%}
|
||||
{%- if message.tool_calls %}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{%- if tool_call.function %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{{- '\n<|tool_call_start|>\n{"name": "' + tool_call.name + '", "arguments": ' + (tool_call.arguments if tool_call.arguments is string else tool_call.arguments | tojson) + '}\n<|tool_call_end|>\n' }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{%- endif -%}
|
||||
{{- content + "<|im_end|>\n" -}}
|
||||
{%- endfor -%}
|
||||
{%- if add_generation_prompt -%}
|
||||
{{- "<|im_start|>assistant\n" -}}
|
||||
{%- endif -%}
|
||||
|
|
@ -29,7 +29,7 @@
|
|||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if tools is iterable and tools | length > 0 %}
|
||||
{{- "\n\n# Tools\n\nYou have access to the following functions:\n\n" }}
|
||||
{{- "\n\n# Tools\n\nYou have access to the following tools:\n\n" }}
|
||||
{{- "<tools>" }}
|
||||
{%- for tool in tools %}
|
||||
{%- if tool.function is defined %}
|
||||
|
|
@ -63,7 +63,7 @@
|
|||
{{- '\n</function>' }}
|
||||
{%- endfor %}
|
||||
{{- "\n</tools>" }}
|
||||
{{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
|
||||
{{- '\n\nIf you choose to call a tool ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nvalue_2\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: the tool calling block MUST begin with an opening <tool_call> tag and end with a closing </tool_call> tag.\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
|
||||
{%- endif %}
|
||||
{%- if system_message is defined %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,80 @@
|
|||
{% macro render_content(content) %}{% if content is none %}{{- '' }}{% elif content is string %}{{- content }}{% elif content is mapping %}{{- content['value'] if 'value' in content else content['text'] }}{% elif content is iterable %}{% for item in content %}{% if item.type == 'text' %}{{- item['value'] if 'value' in item else item['text'] }}{% elif item.type == 'image' %}<im_patch>{% endif %}{% endfor %}{% endif %}{% endmacro %}
|
||||
{{bos_token}}{%- if tools %}
|
||||
{{- '<|im_start|>system\n' }}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- render_content(messages[0].content) + '\n\n' }}
|
||||
{%- endif %}
|
||||
{{- "# Tools\n\nYou have access to the following functions in JSONSchema format:\n\n<tools>" }}
|
||||
{%- for tool in tools %}
|
||||
{{- "\n" }}
|
||||
{{- tool | tojson(ensure_ascii=False) }}
|
||||
{%- endfor %}
|
||||
{{- "\n</tools>\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...>\n...\n</function> block must be nested within <tool_call>\n...\n</tool_call> XML tags\n- Required parameters MUST be specified\n</IMPORTANT><|im_end|>\n" }}
|
||||
{%- else %}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- '<|im_start|>system\n' + render_content(messages[0].content) + '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
||||
{%- for message in messages[::-1] %}
|
||||
{%- set index = (messages|length - 1) - loop.index0 %}
|
||||
{%- if ns.multi_step_tool and message.role == "user" and render_content(message.content) is string and not(render_content(message.content).startswith('<tool_response>') and render_content(message.content).endswith('</tool_response>')) %}
|
||||
{%- set ns.multi_step_tool = false %}
|
||||
{%- set ns.last_query_index = index %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- for message in messages %}
|
||||
{%- set content = render_content(message.content) %}
|
||||
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
||||
{%- set role_name = 'observation' if (message.role == "system" and not loop.first and message.name == 'observation') else message.role %}
|
||||
{{- '<|im_start|>' + role_name + '\n' + content + '<|im_end|>' + '\n' }}
|
||||
{%- elif message.role == "assistant" %}
|
||||
{%- if message.reasoning_content is string %}
|
||||
{%- set reasoning_content = render_content(message.reasoning_content) %}
|
||||
{%- else %}
|
||||
{%- if '</think>' in content %}
|
||||
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
||||
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
||||
{%- else %}
|
||||
{%- set reasoning_content = '' %}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if loop.index0 > ns.last_query_index %}
|
||||
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content + '\n</think>\n' + content }}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- if message.tool_calls %}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{%- if tool_call.function is defined %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{{- '<tool_call>\n<function=' + tool_call.name + '>\n' }}
|
||||
{%- if tool_call.arguments is defined %}
|
||||
{%- set arguments = tool_call.arguments %}
|
||||
{%- for args_name, args_value in arguments|items %}
|
||||
{{- '<parameter=' + args_name + '>\n' }}
|
||||
{%- set args_value = args_value | tojson(ensure_ascii=False) | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
|
||||
{{- args_value }}
|
||||
{{- '\n</parameter>\n' }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- '</function>\n</tool_call>' }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- elif message.role == "tool" %}
|
||||
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
||||
{{- '<|im_start|>tool_response\n' }}
|
||||
{%- endif %}
|
||||
{{- '<tool_response>' }}
|
||||
{{- content }}
|
||||
{{- '</tool_response>' }}
|
||||
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|im_start|>assistant\n<think>\n' }}
|
||||
{%- endif %}
|
||||
|
|
@ -1 +1,44 @@
|
|||
{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\n'}}{% endif %}
|
||||
{% if not add_generation_prompt is defined -%}
|
||||
{%- set add_generation_prompt = false -%}
|
||||
{%- endif -%}
|
||||
{%- set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') -%}
|
||||
{%- for message in messages -%}
|
||||
{%- if message['role'] == 'system' -%}
|
||||
{%- set ns.system_prompt = message['content'] -%}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}{{bos_token}}{{ns.system_prompt}}
|
||||
{%- for message in messages -%}
|
||||
{%- if message['role'] == 'user' -%}
|
||||
{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}
|
||||
{%- endif -%}
|
||||
{%- if message['role'] == 'assistant' and message['content'] is none -%}
|
||||
{%- set ns.is_tool = false -%}
|
||||
{%- for tool in message['tool_calls']-%}
|
||||
{%- if not ns.is_first -%}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}
|
||||
{%- set ns.is_first = true -%}
|
||||
{%- else -%}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{%- endif -%}
|
||||
{%- if message['role'] == 'assistant' and message['content'] is not none -%}
|
||||
{%- if ns.is_tool -%}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}
|
||||
{%- set ns.is_tool = false -%}
|
||||
{%- else -%}
|
||||
{%- set content = message['content'] -%}
|
||||
{%- if '</think>' in content -%}
|
||||
{%- set content = content.split('</think>')[-1] -%}
|
||||
{%- endif -%}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- if message['role'] == 'tool' -%}
|
||||
{%- set ns.is_tool = true -%}
|
||||
{%- if ns.is_output_first -%}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}
|
||||
{%- set ns.is_output_first = false -%}
|
||||
{%- else -%}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{%- if ns.is_tool -%}{{'<|tool▁outputs▁end|>'}}
|
||||
{%- endif -%}
|
||||
{%- if add_generation_prompt and not ns.is_tool -%}{{'<|Assistant|><think>\n'}}
|
||||
{%- endif %}
|
||||
|
|
@ -1 +1,47 @@
|
|||
{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\n'}}{% endif %}
|
||||
{% if not add_generation_prompt is defined -%}
|
||||
{%- set add_generation_prompt = false -%}
|
||||
{%- endif -%}
|
||||
{%- set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') -%}
|
||||
{%- for message in messages -%}
|
||||
{%- if message['role'] == 'system' -%}
|
||||
{%- set ns.system_prompt = message['content'] -%}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}{{bos_token}}{{ns.system_prompt}}
|
||||
{%- for message in messages -%}
|
||||
{%- if message['role'] == 'user' -%}
|
||||
{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}
|
||||
{%- endif -%}
|
||||
{%- if message['role'] == 'assistant' and message['tool_calls'] -%}
|
||||
{%- set ns.is_tool = false -%}
|
||||
{%- for tool in message['tool_calls']-%}
|
||||
{%- if not ns.is_first -%}
|
||||
{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}
|
||||
{%- set ns.is_first = true -%}
|
||||
{%- else -%}
|
||||
{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}
|
||||
{%- endif -%}
|
||||
{%- if message['role'] == 'assistant' and message['content'] is not none -%}
|
||||
{%- if ns.is_tool -%}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}
|
||||
{%- set ns.is_tool = false -%}
|
||||
{%- else -%}
|
||||
{%- set content = message['content'] -%}
|
||||
{%- if '</think>' in content -%}
|
||||
{%- set content = content.split('</think>')[-1] -%}
|
||||
{%- endif -%}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- if message['role'] == 'tool' -%}
|
||||
{%- set ns.is_tool = true -%}
|
||||
{%- if ns.is_output_first -%}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}
|
||||
{%- set ns.is_output_first = false -%}
|
||||
{%- else -%}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{%- if ns.is_tool -%}{{'<|tool▁outputs▁end|>'}}
|
||||
{%- endif -%}
|
||||
{%- if add_generation_prompt and not ns.is_tool -%}{{'<|Assistant|><think>\n'}}
|
||||
{%- endif %}
|
||||
|
|
@ -1,3 +1,71 @@
|
|||
{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% if not thinking is defined %}{% set thinking = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '
|
||||
{% if not add_generation_prompt is defined -%}
|
||||
{%- set add_generation_prompt = false -%}
|
||||
{%- endif -%}
|
||||
{%- if not thinking is defined -%}
|
||||
{%- if enable_thinking is defined -%}
|
||||
{%- set thinking = enable_thinking -%}
|
||||
{%- else -%}
|
||||
{%- set thinking = false -%}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- set ns = namespace(is_first=false, is_tool=false, system_prompt='', is_first_sp=true, is_last_user=false) -%}
|
||||
{%- for message in messages -%}
|
||||
{%- if message['role'] == 'system' -%}
|
||||
{%- if ns.is_first_sp -%}
|
||||
{%- set ns.system_prompt = ns.system_prompt + message['content'] -%}
|
||||
{%- set ns.is_first_sp = false -%}
|
||||
{%- else -%}
|
||||
{%- set ns.system_prompt = ns.system_prompt + '
|
||||
|
||||
' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- if ns.is_last_user %}{{'<|Assistant|></think>'}}{%- endif %}{%- set ns.is_last_user = false -%}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- else %}{{message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'<|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}{%- if ns.is_last_user %}{{'<|Assistant|>'}}{%- if message['prefix'] is defined and message['prefix'] and thinking %}{{'<think>'}} {%- else %}{{'</think>'}}{%- endif %}{%- endif %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{%- set content = message['content'] -%}{%- if '</think>' in content %}{%- set content = content.split('</think>', 1)[1] -%}{%- endif %}{{content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endfor -%}{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool %}{{'<|Assistant|>'}}{%- if not thinking %}{{'</think>'}}{%- else %}{{'<think>'}}{%- endif %}{% endif %}
|
||||
' + message['content'] -%}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}{{ bos_token }}{{ ns.system_prompt }}
|
||||
{%- for message in messages -%}
|
||||
{%- if message['role'] == 'user' -%}
|
||||
{%- set ns.is_tool = false -%}
|
||||
{%- set ns.is_first = false -%}
|
||||
{%- set ns.is_last_user = true -%}{{'<|User|>' + message['content']}}
|
||||
{%- endif -%}
|
||||
{%- if message['role'] == 'assistant' and message['tool_calls'] -%}
|
||||
{%- if ns.is_last_user -%}{{'<|Assistant|></think>'}}
|
||||
{%- endif -%}
|
||||
{%- set ns.is_last_user = false -%}
|
||||
{%- set ns.is_first = false -%}
|
||||
{%- set ns.is_tool = false -%}
|
||||
{%- for tool in message['tool_calls'] -%}
|
||||
{%- if not ns.is_first -%}
|
||||
{%- if not message['content'] -%}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}
|
||||
{%- else -%}{{message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}
|
||||
{%- endif -%}
|
||||
{%- set ns.is_first = true -%}
|
||||
{%- else -%}{{'<|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}
|
||||
{%- endif -%}
|
||||
{%- if message['role'] == 'assistant' and not message['tool_calls'] -%}
|
||||
{%- if ns.is_last_user -%}{{'<|Assistant|>'}}
|
||||
{%- if message['prefix'] is defined and message['prefix'] and thinking -%}{{'<think>'}}
|
||||
{%- else -%}{{'</think>'}}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- set ns.is_last_user = false -%}
|
||||
{%- if ns.is_tool -%}{{message['content'] + '<|end▁of▁sentence|>'}}
|
||||
{%- set ns.is_tool = false -%}
|
||||
{%- else -%}
|
||||
{%- set content = message['content'] -%}
|
||||
{%- if '</think>' in content -%}
|
||||
{%- set content = content.split('</think>', 1)[1] -%}
|
||||
{%- endif -%}{{content + '<|end▁of▁sentence|>'}}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- if message['role'] == 'tool' -%}
|
||||
{%- set ns.is_last_user = false -%}
|
||||
{%- set ns.is_tool = true -%}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool -%}{{'<|Assistant|>'}}
|
||||
{%- if not thinking -%}{{'</think>'}}
|
||||
{%- else -%}{{'<think>'}}
|
||||
{%- endif -%}
|
||||
{%- endif %}
|
||||
|
|
@ -46,7 +46,7 @@ Available functions as JSON spec:
|
|||
{%- if 'tool_calls' in message and message['tool_calls'] -%}
|
||||
{%- set tool = namespace(calls=[]) -%}
|
||||
{%- for call in message['tool_calls'] -%}
|
||||
{%- set tool.calls = tool.calls + ['{"name": "' + call['function']['name'] + '", "arguments": ' + call['function']['arguments'] + '}'] -%}
|
||||
{%- set tool.calls = tool.calls + ['{"name": "' + call['function']['name'] + '", "arguments": ' + call['function']['arguments']|tojson + '}'] -%}
|
||||
{%- endfor -%}
|
||||
{%- set ns.content = ns.content + ' functools[' + tool.calls | join(', ') + ']' -%}
|
||||
{%- endif -%}
|
||||
|
|
|
|||
|
|
@ -1,43 +1,43 @@
|
|||
{%- if tools -%}
|
||||
<|im_system|>tool_declare<|im_middle|>{{ tools | tojson }}<|im_end|>
|
||||
{%- endif -%}
|
||||
{%- for message in messages -%}
|
||||
{%- if loop.first and messages[0]['role'] != 'system' -%}
|
||||
<|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|>
|
||||
{%- endif -%}
|
||||
{%- if message['role'] == 'system' -%}
|
||||
<|im_system|>system<|im_middle|>
|
||||
{%- elif message['role'] == 'user' -%}
|
||||
<|im_user|>user<|im_middle|>
|
||||
{%- elif message['role'] == 'assistant' -%}
|
||||
<|im_assistant|>assistant<|im_middle|>
|
||||
{%- elif message['role'] == 'tool' -%}
|
||||
<|im_system|>tool<|im_middle|>
|
||||
{%- endif -%}
|
||||
{%- if message['role'] == 'assistant' and message.get('tool_calls') -%}
|
||||
{%- if message['content'] -%}{{ message['content'] }}{%- endif -%}
|
||||
<|tool_calls_section_begin|>
|
||||
{%- for tool_call in message['tool_calls'] -%}
|
||||
{%- set func_name = tool_call['function']['name'] -%}
|
||||
{%- set formatted_id = 'functions.' + func_name + ':' + loop.index0|string -%}
|
||||
<|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{{ tool_call['function']['arguments'] | tojson}}<|tool_call_end|>
|
||||
{%- endfor -%}
|
||||
<|tool_calls_section_end|>
|
||||
{%- elif message['role'] == 'tool' -%}
|
||||
## Return of {{ message.tool_call_id }}\n{{ message['content'] }}
|
||||
{%- elif message['content'] is string -%}
|
||||
{{ message['content'] }}
|
||||
{%- elif message['content'] is not none -%}
|
||||
{% for content in message['content'] -%}
|
||||
{% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
|
||||
<|media_start|>image<|media_content|><|media_pad|><|media_end|>
|
||||
{% else -%}
|
||||
{{ content['text'] }}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{%- endif -%}
|
||||
<|im_end|>
|
||||
{%- endfor -%}
|
||||
{%- if add_generation_prompt -%}
|
||||
<|im_assistant|>assistant<|im_middle|>
|
||||
{%- endif -%}
|
||||
{%- if tools -%}
|
||||
<|im_system|>tool_declare<|im_middle|>{{ tools | tojson }}<|im_end|>
|
||||
{%- endif -%}
|
||||
{%- for message in messages -%}
|
||||
{%- if loop.first and messages[0]['role'] != 'system' -%}
|
||||
<|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|>
|
||||
{%- endif -%}
|
||||
{%- if message['role'] == 'system' -%}
|
||||
<|im_system|>system<|im_middle|>
|
||||
{%- elif message['role'] == 'user' -%}
|
||||
<|im_user|>user<|im_middle|>
|
||||
{%- elif message['role'] == 'assistant' -%}
|
||||
<|im_assistant|>assistant<|im_middle|>
|
||||
{%- elif message['role'] == 'tool' -%}
|
||||
<|im_system|>tool<|im_middle|>
|
||||
{%- endif -%}
|
||||
{%- if message['role'] == 'assistant' and message.get('tool_calls') -%}
|
||||
{%- if message['content'] -%}{{ message['content'] }}{%- endif -%}
|
||||
<|tool_calls_section_begin|>
|
||||
{%- for tool_call in message['tool_calls'] -%}
|
||||
{%- set func_name = tool_call['function']['name'] -%}
|
||||
{%- set formatted_id = 'functions.' + func_name + ':' + loop.index0|string -%}
|
||||
<|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{{ tool_call['function']['arguments'] | tojson}}<|tool_call_end|>
|
||||
{%- endfor -%}
|
||||
<|tool_calls_section_end|>
|
||||
{%- elif message['role'] == 'tool' -%}
|
||||
## Return of {{ message.tool_call_id }}\n{{ message['content'] }}
|
||||
{%- elif message['content'] is string -%}
|
||||
{{ message['content'] }}
|
||||
{%- elif message['content'] is not none -%}
|
||||
{% for content in message['content'] -%}
|
||||
{% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
|
||||
<|media_start|>image<|media_content|><|media_pad|><|media_end|>
|
||||
{% else -%}
|
||||
{{ content['text'] }}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{%- endif -%}
|
||||
<|im_end|>
|
||||
{%- endfor -%}
|
||||
{%- if add_generation_prompt -%}
|
||||
<|im_assistant|>assistant<|im_middle|>
|
||||
{%- endif -%}
|
||||
|
|
|
|||
|
|
@ -86,22 +86,22 @@ Prior to generating the function calls, you should generate the reasoning for wh
|
|||
{%- set add_tool_id = false -%}
|
||||
{%- endif -%}
|
||||
{{- '<|assistant|>\n' -}}
|
||||
{%- if message['content'] is not none and message['content']|length > 0 -%}
|
||||
{%- if message['content'] is defined and message['content'] is not none and message['content']|length > 0 -%}
|
||||
{%- if message['content'] is not string and message['content'][0]['text'] is not none %}
|
||||
{{- message['content'][0]['text'] }}
|
||||
{%- else %}
|
||||
{{- message['content'] -}}
|
||||
{%- endif -%}
|
||||
{%- elif message['chosen'] is not none and message['chosen']|length > 0 -%}
|
||||
{%- elif message['chosen'] is defined and message['chosen'] is not none and message['chosen']|length > 0 -%}
|
||||
{{- message['chosen'][0] -}}
|
||||
{%- endif -%}
|
||||
{%- if add_thoughts and 'thought' in message and message['thought'] is not none -%}
|
||||
{{- '<thinking>' + message['thought'] + '</thinking>' -}}
|
||||
{%- endif -%}
|
||||
{%- if message['tool_calls'] is not none and message['tool_calls']|length > 0 -%}
|
||||
{%- if message['tool_calls'] is defined and message['tool_calls'] is not none and message['tool_calls']|length > 0 -%}
|
||||
{{- '\n<tool_calls>[' -}}
|
||||
{%- for tool_call in message["tool_calls"] -%}
|
||||
{{- '{"name": "' + tool_call['function']['name'] + '", "arguments": ' + tool_call['function']['arguments']|string -}}
|
||||
{{- '{"name": "' + tool_call['function']['name'] + '", "arguments": ' + tool_call['function']['arguments']|tojson -}}
|
||||
{%- if add_tool_id == true -%}
|
||||
{{- ', "id": "' + tool_call['id'] + '"' -}}
|
||||
{%- endif -%}
|
||||
|
|
|
|||
|
|
@ -230,7 +230,7 @@ def benchmark(
|
|||
|
||||
logger.info("")
|
||||
logger.info(f"Benchmark duration: {token_t_last:.2f} s")
|
||||
logger.info(f"Request throughput: {n_prompts / token_t_last:.2f} requests/s = {n_prompts / (token_t_last/60):.2f} requests/min")
|
||||
logger.info(f"Request throughput: {n_prompts / token_t_last:.2f} requests/s = {n_prompts / (token_t_last / 60):.2f} requests/min")
|
||||
logger.info(f"Total prompt length: {np.sum(prompt_n)} tokens")
|
||||
logger.info(f"Average prompt length: {np.mean(prompt_n):.2f} tokens")
|
||||
logger.info(f"Average prompt latency: {1e3 * np.mean(prompt_t):.2f} ms")
|
||||
|
|
|
|||
|
|
@ -0,0 +1,202 @@
|
|||
import argparse
|
||||
import json
|
||||
import requests
|
||||
import logging
|
||||
import sys
|
||||
|
||||
handler = logging.StreamHandler(sys.stdout)
|
||||
handler.terminator = "" # ← no newline
|
||||
logging.basicConfig(level=logging.INFO, format='%(message)s', handlers=[handler])
|
||||
logger = logging.getLogger("server-test-model")
|
||||
|
||||
|
||||
def run_query(url, messages, tools=None, stream=False, tool_choice=None):
|
||||
payload = {
|
||||
"messages": messages,
|
||||
"stream": stream,
|
||||
"max_tokens": 5000,
|
||||
}
|
||||
if tools:
|
||||
payload["tools"] = tools
|
||||
if tool_choice:
|
||||
payload["tool_choice"] = tool_choice
|
||||
|
||||
try:
|
||||
response = requests.post(url, json=payload, stream=stream)
|
||||
response.raise_for_status()
|
||||
except requests.exceptions.RequestException as e:
|
||||
if e.response is not None:
|
||||
logger.info(f"Response error: {e} for {e.response.content}\n")
|
||||
else:
|
||||
logger.info(f"Error connecting to server: {e}\n")
|
||||
return None
|
||||
|
||||
full_content = ""
|
||||
reasoning_content = ""
|
||||
tool_calls = []
|
||||
|
||||
if stream:
|
||||
logger.info(f"--- Streaming response (Tools: {bool(tools)}) ---\n")
|
||||
for line in response.iter_lines():
|
||||
if line:
|
||||
decoded_line = line.decode("utf-8")
|
||||
if decoded_line.startswith("data: "):
|
||||
data_str = decoded_line[6:]
|
||||
if data_str == "[DONE]":
|
||||
break
|
||||
try:
|
||||
data = json.loads(data_str)
|
||||
if "choices" in data and len(data["choices"]) > 0:
|
||||
delta = data["choices"][0].get("delta", {})
|
||||
|
||||
# Content
|
||||
content_chunk = delta.get("content", "")
|
||||
if content_chunk:
|
||||
full_content += content_chunk
|
||||
logger.info(content_chunk)
|
||||
|
||||
# Reasoning
|
||||
reasoning_chunk = delta.get("reasoning_content", "")
|
||||
if reasoning_chunk:
|
||||
reasoning_content += reasoning_chunk
|
||||
logger.info(f"\x1B[3m{reasoning_chunk}\x1B[0m")
|
||||
|
||||
# Tool calls
|
||||
if "tool_calls" in delta:
|
||||
for tc in delta["tool_calls"]:
|
||||
index = tc.get("index")
|
||||
if index is not None:
|
||||
while len(tool_calls) <= index:
|
||||
# Using "function" as type default but could be flexible
|
||||
tool_calls.append(
|
||||
{
|
||||
"id": "",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "",
|
||||
"arguments": "",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
if "id" in tc:
|
||||
tool_calls[index]["id"] += tc["id"]
|
||||
if "function" in tc:
|
||||
if "name" in tc["function"]:
|
||||
tool_calls[index]["function"][
|
||||
"name"
|
||||
] += tc["function"]["name"]
|
||||
if "arguments" in tc["function"]:
|
||||
tool_calls[index]["function"][
|
||||
"arguments"
|
||||
] += tc["function"]["arguments"]
|
||||
|
||||
except json.JSONDecodeError:
|
||||
logger.info(f"Failed to decode JSON: {data_str}\n")
|
||||
logger.info("\n--- End of Stream ---\n")
|
||||
else:
|
||||
logger.info(f"--- Non-streaming response (Tools: {bool(tools)}) ---\n")
|
||||
data = response.json()
|
||||
if "choices" in data and len(data["choices"]) > 0:
|
||||
message = data["choices"][0].get("message", {})
|
||||
full_content = message.get("content", "")
|
||||
reasoning_content = message.get("reasoning_content", "")
|
||||
tool_calls = message.get("tool_calls", [])
|
||||
logger.info(full_content)
|
||||
logger.info("--- End of Response ---\n")
|
||||
|
||||
return {
|
||||
"content": full_content,
|
||||
"reasoning_content": reasoning_content,
|
||||
"tool_calls": tool_calls,
|
||||
}
|
||||
|
||||
|
||||
def test_chat(url, stream):
|
||||
logger.info(f"\n=== Testing Chat (Stream={stream}) ===\n")
|
||||
messages = [{"role": "user", "content": "What is the capital of France?"}]
|
||||
result = run_query(url, messages, stream=stream)
|
||||
|
||||
if result:
|
||||
if result["content"]:
|
||||
logger.info("PASS: Output received.\n")
|
||||
else:
|
||||
logger.info("WARN: No content received (valid if strict tool call, but unexpected here).\n")
|
||||
|
||||
if result.get("reasoning_content"):
|
||||
logger.info(f"INFO: Reasoning content detected ({len(result['reasoning_content'])} chars).\n")
|
||||
else:
|
||||
logger.info("INFO: No reasoning content detected (Standard model behavior).\n")
|
||||
else:
|
||||
logger.info("FAIL: No result.\n")
|
||||
|
||||
|
||||
def test_tool_call(url, stream):
|
||||
logger.info(f"\n=== Testing Tool Call (Stream={stream}) ===\n")
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What is the weather in London? Please use the get_weather tool.",
|
||||
}
|
||||
]
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
result = run_query(url, messages, tools=tools, tool_choice="auto", stream=stream)
|
||||
|
||||
if result:
|
||||
tcs = result.get("tool_calls")
|
||||
if tcs and len(tcs) > 0:
|
||||
logger.info("PASS: Tool calls detected.")
|
||||
for tc in tcs:
|
||||
func = tc.get("function", {})
|
||||
logger.info(f" Tool: {func.get('name')}, Args: {func.get('arguments')}\n")
|
||||
else:
|
||||
logger.info(f"FAIL: No tool calls. Content: {result['content']}\n")
|
||||
|
||||
if result.get("reasoning_content"):
|
||||
logger.info(
|
||||
f"INFO: Reasoning content detected during tool call ({len(result['reasoning_content'])} chars).\n"
|
||||
)
|
||||
else:
|
||||
logger.info("FAIL: Query failed.\n")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Test llama-server functionality.")
|
||||
parser.add_argument("--host", default="localhost", help="Server host")
|
||||
parser.add_argument("--port", default=8080, type=int, help="Server port")
|
||||
args = parser.parse_args()
|
||||
|
||||
base_url = f"http://{args.host}:{args.port}/v1/chat/completions"
|
||||
logger.info(f"Testing server at {base_url}\n")
|
||||
|
||||
# Non-streaming tests
|
||||
test_chat(base_url, stream=False)
|
||||
test_tool_call(base_url, stream=False)
|
||||
|
||||
# Streaming tests
|
||||
test_chat(base_url, stream=True)
|
||||
test_tool_call(base_url, stream=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -14,7 +14,7 @@ cli_pref=f'cd {pkg_path} && LD_LIBRARY_PATH={lib_path} ADSP_LIBRARY_PATH={lib_pa
|
|||
def run_cmd(cmd):
|
||||
p = subprocess.run(cmd, text = True, stdout = subprocess.PIPE, stderr = subprocess.STDOUT)
|
||||
sys.stdout.write(p.stdout)
|
||||
assert(p.returncode == 0)
|
||||
assert (p.returncode == 0)
|
||||
|
||||
|
||||
@pytest.mark.dependency()
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
#pragma once
|
||||
|
||||
#include "../llama-model.h"
|
||||
#include "../llama-graph.h"
|
||||
#include "../llama-model.h"
|
||||
|
||||
// TODO: remove in follow-up PR - move to .cpp files
|
||||
#include "../llama-memory-recurrent.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
struct llm_graph_context_mamba : public llm_graph_context {
|
||||
|
|
@ -12,9 +13,16 @@ struct llm_graph_context_mamba : public llm_graph_context {
|
|||
|
||||
virtual ~llm_graph_context_mamba() = default;
|
||||
|
||||
ggml_tensor * build_mamba_layer(llm_graph_input_rs * inp, ggml_tensor * cur, const llama_model & model, const llama_ubatch & ubatch, int il);
|
||||
ggml_tensor * build_mamba2_layer(llm_graph_input_rs * inp, ggml_tensor * cur, const llama_model & model, const llama_ubatch & ubatch, int il) const;
|
||||
|
||||
ggml_tensor * build_mamba_layer(llm_graph_input_rs * inp,
|
||||
ggml_tensor * cur,
|
||||
const llama_model & model,
|
||||
const llama_ubatch & ubatch,
|
||||
int il);
|
||||
ggml_tensor * build_mamba2_layer(llm_graph_input_rs * inp,
|
||||
ggml_tensor * cur,
|
||||
const llama_model & model,
|
||||
const llama_ubatch & ubatch,
|
||||
int il) const;
|
||||
};
|
||||
|
||||
// Base class for RWKV-related models
|
||||
|
|
@ -158,8 +166,7 @@ struct llm_build_ernie4_5_moe : public llm_graph_context {
|
|||
llm_build_ernie4_5_moe(const llama_model & model, const llm_graph_params & params);
|
||||
};
|
||||
|
||||
template <bool iswa>
|
||||
struct llm_build_exaone4 : public llm_graph_context {
|
||||
template <bool iswa> struct llm_build_exaone4 : public llm_graph_context {
|
||||
llm_build_exaone4(const llama_model & model, const llm_graph_params & params);
|
||||
};
|
||||
|
||||
|
|
@ -183,8 +190,7 @@ struct llm_build_gemma2_iswa : public llm_graph_context {
|
|||
llm_build_gemma2_iswa(const llama_model & model, const llm_graph_params & params);
|
||||
};
|
||||
|
||||
template <bool iswa>
|
||||
struct llm_build_gemma3 : public llm_graph_context {
|
||||
template <bool iswa> struct llm_build_gemma3 : public llm_graph_context {
|
||||
llm_build_gemma3(const llama_model & model, const llm_graph_params & params);
|
||||
};
|
||||
|
||||
|
|
@ -195,8 +201,8 @@ struct llm_build_gemma3n_iswa : public llm_graph_context {
|
|||
const int64_t n_embd_altup;
|
||||
const int64_t n_altup;
|
||||
const int i_altup_act;
|
||||
const int n_layer_sparsity = 10; // number of layers using activation sparsity
|
||||
const float f_sparsity_std_mul = 1.6448533535003662f; // std_multiplier = normal_dist.icdf(0.95)
|
||||
const int n_layer_sparsity = 10; // number of layers using activation sparsity
|
||||
const float f_sparsity_std_mul = 1.6448533535003662f; // std_multiplier = normal_dist.icdf(0.95)
|
||||
|
||||
llm_build_gemma3n_iswa(const llama_model & model, const llm_graph_params & params);
|
||||
ggml_tensor * calc_magnitude(ggml_tensor * x);
|
||||
|
|
@ -237,27 +243,26 @@ struct llm_build_gptneox : public llm_graph_context {
|
|||
struct llm_build_granite : public llm_graph_context {
|
||||
llm_build_granite(const llama_model & model, const llm_graph_params & params);
|
||||
|
||||
private:
|
||||
ggml_tensor * build_attention_layer(
|
||||
ggml_tensor * cur,
|
||||
ggml_tensor * inp_pos,
|
||||
llm_graph_input_attn_kv * inp_attn,
|
||||
const llama_model & model,
|
||||
const int64_t n_embd_head,
|
||||
const int il);
|
||||
private:
|
||||
ggml_tensor * build_attention_layer(ggml_tensor * cur,
|
||||
ggml_tensor * inp_pos,
|
||||
llm_graph_input_attn_kv * inp_attn,
|
||||
const llama_model & model,
|
||||
const int64_t n_embd_head,
|
||||
const int il);
|
||||
|
||||
ggml_tensor * build_layer_ffn(
|
||||
ggml_tensor * cur,
|
||||
ggml_tensor * inpSA,
|
||||
const llama_model & model,
|
||||
const int il);
|
||||
ggml_tensor * build_layer_ffn(ggml_tensor * cur, ggml_tensor * inpSA, const llama_model & model, const int il);
|
||||
};
|
||||
|
||||
struct llm_build_granite_hybrid : public llm_graph_context_mamba {
|
||||
llm_build_granite_hybrid(const llama_model & model, const llm_graph_params & params);
|
||||
ggml_tensor * build_layer_ffn(ggml_tensor * cur, ggml_tensor * inpSA, const llama_model & model, const int il);
|
||||
ggml_tensor * build_attention_layer(ggml_tensor * cur, ggml_tensor * inp_pos, llm_graph_input_attn_kv * inp_attn,
|
||||
const llama_model & model,const int64_t n_embd_head, const int il);
|
||||
ggml_tensor * build_attention_layer(ggml_tensor * cur,
|
||||
ggml_tensor * inp_pos,
|
||||
llm_graph_input_attn_kv * inp_attn,
|
||||
const llama_model & model,
|
||||
const int64_t n_embd_head,
|
||||
const int il);
|
||||
};
|
||||
|
||||
struct llm_build_grok : public llm_graph_context {
|
||||
|
|
@ -321,9 +326,11 @@ struct llm_build_lfm2 : public llm_graph_context {
|
|||
llm_build_lfm2(const llama_model & model, const llm_graph_params & params);
|
||||
ggml_tensor * build_moe_feed_forward(ggml_tensor * cur, int il) const;
|
||||
ggml_tensor * build_dense_feed_forward(ggml_tensor * cur, int il) const;
|
||||
ggml_tensor * build_attn_block(ggml_tensor * cur, ggml_tensor * inp_pos, llm_graph_input_attn_kv * inp_attn, int il) const;
|
||||
ggml_tensor * build_attn_block(ggml_tensor * cur,
|
||||
ggml_tensor * inp_pos,
|
||||
llm_graph_input_attn_kv * inp_attn,
|
||||
int il) const;
|
||||
ggml_tensor * build_shortconv_block(ggml_tensor * cur, llm_graph_input_rs * inp_recr, int il);
|
||||
|
||||
};
|
||||
|
||||
struct llm_build_llada : public llm_graph_context {
|
||||
|
|
@ -382,16 +389,18 @@ struct llm_build_nemotron : public llm_graph_context {
|
|||
struct llm_build_nemotron_h : public llm_graph_context_mamba {
|
||||
llm_build_nemotron_h(const llama_model & model, const llm_graph_params & params);
|
||||
ggml_tensor * build_ffn_layer(ggml_tensor * cur, const llama_model & model, const int il);
|
||||
ggml_tensor * build_attention_layer(ggml_tensor * cur, llm_graph_input_attn_kv * inp_attn,
|
||||
const llama_model & model, const int64_t n_embd_head, const int il);
|
||||
ggml_tensor * build_attention_layer(ggml_tensor * cur,
|
||||
llm_graph_input_attn_kv * inp_attn,
|
||||
const llama_model & model,
|
||||
const int64_t n_embd_head,
|
||||
const int il);
|
||||
};
|
||||
|
||||
struct llm_build_neo_bert : public llm_graph_context {
|
||||
llm_build_neo_bert(const llama_model & model, const llm_graph_params & params);
|
||||
};
|
||||
|
||||
template <bool iswa>
|
||||
struct llm_build_olmo2 : public llm_graph_context {
|
||||
template <bool iswa> struct llm_build_olmo2 : public llm_graph_context {
|
||||
llm_build_olmo2(const llama_model & model, const llm_graph_params & params);
|
||||
};
|
||||
|
||||
|
|
@ -423,17 +432,23 @@ struct llm_build_phi2 : public llm_graph_context {
|
|||
llm_build_phi2(const llama_model & model, const llm_graph_params & params);
|
||||
};
|
||||
|
||||
template<bool iswa>
|
||||
struct llm_build_phi3 : public llm_graph_context {
|
||||
template <bool iswa> struct llm_build_phi3 : public llm_graph_context {
|
||||
llm_build_phi3(const llama_model & model, const llm_graph_params & params);
|
||||
};
|
||||
|
||||
struct llm_build_plamo2 : public llm_graph_context_mamba {
|
||||
llm_build_plamo2(const llama_model & model, const llm_graph_params & params);
|
||||
private:
|
||||
ggml_tensor * build_plamo2_mamba_layer(llm_graph_input_rs * inp, ggml_tensor * cur, const llama_model & model, const llama_ubatch & ubatch, int il);
|
||||
ggml_tensor * build_plamo2_attn_layer(llm_graph_input_attn_kv * inp, ggml_tensor * inp_pos, ggml_tensor * cur,
|
||||
const llama_model & model, int il);
|
||||
private:
|
||||
ggml_tensor * build_plamo2_mamba_layer(llm_graph_input_rs * inp,
|
||||
ggml_tensor * cur,
|
||||
const llama_model & model,
|
||||
const llama_ubatch & ubatch,
|
||||
int il);
|
||||
ggml_tensor * build_plamo2_attn_layer(llm_graph_input_attn_kv * inp,
|
||||
ggml_tensor * inp_pos,
|
||||
ggml_tensor * cur,
|
||||
const llama_model & model,
|
||||
int il);
|
||||
};
|
||||
|
||||
struct llm_build_plamo : public llm_graph_context {
|
||||
|
|
@ -479,24 +494,20 @@ struct llm_build_qwen3vlmoe : public llm_graph_context {
|
|||
|
||||
struct llm_build_qwen3next : public llm_graph_context_mamba {
|
||||
llm_build_qwen3next(const llama_model & model, const llm_graph_params & params);
|
||||
private:
|
||||
ggml_tensor * build_layer_attn(
|
||||
llm_graph_input_attn_kv * inp_attn,
|
||||
ggml_tensor * cur,
|
||||
ggml_tensor * inp_pos,
|
||||
int il);
|
||||
private:
|
||||
ggml_tensor * build_layer_attn(llm_graph_input_attn_kv * inp_attn,
|
||||
ggml_tensor * cur,
|
||||
ggml_tensor * inp_pos,
|
||||
int il);
|
||||
|
||||
ggml_tensor * build_layer_attn_linear(
|
||||
llm_graph_input_rs * inp,
|
||||
ggml_tensor * cur,
|
||||
ggml_tensor * causal_mask,
|
||||
ggml_tensor * identity,
|
||||
ggml_tensor * diag_mask,
|
||||
int il);
|
||||
ggml_tensor * build_layer_attn_linear(llm_graph_input_rs * inp,
|
||||
ggml_tensor * cur,
|
||||
ggml_tensor * causal_mask,
|
||||
ggml_tensor * identity,
|
||||
ggml_tensor * diag_mask,
|
||||
int il);
|
||||
|
||||
ggml_tensor * build_layer_ffn(
|
||||
ggml_tensor * cur,
|
||||
int il);
|
||||
ggml_tensor * build_layer_ffn(ggml_tensor * cur, int il);
|
||||
|
||||
// returns pair of output and new state
|
||||
std::pair<ggml_tensor *, ggml_tensor *> build_delta_net_chunking(
|
||||
|
|
@ -681,8 +692,7 @@ struct llm_build_seed_oss : public llm_graph_context {
|
|||
llm_build_seed_oss(const llama_model & model, const llm_graph_params & params);
|
||||
};
|
||||
|
||||
template <bool iswa>
|
||||
struct llm_build_smallthinker : public llm_graph_context {
|
||||
template <bool iswa> struct llm_build_smallthinker : public llm_graph_context {
|
||||
llm_build_smallthinker(const llama_model & model, const llm_graph_params & params);
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -187,11 +187,11 @@ if (NOT WIN32 OR NOT BUILD_SHARED_LIBS)
|
|||
# llama_build_and_test(test-double-float.cpp) # SLOW
|
||||
endif()
|
||||
|
||||
llama_build_and_test(test-chat-parser.cpp)
|
||||
llama_build_and_test(test-chat-peg-parser.cpp peg-parser/simple-tokenize.cpp)
|
||||
llama_build_and_test(test-chat-template.cpp)
|
||||
llama_build_and_test(test-jinja.cpp)
|
||||
llama_test(test-jinja NAME test-jinja-py ARGS -py LABEL python)
|
||||
llama_build_and_test(test-chat-auto-parser.cpp WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
llama_build_and_test(test-chat-template.cpp)
|
||||
llama_build_and_test(test-json-partial.cpp)
|
||||
llama_build_and_test(test-log.cpp)
|
||||
llama_build_and_test(
|
||||
|
|
@ -201,6 +201,7 @@ llama_build_and_test(
|
|||
peg-parser/test-gbnf-generation.cpp
|
||||
peg-parser/test-json-parser.cpp
|
||||
peg-parser/test-json-serialization.cpp
|
||||
peg-parser/test-python-dict-parser.cpp
|
||||
peg-parser/test-unicode.cpp
|
||||
peg-parser/tests.h
|
||||
)
|
||||
|
|
@ -264,3 +265,5 @@ target_link_libraries(${TEST_TARGET} PRIVATE llama)
|
|||
|
||||
llama_build_and_test(test-alloc.cpp)
|
||||
target_include_directories(test-alloc PRIVATE ${PROJECT_SOURCE_DIR}/ggml/src)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
#include "peg-parser.h"
|
||||
#include "tests.h"
|
||||
|
||||
void test_basic(testing & t) {
|
||||
|
|
@ -450,5 +451,21 @@ void test_basic(testing & t) {
|
|||
|
||||
t.assert_equal("result_is_fail", true, result.fail());
|
||||
});
|
||||
|
||||
// Test markers
|
||||
t.test("marker", [](testing &t) {
|
||||
auto bracket_parser = build_peg_parser([](common_peg_parser_builder & p) {
|
||||
return p.marker();
|
||||
});
|
||||
|
||||
common_peg_parse_context ctx_square("[marker]", false);
|
||||
common_peg_parse_context ctx_sharp("<marker>", false);
|
||||
|
||||
auto result_square = bracket_parser.parse(ctx_square);
|
||||
auto result_sharp = bracket_parser.parse(ctx_sharp);
|
||||
|
||||
t.assert_true("result_square_is_success", result_square.success());
|
||||
t.assert_true("result_sharp_is_success", result_sharp.success());
|
||||
});
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,279 @@
|
|||
#include "tests.h"
|
||||
|
||||
void test_python_dict_parser(testing &t) {
|
||||
// Test parsing a simple Python dict object with single quotes
|
||||
t.test("simple Python dict object parsing", [](testing &t) {
|
||||
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
|
||||
|
||||
std::string input = "{'name': 'test', 'value': 42, 'flag': true}";
|
||||
common_peg_parse_context ctx(input);
|
||||
|
||||
auto result = parser.parse(ctx);
|
||||
|
||||
t.assert_equal("result_is_success", true, result.success());
|
||||
t.assert_equal("result_end", input.size(), result.end);
|
||||
});
|
||||
|
||||
// Test parsing a Python dict array with mixed types
|
||||
t.test("Python dict array with mixed types", [](testing &t) {
|
||||
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
|
||||
|
||||
std::string input = "[1, 'hello', true, null, 3.14]";
|
||||
common_peg_parse_context ctx(input);
|
||||
|
||||
auto result = parser.parse(ctx);
|
||||
|
||||
t.assert_equal("result_is_success", true, result.success());
|
||||
t.assert_equal("result_end", input.size(), result.end);
|
||||
});
|
||||
|
||||
// Test parsing nested Python dict with objects and arrays
|
||||
t.test("nested Python dict with objects and arrays", [](testing &t) {
|
||||
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
|
||||
|
||||
std::string input =
|
||||
"{'users': [{'id': 1, 'name': 'Alice'}, {'id': 2, 'name': 'Bob'}], 'count': 2, 'metadata': {'version': '1.0', 'tags': ['admin', 'user']}}";
|
||||
common_peg_parse_context ctx(input);
|
||||
|
||||
auto result = parser.parse(ctx);
|
||||
|
||||
t.assert_equal("result_is_success", true, result.success());
|
||||
t.assert_equal("result_end", input.size(), result.end);
|
||||
});
|
||||
|
||||
// Test parsing Python dict with escaped single quotes
|
||||
t.test("Python dict with escaped single quotes", [](testing &t) {
|
||||
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
|
||||
|
||||
std::string input = "{'message': 'It\\'s working!'}";
|
||||
common_peg_parse_context ctx(input);
|
||||
|
||||
auto result = parser.parse(ctx);
|
||||
|
||||
t.assert_equal("result_is_success", true, result.success());
|
||||
t.assert_equal("result_end", input.size(), result.end);
|
||||
});
|
||||
|
||||
// Test parsing Python dict with double quotes inside single quotes
|
||||
t.test("Python dict with double quotes inside single quotes", [](testing &t) {
|
||||
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
|
||||
|
||||
std::string input = "{'quote': 'He said \"Hello\"'}";
|
||||
common_peg_parse_context ctx(input);
|
||||
|
||||
auto result = parser.parse(ctx);
|
||||
|
||||
t.assert_equal("result_is_success", true, result.success());
|
||||
t.assert_equal("result_end", input.size(), result.end);
|
||||
});
|
||||
|
||||
// Test the example from the requirements
|
||||
t.test("complex Python dict example from requirements", [](testing &t) {
|
||||
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
|
||||
|
||||
std::string input = "{ 'obj' : { 'something': 1, 'other \"something\"' : 'foo\\'s bar' } }";
|
||||
common_peg_parse_context ctx(input);
|
||||
|
||||
auto result = parser.parse(ctx);
|
||||
|
||||
t.assert_equal("result_is_success", true, result.success());
|
||||
t.assert_equal("result_end", input.size(), result.end);
|
||||
});
|
||||
|
||||
// Test need_more_input() parsing - incomplete object
|
||||
t.test("need_more_input() parsing - incomplete object", [](testing &t) {
|
||||
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
|
||||
|
||||
std::string input = "{'name': 'test', 'value': ";
|
||||
common_peg_parse_context ctx(input, true);
|
||||
|
||||
auto result = parser.parse(ctx);
|
||||
|
||||
t.assert_equal("result_is_need_more_input", true, result.need_more_input());
|
||||
});
|
||||
|
||||
// Test need_more_input() parsing - incomplete single-quoted string
|
||||
t.test("need_more_input() parsing - incomplete single-quoted string", [](testing &t) {
|
||||
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
|
||||
|
||||
std::string input = "{'name': 'test";
|
||||
common_peg_parse_context ctx(input, true);
|
||||
|
||||
auto result = parser.parse(ctx);
|
||||
|
||||
t.assert_equal("result_is_need_more_input", true, result.need_more_input());
|
||||
});
|
||||
|
||||
// Test unicode in Python dict strings
|
||||
t.test("unicode in Python dict strings", [](testing &t) {
|
||||
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
|
||||
|
||||
std::string input = "{'message': 'Hello, 世界!'}";
|
||||
common_peg_parse_context ctx(input);
|
||||
|
||||
auto result = parser.parse(ctx);
|
||||
|
||||
t.assert_equal("result_is_success", true, result.success());
|
||||
t.assert_equal("result_end", input.size(), result.end);
|
||||
});
|
||||
|
||||
// Test Python dict with unicode escapes
|
||||
t.test("Python dict with unicode escapes", [](testing &t) {
|
||||
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
|
||||
|
||||
std::string input = "{'unicode': 'Hello\\u0041'}";
|
||||
common_peg_parse_context ctx(input);
|
||||
|
||||
auto result = parser.parse(ctx);
|
||||
|
||||
t.assert_equal("result_is_success", true, result.success());
|
||||
t.assert_equal("result_end", input.size(), result.end);
|
||||
});
|
||||
|
||||
// Test that JSON double-quoted strings fail with Python dict parser
|
||||
t.test("JSON double-quoted strings fail with Python dict parser", [](testing &t) {
|
||||
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
|
||||
|
||||
std::string input = "{\"name\": \"test\"}";
|
||||
common_peg_parse_context ctx(input);
|
||||
|
||||
auto result = parser.parse(ctx);
|
||||
|
||||
t.assert_equal("result_is_fail", true, result.fail());
|
||||
});
|
||||
|
||||
// Test Python dict string content parser directly
|
||||
t.test("python dict string content parser", [](testing &t) {
|
||||
auto parser = build_peg_parser([](common_peg_parser_builder & p) {
|
||||
return p.sequence({ p.literal("'"), p.python_dict_string_content(), p.literal("'"), p.space() });
|
||||
});
|
||||
|
||||
t.test("simple string", [&](testing &t) {
|
||||
std::string input = "'hello'";
|
||||
common_peg_parse_context ctx(input);
|
||||
|
||||
auto result = parser.parse(ctx);
|
||||
t.assert_true("success", result.success());
|
||||
t.assert_equal("end", input.size(), result.end);
|
||||
});
|
||||
|
||||
t.test("string with escaped single quote", [&](testing &t) {
|
||||
std::string input = "'it\\'s'";
|
||||
common_peg_parse_context ctx(input);
|
||||
|
||||
auto result = parser.parse(ctx);
|
||||
t.assert_true("success", result.success());
|
||||
t.assert_equal("end", input.size(), result.end);
|
||||
});
|
||||
|
||||
t.test("string with double quotes", [&](testing &t) {
|
||||
std::string input = "'say \"hello\"'";
|
||||
common_peg_parse_context ctx(input);
|
||||
|
||||
auto result = parser.parse(ctx);
|
||||
t.assert_true("success", result.success());
|
||||
t.assert_equal("end", input.size(), result.end);
|
||||
});
|
||||
|
||||
t.test("incomplete string", [&](testing &t) {
|
||||
std::string input = "'hello";
|
||||
common_peg_parse_context ctx(input, true);
|
||||
|
||||
auto result = parser.parse(ctx);
|
||||
t.assert_true("need_more_input", result.need_more_input());
|
||||
});
|
||||
});
|
||||
|
||||
// Test allow_python_dict_format flag usage
|
||||
t.test("allow_python_dict_format flag", [](testing &t) {
|
||||
t.test("flag is false by default", [&](testing &t) {
|
||||
common_peg_parser_builder builder;
|
||||
t.assert_equal("default_value", false, builder.get_allow_python_dict_format());
|
||||
});
|
||||
|
||||
t.test("flag can be set to true", [&](testing &t) {
|
||||
common_peg_parser_builder builder;
|
||||
builder.set_allow_python_dict_format(true);
|
||||
t.assert_equal("after_set", true, builder.get_allow_python_dict_format());
|
||||
});
|
||||
|
||||
t.test("flag can be set back to false", [&](testing &t) {
|
||||
common_peg_parser_builder builder;
|
||||
builder.set_allow_python_dict_format(true);
|
||||
builder.set_allow_python_dict_format(false);
|
||||
t.assert_equal("after_reset", false, builder.get_allow_python_dict_format());
|
||||
});
|
||||
});
|
||||
|
||||
// Test that the flag actually affects json() parser behavior
|
||||
t.test("json() parser with allow_python_dict_format flag", [](testing &t) {
|
||||
t.test("json() rejects single quotes when flag is false", [&](testing &t) {
|
||||
auto parser = build_peg_parser([](common_peg_parser_builder & p) {
|
||||
p.set_allow_python_dict_format(false);
|
||||
return p.json();
|
||||
});
|
||||
|
||||
std::string input = "{'name': 'test'}";
|
||||
common_peg_parse_context ctx(input);
|
||||
|
||||
auto result = parser.parse(ctx);
|
||||
t.assert_true("fail", result.fail());
|
||||
});
|
||||
|
||||
t.test("json() accepts single quotes when flag is true", [&](testing &t) {
|
||||
auto parser = build_peg_parser([](common_peg_parser_builder & p) {
|
||||
p.set_allow_python_dict_format(true);
|
||||
return p.json();
|
||||
});
|
||||
|
||||
std::string input = "{'name': 'test'}";
|
||||
common_peg_parse_context ctx(input);
|
||||
|
||||
auto result = parser.parse(ctx);
|
||||
t.assert_true("success", result.success());
|
||||
t.assert_equal("end", input.size(), result.end);
|
||||
});
|
||||
|
||||
t.test("json() still accepts double quotes when flag is true", [&](testing &t) {
|
||||
auto parser = build_peg_parser([](common_peg_parser_builder & p) {
|
||||
p.set_allow_python_dict_format(true);
|
||||
return p.json();
|
||||
});
|
||||
|
||||
std::string input = "{\"name\": \"test\"}";
|
||||
common_peg_parse_context ctx(input);
|
||||
|
||||
auto result = parser.parse(ctx);
|
||||
t.assert_true("success", result.success());
|
||||
t.assert_equal("end", input.size(), result.end);
|
||||
});
|
||||
|
||||
t.test("json() accepts mixed quote styles when flag is true", [&](testing &t) {
|
||||
auto parser = build_peg_parser([](common_peg_parser_builder & p) {
|
||||
p.set_allow_python_dict_format(true);
|
||||
return p.json();
|
||||
});
|
||||
|
||||
std::string input = "{\"name\": 'test', 'value': \"hello\"}";
|
||||
common_peg_parse_context ctx(input);
|
||||
|
||||
auto result = parser.parse(ctx);
|
||||
t.assert_true("success", result.success());
|
||||
t.assert_equal("end", input.size(), result.end);
|
||||
});
|
||||
|
||||
t.test("complex nested structure with flag true", [&](testing &t) {
|
||||
auto parser = build_peg_parser([](common_peg_parser_builder & p) {
|
||||
p.set_allow_python_dict_format(true);
|
||||
return p.json();
|
||||
});
|
||||
|
||||
std::string input = "{ 'obj' : { 'something': 1, 'other \"something\"' : 'foo\\'s bar' } }";
|
||||
common_peg_parse_context ctx(input);
|
||||
|
||||
auto result = parser.parse(ctx);
|
||||
t.assert_true("success", result.success());
|
||||
t.assert_equal("end", input.size(), result.end);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
|
@ -22,3 +22,4 @@ void test_json_parser(testing &t);
|
|||
void test_gbnf_generation(testing &t);
|
||||
void test_unicode(testing &t);
|
||||
void test_json_serialization(testing &t);
|
||||
void test_python_dict_parser(testing &t);
|
||||
|
|
|
|||
|
|
@ -7790,6 +7790,8 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|||
test_cases.emplace_back(new test_mul_mat(type_a, GGML_TYPE_F32, 1, 64, 256, {1, 1}, {1, 1}));
|
||||
}
|
||||
|
||||
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q8_0, GGML_TYPE_F32, 6, 4096, 5120, {1, 1}, {1, 1}));
|
||||
|
||||
#if 0
|
||||
// test the mat-mat path for Metal
|
||||
for (int k = 1; k < 512; ++k) {
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -1,617 +0,0 @@
|
|||
// Tests chat handling, including grammar generation and parsing for tool calling, for various templates.
|
||||
//
|
||||
// Also acts as a CLI to generate a Markdown summary of the formats of Jinja templates,
|
||||
// e.g. given Minja (http://github.com/google/minja) checked out in parent dir:
|
||||
//
|
||||
// cmake -B build && cmake --build build --parallel && ./build/bin/test-chat ../minja/build/tests/*.jinja 2>/dev/null
|
||||
//
|
||||
#include <exception>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "chat-parser.h"
|
||||
#include "common.h"
|
||||
#include "log.h"
|
||||
#include "regex-partial.h"
|
||||
|
||||
template <class T>
|
||||
static void assert_equals(const std::string_view label, const T & expected, const T & actual) {
|
||||
if (expected != actual) {
|
||||
std::cerr << label << std::endl;
|
||||
std::cerr << "Expected: " << expected << std::endl;
|
||||
std::cerr << "Actual: " << actual << std::endl;
|
||||
std::cerr << std::flush;
|
||||
throw std::runtime_error("Test failed");
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static void assert_equals(const T & expected, const T & actual) {
|
||||
assert_equals("", expected, actual);
|
||||
}
|
||||
static void assert_equals(const char * expected, const std::string & actual) {
|
||||
return assert_equals<std::string>(expected, actual);
|
||||
}
|
||||
|
||||
static void assert_throws(const std::function<void()> & fn, const std::string & expected_exception_pattern = "") {
|
||||
try {
|
||||
fn();
|
||||
} catch (const std::exception & e) {
|
||||
if (expected_exception_pattern.empty()) {
|
||||
return;
|
||||
}
|
||||
std::regex expected_exception_regex(expected_exception_pattern);
|
||||
std::string actual_message = e.what();
|
||||
if (std::regex_search(actual_message, expected_exception_regex)) {
|
||||
return;
|
||||
}
|
||||
throw std::runtime_error("Exception doesn't match expected pattern: " + actual_message + " (pattern: " + expected_exception_pattern + ")");
|
||||
throw std::runtime_error("Exception of unexpected type: " + std::string(e.what()));
|
||||
}
|
||||
throw std::runtime_error("Exception was expected but not thrown");
|
||||
}
|
||||
|
||||
static void test_reasoning() {
|
||||
//common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG);
|
||||
{
|
||||
common_chat_parser_params params;
|
||||
params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
||||
params.reasoning_format = COMMON_REASONING_FORMAT_NONE;
|
||||
params.reasoning_in_content = false;
|
||||
params.thinking_forced_open = false;
|
||||
common_chat_msg_parser builder("<tnk>Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
|
||||
assert_equals(false, builder.try_parse_reasoning("<tnk>", "</tnk>"));
|
||||
assert_equals("<tnk>Cogito</tnk>Ergo sum", builder.consume_rest());
|
||||
}
|
||||
{
|
||||
common_chat_parser_params params;
|
||||
params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
||||
params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
||||
params.reasoning_in_content = false;
|
||||
params.thinking_forced_open = false;
|
||||
common_chat_msg_parser builder("<tnk>Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
|
||||
assert_equals(true, builder.try_parse_reasoning("<tnk>", "</tnk>"));
|
||||
assert_equals(std::string("Cogito"), builder.result().reasoning_content);
|
||||
assert_equals("Ergo sum", builder.consume_rest());
|
||||
}
|
||||
{
|
||||
common_chat_parser_params params;
|
||||
params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
||||
params.reasoning_format = COMMON_REASONING_FORMAT_NONE;
|
||||
params.reasoning_in_content = false;
|
||||
params.thinking_forced_open = false;
|
||||
common_chat_msg_parser builder("Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
|
||||
assert_equals(false, builder.try_parse_reasoning("<tnk>", "</tnk>"));
|
||||
assert_equals("Cogito</tnk>Ergo sum", builder.consume_rest());
|
||||
}
|
||||
{
|
||||
common_chat_parser_params params;
|
||||
params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
||||
params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
||||
params.reasoning_in_content = false;
|
||||
params.thinking_forced_open = true;
|
||||
common_chat_msg_parser builder("Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
|
||||
assert_equals(true, builder.try_parse_reasoning("<tnk>", "</tnk>"));
|
||||
assert_equals(std::string("Cogito"), builder.result().reasoning_content);
|
||||
assert_equals("Ergo sum", builder.consume_rest());
|
||||
}
|
||||
{
|
||||
common_chat_parser_params params;
|
||||
params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
||||
params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
||||
params.reasoning_in_content = true;
|
||||
params.thinking_forced_open = true;
|
||||
common_chat_msg_parser builder("Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
|
||||
assert_equals(true, builder.try_parse_reasoning("<tnk>", "</tnk>"));
|
||||
assert_equals("<think>Cogito</think>", builder.result().content);
|
||||
assert_equals("Ergo sum", builder.consume_rest());
|
||||
}
|
||||
{
|
||||
const std::string variant("content_only_inline_think");
|
||||
common_chat_parser_params params;
|
||||
params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
||||
params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
||||
params.reasoning_in_content = false;
|
||||
params.thinking_forced_open = false;
|
||||
params.parse_tool_calls = false;
|
||||
const std::string input = "<think>Pense</think>Bonjour";
|
||||
auto msg = common_chat_parse(input, false, params);
|
||||
assert_equals(variant, std::string("Pense"), msg.reasoning_content);
|
||||
assert_equals(variant, std::string("Bonjour"), msg.content);
|
||||
}
|
||||
{
|
||||
const std::string variant("llama_3_inline_think");
|
||||
common_chat_parser_params params;
|
||||
params.format = COMMON_CHAT_FORMAT_LLAMA_3_X;
|
||||
params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
||||
params.reasoning_in_content = false;
|
||||
params.thinking_forced_open = false;
|
||||
params.parse_tool_calls = false;
|
||||
const std::string input = "<think>Plan</think>Réponse";
|
||||
auto msg = common_chat_parse(input, false, params);
|
||||
assert_equals(variant, std::string("Plan"), msg.reasoning_content);
|
||||
assert_equals(variant, std::string("Réponse"), msg.content);
|
||||
}
|
||||
// Test DeepSeek V3.1 parsing - reasoning content followed by "</think>" and then regular content
|
||||
{
|
||||
common_chat_parser_params params;
|
||||
params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
|
||||
params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
||||
params.reasoning_in_content = false;
|
||||
params.thinking_forced_open = true;
|
||||
params.parse_tool_calls = true;
|
||||
const std::string variant("deepseek_v3_1_reasoning_format_deepseek");
|
||||
common_chat_msg_parser builder("REASONING</think>ok", /* is_partial= */ false, params);
|
||||
assert_equals(variant, true, builder.try_parse_reasoning("<think>", "</think>"));
|
||||
assert_equals(variant, std::string("REASONING"), builder.result().reasoning_content);
|
||||
assert_equals(variant, std::string("ok"), builder.consume_rest());
|
||||
}
|
||||
// Test DeepSeek V3.1 parsing - reasoning_format none - reasoning content followed by "</think>" and then regular content
|
||||
{
|
||||
common_chat_parser_params params;
|
||||
params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
|
||||
params.reasoning_format = COMMON_REASONING_FORMAT_NONE;
|
||||
params.reasoning_in_content = false;
|
||||
params.thinking_forced_open = true;
|
||||
params.parse_tool_calls = true;
|
||||
const std::string variant("deepseek_v3_1_reasoning_format_none");
|
||||
const std::string input = "REASONING</think>ok";
|
||||
auto msg = common_chat_parse(input, false, params);
|
||||
assert_equals(variant, std::string("REASONING</think>ok"), msg.content);
|
||||
assert_equals(variant, std::string(""), msg.reasoning_content);
|
||||
}
|
||||
}
|
||||
|
||||
static void test_regex() {
|
||||
auto test_throws = [](const std::string & input, const std::string & regex, const std::string & expected_exception_pattern = "") {
|
||||
common_chat_msg_parser builder(input, /* is_partial= */ false, {});
|
||||
assert_throws([&]() { builder.consume_regex(common_regex(regex)); }, expected_exception_pattern);
|
||||
};
|
||||
|
||||
test_throws("Hello, world!", "abc", "^abc$");
|
||||
test_throws("Hello, world!", "e", "^e$");
|
||||
|
||||
{
|
||||
common_chat_msg_parser builder("Hello, world!", /* is_partial= */ false, {});
|
||||
builder.consume_regex(common_regex("Hello"));
|
||||
assert_equals(", world!", builder.consume_rest());
|
||||
}
|
||||
|
||||
{
|
||||
// When in non partial mode, we can say whether the regex was consumed or not.
|
||||
common_chat_msg_parser builder("Hello,", /* is_partial= */ false, {});
|
||||
assert_equals(false, builder.try_consume_regex(common_regex("Hello, world!")).has_value());
|
||||
}
|
||||
{
|
||||
common_chat_msg_parser builder("Hello,", /* is_partial= */ false, {});
|
||||
auto res = builder.try_consume_regex(common_regex("H(el)l(?:o, world!)?"));
|
||||
assert_equals(true, res.has_value());
|
||||
// Verify captures
|
||||
assert_equals<size_t>(2, res->groups.size());
|
||||
assert_equals("Hell", builder.str(res->groups[0]));
|
||||
assert_equals("el", builder.str(res->groups[1]));
|
||||
// Verify position is after the match
|
||||
assert_equals<size_t>(4, builder.pos());
|
||||
assert_equals("o,", builder.consume_rest());
|
||||
}
|
||||
{
|
||||
// But in partial mode, we have a partial final match / can't decide, so we throw a partial exception.
|
||||
common_chat_msg_parser builder("Hello,", /* is_partial= */ true, {});
|
||||
assert_throws([&]() {
|
||||
builder.try_consume_regex(common_regex("Hello, world!"));
|
||||
}, "^Hello, world!$");
|
||||
}
|
||||
|
||||
// Now regardless of the mode, we can tell these aren't a match.
|
||||
for (const auto is_partial : {false, true}) {
|
||||
common_chat_msg_parser builder("Hello,", is_partial, {});
|
||||
assert_equals(false, builder.try_consume_regex(common_regex("a(b|c)(d|e)f")).has_value());
|
||||
}
|
||||
for (const auto is_partial : {false, true}) {
|
||||
common_chat_msg_parser builder("Hello,", is_partial, {});
|
||||
assert_equals(false, builder.try_consume_literal("Oh"));
|
||||
}
|
||||
}
|
||||
|
||||
const std::vector<std::string> barely_healable_jsons = {
|
||||
"{",
|
||||
"{\"",
|
||||
"{\"\\",
|
||||
"{\"n",
|
||||
"{\"name\"",
|
||||
"{\"name\":",
|
||||
"{\"name\":\"",
|
||||
"{\"name\":\"\\",
|
||||
"{\"name\":\"python",
|
||||
"{\"name\":\"python\\",
|
||||
"{\",",
|
||||
"{\":",
|
||||
"{\"[",
|
||||
"{\"]",
|
||||
"{\"{",
|
||||
"{\"}",
|
||||
"{\"1",
|
||||
"{\"name\":\",",
|
||||
"{\"name\":\":",
|
||||
"{\"name\":\"[",
|
||||
"{\"name\":\"]",
|
||||
"{\"name\":\"{",
|
||||
"{\"name\":\"}",
|
||||
"{\"name\":\"1",
|
||||
};
|
||||
|
||||
static void test(const std::string & input, bool is_partial, const std::vector<std::vector<std::string>> & args_paths, const std::vector<std::vector<std::string>> & content_paths, const std::string & expected) {
|
||||
common_chat_msg_parser builder(input, is_partial, {});
|
||||
auto js = builder.try_consume_json_with_dumped_args(args_paths, content_paths);
|
||||
assert_equals(true, js.has_value());
|
||||
assert_equals(is_partial, js->is_partial);
|
||||
assert_equals(expected, args_paths.size() == 1 && args_paths[0].empty() ? js->value.get<std::string>() : js->value.dump());
|
||||
}
|
||||
|
||||
static void test_deepseek_v3_1_tool_calls() {
|
||||
//common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG);
|
||||
// variant: happy path for when it works as the model card says it should
|
||||
const std::string variant("simple");
|
||||
common_chat_parser_params params;
|
||||
params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
|
||||
params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
||||
params.reasoning_in_content = false;
|
||||
params.thinking_forced_open = false;
|
||||
params.parse_tool_calls = true;
|
||||
const std::string input = "<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
|
||||
auto msg = common_chat_parse(input, false, params);
|
||||
assert_equals<std::size_t>(variant, 1, msg.tool_calls.size());
|
||||
assert_equals(variant, std::string("get_time"), msg.tool_calls[0].name);
|
||||
// JSON arguments are dumped without spaces
|
||||
assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), msg.tool_calls[0].arguments);
|
||||
assert_equals(variant, std::string(""), msg.content);
|
||||
assert_equals(variant, std::string(""), msg.reasoning_content);
|
||||
|
||||
// variant: simple + thinking open
|
||||
{
|
||||
common_chat_parser_params params;
|
||||
params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
|
||||
params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
||||
params.reasoning_in_content = false;
|
||||
params.thinking_forced_open = true;
|
||||
params.parse_tool_calls = true;
|
||||
const std::string variant("simple_thinking");
|
||||
const std::string in = "REASONING</think><|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
|
||||
auto m = common_chat_parse(in, false, params);
|
||||
assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
|
||||
assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
|
||||
assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
|
||||
assert_equals(variant, std::string(""), m.content);
|
||||
assert_equals(variant, std::string("REASONING"), m.reasoning_content);
|
||||
}
|
||||
// variant: simple + multiple tool calls
|
||||
{
|
||||
common_chat_parser_params params;
|
||||
params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
|
||||
params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
||||
params.reasoning_in_content = false;
|
||||
params.thinking_forced_open = false;
|
||||
params.parse_tool_calls = true;
|
||||
const std::string variant("simple_multiple_tool_calls");
|
||||
const std::string in = "CONTENT<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Paris\"}<|tool▁call▁end|><|tool▁call▁begin|>get_weather<|tool▁sep|>{\"city\": \"Paris\"}<|tool▁call▁end|><|tool▁calls▁end|>";
|
||||
auto m = common_chat_parse(in, false, params);
|
||||
assert_equals<std::size_t>(variant, 2, m.tool_calls.size());
|
||||
assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
|
||||
assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[0].arguments);
|
||||
assert_equals(variant, std::string("get_weather"), m.tool_calls[1].name);
|
||||
assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[1].arguments);
|
||||
assert_equals(variant, std::string("CONTENT"), m.content);
|
||||
assert_equals(variant, std::string(""), m.reasoning_content);
|
||||
}
|
||||
|
||||
|
||||
// variant: thinking forced open + tool call in reasoning content
|
||||
{
|
||||
common_chat_parser_params params;
|
||||
params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
|
||||
params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
||||
params.reasoning_in_content = false;
|
||||
params.thinking_forced_open = true;
|
||||
params.parse_tool_calls = true;
|
||||
const std::string variant("thinking_forced_open_tool_call_in_reasoning");
|
||||
const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time2<|tool▁sep|>{\"city\": \"Tokyo2\"}<|tool▁call▁end|><|tool▁calls▁end|>REASONING</think><|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
|
||||
auto m = common_chat_parse(in, false, params);
|
||||
assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
|
||||
assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
|
||||
assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
|
||||
assert_equals(variant, std::string(""), m.content);
|
||||
assert_equals(variant, std::string("REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time2<|tool▁sep|>{\"city\": \"Tokyo2\"}<|tool▁call▁end|><|tool▁calls▁end|>REASONING"), m.reasoning_content);
|
||||
}
|
||||
|
||||
// variant: thinking forced open + tool call in reasoning content + no closing think + not partial
|
||||
// This is a bit of a fine tuning issue on the model's part IMO. It really should not be attempting
|
||||
// to make tool calls in reasoning content according to the model card, but it does sometimes, so
|
||||
// add the reasoning content as regular content and parse the tool calls.
|
||||
{
|
||||
common_chat_parser_params params;
|
||||
params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
|
||||
params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
||||
params.reasoning_in_content = false;
|
||||
params.thinking_forced_open = true;
|
||||
params.parse_tool_calls = true;
|
||||
const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_not_partial");
|
||||
const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
|
||||
auto m = common_chat_parse(in, false, params);
|
||||
assert_equals(variant, std::string("REASONING"), m.content);
|
||||
assert_equals(variant, std::string(""), m.reasoning_content);
|
||||
assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
|
||||
assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
|
||||
assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
|
||||
}
|
||||
|
||||
// variant: thinking forced open + tool call in reasoning content + no closing think + partial
|
||||
{
|
||||
common_chat_parser_params params;
|
||||
params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
|
||||
params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
||||
params.reasoning_in_content = false;
|
||||
params.thinking_forced_open = true;
|
||||
params.parse_tool_calls = true;
|
||||
const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_partial");
|
||||
const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
|
||||
auto m = common_chat_parse(in, /* is_partial= */ true, params);
|
||||
assert_equals(variant, std::string("REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>"), m.reasoning_content);
|
||||
assert_equals(variant, std::string(""), m.content);
|
||||
assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
|
||||
}
|
||||
|
||||
// variant: thinking not forced open + reasoning + regular content + no tool calls
|
||||
{
|
||||
common_chat_parser_params params;
|
||||
params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
|
||||
params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
||||
params.reasoning_in_content = false;
|
||||
params.thinking_forced_open = true;
|
||||
params.parse_tool_calls = true;
|
||||
const std::string variant("thinking_forced_open_reasoning_regular_content_no_tool_calls");
|
||||
const std::string in = "REASONING</think>CONTENT";
|
||||
auto m = common_chat_parse(in, false, params);
|
||||
assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
|
||||
assert_equals(variant, std::string("CONTENT"), m.content);
|
||||
assert_equals(variant, std::string("REASONING"), m.reasoning_content);
|
||||
}
|
||||
// variant: thinking not forced open + missing reasoning + no tool calls
|
||||
{
|
||||
common_chat_parser_params params;
|
||||
params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
|
||||
params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
||||
params.reasoning_in_content = false;
|
||||
params.thinking_forced_open = false;
|
||||
params.parse_tool_calls = true;
|
||||
const std::string variant("thinking_not_forced_open_missing_reasoning_no_tool_calls");
|
||||
const std::string in = "CONTENT";
|
||||
auto m = common_chat_parse(in, false, params);
|
||||
assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
|
||||
assert_equals(variant, std::string("CONTENT"), m.content);
|
||||
assert_equals(variant, std::string(""), m.reasoning_content);
|
||||
}
|
||||
}
|
||||
|
||||
static void test_with_args(const std::string & input, const std::string & expected, bool parse_as_partial = true, bool is_partial = true) {
|
||||
common_chat_msg_parser builder(input, parse_as_partial, {});
|
||||
auto js = builder.try_consume_json_with_dumped_args({{"args"}}, {});
|
||||
assert_equals(true, js.has_value());
|
||||
assert_equals(is_partial, js->is_partial);
|
||||
assert_equals(expected, js->value.dump());
|
||||
}
|
||||
|
||||
static void test_json_with_dumped_args_no_args() {
|
||||
// Normal JSON, nothing to heal, nothing to dump
|
||||
test("{\"name\": \"python\"}", false, {}, {}, "{\"name\":\"python\"}");
|
||||
// Full json is args
|
||||
test("{\"name\": \"python\"}", false, {{}}, {}, "{\"name\":\"python\"}");
|
||||
|
||||
// If the arguments are further down, don't heal partial content.
|
||||
for (const auto & src : barely_healable_jsons) {
|
||||
test(src, true, {{"arguments"}}, {}, "{}");
|
||||
}
|
||||
// But heal content that isn't partial.
|
||||
test("{\"name\": \"python\"", true, {{"arguments"}}, {}, "{\"name\":\"python\"}");
|
||||
}
|
||||
|
||||
static void test_json_with_dumped_args() {
|
||||
|
||||
// Partial content.
|
||||
test("{\"content\": \"t", true, {}, {{"content"}}, "{\"content\":\"t\"}");
|
||||
test("{\"content\": \"", true, {}, {{"content"}}, "{\"content\":\"\"}");
|
||||
test("{\"content\": ", true, {}, {{"content"}}, "{}");
|
||||
|
||||
// If the entire JSON is the arguments, healing it them dumping it produces the same output as the input (just reformatted).
|
||||
test("{\"name\": \"python", true, {{}}, {}, "{\"name\":\"python");
|
||||
for (const auto & src : barely_healable_jsons) {
|
||||
test(src, true, {{}}, {}, src);
|
||||
}
|
||||
|
||||
// Full JSON w/ args
|
||||
for (auto parse_as_partial : {true, false}) {
|
||||
test_with_args(
|
||||
R"({"name": "python", "args": {"arg1": 1}})",
|
||||
R"({"name":"python","args":"{\"arg1\":1}"})",
|
||||
parse_as_partial,
|
||||
/* is_partial= */ false
|
||||
);
|
||||
}
|
||||
|
||||
// Partial JSON w/ partial args
|
||||
test_with_args(
|
||||
R"({"foo": "bar", "args": {")",
|
||||
R"({"foo":"bar","args":"{\""})"
|
||||
);
|
||||
// Partial args broken in object key
|
||||
test_with_args(
|
||||
R"({"foo": "bar", "args": {"ar)",
|
||||
R"({"foo":"bar","args":"{\"ar"})"
|
||||
);
|
||||
// Partial args broken after object key
|
||||
test_with_args(
|
||||
R"({"foo": "bar", "args": {"arg1")",
|
||||
R"({"foo":"bar","args":"{\"arg1\""})"
|
||||
);
|
||||
// Partial args broken before object value
|
||||
test_with_args(
|
||||
R"({"foo": "bar", "args": {"arg1":)",
|
||||
R"({"foo":"bar","args":"{\"arg1\":"})"
|
||||
);
|
||||
// Partial args broken before object value (space)
|
||||
test_with_args(
|
||||
R"({"foo": "bar", "args": {"arg1": )",
|
||||
R"({"foo":"bar","args":"{\"arg1\":"})"
|
||||
);
|
||||
// Partial args broken in object value that may not be complete (int)
|
||||
test_with_args(
|
||||
R"({"foo": "bar", "args": {"arg1": 1)",
|
||||
R"({"foo":"bar","args":"{\"arg1\":"})"
|
||||
);
|
||||
// Partial args broken in object value that is complete (int)
|
||||
test_with_args(
|
||||
R"({"foo": "bar", "args": {"arg1": 1 )",
|
||||
R"({"foo":"bar","args":"{\"arg1\":1"})"
|
||||
);
|
||||
// Partial args broken in object value that is incomplete (string)
|
||||
test_with_args(
|
||||
R"({"foo": "bar", "args": {"arg1": ")",
|
||||
R"({"foo":"bar","args":"{\"arg1\":\""})"
|
||||
);
|
||||
// Partial args broken in object value that is complete (string)
|
||||
test_with_args(
|
||||
R"({"foo": "bar", "args": {"arg1": "1")",
|
||||
R"({"foo":"bar","args":"{\"arg1\":\"1\""})"
|
||||
);
|
||||
// Partial args broken on array opening
|
||||
test_with_args(
|
||||
R"({"foo": "bar", "args": [)",
|
||||
R"({"foo":"bar","args":"["})"
|
||||
);
|
||||
// Partial args broken on array value that is incomplete (int)
|
||||
test_with_args(
|
||||
R"({"foo": "bar", "args": [1)",
|
||||
R"({"foo":"bar","args":"["})"
|
||||
);
|
||||
// Partial args broken on array value that is complete (int)
|
||||
test_with_args(
|
||||
R"({"foo": "bar", "args": [1 )",
|
||||
R"({"foo":"bar","args":"[1"})"
|
||||
);
|
||||
// Partial args broken on array value that is complete (string)
|
||||
test_with_args(
|
||||
R"({"foo": "bar", "args": ["1")",
|
||||
R"({"foo":"bar","args":"[\"1\""})"
|
||||
);
|
||||
// Partial args broken after array value
|
||||
test_with_args(
|
||||
R"({"foo": "bar", "args": [1,)",
|
||||
R"({"foo":"bar","args":"[1,"})"
|
||||
);
|
||||
// Partial args broken on nested array
|
||||
test_with_args(
|
||||
R"({"foo": "bar", "args": {"arg1": [)",
|
||||
R"({"foo":"bar","args":"{\"arg1\":["})"
|
||||
);
|
||||
|
||||
// Unicode tests
|
||||
test_with_args(
|
||||
R"({"foo": "bar", "args": {"arg1": "\u)",
|
||||
R"({"foo":"bar","args":"{\"arg1\":\"\\u"})"
|
||||
);
|
||||
test_with_args(
|
||||
R"({"foo": "bar", "args": {"arg1": "\u0)",
|
||||
R"({"foo":"bar","args":"{\"arg1\":\"\\u0"})"
|
||||
);
|
||||
test_with_args(
|
||||
R"({"foo": "bar", "args": {"arg1": "\u00)",
|
||||
R"({"foo":"bar","args":"{\"arg1\":\"\\u00"})"
|
||||
);
|
||||
test_with_args(
|
||||
R"({"foo": "bar", "args": {"arg1": "\u000)",
|
||||
R"({"foo":"bar","args":"{\"arg1\":\"\\u000"})"
|
||||
);
|
||||
test_with_args(
|
||||
R"({"foo": "bar", "args": {"arg1": "\u0000)",
|
||||
R"({"foo":"bar","args":"{\"arg1\":\"\\u0000"})"
|
||||
);
|
||||
test_with_args(
|
||||
R"({"foo": "bar", "args": {"arg1": "\ud8)",
|
||||
R"({"foo":"bar","args":"{\"arg1\":\"\\ud8"})"
|
||||
);
|
||||
test_with_args(
|
||||
R"({"foo": "bar", "args": {"arg1": "\ud80)",
|
||||
R"({"foo":"bar","args":"{\"arg1\":\"\\ud80"})"
|
||||
);
|
||||
test_with_args(
|
||||
R"({"foo": "bar", "args": {"arg1": "\ud800)",
|
||||
R"({"foo":"bar","args":"{\"arg1\":\"\\ud800"})"
|
||||
);
|
||||
test_with_args(
|
||||
R"({"foo": "bar", "args": {"arg1": "\ud800\)",
|
||||
R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\"})"
|
||||
);
|
||||
test_with_args(
|
||||
R"({"foo": "bar", "args": {"arg1": "\ud800\u)",
|
||||
R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\u"})"
|
||||
);
|
||||
test_with_args(
|
||||
R"({"foo": "bar", "args": {"arg1": "\ud800\ud)",
|
||||
R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\ud"})"
|
||||
);
|
||||
test_with_args(
|
||||
R"({"foo": "bar", "args": {"arg1": "\ud800\udc)",
|
||||
R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc"})"
|
||||
);
|
||||
test_with_args(
|
||||
R"({"foo": "bar", "args": {"arg1": "\ud800\udc0)",
|
||||
R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc0"})"
|
||||
);
|
||||
test_with_args(
|
||||
R"({"foo": "bar", "args": {"arg1": "\ud800\udc00)",
|
||||
R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc00"})"
|
||||
);
|
||||
}
|
||||
|
||||
static void test_positions() {
|
||||
{
|
||||
common_chat_msg_parser builder("Hello, world!", /* is_partial= */ false, {});
|
||||
assert_equals<size_t>(0, builder.pos());
|
||||
assert_throws([&]() { builder.move_to(100); });
|
||||
assert_equals<size_t>(0, builder.pos());
|
||||
assert_throws([&]() { builder.move_back(1); });
|
||||
assert_equals<size_t>(0, builder.pos());
|
||||
|
||||
builder.move_to(8);
|
||||
assert_equals<size_t>(8, builder.pos());
|
||||
builder.move_back(1);
|
||||
assert_equals<size_t>(7, builder.pos());
|
||||
assert_equals("world!", builder.consume_rest());
|
||||
|
||||
builder.move_to(0);
|
||||
assert_equals<size_t>(0, builder.pos());
|
||||
|
||||
assert_throws([&]() { builder.finish(); });
|
||||
assert_equals<size_t>(0, builder.pos());
|
||||
|
||||
builder.move_to(builder.input().size());
|
||||
builder.finish();
|
||||
}
|
||||
{
|
||||
common_chat_msg_parser builder("Hello, world!", /* is_partial= */ true, {});
|
||||
|
||||
builder.move_to(builder.input().size());
|
||||
assert_equals<size_t>(builder.input().size(), builder.pos());
|
||||
builder.finish();
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
test_positions();
|
||||
test_json_with_dumped_args_no_args();
|
||||
test_json_with_dumped_args();
|
||||
test_reasoning();
|
||||
test_regex();
|
||||
test_deepseek_v3_1_tool_calls();
|
||||
std::cout << "All tests passed!\n";
|
||||
return 0;
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -1,7 +1,6 @@
|
|||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
#include <regex>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <filesystem>
|
||||
|
|
@ -12,26 +11,28 @@
|
|||
#include <cassert>
|
||||
|
||||
#include "llama.h"
|
||||
#include "common.h"
|
||||
#include "chat.h"
|
||||
#include "jinja/runtime.h"
|
||||
#include "jinja/parser.h"
|
||||
#include "jinja/lexer.h"
|
||||
#include "jinja/caps.h"
|
||||
|
||||
#ifdef WIN32
|
||||
#include <regex>
|
||||
#endif
|
||||
|
||||
using json = nlohmann::ordered_json;
|
||||
|
||||
int main_automated_tests(void);
|
||||
static int main_automated_tests(void);
|
||||
|
||||
void run_multiple(std::string dir_path, bool stop_on_first_failure, json input, bool use_common = false);
|
||||
void run_single(std::string contents, json input, bool use_common = false, const std::string & output_path = "");
|
||||
static void run_multiple(const std::string& dir_path, bool stop_on_first_failure, const json& input, bool use_common = false);
|
||||
static void run_single(const std::string& contents, json input, bool use_common = false, const std::string & output_path = "");
|
||||
|
||||
|
||||
|
||||
std::string HELP = R"(
|
||||
static std::string HELP = R"(
|
||||
Usage: test-chat-template [OPTIONS] PATH_TO_TEMPLATE
|
||||
Options:
|
||||
-h, --help Show this help message and exit.
|
||||
--with-tools Add a tool and a tool call to the default JSON input
|
||||
--json <path> Path to the JSON input file.
|
||||
--stop-on-first-fail Stop testing on the first failure (default: false).
|
||||
--no-common Use direct Jinja engine instead of common chat templates (default: use common).
|
||||
|
|
@ -41,7 +42,7 @@ If PATH_TO_TEMPLATE is a directory, runs all .jinja files in that directory.
|
|||
If PATH_TO_TEMPLATE is omitted, runs automated tests (default CI mode).
|
||||
)";
|
||||
|
||||
std::string DEFAULT_JSON = R"({
|
||||
static std::string DEFAULT_JSON = R"({
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
|
|
@ -57,12 +58,65 @@ std::string DEFAULT_JSON = R"({
|
|||
"add_generation_prompt": true
|
||||
})";
|
||||
|
||||
static std::string DEFAULT_JSON_WITH_TOOLS = R"({
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello, how are you?"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "I am fine, thank you!"
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Call a tool!"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call00001",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "test",
|
||||
"arguments": { "arg": "hello" }
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "test",
|
||||
"description": "Test",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"arg": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["arg"]
|
||||
}
|
||||
}
|
||||
],
|
||||
"bos_token": "<s>",
|
||||
"eos_token": "</s>",
|
||||
"add_generation_prompt": true
|
||||
})";
|
||||
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
std::vector<std::string> args(argv, argv + argc);
|
||||
|
||||
std::string tmpl_path;
|
||||
std::string json_path;
|
||||
std::string output_path;
|
||||
std::string & json_to_use = DEFAULT_JSON;
|
||||
bool stop_on_first_fail = false;
|
||||
bool use_common = true;
|
||||
|
||||
|
|
@ -70,9 +124,12 @@ int main(int argc, char ** argv) {
|
|||
if (args[i] == "--help" || args[i] == "-h") {
|
||||
std::cout << HELP << "\n";
|
||||
return 0;
|
||||
} else if (args[i] == "--json" && i + 1 < args.size()) {
|
||||
}
|
||||
if (args[i] == "--json" && i + 1 < args.size()) {
|
||||
json_path = args[i + 1];
|
||||
i++;
|
||||
} else if (args[i] == "--with-tools") {
|
||||
json_to_use = DEFAULT_JSON_WITH_TOOLS;
|
||||
} else if (args[i] == "--stop-on-first-fail") {
|
||||
stop_on_first_fail = true;
|
||||
} else if (args[i] == "--output" && i + 1 < args.size()) {
|
||||
|
|
@ -105,7 +162,7 @@ int main(int argc, char ** argv) {
|
|||
std::istreambuf_iterator<char>());
|
||||
input_json = json::parse(content);
|
||||
} else {
|
||||
input_json = json::parse(DEFAULT_JSON);
|
||||
input_json = json::parse(json_to_use);
|
||||
}
|
||||
|
||||
std::filesystem::path p(tmpl_path);
|
||||
|
|
@ -125,7 +182,7 @@ int main(int argc, char ** argv) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
void run_multiple(std::string dir_path, bool stop_on_first_fail, json input, bool use_common) {
|
||||
void run_multiple(const std::string& dir_path, bool stop_on_first_fail, const json& input, bool use_common) {
|
||||
std::vector<std::string> failed_tests;
|
||||
|
||||
// list all files in models/templates/ and run each
|
||||
|
|
@ -180,7 +237,7 @@ static std::string format_using_common(
|
|||
common_chat_templates_inputs inputs;
|
||||
inputs.use_jinja = true;
|
||||
inputs.messages = messages;
|
||||
inputs.tools = tools;
|
||||
inputs.tools = std::move(tools);
|
||||
inputs.add_generation_prompt = true;
|
||||
auto output = common_chat_templates_apply(tmpls.get(), inputs).prompt;
|
||||
output = normalize_newlines(output);
|
||||
|
|
@ -209,7 +266,7 @@ static jinja::value_string format_using_direct_engine(
|
|||
|
||||
jinja::runtime runtime(ctx);
|
||||
const jinja::value results = runtime.execute(ast);
|
||||
auto parts = runtime.gather_string_parts(results);
|
||||
auto parts = jinja::runtime::gather_string_parts(results);
|
||||
|
||||
std::cout << "\n=== RESULTS ===\n";
|
||||
for (const auto & part : parts->as_string().parts) {
|
||||
|
|
@ -220,7 +277,7 @@ static jinja::value_string format_using_direct_engine(
|
|||
}
|
||||
|
||||
|
||||
void run_single(std::string contents, json input, bool use_common, const std::string & output_path) {
|
||||
void run_single(const std::string& contents, json input, bool use_common, const std::string & output_path) {
|
||||
jinja::enable_debug(true);
|
||||
|
||||
jinja::value_string output_parts;
|
||||
|
|
@ -560,7 +617,7 @@ int main_automated_tests(void) {
|
|||
supported_tmpl.resize(res);
|
||||
res = llama_chat_builtin_templates(supported_tmpl.data(), supported_tmpl.size());
|
||||
std::cout << "Built-in chat templates:\n";
|
||||
for (auto tmpl : supported_tmpl) {
|
||||
for (const auto *tmpl : supported_tmpl) {
|
||||
std::cout << " " << tmpl << "\n";
|
||||
}
|
||||
|
||||
|
|
@ -592,6 +649,7 @@ int main_automated_tests(void) {
|
|||
}
|
||||
|
||||
std::vector<common_chat_msg> messages;
|
||||
messages.reserve(conversation.size());
|
||||
for (const auto & msg : conversation) {
|
||||
messages.push_back(simple_msg(msg.role, msg.content));
|
||||
}
|
||||
|
|
@ -622,58 +680,6 @@ int main_automated_tests(void) {
|
|||
}
|
||||
}
|
||||
|
||||
// TODO: llama_chat_format_single will be deprecated, remove these tests later
|
||||
|
||||
// test llama_chat_format_single for system message
|
||||
std::cout << "\n\n=== llama_chat_format_single (system message) ===\n\n";
|
||||
std::vector<common_chat_msg> chat2;
|
||||
auto sys_msg = simple_msg("system", "You are a helpful assistant");
|
||||
|
||||
auto fmt_sys = [&](std::string tmpl_str) {
|
||||
auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl_str);
|
||||
auto output = common_chat_format_single(tmpls.get(), chat2, sys_msg, false, /* use_jinja= */ false);
|
||||
std::cout << "fmt_sys(" << tmpl_str << ") : " << output << "\n";
|
||||
std::cout << "-------------------------\n";
|
||||
return output;
|
||||
};
|
||||
assert(fmt_sys("chatml") == "<|im_start|>system\nYou are a helpful assistant<|im_end|>\n");
|
||||
assert(fmt_sys("mistral-v1") == " [INST] You are a helpful assistant\n\n");
|
||||
assert(fmt_sys("mistral-v3") == "[INST] You are a helpful assistant\n\n");
|
||||
assert(fmt_sys("mistral-v3-tekken") == "[INST]You are a helpful assistant\n\n");
|
||||
assert(fmt_sys("mistral-v7") == "[SYSTEM_PROMPT] You are a helpful assistant[/SYSTEM_PROMPT]");
|
||||
assert(fmt_sys("llama2") == "[INST] You are a helpful assistant\n");
|
||||
assert(fmt_sys("llama2-sys") == "[INST] <<SYS>>\nYou are a helpful assistant\n<</SYS>>\n\n");
|
||||
assert(fmt_sys("mistral") == "[INST] You are a helpful assistant\n"); // for old pre-v1 templates
|
||||
assert(fmt_sys("gemma") == ""); // for gemma, system message is merged with user message
|
||||
assert(fmt_sys("llama3") == "<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|>");
|
||||
assert(fmt_sys("gigachat") == "<s>You are a helpful assistant<|message_sep|>");
|
||||
|
||||
|
||||
// test llama_chat_format_single for user message
|
||||
std::cout << "\n\n=== llama_chat_format_single (user message) ===\n\n";
|
||||
chat2.push_back(simple_msg("system", "You are a helpful assistant"));
|
||||
chat2.push_back(simple_msg("user", "Hello"));
|
||||
chat2.push_back(simple_msg("assistant", "I am assistant"));
|
||||
auto new_msg = simple_msg("user", "How are you");
|
||||
|
||||
auto fmt_single = [&](const std::string & tmpl_str) {
|
||||
auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl_str.c_str());
|
||||
auto output = common_chat_format_single(tmpls.get(), chat2, new_msg, true, /* use_jinja= */ false);
|
||||
std::cout << "fmt_single(" << tmpl_str << ") : " << output << "\n";
|
||||
std::cout << "-------------------------\n";
|
||||
return output;
|
||||
};
|
||||
assert(fmt_single("chatml") == "\n<|im_start|>user\nHow are you<|im_end|>\n<|im_start|>assistant\n");
|
||||
assert(fmt_single("mistral-v1") == " [INST] How are you [/INST]");
|
||||
assert(fmt_single("mistral-v3") == "[INST] How are you[/INST]");
|
||||
assert(fmt_single("mistral-v3-tekken") == "[INST]How are you[/INST]");
|
||||
assert(fmt_single("mistral-v7") == "[INST] How are you[/INST]");
|
||||
assert(fmt_single("llama2") == "[INST] How are you [/INST]");
|
||||
assert(fmt_single("mistral") == "[INST] How are you [/INST]"); // for old pre-v1 templates
|
||||
assert(fmt_single("gemma") == "\n<start_of_turn>user\nHow are you<end_of_turn>\n<start_of_turn>model\n");
|
||||
assert(fmt_single("llama3") == "<|start_header_id|>user<|end_header_id|>\n\nHow are you<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n");
|
||||
// assert(fmt_single("gigachat") == "user<|role_sep|>How are you<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|>");
|
||||
|
||||
std::cout << "\nOK: All tests passed successfully.\n";
|
||||
|
||||
return 0;
|
||||
|
|
|
|||
5359
tests/test-chat.cpp
5359
tests/test-chat.cpp
File diff suppressed because it is too large
Load Diff
|
|
@ -20,6 +20,7 @@ int main(int argc, char *argv[]) {
|
|||
t.test("json", test_json_parser);
|
||||
t.test("gbnf", test_gbnf_generation);
|
||||
t.test("serialization", test_json_serialization);
|
||||
t.test("python-dict", test_python_dict_parser);
|
||||
|
||||
return t.summary();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ else()
|
|||
add_subdirectory(server)
|
||||
endif()
|
||||
add_subdirectory(tokenize)
|
||||
add_subdirectory(parser)
|
||||
add_subdirectory(tts)
|
||||
add_subdirectory(mtmd)
|
||||
if (GGML_RPC)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,20 @@
|
|||
if (NOT WIN32 OR NOT BUILD_SHARED_LIBS)
|
||||
# this tool is disabled on Windows when building with shared libraries because it uses internal functions not exported with LLAMA_API
|
||||
set(TARGET llama-debug-template-parser)
|
||||
add_executable(${TARGET} debug-template-parser.cpp)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
||||
|
||||
if(LLAMA_TOOLS_INSTALL)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(TARGET llama-template-analysis)
|
||||
add_executable(${TARGET} template-analysis.cpp)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
||||
|
||||
if(LLAMA_TOOLS_INSTALL)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
endif()
|
||||
|
|
@ -0,0 +1,488 @@
|
|||
#include "../src/llama-grammar.h"
|
||||
#include "chat-auto-parser.h"
|
||||
#include "chat-diff-analyzer.h"
|
||||
#include "chat.h"
|
||||
#include "common.h"
|
||||
#include "gguf.h"
|
||||
#include "jinja/runtime.h"
|
||||
#include "log.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <numeric>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include "nlohmann/json.hpp"
|
||||
#include "peg-parser.h"
|
||||
|
||||
using json = nlohmann::ordered_json;
|
||||
|
||||
enum class output_mode {
|
||||
ANALYSIS, // Only output analysis results (default)
|
||||
TEMPLATE, // Only output rendered template
|
||||
BOTH // Output both
|
||||
};
|
||||
|
||||
enum class input_message_type {
|
||||
NONE, // Don't render any message scenarios (only analysis)
|
||||
CONTENT_ONLY, // Simple assistant message with content
|
||||
REASONING_CONTENT, // Message with reasoning_content + content
|
||||
TOOL_CALL_ONLY, // Message with tool_calls only
|
||||
CONTENT_TOOL_CALL, // Message with content + tool_calls
|
||||
REASONING_TOOL_CALL, // Message with reasoning_content + tool_calls
|
||||
CONTENT_FAKE_TOOL_CALL, // Message with content but no actual tool_calls (for testing)
|
||||
ALL // Render all scenarios
|
||||
};
|
||||
|
||||
struct debug_options {
|
||||
std::string template_path;
|
||||
bool with_tools = true;
|
||||
bool generation_prompt = true;
|
||||
bool enable_reasoning = true;
|
||||
bool debug_jinja = false;
|
||||
output_mode mode = output_mode::BOTH;
|
||||
input_message_type input_message = input_message_type::NONE;
|
||||
};
|
||||
|
||||
static std::string read_file(const std::string & path) {
|
||||
std::ifstream fin(path, std::ios::binary);
|
||||
if (!fin.is_open()) {
|
||||
throw std::runtime_error("Could not open file: " + path);
|
||||
}
|
||||
std::ostringstream buf;
|
||||
buf << fin.rdbuf();
|
||||
return buf.str();
|
||||
}
|
||||
|
||||
static std::string read_gguf_chat_template(const std::string & path) {
|
||||
struct gguf_init_params params = { /*no_alloc =*/true, // We only need metadata, not tensor data
|
||||
/*ctx=*/nullptr };
|
||||
|
||||
struct gguf_context * ctx = gguf_init_from_file(path.c_str(), params);
|
||||
if (ctx == nullptr) {
|
||||
throw std::runtime_error("Could not open GGUF file: " + path);
|
||||
}
|
||||
|
||||
const char * key = "tokenizer.chat_template";
|
||||
int64_t key_id = gguf_find_key(ctx, key);
|
||||
|
||||
if (key_id == -1) {
|
||||
gguf_free(ctx);
|
||||
throw std::runtime_error("GGUF file does not contain chat template key: " + std::string(key));
|
||||
}
|
||||
|
||||
const char * template_str = gguf_get_val_str(ctx, key_id);
|
||||
if (template_str == nullptr) {
|
||||
gguf_free(ctx);
|
||||
throw std::runtime_error("GGUF file contains chat template key but value is null");
|
||||
}
|
||||
|
||||
std::string result = template_str;
|
||||
gguf_free(ctx);
|
||||
return result;
|
||||
}
|
||||
|
||||
static void print_usage(const char * program_name) {
|
||||
LOG_ERR("Usage: %s <template_or_gguf_path> [options]\n", program_name);
|
||||
LOG_ERR("\nOptions:\n");
|
||||
LOG_ERR(" --no-tools Disable tool definitions\n");
|
||||
LOG_ERR(" --generation-prompt=0|1 Set add_generation_prompt (default: 1)\n");
|
||||
LOG_ERR(" --enable-reasoning=0|1 Enable reasoning parsing (default: 1)\n");
|
||||
LOG_ERR(" --output=MODE Output mode: analysis, template, both (default: both)\n");
|
||||
LOG_ERR(" --debug-jinja Enable Jinja fine-grained debug\n");
|
||||
LOG_ERR(" --input-message=TYPE Message type to render:\n");
|
||||
LOG_ERR(" content_only, reasoning_content, tool_call_only,\n");
|
||||
LOG_ERR(" content_tool_call, reasoning_tool_call,\n");
|
||||
LOG_ERR(" content_fake_tool_call, all\n");
|
||||
LOG_ERR("\nExamples:\n");
|
||||
LOG_ERR(" %s template.jinja --input-message=all --generation-prompt=1\n", program_name);
|
||||
LOG_ERR(" %s template.jinja --output=template --input-message=tool_call_only\n", program_name);
|
||||
}
|
||||
|
||||
static bool parse_bool_option(const std::string & value) {
|
||||
return value == "1" || value == "true" || value == "yes";
|
||||
}
|
||||
|
||||
static bool parse_options(int argc, char ** argv, debug_options & opts) {
|
||||
if (argc < 2) {
|
||||
print_usage(argv[0]);
|
||||
return false;
|
||||
}
|
||||
|
||||
opts.template_path = argv[1];
|
||||
|
||||
for (int i = 2; i < argc; ++i) {
|
||||
std::string arg = argv[i];
|
||||
|
||||
if (arg == "--debug-jinja") {
|
||||
opts.debug_jinja = true;
|
||||
} else if (arg == "--no-tools") {
|
||||
opts.with_tools = false;
|
||||
} else if (arg.rfind("--generation-prompt=", 0) == 0) {
|
||||
opts.generation_prompt = parse_bool_option(arg.substr(20));
|
||||
} else if (arg.rfind("--enable-reasoning=", 0) == 0) {
|
||||
opts.enable_reasoning = parse_bool_option(arg.substr(19));
|
||||
} else if (arg.rfind("--output=", 0) == 0) {
|
||||
std::string mode = arg.substr(9);
|
||||
if (mode == "analysis") {
|
||||
opts.mode = output_mode::ANALYSIS;
|
||||
} else if (mode == "template") {
|
||||
opts.mode = output_mode::TEMPLATE;
|
||||
} else if (mode == "both") {
|
||||
opts.mode = output_mode::BOTH;
|
||||
} else {
|
||||
LOG_ERR("Unknown output mode: %s\n", mode.c_str());
|
||||
return false;
|
||||
}
|
||||
} else if (arg.rfind("--input-message=", 0) == 0) {
|
||||
std::string type = arg.substr(16);
|
||||
if (type == "content_only") {
|
||||
opts.input_message = input_message_type::CONTENT_ONLY;
|
||||
} else if (type == "reasoning_content") {
|
||||
opts.input_message = input_message_type::REASONING_CONTENT;
|
||||
} else if (type == "tool_call_only") {
|
||||
opts.input_message = input_message_type::TOOL_CALL_ONLY;
|
||||
} else if (type == "content_tool_call") {
|
||||
opts.input_message = input_message_type::CONTENT_TOOL_CALL;
|
||||
} else if (type == "reasoning_tool_call") {
|
||||
opts.input_message = input_message_type::REASONING_TOOL_CALL;
|
||||
} else if (type == "content_fake_tool_call") {
|
||||
opts.input_message = input_message_type::CONTENT_FAKE_TOOL_CALL;
|
||||
} else if (type == "all") {
|
||||
opts.input_message = input_message_type::ALL;
|
||||
} else {
|
||||
LOG_ERR("Unknown input message type: %s\n", type.c_str());
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
LOG_ERR("Unknown option: %s\n", arg.c_str());
|
||||
print_usage(argv[0]);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static json build_user_message() {
|
||||
return json{
|
||||
{ "role", "user" },
|
||||
{ "content", "Hello, please help me with a task." }
|
||||
};
|
||||
}
|
||||
|
||||
static json build_content_only_message() {
|
||||
return json{
|
||||
{ "role", "assistant" },
|
||||
{ "content", "Hello! I'm here to help you with your task." }
|
||||
};
|
||||
}
|
||||
|
||||
static json build_reasoning_content_message() {
|
||||
return json{
|
||||
{ "role", "assistant" },
|
||||
{ "content", "Hello! I'm here to help you with your task." },
|
||||
{ "reasoning_content", "The user is greeting me and asking for help. I should respond politely." }
|
||||
};
|
||||
}
|
||||
|
||||
static json build_tool_call_only_message() {
|
||||
return json{
|
||||
{ "role", "assistant" },
|
||||
{ "content", nullptr },
|
||||
{ "tool_calls",
|
||||
json::array({ json{
|
||||
{ "type", "function" },
|
||||
{ "function", json{ { "name", "test_function_name" },
|
||||
{ "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } },
|
||||
{ "id", "123456789" } } }) }
|
||||
};
|
||||
}
|
||||
|
||||
static json build_content_tool_call_message() {
|
||||
return json{
|
||||
{ "role", "assistant" },
|
||||
{ "content", "I'll help you by calling a function." },
|
||||
{ "tool_calls",
|
||||
json::array({ json{
|
||||
{ "type", "function" },
|
||||
{ "function",
|
||||
json{ { "name", "test_function_name" },
|
||||
{ "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } } } }) }
|
||||
};
|
||||
}
|
||||
|
||||
static json build_reasoning_tool_call_message() {
|
||||
return json{
|
||||
{ "role", "assistant" },
|
||||
{ "content", nullptr },
|
||||
{ "reasoning_content", "I need to call a function to help with this task." },
|
||||
{ "tool_calls",
|
||||
json::array({ json{
|
||||
{ "type", "function" },
|
||||
{ "function",
|
||||
json{ { "name", "test_function_name" },
|
||||
{ "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } } } }) }
|
||||
};
|
||||
}
|
||||
|
||||
static json build_content_fake_tool_call_message() {
|
||||
// This message has content but NO tool_calls field
|
||||
// It's used to test if a template renders tool definitions but not tool calls
|
||||
return json{
|
||||
{ "role", "assistant" },
|
||||
{ "content", "I'll help you by calling a function." }
|
||||
};
|
||||
}
|
||||
|
||||
static json build_tools_definition() {
|
||||
json parameters_schema = json::object();
|
||||
parameters_schema["type"] = "object";
|
||||
parameters_schema["properties"] = json::object();
|
||||
parameters_schema["properties"]["param1"] = json::object({
|
||||
{ "type", "string" },
|
||||
{ "description", "First parameter" }
|
||||
});
|
||||
parameters_schema["properties"]["param2"] = json::object({
|
||||
{ "type", "string" },
|
||||
{ "description", "Second parameter" }
|
||||
});
|
||||
parameters_schema["required"] = json::array({ "param1" });
|
||||
|
||||
return json::array({
|
||||
json{ { "type", "function" },
|
||||
{ "function", json{ { "name", "test_function_name" },
|
||||
{ "description", "A test function for debugging" },
|
||||
{ "parameters", parameters_schema } } } }
|
||||
});
|
||||
}
|
||||
|
||||
static void render_scenario(const common_chat_template & tmpl,
|
||||
const std::string & scenario_name,
|
||||
const json & messages,
|
||||
const json & tools,
|
||||
bool add_generation_prompt,
|
||||
bool enable_thinking) {
|
||||
LOG_ERR("\n=== Scenario: %s ===\n", scenario_name.c_str());
|
||||
LOG_ERR("add_generation_prompt: %s, enable_thinking: %s\n", add_generation_prompt ? "true" : "false",
|
||||
enable_thinking ? "true" : "false");
|
||||
|
||||
// When add_generation_prompt is true, add a trailing user message to trigger the prompt
|
||||
json final_messages = messages;
|
||||
if (add_generation_prompt && !messages.empty() && messages.back().value("role", "") == "assistant") {
|
||||
final_messages.push_back(json{
|
||||
{ "role", "user" },
|
||||
{ "content", "Now please continue with another response." }
|
||||
});
|
||||
}
|
||||
|
||||
LOG_ERR("Messages:\n%s\n", final_messages.dump(2).c_str());
|
||||
|
||||
try {
|
||||
autoparser::templates_params inputs;
|
||||
inputs.messages = final_messages;
|
||||
inputs.add_generation_prompt = add_generation_prompt;
|
||||
inputs.extra_context["enable_thinking"] = enable_thinking;
|
||||
|
||||
if (!tools.is_null() && tools.is_array() && !tools.empty()) {
|
||||
inputs.tools = tools;
|
||||
}
|
||||
|
||||
std::string output = common_chat_template_direct_apply(tmpl, inputs);
|
||||
|
||||
LOG_ERR("\n--- Rendered Output ---\n");
|
||||
LOG_ERR("%s\n", output.c_str());
|
||||
LOG_ERR("--- End Output (length: %zu) ---\n", output.length());
|
||||
} catch (const std::exception & e) {
|
||||
LOG_ERR("Rendering failed: %s\n", e.what());
|
||||
}
|
||||
}
|
||||
|
||||
static void render_all_scenarios(const common_chat_template & tmpl,
|
||||
const json & tools,
|
||||
bool add_generation_prompt,
|
||||
bool enable_thinking,
|
||||
input_message_type message_type) {
|
||||
json user_msg = build_user_message();
|
||||
|
||||
auto render_if = [&](input_message_type type, const std::string & name, const json & assistant_msg) {
|
||||
if (message_type == input_message_type::ALL || message_type == type) {
|
||||
json messages = json::array({ user_msg, assistant_msg });
|
||||
render_scenario(tmpl, name, messages, tools, add_generation_prompt, enable_thinking);
|
||||
}
|
||||
};
|
||||
|
||||
render_if(input_message_type::CONTENT_ONLY, "content_only", build_content_only_message());
|
||||
render_if(input_message_type::REASONING_CONTENT, "reasoning_content", build_reasoning_content_message());
|
||||
render_if(input_message_type::TOOL_CALL_ONLY, "tool_call_only", build_tool_call_only_message());
|
||||
render_if(input_message_type::CONTENT_TOOL_CALL, "content_tool_call", build_content_tool_call_message());
|
||||
render_if(input_message_type::REASONING_TOOL_CALL, "reasoning_tool_call", build_reasoning_tool_call_message());
|
||||
render_if(input_message_type::CONTENT_FAKE_TOOL_CALL, "content_fake_tool_call",
|
||||
build_content_fake_tool_call_message());
|
||||
|
||||
// Also render with add_generation_prompt=true to show the prompt ending
|
||||
if (message_type == input_message_type::ALL) {
|
||||
LOG_ERR("\n\n=== Generation Prompt Scenarios (add_generation_prompt=true) ===\n");
|
||||
|
||||
json prompt_messages = json::array({ user_msg });
|
||||
render_scenario(tmpl, "generation_prompt_only", prompt_messages, tools, true, enable_thinking);
|
||||
|
||||
// With enable_thinking toggled
|
||||
render_scenario(tmpl, "generation_prompt_thinking_disabled", prompt_messages, tools, true, false);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static std::string mode_to_str(T mode) {
|
||||
std::ostringstream os;
|
||||
os << mode;
|
||||
return os.str();
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
// Set log level to most verbose to capture all debug output
|
||||
common_log_set_verbosity_thold(99);
|
||||
|
||||
debug_options opts;
|
||||
if (!parse_options(argc, argv, opts)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (opts.debug_jinja || std::getenv("LLAMA_DEBUG_JINJA") != nullptr) {
|
||||
jinja::enable_debug(true);
|
||||
}
|
||||
|
||||
std::string template_source;
|
||||
try {
|
||||
// Check if the file is a GGUF file
|
||||
if (opts.template_path.size() >= 5 &&
|
||||
opts.template_path.compare(opts.template_path.size() - 5, 5, ".gguf") == 0) {
|
||||
template_source = read_gguf_chat_template(opts.template_path);
|
||||
} else {
|
||||
template_source = read_file(opts.template_path);
|
||||
}
|
||||
} catch (const std::exception & e) {
|
||||
LOG_ERR("Error reading template: %s\n", e.what());
|
||||
return 1;
|
||||
}
|
||||
|
||||
LOG_ERR("Analyzing template: %s\n", opts.template_path.c_str());
|
||||
LOG_ERR("Options: with_tools=%s, generation_prompt=%s, enable_reasoning=%s\n", opts.with_tools ? "true" : "false",
|
||||
opts.generation_prompt ? "true" : "false", opts.enable_reasoning ? "true" : "false");
|
||||
|
||||
try {
|
||||
common_chat_template chat_template(template_source, "", "");
|
||||
|
||||
// Build tools definition
|
||||
json tools = opts.with_tools ? build_tools_definition() : json();
|
||||
|
||||
// Render template scenarios if requested
|
||||
if (opts.input_message != input_message_type::NONE &&
|
||||
(opts.mode == output_mode::TEMPLATE || opts.mode == output_mode::BOTH)) {
|
||||
LOG_ERR("\n");
|
||||
LOG_ERR("================================================================================\n");
|
||||
LOG_ERR(" TEMPLATE RENDERING OUTPUT\n");
|
||||
LOG_ERR("================================================================================\n");
|
||||
|
||||
render_all_scenarios(chat_template, tools, opts.generation_prompt, opts.enable_reasoning,
|
||||
opts.input_message);
|
||||
}
|
||||
|
||||
// Output analysis if requested
|
||||
if (opts.mode == output_mode::ANALYSIS || opts.mode == output_mode::BOTH) {
|
||||
LOG_ERR("\n");
|
||||
LOG_ERR("================================================================================\n");
|
||||
LOG_ERR(" TEMPLATE ANALYSIS\n");
|
||||
LOG_ERR("================================================================================\n");
|
||||
|
||||
autoparser::analyze_template analysis(chat_template);
|
||||
|
||||
// Generate Parser
|
||||
autoparser::templates_params params;
|
||||
params.messages = json::array();
|
||||
params.reasoning_format =
|
||||
opts.enable_reasoning ? COMMON_REASONING_FORMAT_DEEPSEEK : COMMON_REASONING_FORMAT_NONE;
|
||||
params.enable_thinking = opts.enable_reasoning;
|
||||
params.add_generation_prompt = opts.generation_prompt;
|
||||
|
||||
if (opts.with_tools) {
|
||||
params.tools = tools;
|
||||
params.tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
|
||||
} else {
|
||||
params.tools = json();
|
||||
params.tool_choice = COMMON_CHAT_TOOL_CHOICE_NONE;
|
||||
}
|
||||
params.parallel_tool_calls = false;
|
||||
|
||||
auto parser_data = autoparser::universal_peg_generator::generate_parser(chat_template, params, analysis);
|
||||
|
||||
LOG_ERR("\n=== Differential Analysis Results ===\n");
|
||||
|
||||
LOG_ERR("\n--- Reasoning & Content Structure ---\n");
|
||||
LOG_ERR("reasoning_mode: %s\n", mode_to_str(analysis.reasoning.mode).c_str());
|
||||
LOG_ERR("reasoning_start: '%s'\n", analysis.reasoning.start.c_str());
|
||||
LOG_ERR("reasoning_end: '%s'\n", analysis.reasoning.end.c_str());
|
||||
LOG_ERR("content_mode: %s\n", mode_to_str(analysis.content.mode).c_str());
|
||||
LOG_ERR("content_start: '%s'\n", analysis.content.start.c_str());
|
||||
LOG_ERR("content_end: '%s'\n", analysis.content.end.c_str());
|
||||
|
||||
LOG_ERR("\n--- Tool Call Structure ---\n");
|
||||
LOG_ERR("tool_mode: %s\n", mode_to_str(analysis.tools.format.mode).c_str());
|
||||
LOG_ERR("supports_tools: %s\n", analysis.jinja_caps.supports_tools ? "true" : "false");
|
||||
LOG_ERR("supports_parallel_calls: %s\n", analysis.jinja_caps.supports_parallel_tool_calls ? "true" : "false");
|
||||
LOG_ERR("tool_section_start: '%s'\n", analysis.tools.format.section_start.c_str());
|
||||
LOG_ERR("tool_section_end: '%s'\n", analysis.tools.format.section_end.c_str());
|
||||
LOG_ERR("per_call_start: '%s'\n", analysis.tools.format.per_call_start.c_str());
|
||||
LOG_ERR("per_call_end: '%s'\n", analysis.tools.format.per_call_end.c_str());
|
||||
LOG_ERR("func_name_prefix: '%s'\n", analysis.tools.function.name_prefix.c_str());
|
||||
LOG_ERR("func_name_suffix: '%s'\n", analysis.tools.function.name_suffix.c_str());
|
||||
LOG_ERR("func_close: '%s'\n", analysis.tools.function.close.c_str());
|
||||
LOG_ERR("arg_name_prefix: '%s'\n", analysis.tools.arguments.name_prefix.c_str());
|
||||
LOG_ERR("arg_name_suffix: '%s'\n", analysis.tools.arguments.name_suffix.c_str());
|
||||
LOG_ERR("arg_value_prefix: '%s'\n", analysis.tools.arguments.value_prefix.c_str());
|
||||
LOG_ERR("arg_value_suffix: '%s'\n", analysis.tools.arguments.value_suffix.c_str());
|
||||
LOG_ERR("name_field: '%s'\n", analysis.tools.format.name_field.c_str());
|
||||
LOG_ERR("args_field: '%s'\n", analysis.tools.format.args_field.c_str());
|
||||
LOG_ERR("id_field: '%s'\n", analysis.tools.format.id_field.c_str());
|
||||
LOG_ERR("gen_id_field: '%s'\n", analysis.tools.format.gen_id_field.c_str());
|
||||
LOG_ERR("parameter_order: '%s'\n", std::accumulate(analysis.tools.format.parameter_order.begin(), analysis.tools.format.parameter_order.end(),
|
||||
std::string(""), [] (const std::string & a, const std::string & b) { return a.empty() ? b : a + ", " + b; }
|
||||
).c_str());
|
||||
|
||||
LOG_ERR("\n=== Generated Parser ===\n");
|
||||
common_peg_arena arena;
|
||||
arena.load(parser_data.parser);
|
||||
LOG_ERR("%s\n", arena.dump(arena.root()).c_str());
|
||||
|
||||
LOG_ERR("\n=== Generated Grammar ===\n");
|
||||
LOG_ERR("%s\n", parser_data.grammar.c_str());
|
||||
|
||||
LOG_ERR("\n=== Generated Lazy Grammar ===\n");
|
||||
LOG_ERR("%d\n", parser_data.grammar_lazy);
|
||||
|
||||
LOG_ERR("\n=== Generated Grammar Triggers ===\n");
|
||||
for (const common_grammar_trigger & cgt : parser_data.grammar_triggers) {
|
||||
LOG_ERR("Token: %d | Type: %d | Value: %s\n", cgt.token, cgt.type, cgt.value.c_str());
|
||||
}
|
||||
|
||||
LOG_ERR("\n=== Preserved Tokens ===\n");
|
||||
for (const std::string & token : parser_data.preserved_tokens) {
|
||||
LOG_ERR(" '%s'\n", token.c_str());
|
||||
}
|
||||
|
||||
if (!parser_data.grammar.empty()) {
|
||||
LOG_ERR("\n=== Verifying created grammar ===\n");
|
||||
auto * grammar = llama_grammar_init_impl(nullptr, parser_data.grammar.c_str(), "root",
|
||||
parser_data.grammar_lazy, nullptr, 0, nullptr, 0);
|
||||
if (grammar != nullptr) {
|
||||
LOG_ERR("\n=== Grammar successfully created ===\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (const std::exception & e) {
|
||||
LOG_ERR("Analysis failed: %s\n", e.what());
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,611 @@
|
|||
#include "chat-auto-parser.h"
|
||||
#include "chat-auto-parser-helpers.h"
|
||||
#include "chat.h"
|
||||
#include "log.h"
|
||||
#include "jinja/caps.h"
|
||||
#include "jinja/runtime.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
#include "nlohmann/json.hpp"
|
||||
|
||||
using json = nlohmann::ordered_json;
|
||||
|
||||
// ANSI color codes - using 256-color palette for brighter colors (all bold)
|
||||
#define ANSI_RESET "\033[0m"
|
||||
#define ANSI_PURPLE "\033[1m\x1b[38;5;126m" // Bold bright purple for main headers
|
||||
#define ANSI_CYAN "\033[1m\x1b[38;5;81m" // Bold bright cyan for section headers
|
||||
#define ANSI_BLUE "\033[1m\x1b[38;5;12m" // Bold bright blue for labels
|
||||
#define ANSI_ORANGE "\033[1m\x1b[38;5;209m" // Bold orange for right differences
|
||||
#define ANSI_GREEN "\033[1m\x1b[38;5;83m" // Bold bright green for left differences
|
||||
#define ANSI_GRAY "\033[1m\x1b[38;5;240m" // Bold gray (used for "no variables" message)
|
||||
#define ANSI_BOLD "\033[1m" // Standalone bold
|
||||
#define ANSI_PREFIX "\033[1m\x1b[38;5;176m" // Bold color for common prefix
|
||||
#define ANSI_SUFFIX "\033[1m\x1b[38;5;61m" // Bold color for common suffix
|
||||
|
||||
// All template paths extracted from tests/test-chat.cpp
|
||||
static const std::vector<std::string> ALL_TEMPLATE_PATHS = {
|
||||
"models/templates/Apertus-8B-Instruct.jinja",
|
||||
"models/templates/Apriel-1.6-15b-Thinker-fixed.jinja",
|
||||
"models/templates/ByteDance-Seed-OSS.jinja",
|
||||
"models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja",
|
||||
"models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja",
|
||||
"models/templates/GLM-4.6.jinja",
|
||||
"models/templates/GLM-4.7-Flash.jinja",
|
||||
"models/templates/Kimi-K2-Instruct.jinja",
|
||||
"models/templates/Kimi-K2-Thinking.jinja",
|
||||
"models/templates/MiMo-VL.jinja",
|
||||
"models/templates/MiniMax-M2.jinja",
|
||||
"models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja",
|
||||
"models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja",
|
||||
"models/templates/NVIDIA-Nemotron-Nano-v2.jinja",
|
||||
"models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja",
|
||||
"models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja",
|
||||
"models/templates/Qwen-QwQ-32B.jinja",
|
||||
"models/templates/Qwen-Qwen2.5-7B-Instruct.jinja",
|
||||
"models/templates/Qwen3-Coder.jinja",
|
||||
"models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja",
|
||||
"models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja",
|
||||
"models/templates/deepseek-ai-DeepSeek-V3.1.jinja",
|
||||
"models/templates/fireworks-ai-llama-3-firefunction-v2.jinja",
|
||||
"models/templates/google-gemma-2-2b-it.jinja",
|
||||
"models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja",
|
||||
"models/templates/llama-cpp-deepseek-r1.jinja",
|
||||
"models/templates/meetkai-functionary-medium-v3.1.jinja",
|
||||
"models/templates/meetkai-functionary-medium-v3.2.jinja",
|
||||
"models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja",
|
||||
"models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja",
|
||||
"models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja",
|
||||
"models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja",
|
||||
"models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja",
|
||||
"models/templates/moonshotai-Kimi-K2.jinja",
|
||||
"models/templates/openai-gpt-oss-120b.jinja",
|
||||
"models/templates/unsloth-Apriel-1.5.jinja",
|
||||
"models/templates/unsloth-mistral-Devstral-Small-2507.jinja",
|
||||
};
|
||||
|
||||
struct analysis_options {
|
||||
std::vector<std::string> template_paths;
|
||||
bool analyze_all = false;
|
||||
};
|
||||
|
||||
static std::string read_file(const std::string & path) {
|
||||
std::ifstream fin(path, std::ios::binary);
|
||||
if (!fin.is_open()) {
|
||||
throw std::runtime_error("Could not open file: " + path);
|
||||
}
|
||||
std::ostringstream buf;
|
||||
buf << fin.rdbuf();
|
||||
return buf.str();
|
||||
}
|
||||
|
||||
static void print_usage(const char * program_name) {
|
||||
LOG_ERR("Usage: %s [options]\n", program_name);
|
||||
LOG_ERR("\nOptions:\n");
|
||||
LOG_ERR(" --template <name> Analyze specific template from test suite (e.g., 'deepseek' or 'DeepSeek-V3.1')\n");
|
||||
LOG_ERR(" --template-file <path> Analyze custom template file\n");
|
||||
LOG_ERR(" --all Analyze all templates from test suite\n");
|
||||
LOG_ERR("\nExamples:\n");
|
||||
LOG_ERR(" %s --all\n", program_name);
|
||||
LOG_ERR(" %s --template deepseek\n", program_name);
|
||||
LOG_ERR(" %s --template-file my-template.jinja\n", program_name);
|
||||
}
|
||||
|
||||
static bool parse_options(int argc, char ** argv, analysis_options & opts) {
|
||||
if (argc < 2) {
|
||||
print_usage(argv[0]);
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = 1; i < argc; ++i) {
|
||||
std::string arg = argv[i];
|
||||
|
||||
if (arg == "--all") {
|
||||
opts.analyze_all = true;
|
||||
} else if (arg == "--template") {
|
||||
if (i + 1 >= argc) {
|
||||
LOG_ERR("--template requires an argument\n");
|
||||
return false;
|
||||
}
|
||||
std::string pattern = argv[++i];
|
||||
std::transform(pattern.begin(), pattern.end(), pattern.begin(), ::tolower);
|
||||
|
||||
// Find matching templates
|
||||
bool found = false;
|
||||
for (const auto & path : ALL_TEMPLATE_PATHS) {
|
||||
std::string path_lower = path;
|
||||
std::transform(path_lower.begin(), path_lower.end(), path_lower.begin(), ::tolower);
|
||||
if (path_lower.find(pattern) != std::string::npos) {
|
||||
opts.template_paths.push_back(path);
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!found) {
|
||||
LOG_ERR("No templates found matching: %s\n", pattern.c_str());
|
||||
return false;
|
||||
}
|
||||
} else if (arg == "--template-file") {
|
||||
if (i + 1 >= argc) {
|
||||
LOG_ERR("--template-file requires an argument\n");
|
||||
return false;
|
||||
}
|
||||
opts.template_paths.push_back(argv[++i]);
|
||||
} else {
|
||||
LOG_ERR("Unknown option: %s\n", arg.c_str());
|
||||
print_usage(argv[0]);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (opts.analyze_all) {
|
||||
opts.template_paths = ALL_TEMPLATE_PATHS;
|
||||
}
|
||||
|
||||
if (opts.template_paths.empty()) {
|
||||
LOG_ERR("No templates specified\n");
|
||||
print_usage(argv[0]);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static json build_tools_definition() {
|
||||
json parameters_schema = json::object();
|
||||
parameters_schema["type"] = "object";
|
||||
parameters_schema["properties"] = json::object();
|
||||
parameters_schema["properties"]["param1"] = json::object({
|
||||
{ "type", "string" },
|
||||
{ "description", "First parameter" }
|
||||
});
|
||||
parameters_schema["properties"]["param2"] = json::object({
|
||||
{ "type", "string" },
|
||||
{ "description", "Second parameter" }
|
||||
});
|
||||
parameters_schema["required"] = json::array({ "param1", "param2" });
|
||||
|
||||
return json::array({
|
||||
json{ { "type", "function" },
|
||||
{ "function", json{ { "name", "test_function_name" },
|
||||
{ "description", "A test function for debugging" },
|
||||
{ "parameters", parameters_schema } } } }
|
||||
});
|
||||
}
|
||||
|
||||
// Helper to create a tool call with arguments as JSON object
|
||||
static json build_tool_call(const std::string & name, const json & args_object, const std::string & id = "call_001") {
|
||||
return json{
|
||||
{"id", id},
|
||||
{"type", "function"},
|
||||
{"function", json{
|
||||
{"name", name},
|
||||
{"arguments", args_object} // Pass as JSON object, not serialized string
|
||||
}}
|
||||
};
|
||||
}
|
||||
|
||||
// Helper functions to create repeating message definitions
|
||||
static json make_user_msg() {
|
||||
return json{
|
||||
{"role", "user"},
|
||||
{"content", "Hello, please help me."}
|
||||
};
|
||||
}
|
||||
|
||||
static json make_user_msg2() {
|
||||
return json{
|
||||
{"role", "user"},
|
||||
{"content", "Thank you."}
|
||||
};
|
||||
}
|
||||
|
||||
static json make_user_msg2_continue() {
|
||||
return json{
|
||||
{"role", "user"},
|
||||
{"content", "Continue."}
|
||||
};
|
||||
}
|
||||
|
||||
static json make_assistant_no_tool() {
|
||||
return json{
|
||||
{"role", "assistant"},
|
||||
{"content", "Let me help you."}
|
||||
};
|
||||
}
|
||||
|
||||
static json make_assistant_one_tool() {
|
||||
return json{
|
||||
{"role", "assistant"},
|
||||
{"content", nullptr},
|
||||
{"tool_calls", json::array({
|
||||
build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
|
||||
})}
|
||||
};
|
||||
}
|
||||
|
||||
static json make_assistant_two_tools() {
|
||||
return json{
|
||||
{"role", "assistant"},
|
||||
{"content", nullptr},
|
||||
{"tool_calls", json::array({
|
||||
build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})),
|
||||
build_tool_call("test_function_name", json::object({{"param1", "value3"}, {"param2", "value4"}}), "call_002")
|
||||
})}
|
||||
};
|
||||
}
|
||||
|
||||
static json make_assistant_no_reasoning() {
|
||||
return json{
|
||||
{"role", "assistant"},
|
||||
{"content", "I can help you with that."}
|
||||
};
|
||||
}
|
||||
|
||||
static json make_assistant_with_reasoning() {
|
||||
return json{
|
||||
{"role", "assistant"},
|
||||
{"content", "I can help you with that."},
|
||||
{"reasoning_content", "The user is asking for help. I should respond positively."}
|
||||
};
|
||||
}
|
||||
|
||||
static json make_assistant_one_tool_with_reasoning() {
|
||||
return json{
|
||||
{"role", "assistant"},
|
||||
{"content", nullptr},
|
||||
{"tool_calls", json::array({
|
||||
build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
|
||||
})},
|
||||
{"reasoning_content", "I need to call the tool first."}
|
||||
};
|
||||
}
|
||||
|
||||
static void print_diff_split(const std::string & title, const diff_split & diff) {
|
||||
LOG_ERR("\n%s=== %s ===%s\n", ANSI_CYAN, title.c_str(), ANSI_RESET);
|
||||
LOG_ERR("%sCommon Prefix:%s '%s'\n", ANSI_PREFIX, ANSI_RESET, diff.prefix.c_str());
|
||||
LOG_ERR("%sCommon Suffix:%s '%s'\n", ANSI_SUFFIX, ANSI_RESET, diff.suffix.c_str());
|
||||
LOG_ERR("%sLeft (difference):%s '%s'\n", ANSI_GREEN, ANSI_RESET, diff.left.c_str());
|
||||
LOG_ERR("%sRight (difference):%s '%s'\n", ANSI_ORANGE, ANSI_RESET, diff.right.c_str());
|
||||
}
|
||||
|
||||
static void check_reasoning_variables(const common_chat_template & tmpl) {
|
||||
LOG_ERR("\n%s=== Checking Reasoning Variables ===%s\n", ANSI_CYAN, ANSI_RESET);
|
||||
|
||||
try {
|
||||
// Create a list of candidate reasoning/thinking variable names to probe
|
||||
std::vector<std::string> candidate_vars = {
|
||||
"enable_reasoning",
|
||||
"use_reasoning",
|
||||
"reasoning_enabled",
|
||||
"has_reasoning",
|
||||
"reasoning_mode",
|
||||
"reasoning_format",
|
||||
"reasoning_active",
|
||||
"with_reasoning",
|
||||
"use_thinking",
|
||||
"thinking_enabled",
|
||||
"has_thinking",
|
||||
"thinking_mode",
|
||||
"thinking_format",
|
||||
"thinking_active",
|
||||
"with_thinking",
|
||||
"enable_reason",
|
||||
"reason_enabled",
|
||||
"enable_think",
|
||||
"think_enabled",
|
||||
};
|
||||
|
||||
jinja::context ctx;
|
||||
ctx.is_get_stats = true;
|
||||
|
||||
json messages = json::array({
|
||||
json{
|
||||
{"role", "user"},
|
||||
{"content", "Test message"}
|
||||
},
|
||||
json{
|
||||
{"role", "assistant"},
|
||||
{"content", "Response"},
|
||||
{"reasoning_content", "Some reasoning"}
|
||||
}
|
||||
});
|
||||
|
||||
// Set up base context
|
||||
jinja::global_from_json(ctx, json{
|
||||
{"messages", messages},
|
||||
{"tools", json::array()},
|
||||
{"bos_token", ""},
|
||||
{"eos_token", ""},
|
||||
{"add_generation_prompt", false},
|
||||
{"enable_thinking", true} // Already passed, so we'll exclude this from results
|
||||
}, true);
|
||||
|
||||
// Add candidate variables as undefined to probe which ones are accessed
|
||||
for (const auto & var_name : candidate_vars) {
|
||||
ctx.set_val(var_name, jinja::mk_val<jinja::value_undefined_t>(var_name));
|
||||
}
|
||||
|
||||
try {
|
||||
jinja::runtime runtime(ctx);
|
||||
runtime.execute(tmpl.prog);
|
||||
} catch (const std::exception & e) {
|
||||
// Execution may fail, that's okay - we just want to see what variables were accessed
|
||||
}
|
||||
|
||||
// Check which candidate variables were accessed (stats.used = true)
|
||||
std::vector<std::string> accessed_vars;
|
||||
for (const auto & var_name : candidate_vars) {
|
||||
auto val = ctx.get_val(var_name);
|
||||
if (!val->is_undefined()) {
|
||||
// Variable was overwritten, skip it
|
||||
continue;
|
||||
}
|
||||
if (val->stats.used) {
|
||||
accessed_vars.push_back(var_name);
|
||||
}
|
||||
}
|
||||
|
||||
if (accessed_vars.empty()) {
|
||||
LOG_ERR("%sNo reasoning/thinking-related variables were queried by the template%s\n", ANSI_GRAY, ANSI_RESET);
|
||||
} else {
|
||||
LOG_ERR("Template queries the following reasoning/thinking-related variables:\n");
|
||||
for (const auto & var : accessed_vars) {
|
||||
LOG_ERR(" %s- %s%s\n", ANSI_ORANGE, var.c_str(), ANSI_RESET);
|
||||
}
|
||||
}
|
||||
|
||||
} catch (const std::exception & e) {
|
||||
LOG_ERR("Error checking reasoning variables: %s\n", e.what());
|
||||
}
|
||||
}
|
||||
|
||||
static void analyze_template(const std::string & template_path) {
|
||||
LOG_ERR("\n");
|
||||
LOG_ERR("%s", ANSI_PURPLE);
|
||||
LOG_ERR("================================================================================\n");
|
||||
LOG_ERR(" ANALYZING TEMPLATE: %s\n", template_path.c_str());
|
||||
LOG_ERR("================================================================================\n");
|
||||
LOG_ERR("%s", ANSI_RESET);
|
||||
|
||||
std::string template_source;
|
||||
try {
|
||||
template_source = read_file(template_path);
|
||||
} catch (const std::exception & e) {
|
||||
LOG_ERR("Error reading template: %s\n", e.what());
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
common_chat_template chat_template(template_source, "", "");
|
||||
json tools = build_tools_definition();
|
||||
|
||||
// ===== CAPABILITIES ANALYSIS =====
|
||||
LOG_ERR("\n%s=== Template Capabilities (from jinja::caps) ===%s\n", ANSI_CYAN, ANSI_RESET);
|
||||
auto caps = chat_template.original_caps();
|
||||
LOG_ERR("%ssupports_tools:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_tools ? "true" : "false");
|
||||
LOG_ERR("%ssupports_tool_calls:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_tool_calls ? "true" : "false");
|
||||
LOG_ERR("%ssupports_system_role:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_system_role ? "true" : "false");
|
||||
LOG_ERR("%ssupports_parallel_tool_calls:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_parallel_tool_calls ? "true" : "false");
|
||||
LOG_ERR("%ssupports_typed_content:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_typed_content ? "true" : "false");
|
||||
LOG_ERR("%ssupports_string_content:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_string_content ? "true" : "false");
|
||||
|
||||
// ===== DIFFERENTIAL ANALYSIS =====
|
||||
|
||||
// Test 1: With and without tools (single user message)
|
||||
{
|
||||
json user_msg = make_user_msg();
|
||||
|
||||
autoparser::templates_params params_no_tools;
|
||||
params_no_tools.messages = json::array({ user_msg });
|
||||
params_no_tools.add_generation_prompt = false;
|
||||
params_no_tools.tools = json::array();
|
||||
|
||||
autoparser::templates_params params_with_tools = params_no_tools;
|
||||
params_with_tools.tools = tools;
|
||||
|
||||
std::string output_no_tools = common_chat_template_direct_apply(chat_template, params_no_tools);
|
||||
std::string output_with_tools = common_chat_template_direct_apply(chat_template, params_with_tools);
|
||||
|
||||
auto diff = calculate_diff_split(output_no_tools, output_with_tools);
|
||||
print_diff_split("Diff: With vs Without Tools (single user message)", diff);
|
||||
}
|
||||
|
||||
// Test 2: With and without add_generation_prompt (single user message)
|
||||
{
|
||||
json user_msg = make_user_msg();
|
||||
|
||||
autoparser::templates_params params_no_prompt;
|
||||
params_no_prompt.messages = json::array({ user_msg });
|
||||
params_no_prompt.add_generation_prompt = false;
|
||||
params_no_prompt.tools = json::array();
|
||||
|
||||
autoparser::templates_params params_with_prompt = params_no_prompt;
|
||||
params_with_prompt.add_generation_prompt = true;
|
||||
|
||||
std::string output_no_prompt = common_chat_template_direct_apply(chat_template, params_no_prompt);
|
||||
std::string output_with_prompt = common_chat_template_direct_apply(chat_template, params_with_prompt);
|
||||
|
||||
auto diff = calculate_diff_split(output_no_prompt, output_with_prompt);
|
||||
print_diff_split("Diff: With vs Without add_generation_prompt (single user message)", diff);
|
||||
}
|
||||
|
||||
// Test 3: Assistant with reasoning_content (user, assistant)
|
||||
{
|
||||
json user_msg = make_user_msg();
|
||||
|
||||
autoparser::templates_params params_no_reasoning;
|
||||
params_no_reasoning.messages = json::array({ user_msg, make_assistant_no_reasoning() });
|
||||
params_no_reasoning.add_generation_prompt = false;
|
||||
params_no_reasoning.enable_thinking = true;
|
||||
|
||||
autoparser::templates_params params_with_reasoning = params_no_reasoning;
|
||||
params_with_reasoning.messages = json::array({ user_msg, make_assistant_with_reasoning() });
|
||||
|
||||
std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning);
|
||||
std::string output_with_reasoning = common_chat_template_direct_apply(chat_template, params_with_reasoning);
|
||||
|
||||
auto diff = calculate_diff_split(output_no_reasoning, output_with_reasoning);
|
||||
print_diff_split("Diff: With vs Without reasoning_content (user, assistant)", diff);
|
||||
}
|
||||
|
||||
// Test 4: Assistant with reasoning_content (user, assistant, user)
|
||||
{
|
||||
json user_msg = make_user_msg();
|
||||
json user_msg2 = make_user_msg2();
|
||||
|
||||
autoparser::templates_params params_no_reasoning;
|
||||
params_no_reasoning.messages = json::array({ user_msg, make_assistant_no_reasoning(), user_msg2 });
|
||||
params_no_reasoning.add_generation_prompt = false;
|
||||
params_no_reasoning.enable_thinking = true;
|
||||
|
||||
autoparser::templates_params params_with_reasoning = params_no_reasoning;
|
||||
params_with_reasoning.messages = json::array({ user_msg, make_assistant_with_reasoning(), user_msg2 });
|
||||
|
||||
std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning);
|
||||
std::string output_with_reasoning = common_chat_template_direct_apply(chat_template, params_with_reasoning);
|
||||
|
||||
auto diff = calculate_diff_split(output_no_reasoning, output_with_reasoning);
|
||||
print_diff_split("Diff: With vs Without reasoning_content (user, assistant, user)", diff);
|
||||
}
|
||||
|
||||
// Test 5: Tool call in last assistant message (user, assistant)
|
||||
{
|
||||
json user_msg = make_user_msg();
|
||||
|
||||
autoparser::templates_params params_no_tool;
|
||||
params_no_tool.messages = json::array({ user_msg, make_assistant_no_tool() });
|
||||
params_no_tool.add_generation_prompt = false;
|
||||
params_no_tool.tools = tools;
|
||||
|
||||
autoparser::templates_params params_with_tool = params_no_tool;
|
||||
params_with_tool.messages = json::array({ user_msg, make_assistant_one_tool() });
|
||||
|
||||
std::string output_no_tool = common_chat_template_direct_apply(chat_template, params_no_tool);
|
||||
std::string output_with_tool = common_chat_template_direct_apply(chat_template, params_with_tool);
|
||||
|
||||
auto diff = calculate_diff_split(output_no_tool, output_with_tool);
|
||||
print_diff_split("Diff: With vs Without tool call (user, assistant)", diff);
|
||||
}
|
||||
|
||||
// Test 6: Tool call in last assistant message (user, assistant, user)
|
||||
{
|
||||
json user_msg = make_user_msg();
|
||||
json user_msg2 = make_user_msg2_continue();
|
||||
|
||||
autoparser::templates_params params_no_tool;
|
||||
params_no_tool.messages = json::array({ user_msg, make_assistant_no_tool(), user_msg2 });
|
||||
params_no_tool.add_generation_prompt = false;
|
||||
params_no_tool.tools = tools;
|
||||
|
||||
autoparser::templates_params params_with_tool = params_no_tool;
|
||||
params_with_tool.messages = json::array({ user_msg, make_assistant_one_tool(), user_msg2 });
|
||||
|
||||
std::string output_no_tool = common_chat_template_direct_apply(chat_template, params_no_tool);
|
||||
std::string output_with_tool = common_chat_template_direct_apply(chat_template, params_with_tool);
|
||||
|
||||
auto diff = calculate_diff_split(output_no_tool, output_with_tool);
|
||||
print_diff_split("Diff: With vs Without tool call (user, assistant, user)", diff);
|
||||
}
|
||||
|
||||
// Test 7: One vs two tool calls (user, assistant)
|
||||
{
|
||||
json user_msg = make_user_msg();
|
||||
|
||||
autoparser::templates_params params_one_tool;
|
||||
params_one_tool.messages = json::array({ user_msg, make_assistant_one_tool() });
|
||||
params_one_tool.add_generation_prompt = false;
|
||||
params_one_tool.tools = tools;
|
||||
|
||||
autoparser::templates_params params_two_tools = params_one_tool;
|
||||
params_two_tools.messages = json::array({ user_msg, make_assistant_two_tools() });
|
||||
|
||||
std::string output_one_tool = common_chat_template_direct_apply(chat_template, params_one_tool);
|
||||
std::string output_two_tools = common_chat_template_direct_apply(chat_template, params_two_tools);
|
||||
|
||||
auto diff = calculate_diff_split(output_one_tool, output_two_tools);
|
||||
print_diff_split("Diff: One vs Two tool calls (user, assistant)", diff);
|
||||
}
|
||||
|
||||
// Test 8: One vs two tool calls (user, assistant, user)
|
||||
{
|
||||
json user_msg = make_user_msg();
|
||||
json user_msg2 = make_user_msg2_continue();
|
||||
|
||||
autoparser::templates_params params_one_tool;
|
||||
params_one_tool.messages = json::array({ user_msg, make_assistant_one_tool(), user_msg2 });
|
||||
params_one_tool.add_generation_prompt = false;
|
||||
params_one_tool.tools = tools;
|
||||
|
||||
autoparser::templates_params params_two_tools = params_one_tool;
|
||||
params_two_tools.messages = json::array({ user_msg, make_assistant_two_tools(), user_msg2 });
|
||||
|
||||
std::string output_one_tool = common_chat_template_direct_apply(chat_template, params_one_tool);
|
||||
std::string output_two_tools = common_chat_template_direct_apply(chat_template, params_two_tools);
|
||||
|
||||
auto diff = calculate_diff_split(output_one_tool, output_two_tools);
|
||||
print_diff_split("Diff: One vs Two tool calls (user, assistant, user)", diff);
|
||||
}
|
||||
|
||||
// Test 9: Tool call with vs without reasoning_content (user, assistant)
|
||||
{
|
||||
json user_msg = make_user_msg();
|
||||
|
||||
autoparser::templates_params params_no_reasoning;
|
||||
params_no_reasoning.messages = json::array({ user_msg, make_assistant_one_tool() });
|
||||
params_no_reasoning.add_generation_prompt = false;
|
||||
params_no_reasoning.tools = tools;
|
||||
params_no_reasoning.enable_thinking = true;
|
||||
|
||||
autoparser::templates_params params_with_reasoning = params_no_reasoning;
|
||||
params_with_reasoning.messages = json::array({ user_msg, make_assistant_one_tool_with_reasoning() });
|
||||
|
||||
std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning);
|
||||
std::string output_with_reasoning = common_chat_template_direct_apply(chat_template, params_with_reasoning);
|
||||
|
||||
auto diff = calculate_diff_split(output_no_reasoning, output_with_reasoning);
|
||||
print_diff_split("Diff: Tool call with vs without reasoning_content (user, assistant)", diff);
|
||||
}
|
||||
|
||||
// Check reasoning variables
|
||||
check_reasoning_variables(chat_template);
|
||||
|
||||
} catch (const std::exception & e) {
|
||||
LOG_ERR("Analysis failed: %s\n", e.what());
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
// Set log level to capture all output
|
||||
common_log_set_verbosity_thold(99);
|
||||
|
||||
analysis_options opts;
|
||||
if (!parse_options(argc, argv, opts)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
LOG_ERR("\n");
|
||||
LOG_ERR("%s", ANSI_PURPLE);
|
||||
LOG_ERR("================================================================================\n");
|
||||
LOG_ERR(" TEMPLATE ANALYSIS TOOL\n");
|
||||
LOG_ERR("================================================================================\n");
|
||||
LOG_ERR("%s", ANSI_RESET);
|
||||
LOG_ERR("Analyzing %s%zu%s template(s)\n", ANSI_CYAN, opts.template_paths.size(), ANSI_RESET);
|
||||
|
||||
for (const auto & path : opts.template_paths) {
|
||||
analyze_template(path);
|
||||
}
|
||||
|
||||
LOG_ERR("\n");
|
||||
LOG_ERR("%s", ANSI_GREEN);
|
||||
LOG_ERR("================================================================================\n");
|
||||
LOG_ERR(" ANALYSIS COMPLETE\n");
|
||||
LOG_ERR("================================================================================\n");
|
||||
LOG_ERR("%s", ANSI_RESET);
|
||||
|
||||
return 0;
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -3,10 +3,10 @@
|
|||
#include "common.h"
|
||||
#include "llama.h"
|
||||
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
|
||||
// TODO: prevent including the whole server-common.h as we only use server_tokens
|
||||
#include "server-common.h"
|
||||
|
|
@ -30,7 +30,7 @@ enum server_task_type {
|
|||
|
||||
// TODO: change this to more generic "response_format" to replace the "format_response_*" in server-common
|
||||
enum task_response_type {
|
||||
TASK_RESPONSE_TYPE_NONE, // llama.cpp native format
|
||||
TASK_RESPONSE_TYPE_NONE, // llama.cpp native format
|
||||
TASK_RESPONSE_TYPE_OAI_CHAT,
|
||||
TASK_RESPONSE_TYPE_OAI_CMPL,
|
||||
TASK_RESPONSE_TYPE_OAI_RESP,
|
||||
|
|
@ -48,22 +48,23 @@ enum stop_type {
|
|||
struct task_params {
|
||||
bool stream = true;
|
||||
bool include_usage = false;
|
||||
bool cache_prompt = true; // remember the prompt to avoid reprocessing all prompt
|
||||
bool cache_prompt = true; // remember the prompt to avoid reprocessing all prompt
|
||||
bool return_tokens = false;
|
||||
bool return_progress = false;
|
||||
|
||||
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
||||
int32_t n_discard = 0; // number of tokens after n_keep that may be discarded when shifting context, 0 defaults to half
|
||||
int32_t n_predict = -1; // new tokens to predict
|
||||
int32_t n_indent = 0; // minimum line indentation for the generated text in number of whitespace characters
|
||||
int32_t n_cmpl = 1; // number of completions to generate from this prompt
|
||||
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
||||
int32_t n_discard =
|
||||
0; // number of tokens after n_keep that may be discarded when shifting context, 0 defaults to half
|
||||
int32_t n_predict = -1; // new tokens to predict
|
||||
int32_t n_indent = 0; // minimum line indentation for the generated text in number of whitespace characters
|
||||
int32_t n_cmpl = 1; // number of completions to generate from this prompt
|
||||
|
||||
int32_t n_cache_reuse = 0; // min chunk size to attempt reusing from the cache via KV shifting (0 = disabled)
|
||||
int32_t n_cache_reuse = 0; // min chunk size to attempt reusing from the cache via KV shifting (0 = disabled)
|
||||
|
||||
int64_t t_max_prompt_ms = -1; // TODO: implement
|
||||
int64_t t_max_predict_ms = -1; // if positive, limit the generation phase to this time limit
|
||||
int64_t t_max_prompt_ms = -1; // TODO: implement
|
||||
int64_t t_max_predict_ms = -1; // if positive, limit the generation phase to this time limit
|
||||
|
||||
std::map<int, float> lora; // mapping adapter ID -> scale
|
||||
std::map<int, float> lora; // mapping adapter ID -> scale
|
||||
|
||||
std::vector<std::string> antiprompt;
|
||||
std::vector<std::string> response_fields;
|
||||
|
|
@ -71,7 +72,7 @@ struct task_params {
|
|||
bool timings_per_token = false;
|
||||
bool post_sampling_probs = false;
|
||||
|
||||
struct common_params_sampling sampling;
|
||||
struct common_params_sampling sampling;
|
||||
struct common_params_speculative speculative;
|
||||
|
||||
// response formatting
|
||||
|
|
@ -84,7 +85,7 @@ struct task_params {
|
|||
common_chat_parser_params chat_parser_params;
|
||||
|
||||
// Embeddings
|
||||
int32_t embd_normalize = 2; // (-1=none, 0=max absolute int16, 1=taxicab, 2=Euclidean/L2, >2=p-norm)
|
||||
int32_t embd_normalize = 2; // (-1=none, 0=max absolute int16, 1=taxicab, 2=Euclidean/L2, >2=p-norm)
|
||||
|
||||
json format_logit_bias(const std::vector<llama_logit_bias> & logit_bias) const;
|
||||
json to_json(bool only_metrics = false) const;
|
||||
|
|
@ -95,9 +96,10 @@ struct task_result_state {
|
|||
// tracking diffs for partial tool calls
|
||||
std::vector<common_chat_msg_diff> diffs;
|
||||
common_chat_parser_params chat_parser_params;
|
||||
common_chat_msg chat_msg;
|
||||
std::string generated_text; // append new chunks of generated text here
|
||||
std::vector<std::string> generated_tool_call_ids;
|
||||
common_chat_msg chat_msg;
|
||||
std::string generated_text; // append new chunks of generated text here
|
||||
std::vector<std::string> generated_tool_call_ids;
|
||||
std::unordered_set<size_t> sent_tool_call_names;
|
||||
|
||||
// for OpenAI Responses and Anthropic streaming API:
|
||||
// track output item / content block state across chunks
|
||||
|
|
@ -117,17 +119,17 @@ struct task_result_state {
|
|||
, oai_resp_message_id("msg_" + random_string()) {}
|
||||
|
||||
// parse partial tool calls and update the internal state
|
||||
common_chat_msg update_chat_msg(
|
||||
const std::string & text_added,
|
||||
bool is_partial,
|
||||
std::vector<common_chat_msg_diff> & diffs);
|
||||
common_chat_msg update_chat_msg(const std::string & text_added,
|
||||
bool is_partial,
|
||||
std::vector<common_chat_msg_diff> & diffs,
|
||||
bool filter_tool_calls = false);
|
||||
};
|
||||
|
||||
struct server_task {
|
||||
int id = -1; // to be filled by server_queue
|
||||
int id = -1; // to be filled by server_queue
|
||||
|
||||
// TODO @ngxson : remove this field and implement a mapping task_id -> idx in the response_reader
|
||||
size_t index = 0; // used when there are multiple prompts (batch request)
|
||||
size_t index = 0; // used when there are multiple prompts (batch request)
|
||||
|
||||
// used by SERVER_TASK_TYPE_CANCEL
|
||||
int id_target = -1;
|
||||
|
|
@ -157,13 +159,14 @@ struct server_task {
|
|||
std::string filename;
|
||||
std::string filepath;
|
||||
};
|
||||
|
||||
slot_action slot_action;
|
||||
|
||||
// used by SERVER_TASK_TYPE_METRICS
|
||||
bool metrics_reset_bucket = false;
|
||||
|
||||
// used by SERVER_TASK_TYPE_SET_LORA
|
||||
std::map<int, float> set_lora; // mapping adapter ID -> scale
|
||||
std::map<int, float> set_lora; // mapping adapter ID -> scale
|
||||
|
||||
server_task() = default;
|
||||
|
||||
|
|
@ -203,11 +206,10 @@ struct server_task {
|
|||
}
|
||||
}
|
||||
|
||||
static task_params params_from_json_cmpl(
|
||||
const llama_vocab * vocab,
|
||||
const common_params & params_base,
|
||||
const int n_ctx_slot,
|
||||
const json & data);
|
||||
static task_params params_from_json_cmpl(const llama_vocab * vocab,
|
||||
const common_params & params_base,
|
||||
const int n_ctx_slot,
|
||||
const json & data);
|
||||
|
||||
// utility function
|
||||
static std::unordered_set<int> get_list_id(const std::vector<server_task> & tasks) {
|
||||
|
|
@ -259,50 +261,53 @@ struct result_timings {
|
|||
int32_t cache_n = -1;
|
||||
|
||||
int32_t prompt_n = -1;
|
||||
double prompt_ms;
|
||||
double prompt_per_token_ms;
|
||||
double prompt_per_second;
|
||||
double prompt_ms;
|
||||
double prompt_per_token_ms;
|
||||
double prompt_per_second;
|
||||
|
||||
int32_t predicted_n = -1;
|
||||
double predicted_ms;
|
||||
double predicted_per_token_ms;
|
||||
double predicted_per_second;
|
||||
double predicted_ms;
|
||||
double predicted_per_token_ms;
|
||||
double predicted_per_second;
|
||||
|
||||
// Optional speculative metrics - only included when > 0
|
||||
int32_t draft_n = 0;
|
||||
int32_t draft_n = 0;
|
||||
int32_t draft_n_accepted = 0;
|
||||
|
||||
json to_json() const;
|
||||
};
|
||||
|
||||
struct result_prompt_progress {
|
||||
int32_t total = 0;
|
||||
int32_t cache = 0;
|
||||
int32_t total = 0;
|
||||
int32_t cache = 0;
|
||||
int32_t processed = 0;
|
||||
int64_t time_ms = 0;
|
||||
int64_t time_ms = 0;
|
||||
|
||||
json to_json() const;
|
||||
};
|
||||
|
||||
struct server_task_result {
|
||||
int id = -1;
|
||||
int id_slot = -1;
|
||||
int id = -1;
|
||||
int id_slot = -1;
|
||||
|
||||
// TODO @ngxson : remove this field and implement a mapping task_id -> idx in the response_reader
|
||||
size_t index = 0; // to be used for batched tasks
|
||||
size_t index = 0; // to be used for batched tasks
|
||||
|
||||
virtual bool is_error() {
|
||||
// only used by server_task_result_error
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual bool is_stop() {
|
||||
// only used by server_task_result_cmpl_*
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual void update(task_result_state &) {
|
||||
// only used by server_task_result_cmpl_*
|
||||
}
|
||||
virtual json to_json() = 0;
|
||||
|
||||
virtual json to_json() = 0;
|
||||
virtual ~server_task_result() = default;
|
||||
};
|
||||
|
||||
|
|
@ -311,13 +316,15 @@ using server_task_result_ptr = std::unique_ptr<server_task_result>;
|
|||
|
||||
struct completion_token_output {
|
||||
llama_token tok;
|
||||
float prob;
|
||||
float prob;
|
||||
std::string text_to_send;
|
||||
|
||||
struct prob_info {
|
||||
llama_token tok;
|
||||
std::string txt;
|
||||
float prob;
|
||||
float prob;
|
||||
};
|
||||
|
||||
std::vector<prob_info> probs;
|
||||
|
||||
json to_json(bool post_sampling_probs) const;
|
||||
|
|
@ -327,29 +334,28 @@ struct completion_token_output {
|
|||
static float logarithm(float x);
|
||||
|
||||
static std::vector<unsigned char> str_to_bytes(const std::string & str);
|
||||
|
||||
};
|
||||
|
||||
struct server_task_result_cmpl_final : server_task_result {
|
||||
std::string content;
|
||||
std::string content;
|
||||
llama_tokens tokens;
|
||||
|
||||
bool stream;
|
||||
bool include_usage;
|
||||
bool stream;
|
||||
bool include_usage;
|
||||
result_timings timings;
|
||||
std::string prompt;
|
||||
std::string prompt;
|
||||
|
||||
bool truncated;
|
||||
int32_t n_decoded;
|
||||
int32_t n_prompt_tokens;
|
||||
int32_t n_tokens_cached;
|
||||
bool has_new_line;
|
||||
bool truncated;
|
||||
int32_t n_decoded;
|
||||
int32_t n_prompt_tokens;
|
||||
int32_t n_tokens_cached;
|
||||
bool has_new_line;
|
||||
std::string stopping_word;
|
||||
stop_type stop = STOP_TYPE_NONE;
|
||||
stop_type stop = STOP_TYPE_NONE;
|
||||
|
||||
bool post_sampling_probs;
|
||||
bool post_sampling_probs;
|
||||
std::vector<completion_token_output> probs_output;
|
||||
std::vector<std::string> response_fields;
|
||||
std::vector<std::string> response_fields;
|
||||
|
||||
task_params generation_params;
|
||||
|
||||
|
|
@ -358,7 +364,7 @@ struct server_task_result_cmpl_final : server_task_result {
|
|||
task_response_type res_type = TASK_RESPONSE_TYPE_NONE;
|
||||
std::string oaicompat_model;
|
||||
std::string oaicompat_cmpl_id;
|
||||
common_chat_msg oaicompat_msg; // to be populated by update()
|
||||
common_chat_msg oaicompat_msg; // to be populated by update()
|
||||
|
||||
std::vector<common_chat_msg_diff> oaicompat_msg_diffs; // to be populated by update()
|
||||
bool is_updated = false;
|
||||
|
|
@ -369,7 +375,7 @@ struct server_task_result_cmpl_final : server_task_result {
|
|||
std::string oai_resp_message_id;
|
||||
|
||||
virtual bool is_stop() override {
|
||||
return true; // in stream mode, final responses are considered stop
|
||||
return true; // in stream mode, final responses are considered stop
|
||||
}
|
||||
|
||||
virtual json to_json() override;
|
||||
|
|
@ -407,11 +413,11 @@ struct server_task_result_cmpl_partial : server_task_result {
|
|||
int32_t n_decoded;
|
||||
int32_t n_prompt_tokens;
|
||||
|
||||
bool post_sampling_probs;
|
||||
bool is_progress = false;
|
||||
bool post_sampling_probs;
|
||||
bool is_progress = false;
|
||||
completion_token_output prob_output;
|
||||
result_timings timings;
|
||||
result_prompt_progress progress;
|
||||
result_timings timings;
|
||||
result_prompt_progress progress;
|
||||
|
||||
// response formatting
|
||||
bool verbose = false;
|
||||
|
|
@ -435,7 +441,7 @@ struct server_task_result_cmpl_partial : server_task_result {
|
|||
bool anthropic_has_reasoning = false;
|
||||
|
||||
virtual bool is_stop() override {
|
||||
return false; // in stream mode, partial responses are not considered stop
|
||||
return false; // in stream mode, partial responses are not considered stop
|
||||
}
|
||||
|
||||
virtual void update(task_result_state & state) override;
|
||||
|
|
@ -477,24 +483,22 @@ struct server_task_result_rerank : server_task_result {
|
|||
};
|
||||
|
||||
struct server_task_result_error : server_task_result {
|
||||
error_type err_type = ERROR_TYPE_SERVER;
|
||||
error_type err_type = ERROR_TYPE_SERVER;
|
||||
std::string err_msg;
|
||||
|
||||
// for ERROR_TYPE_EXCEED_CONTEXT_SIZE
|
||||
int32_t n_prompt_tokens = 0;
|
||||
int32_t n_ctx = 0;
|
||||
|
||||
virtual bool is_error() override {
|
||||
return true;
|
||||
}
|
||||
virtual bool is_error() override { return true; }
|
||||
|
||||
virtual json to_json() override;
|
||||
};
|
||||
|
||||
struct server_task_result_metrics : server_task_result {
|
||||
int n_idle_slots;
|
||||
int n_processing_slots;
|
||||
int n_tasks_deferred;
|
||||
int n_idle_slots;
|
||||
int n_processing_slots;
|
||||
int n_tasks_deferred;
|
||||
int64_t t_start;
|
||||
|
||||
// TODO: somehow reuse server_metrics in the future, instead of duplicating the fields
|
||||
|
|
@ -523,7 +527,7 @@ struct server_task_result_metrics : server_task_result {
|
|||
|
||||
struct server_task_result_slot_save_load : server_task_result {
|
||||
std::string filename;
|
||||
bool is_save; // true = save, false = load
|
||||
bool is_save; // true = save, false = load
|
||||
|
||||
size_t n_tokens;
|
||||
size_t n_bytes;
|
||||
|
|
@ -541,9 +545,10 @@ struct server_task_result_slot_erase : server_task_result {
|
|||
struct server_task_result_get_lora : server_task_result {
|
||||
struct lora {
|
||||
common_adapter_lora_info info;
|
||||
std::string alora_invocation_string;
|
||||
llama_tokens alora_invocation_tokens;
|
||||
std::string alora_invocation_string;
|
||||
llama_tokens alora_invocation_tokens;
|
||||
};
|
||||
|
||||
std::vector<lora> loras;
|
||||
|
||||
virtual json to_json() override;
|
||||
|
|
@ -559,9 +564,7 @@ struct server_prompt_checkpoint {
|
|||
|
||||
std::vector<uint8_t> data;
|
||||
|
||||
size_t size() const {
|
||||
return data.size();
|
||||
}
|
||||
size_t size() const { return data.size(); }
|
||||
};
|
||||
|
||||
struct server_prompt {
|
||||
|
|
@ -581,22 +584,14 @@ struct server_prompt {
|
|||
return res;
|
||||
}
|
||||
|
||||
int n_tokens() const {
|
||||
return tokens.size();
|
||||
}
|
||||
int n_tokens() const { return tokens.size(); }
|
||||
|
||||
server_prompt clone() const {
|
||||
return server_prompt {
|
||||
tokens.clone(),
|
||||
data,
|
||||
checkpoints
|
||||
};
|
||||
}
|
||||
server_prompt clone() const { return server_prompt{ tokens.clone(), data, checkpoints }; }
|
||||
};
|
||||
|
||||
struct server_prompt_cache {
|
||||
server_prompt_cache(int32_t limit_size_mib, size_t limit_tokens) {
|
||||
this->limit_size = 1024ull*1024ull*(limit_size_mib < 0 ? 0 : limit_size_mib);
|
||||
this->limit_size = 1024ull * 1024ull * (limit_size_mib < 0 ? 0 : limit_size_mib);
|
||||
this->limit_tokens = limit_tokens;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue