diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 295ae9ea25..41069a04ef 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -48,10 +48,11 @@ add_library(${TARGET} STATIC arg.cpp arg.h base64.hpp - chat-parser.cpp - chat-parser.h - chat-parser-xml-toolcall.h - chat-parser-xml-toolcall.cpp + chat-auto-parser-generator.cpp + chat-auto-parser-helpers.cpp + chat-auto-parser.h + chat-diff-analyzer.cpp + chat-diff-analyzer.h chat-peg-parser.cpp chat-peg-parser.h chat.cpp diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp new file mode 100644 index 0000000000..072bdb795f --- /dev/null +++ b/common/chat-auto-parser-generator.cpp @@ -0,0 +1,388 @@ +#include "chat-auto-parser.h" +#include "chat-diff-analyzer.h" +#include "chat-peg-parser.h" +#include "chat.h" +#include "json-schema-to-grammar.h" +#include "nlohmann/json.hpp" +#include + + +using json = nlohmann::ordered_json; + +// Helper to iterate over tools/functions +static void foreach_function(const json & tools, const std::function & fn) { + for (const auto & tool : tools) { + if (!tool.contains("type") || tool.at("type") != "function" || !tool.contains("function")) { + continue; + } + fn(tool); + } +} + +namespace autoparser { + +parser_build_context::parser_build_context(common_chat_peg_unified_builder & p, const templates_params & inputs) + : p(p), inputs(inputs), reasoning_parser(p.eps()) {} + +common_chat_params universal_peg_generator::generate_parser(const common_chat_template & tmpl, + const struct templates_params & inputs) { + // Run differential analysis to extract template structure + analyze_template analysis(tmpl); + return generate_parser(tmpl, inputs, analysis); +} + +common_chat_params universal_peg_generator::generate_parser(const common_chat_template & tmpl, + const struct templates_params & inputs, + const analyze_template & analysis) { + // Build the parser using the analysis results + auto parser = analysis.build_parser(inputs); + + // Create the result structure + common_chat_params data; + data.prompt = common_chat_template_direct_apply(tmpl, inputs); + data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; + data.preserved_tokens = analysis.preserved_tokens; + data.parser = parser.save(); + + // Build grammar if tools are present + bool has_tools = analysis.tools.format.mode != tool_format::NONE && inputs.tools.is_array() && !inputs.tools.empty(); + bool include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE; + + if (include_grammar) { + data.grammar_lazy = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO; + + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool.at("function"); + auto schema = function.at("parameters"); + builder.resolve_refs(schema); + }); + parser.build_grammar(builder, data.grammar_lazy); + }); + + // Set grammar triggers based on tool section markers (fall back to per-call markers) + std::string trigger_marker = !analysis.tools.format.section_start.empty() + ? analysis.tools.format.section_start + : analysis.tools.format.per_call_start; + if (!trigger_marker.empty()) { + data.grammar_triggers = { + { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, trigger_marker } + }; + } + } + + return data; +} + +common_peg_arena analyze_template::build_parser(const templates_params & inputs) const { + return build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { + p.set_allow_python_dict_format(true); + + parser_build_context ctx(p, inputs); + bool extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE; + bool enable_thinking = inputs.enable_thinking; + + ctx.extracting_reasoning = extract_reasoning && enable_thinking && reasoning.mode != reasoning_mode::NONE; + ctx.content = &content; + + // Build reasoning parser + ctx.reasoning_parser = reasoning.build_parser(ctx); + + bool has_tools = inputs.tools.is_array() && !inputs.tools.empty(); + bool has_response_format = inputs.json_schema.is_object() && !inputs.json_schema.empty(); + + if (has_response_format) { + return ctx.reasoning_parser + p.space() + p.content(p.schema(p.json(), "response-format", inputs.json_schema)) + p.end(); + } + + if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && jinja_caps.supports_tool_calls) { + return tools.build_parser(ctx); + } + + return content.build_parser(ctx); + }); +} + +common_peg_parser analyze_reasoning::build_parser(parser_build_context & ctx) const { + auto & p = ctx.p; + + if (!ctx.extracting_reasoning) { + return p.eps(); + } + + bool thinking_forced_open = (mode == reasoning_mode::FORCED_OPEN); + bool thinking_forced_closed = (mode == reasoning_mode::FORCED_CLOSED); + + if (thinking_forced_open || thinking_forced_closed) { + // Thinking is forced open OR forced closed with enable_thinking=true + // In both cases, expect only the closing tag (opening was in template) + return p.reasoning(p.until(end)) + end; + } + if (mode == reasoning_mode::TAG_BASED || mode == reasoning_mode::TOOLS_ONLY) { + // Standard tag-based reasoning OR tools-only mode (reasoning appears with tools) + // Both use the same tag-based pattern if markers are available + if (!start.empty() && !end.empty()) { + return p.optional(start + p.reasoning(p.until(end)) + end); + } + } else if (mode == reasoning_mode::DELIMITER) { + return p.optional(p.reasoning(p.until(end)) + end); + } + + return p.eps(); +} + +common_peg_parser analyze_content::build_parser(parser_build_context & ctx) const { + auto & p = ctx.p; + + if (is_always_wrapped()) { + if (ctx.extracting_reasoning) { + return ctx.reasoning_parser + start + p.content(p.until(end)) + end + p.end(); + } + return p.content(p.until(start)) + start + p.content(p.until(end)) + end + p.end(); + } + return ctx.reasoning_parser + p.content(p.rest()) + p.end(); +} + +common_peg_parser analyze_content::build_optional_wrapped(parser_build_context & ctx) const { + auto & p = ctx.p; + + if (is_always_wrapped()) { + return p.optional(start + p.content(p.until(end)) + end); + } + return p.eps(); +} + +common_peg_parser analyze_tools::build_parser(parser_build_context & ctx) const { + switch (format.mode) { + case tool_format::JSON_NATIVE: + return build_tool_parser_json_native(ctx); + case tool_format::TAG_WITH_JSON: + return build_tool_parser_tag_json(ctx); + case tool_format::TAG_WITH_TAGGED: + return build_tool_parser_tag_tagged(ctx); + default: + GGML_ABORT("Unable to create tool parser"); + } +} + +common_peg_parser analyze_tools::build_tool_parser_json_native(parser_build_context & ctx) const { + auto & p = ctx.p; + const auto & inputs = ctx.inputs; + + // Build effective field names with dot notation if function_field is set + std::string name_field = format.name_field; + std::string args_field = format.args_field; + + if (!format.function_field.empty() && + format.function_field != "function" && + name_field.find('.') == std::string::npos) { + name_field = format.function_field + "." + name_field; + args_field = format.function_field + "." + args_field; + } + + auto tools_parser = p.standard_json_tools( + format.section_start, + format.section_end, + inputs.tools, + inputs.parallel_tool_calls, + inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED, + name_field, + args_field, + format.tools_array_wrapped, + format.fun_name_is_key, + format.id_field, + format.gen_id_field, + format.parameter_order + ); + + // Handle content wrappers if present + if (ctx.content && ctx.content->is_always_wrapped()) { + auto wrapped_content = ctx.content->build_optional_wrapped(ctx); + return ctx.reasoning_parser + wrapped_content + tools_parser + p.end(); + } + + auto content_before_tools = format.section_start.empty() ? p.eps() : p.until(format.section_start); + return ctx.reasoning_parser + p.optional(p.content(content_before_tools)) + tools_parser + p.end(); +} + +common_peg_parser analyze_tools::build_tool_parser_tag_json(parser_build_context & ctx) const { + auto & p = ctx.p; + const auto & inputs = ctx.inputs; + + common_peg_parser tool_choice = p.choice(); + + foreach_function(inputs.tools, [&](const json & tool) { + const auto & func = tool.at("function"); + std::string name = func.at("name"); + const auto & schema = func.at("parameters"); + + // Build call_id parser based on position (if supported) + common_peg_parser call_id_section = p.eps(); + if (call_id.pos == call_id_position::BETWEEN_FUNC_AND_ARGS && + !call_id.prefix.empty() && !call_id.suffix.empty()) { + call_id_section = p.optional(call_id.prefix + p.tool_id(p.until(call_id.suffix))) + call_id.suffix; + } + + auto func_parser = p.tool_open(function.name_prefix + p.tool_name(p.literal(name)) + function.name_suffix) + + call_id_section + + p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)); + + if (!function.close.empty()) { + func_parser = func_parser + function.close; + } + + tool_choice |= p.rule("tool-" + name, func_parser); + }); + + auto require_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED; + + common_peg_parser tool_calls = p.eps(); + + if (!format.per_call_start.empty()) { + auto wrapped_call = format.per_call_start + tool_choice + format.per_call_end; + if (inputs.parallel_tool_calls) { + tool_calls = p.trigger_rule("tool-call", + wrapped_call + p.zero_or_more(p.space() + wrapped_call)); + } else { + tool_calls = p.trigger_rule("tool-call", wrapped_call); + } + if (!format.section_start.empty()) { + tool_calls = p.trigger_rule("tool-calls", p.literal(format.section_start) + p.space() + + tool_calls + p.space() + (format.section_end.empty() ? p.end() : p.literal(format.section_end))); + } + } else { + std::string separator = ", "; // Default + if (inputs.parallel_tool_calls) { + tool_calls = p.trigger_rule("tool-call", + format.section_start + tool_choice + p.zero_or_more(separator + tool_choice) + format.section_end); + } else { + tool_calls = p.trigger_rule("tool-call", + format.section_start + tool_choice + format.section_end); + } + } + + if (!require_calls) { + tool_calls = p.optional(tool_calls); + } + + std::string trigger_marker = !format.section_start.empty() ? format.section_start : format.per_call_start; + auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker); + return ctx.reasoning_parser + p.optional(p.content(content_before_tools)) + tool_calls + p.end(); +} + +common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_context & ctx) const { + auto & p = ctx.p; + const auto & inputs = ctx.inputs; + + common_peg_parser tool_choice = p.choice(); + + foreach_function(inputs.tools, [&](const json & tool) { + const auto & func = tool.at("function"); + std::string name = func.at("name"); + const auto & params = func.at("parameters"); + + if (!params.contains("properties") || !params.at("properties").is_object()) { + return; + } + + const auto & properties = params.at("properties"); + std::set required; + if (params.contains("required") && params.at("required").is_array()) { + params.at("required").get_to(required); + } + + // Build parser for each argument + std::vector arg_parsers; + for (const auto & [param_name, param_schema] : properties.items()) { + bool is_required = required.find(param_name) != required.end(); + auto type = param_schema.value("type", "object"); + + auto arg = p.tool_arg( + p.tool_arg_open(arguments.name_prefix + p.tool_arg_name(p.literal(param_name)) + arguments.name_suffix) + arguments.value_prefix + + (type == "string" ? + p.tool_arg_string_value(p.schema(p.until(arguments.value_suffix), + "tool-" + name + "-arg-" + param_name + "-schema", param_schema, true)) : + p.tool_arg_json_value(p.schema(p.json(), + "tool-" + name + "-arg-" + param_name + "-schema", param_schema)) + p.space()) + + p.tool_arg_close(p.literal(arguments.value_suffix)) + ); + + if (is_required) { + arg_parsers.push_back(p.rule("tool-" + name + "-arg-" + param_name, arg)); + } else { + arg_parsers.push_back(p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg))); + } + } + + // Build arg sequence with space() between consecutive args + common_peg_parser args_seq = p.eps(); + for (size_t i = 0; i < arg_parsers.size(); i++) { + if (i > 0) { + args_seq = args_seq + p.space(); + } + args_seq = args_seq + arg_parsers[i]; + } + + // Build call_id parser based on position (if supported) + common_peg_parser call_id_section = p.eps(); + if (call_id.pos == call_id_position::BETWEEN_FUNC_AND_ARGS && + !call_id.prefix.empty() && !call_id.suffix.empty()) { + call_id_section = p.optional(call_id.prefix + p.tool_id(p.until(call_id.suffix))) + call_id.suffix; + } + + auto func_parser = p.tool_open(function.name_prefix + p.tool_name(p.literal(name)) + function.name_suffix) + + call_id_section + + p.space() + args_seq; + + if (!function.close.empty()) { + func_parser = func_parser + p.space() + p.tool_close(p.literal(function.close)); + } else if (!format.per_call_end.empty()) { + // When there's no func_close but there is a per_call_end marker, use peek() to ensure + // we only emit tool_close when we can actually see the closing marker. This prevents + // premature closing during partial parsing when we've seen e.g. "" (end) or "" prefix that failed to match. + func_parser = func_parser + p.tool_close(p.peek(p.literal(format.per_call_end))); + } else { + func_parser = func_parser + p.tool_close(p.space()); // force this to process tool closing callbacks in mapper + } + + tool_choice |= p.rule("tool-" + name, func_parser); + }); + + auto require_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED; + + common_peg_parser tool_calls = p.eps(); + + if (!format.per_call_start.empty()) { + auto wrapped_call = format.per_call_start + p.space() + tool_choice + p.space() + format.per_call_end; + if (inputs.parallel_tool_calls) { + tool_calls = p.trigger_rule("tool-call", wrapped_call + p.zero_or_more(p.space() + wrapped_call)); + } else { + tool_calls = p.trigger_rule("tool-call", wrapped_call); + } + if (!format.section_start.empty()) { + tool_calls = p.trigger_rule("tool-calls", p.literal(format.section_start) + p.space() + + tool_calls + p.space() + (format.section_end.empty() ? p.end() : p.literal(format.section_end))); + } + } else { + std::string separator = ", "; // Default + + if (inputs.parallel_tool_calls) { + tool_calls = p.trigger_rule("tool-call", + format.section_start + p.space() + tool_choice + p.zero_or_more(separator + tool_choice) + p.space() + format.section_end); + } else { + tool_calls = p.trigger_rule("tool-call", + format.section_start + p.space() + tool_choice + p.space() + format.section_end); + } + } + + if (!require_tools) { + tool_calls = p.optional(tool_calls); + } + + std::string trigger_marker = !format.section_start.empty() ? format.section_start : format.per_call_start; + auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker); + return ctx.reasoning_parser + p.optional(p.content(content_before_tools)) + tool_calls + p.end(); +} + +} // namespace autoparser diff --git a/common/chat-auto-parser-helpers.cpp b/common/chat-auto-parser-helpers.cpp new file mode 100644 index 0000000000..3be1cbf1b2 --- /dev/null +++ b/common/chat-auto-parser-helpers.cpp @@ -0,0 +1,348 @@ +#include "chat-auto-parser-helpers.h" + +#include "chat-auto-parser.h" +#include "chat-diff-analyzer.h" +#include "chat.h" +#include "log.h" +#include "nlohmann/json.hpp" + +#include +#include + +using json = nlohmann::ordered_json; + +std::string trim_whitespace(const std::string & str) { + size_t start = 0; + while (start < str.length() && std::isspace(static_cast(str[start]))) { + start++; + } + + if (start == str.length()) { + return ""; + } + + size_t end = str.length() - 1; + while (end > start && std::isspace(static_cast(str[end]))) { + end--; + } + + return str.substr(start, end - start + 1); +} + +std::string trim_leading_whitespace(const std::string & str) { + size_t start = 0; + while (start < str.length() && std::isspace(static_cast(str[start]))) { + start++; + } + + return str.substr(start); +} + +std::string trim_trailing_whitespace(const std::string & str) { + if (str.empty()) { + return ""; + } + + size_t end = str.length() - 1; + while (end > 0 && std::isspace(static_cast(str[end]))) { + end--; + } + + // If first char is also whitespace, return empty string + if (end == 0 && std::isspace(static_cast(str[0]))) { + return ""; + } + + return str.substr(0, end + 1); +} + +std::string trim_trailing_newlines(const std::string & str) { + size_t end = str.length(); + while (end > 0 && str[end - 1] == '\n') { + end--; + } + + return str.substr(0, end); +} + +static size_t common_prefix_len(const std::string & left, const std::string & right) { + size_t prefix_len = 0; + size_t min_len = std::min(left.length(), right.length()); + while (prefix_len < min_len && left[prefix_len] == right[prefix_len]) { + prefix_len++; + } + return prefix_len; +} + +static size_t common_suffix_len(const std::string & left, const std::string & right) { + size_t suffix_len = 0; + size_t min_len = std::min(left.length(), right.length()); + while (suffix_len < min_len && left[left.length() - 1 - suffix_len] == right[right.length() - 1 - suffix_len]) { + suffix_len++; + } + return suffix_len; +} + +diff_split calculate_diff_split(const std::string & left, const std::string & right) { + diff_split result; + + auto left_seg = segmentize_markers(left); + auto right_seg = segmentize_markers(right); + + if (left_seg.empty()) { + result.right = right; + return result; + } + if (right_seg.empty()) { + result.left = left; + return result; + } + + auto left_start = left_seg.begin(); + auto left_end = --left_seg.end(); + auto right_start = right_seg.begin(); + auto right_end = --right_seg.end(); + + auto test = [&] () { + return left_start != left_end && right_start != right_end; + }; + + bool left_fully_consumed = false; + bool right_fully_consumed = false; + + while (test()) { + bool advanced = false; + if (*left_start == *right_start) { + result.prefix.append(left_start->value); + left_start++; + right_start++; + advanced = true; + } + if (*left_end == *right_end) { + result.suffix = left_end->value + result.suffix; + if (left_start != left_end) { + left_end--; + } else { + left_fully_consumed = true; + } + if (right_start != right_end) { + right_end--; + } else { + right_fully_consumed = true; + } + advanced = true; + } + if (!advanced) { + break; + } + } + + if (left_start == left_end && right_start != right_end) { + if (*left_start == *right_end) { + result.suffix = right_end->value + result.suffix; + right_end--; + left_fully_consumed = true; + } else if (*left_start == *right_start) { + result.prefix.append(right_start->value); + right_start++; + left_fully_consumed = true; + } + } else if (right_start == right_end && left_start != left_end) { + if (*left_end == *right_start) { + result.suffix = left_end->value + result.suffix; + left_end--; + right_fully_consumed = true; + } else if (*left_start == *right_start) { + result.prefix.append(left_start->value); + left_start++; + right_fully_consumed = true; + } + } else if (left_start == left_end && right_start == right_end && *left_start == *right_start && left_start->type == segment_type::MARKER) { + result.prefix.append(right_start->value); + left_fully_consumed = true; + right_fully_consumed = true; + } + + auto eat_segment = [](std::string & str, segment & seg) -> std::string { return str.append(seg.value); }; + + bool can_have_text_suffix = left_end->type == segment_type::TEXT && right_end->type == segment_type::TEXT; + bool can_have_text_prefix = right_start->type == segment_type::TEXT && left_start->type == segment_type::TEXT; + + std::string remainder_left = std::accumulate(left_start, left_fully_consumed ? left_end : ++left_end, std::string(), eat_segment); + std::string remainder_right = std::accumulate(right_start, right_fully_consumed ? right_end : ++right_end, std::string(), eat_segment); + + size_t suffix_len = can_have_text_suffix ? common_suffix_len(remainder_left, remainder_right) : 0; + // avoid overlaps between prefix and suffix + size_t prefix_len = can_have_text_prefix ? common_prefix_len(remainder_left.substr(0, remainder_left.size() - suffix_len), + remainder_right.substr(0, remainder_right.size() - suffix_len)) : 0; + + result.prefix.append(remainder_left.substr(0, prefix_len)); + result.suffix = remainder_left.substr(remainder_left.length() - suffix_len, suffix_len) + result.suffix; + result.left = remainder_left.substr(prefix_len, remainder_left.length() - prefix_len - suffix_len); + result.right = remainder_right.substr(prefix_len, remainder_right.length() - prefix_len - suffix_len); + + if (result.left == "" && result.right == "") { + // degenerate case, no diff + result.prefix = left; + result.suffix = ""; + // pick prefix = all as representation + } + return result; +} + +// Returns the prefix of `full` up until the first occurrence of the common prefix of `left` and `right` +std::string until_common_prefix(const std::string & full, const std::string & left, const std::string & right) { + // Find the common prefix of left and right + size_t common_prefix_len = 0; + size_t min_len = std::min(left.length(), right.length()); + while (common_prefix_len < min_len && left[common_prefix_len] == right[common_prefix_len]) { + common_prefix_len++; + } + + // If there's no common prefix, return empty string + if (common_prefix_len == 0) { + return ""; + } + + // Find the common prefix in the full string + std::string common_prefix = left.substr(0, common_prefix_len); + size_t pos = full.find(common_prefix); + + // If not found, return empty string + if (pos == std::string::npos) { + return ""; + } + + // Return everything before the common prefix + return full.substr(0, pos); +} + +// Returns the suffix of `full` after the last occurrence of the common suffix of `left` and `right` +std::string after_common_suffix(const std::string & full, const std::string & left, const std::string & right) { + // Find the common suffix of left and right (compare from the end) + size_t common_suffix_len = 0; + size_t min_len = std::min(left.length(), right.length()); + while (common_suffix_len < min_len && + left[left.length() - 1 - common_suffix_len] == right[right.length() - 1 - common_suffix_len]) { + common_suffix_len++; + } + + // If there's no common suffix, return empty string + if (common_suffix_len == 0) { + return ""; + } + + // Extract the common suffix + std::string common_suffix = left.substr(left.length() - common_suffix_len); + + // Find the last occurrence of the common suffix in the full string + size_t pos = full.rfind(common_suffix); + + // If not found, return empty string + if (pos == std::string::npos) { + return ""; + } + + // Return everything after the common suffix + return full.substr(pos + common_suffix_len); +} + +// TODO: segmentize will treat a JSON array inside tags as a tag: [{ "fun": { ... } }] will be three markers +// not too worried about that because it hasn't turned out as a problem anywhere, but noting here in case it will +// Might have to put some restrictions on tag contents as well (like "no { }") +std::vector segmentize_markers(const std::string & text) { + std::vector retval; + bool in_marker = false; + char marker_opener = '\0'; + + auto is_marker_opener = [](char c) -> bool { return c == '<' || c == '['; }; + auto is_marker_closer = [](char op, char c) -> bool { return (op == '<' && c == '>') || (op == '[' && c == ']'); }; + + size_t last_border = 0; + + for (size_t cur_pos = 0; cur_pos < text.length(); cur_pos++) { + if (!in_marker && is_marker_opener(text[cur_pos])) { + if (last_border < cur_pos) { + retval.push_back(segment(segment_type::TEXT, text.substr(last_border, cur_pos - last_border))); + } + last_border = cur_pos; + in_marker = true; + marker_opener = text[cur_pos]; + } else if (in_marker && is_marker_closer(marker_opener, text[cur_pos])) { + // no need to check because last_border will always be smaller + retval.push_back(segment(segment_type::MARKER, text.substr(last_border, cur_pos - last_border + 1))); + last_border = cur_pos + 1; + in_marker = false; + marker_opener = '\0'; + } + } + if (last_border < text.length()) { + retval.push_back(segment(segment_type::TEXT, text.substr(last_border))); + } + return retval; +} + +std::vector prune_whitespace_segments(const std::vector & segments) { + std::vector result; + for (const auto & seg : segments) { + if (!trim_whitespace(seg.value).empty()) { + result.push_back(seg); + } + } + return result; +} + +namespace autoparser { + +std::string apply_template(const common_chat_template & tmpl, const template_params & params) { + templates_params tmpl_params; + tmpl_params.messages = params.messages; + tmpl_params.tools = params.tools; + tmpl_params.add_generation_prompt = params.add_generation_prompt; + tmpl_params.enable_thinking = params.enable_thinking; + + if (params.extra_context) { + tmpl_params.extra_context = *params.extra_context; + } + tmpl_params.extra_context["enable_thinking"] = params.enable_thinking; + + try { + return common_chat_template_direct_apply(tmpl, tmpl_params); + } catch (const std::exception & e) { + LOG_DBG("Template application failed: %s\n", e.what()); + return ""; + } +} + +std::optional compare_variants( + const common_chat_template & tmpl, + const template_params & params_A, + const std::function & params_modifier) { + // Create variant B by copying A + template_params params_B = params_A; + + // Apply modifier to create variant B + if (params_modifier) { + params_modifier(params_B); + } + + // Apply template to both variants + std::string output_A = apply_template(tmpl, params_A); + std::string output_B = apply_template(tmpl, params_B); + + // Check for template application failures + if (output_A.empty() || output_B.empty()) { + return std::nullopt; + } + + // Calculate diff and return result with both outputs + compare_variants_result result; + result.diff = calculate_diff_split(output_A, output_B); + result.output_A = output_A; + result.output_B = output_B; + + return result; +} + +} // namespace autoparser + diff --git a/common/chat-auto-parser-helpers.h b/common/chat-auto-parser-helpers.h new file mode 100644 index 0000000000..c235d63850 --- /dev/null +++ b/common/chat-auto-parser-helpers.h @@ -0,0 +1,73 @@ +#pragma once + +#include "chat-diff-analyzer.h" +#include +#include +#include + +std::string trim_whitespace(const std::string & str); +std::string trim_leading_whitespace(const std::string & str); +std::string trim_trailing_whitespace(const std::string & str); +std::string trim_trailing_newlines(const std::string & str); + +// calculate a diff split (longest common prefix, longest common suffix excluding prefix, +// mismatched part on the left, mismatched part on the right) between two strings +// account for markers - align prefix and suffix endings so that they end on markers +// * eg.: +// calculate_diff_split("
", "

Something

") -> +// { "prefix": "" (not: "<"), "suffix": "", "left": "
", "right": "

Something

" } +// calculate_diff_split("Something", "") -> +// { "prefix": "", "suffix": "", "left": "Something", "right": "" } +diff_split calculate_diff_split(const std::string & left, const std::string & right); + +// Returns the prefix of `full` up until the first occurrence of the common prefix of `left` and `right` +// Returns empty string if there's no common prefix +// * eg.: +// until_common_prefix("really want a FUNCTION call", "FUNCTION alpha", "FUNCTION beta") -> "really want a " +// until_common_prefix("", "", "") -> "" +// until_common_prefix("some text", "1234", "abcd") -> "" +// until_common_prefix("one arg two args three args four", "argument alpha", "argument beta") -> "one "" +std::string until_common_prefix(const std::string & full, const std::string & left, const std::string & right); + +// Returns the suffix of `full` after the last occurrence of the common suffix of `left` and `right` +// Returns empty string if there's no common suffix +// Mirror function of `until_common_prefix` +// * eg.: +// after_common_suffix("really want a FUNCTION call", "first FUNCTION", "second FUNCTION") -> " call" +// after_common_suffix("one arg two-args three args four", "alpha-args", "beta-args") -> " three args four" +std::string after_common_suffix(const std::string & full, const std::string & left, const std::string & right); + +// Segmentize text into markers and non-marker fragments +// * eg.: +// segmentize_markers("The site title
Here's some content
" -> +// [ (MARKER, ""), (MARKER, ""), (MARKER, ""), (TEXT, "The site title"), (MARKER, ""), +// (MARKER, ""), (MARKER, "
"), (TEXT, "Here's some "), (MARKER, ""), (TEXT, "content"), (MARKER, ""), +// (MARKER, "
"), (MARKER, ""), (MARKER, "") +// ] +// segmentize_markers("<|tool_call|>[args]{ are here }[/args]<|tool_call_end|>") -> +// [ (MARKER, "<|tool_call|>"), (MARKER, "[args]"), (TEXT, "{ are here }"), (MARKER, "[/args]"), (MARKER, "<|tool_call_end|>") ] +std::vector segmentize_markers(const std::string & text); + +// Prune whitespace-only segments from a vector of segments +// * eg.: +// segmentize_markers("\n\n\n \n\n\n") -> +// X = [ (MARKER, ""), (TEXT, "\n"), (MARKER, ""), (TEXT, "\n"), (MARKER, ""), (TEXT, "\n \n"), +// (MARKER, ""), (TEXT, "\n"), (MARKER, ""), (TEXT, "\n"), (MARKER, "") ] +// prune_whitespace_segments(X) -> [ (MARKER, ""), (MARKER, ""), (MARKER, ""), (MARKER, ""), +// (MARKER, ""), (MARKER, "") ] +std::vector prune_whitespace_segments(const std::vector & segments); + +namespace autoparser { + +// Apply a template with the given parameters, returning the rendered string (empty on failure) +std::string apply_template(const common_chat_template & tmpl, const template_params & params); + +// Factorized differential comparison function +// Takes base params and a single modifier lambda to create variant B +// Returns compare_variants_result containing diff and both outputs, or std::nullopt on failure +std::optional compare_variants( + const common_chat_template & tmpl, + const template_params & params_A, + const std::function & params_modifier); + +} // namespace autoparser diff --git a/common/chat-auto-parser.h b/common/chat-auto-parser.h new file mode 100644 index 0000000000..31ee56dd03 --- /dev/null +++ b/common/chat-auto-parser.h @@ -0,0 +1,45 @@ +#pragma once + +#include "chat-diff-analyzer.h" +#include "chat.h" +#include "chat-peg-parser.h" +#include "common.h" + +#include +#include + +using json = nlohmann::ordered_json; + +namespace autoparser { + +struct templates_params { + json messages; + json tools; + common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO; + json json_schema; + bool parallel_tool_calls = true; + common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_AUTO; + bool stream = true; + std::string grammar; + bool add_generation_prompt = false; + bool enable_thinking = true; + std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); + json extra_context; + bool add_bos = false; + bool add_eos = false; + bool is_inference = true; + bool add_inference = false; + bool mark_input = true; // whether to mark input strings in the jinja context +}; + +class universal_peg_generator { + public: + static common_chat_params generate_parser(const common_chat_template & tmpl, + const struct templates_params & inputs); + + static common_chat_params generate_parser(const common_chat_template & tmpl, + const struct templates_params & inputs, + const analyze_template & analysis); +}; + +} // namespace autoparser diff --git a/common/chat-diff-analyzer.cpp b/common/chat-diff-analyzer.cpp new file mode 100644 index 0000000000..81c8255a11 --- /dev/null +++ b/common/chat-diff-analyzer.cpp @@ -0,0 +1,1472 @@ +#include "chat-diff-analyzer.h" + +#include "chat-auto-parser-helpers.h" +#include "chat-peg-parser.h" +#include "chat.h" +#include "log.h" +#include "nlohmann/json.hpp" +#include "peg-parser.h" + +#include +#include + +#define ANSI_RESET "\033[0m" +#define ANSI_PURPLE "\033[1m\x1b[38;5;126m" +#define ANSI_ORANGE "\033[1m\x1b[38;5;214m" +#define ANSI_RED "\033[1m\x1b[38;5;196m" + +using json = nlohmann::ordered_json; + +namespace autoparser { + +static std::vector> workarounds( + { // Old reasoning Qwen templates - they don't really display reasoning content, but we still want to + // support reasoning on them + [](const common_chat_template & tmpl, analyze_template & analysis) -> void { + if (tmpl.src.find("content.split('')") != std::string::npos && + analysis.reasoning.mode == reasoning_mode::NONE) { + analysis.reasoning.mode = reasoning_mode::FORCED_OPEN; + analysis.reasoning.start = ""; + analysis.reasoning.end = ""; + analysis.preserved_tokens.push_back(""); + analysis.preserved_tokens.push_back(""); + LOG_DBG(ANSI_ORANGE "[Patch: old Qwen/Deepseek thinking template]\n" ANSI_RESET); + } + }, + // Granite 3.3, with separate reasoning and content markers + [](const common_chat_template & tmpl, analyze_template & analysis) -> void { + if (tmpl.src.find("Write your thoughts between and write your response between " + "") != std::string::npos) { + analysis.reasoning.mode = reasoning_mode::TAG_BASED; + analysis.reasoning.start = ""; + analysis.reasoning.end = ""; + analysis.preserved_tokens.push_back(""); + analysis.preserved_tokens.push_back(""); + analysis.content.mode = content_mode::WRAPPED_WITH_REASONING; + analysis.content.start = ""; + analysis.content.end = ""; + analysis.preserved_tokens.push_back(""); + analysis.preserved_tokens.push_back(""); + LOG_DBG(ANSI_ORANGE "[Patch: Granite 3.3]\n" ANSI_RESET); + } + }, + // Cohere Command R+ - content wrapped in <|CHATBOT_TOKEN|>...<|END_OF_TURN_TOKEN|> + [](const common_chat_template & tmpl, analyze_template & analysis) -> void { + if (tmpl.src.find("<|CHATBOT_TOKEN|>") != std::string::npos && + tmpl.src.find("<|END_OF_TURN_TOKEN|>") != std::string::npos && analysis.content.start.empty()) { + analysis.content.mode = content_mode::ALWAYS_WRAPPED; + analysis.content.start = "<|CHATBOT_TOKEN|>"; + analysis.content.end = "<|END_OF_TURN_TOKEN|>"; + analysis.preserved_tokens.push_back("<|CHATBOT_TOKEN|>"); + analysis.preserved_tokens.push_back("<|END_OF_TURN_TOKEN|>"); + LOG_DBG(ANSI_ORANGE "[Patch: Cohere Command R+]\n" ANSI_RESET); + } + }, + // Functionary - no tool call section delimiter + [](const common_chat_template & tmpl, analyze_template & analysis) -> void { + if (tmpl.src.find("set has_code_interpreter = tools | selectattr(\"type\", \"equalto\", " + "\"code_interpreter\") | list | length > 0") != std::string::npos) { + analysis.content.mode = content_mode::PLAIN; + analysis.content.end = ""; + analysis.tools.function.name_prefix = ""; + analysis.tools.format.section_start = ""; + analysis.tools.format.section_end = ""; + analysis.tools.format.per_call_start = ""); + analysis.preserved_tokens.push_back("<|eom_id|>"); + analysis.preserved_tokens.push_back(""); + analysis.preserved_tokens.push_back(""); + LOG_DBG(ANSI_ORANGE "[Patch: Functionary 3.1]\n" ANSI_RESET); + } + }, + // DeepSeek-R1-Distill-Qwen + [](const common_chat_template & tmpl, analyze_template & analysis) -> void { + if (tmpl.src.find( + "{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>'") != + std::string::npos) { + analysis.tools.format.section_start = "<|tool▁calls▁begin|>"; + analysis.tools.format.section_end = "<|tool▁calls▁end|>"; + analysis.tools.format.per_call_start = "<|tool▁call▁begin|>function"; + analysis.tools.function.name_prefix = "<|tool▁sep|>"; + analysis.tools.format.per_call_end = "<|tool▁call▁end|>"; + analysis.tools.function.close = "```"; + } + } }); + +// Common JSON structures +static json params_schema = { + { "type", "object" }, + { "properties", + { { "first", { { "type", "string" }, { "description", "First argument" } } }, + { "second", { { "type", "string" }, { "description", "Second argument" } } } } }, + { "required", json::array({}) } +}; + +static json tools = json::array({ + { { "type", "function" }, + { "function", + json{ { "name", "foofoo" }, { "description", "Test function foo" }, { "parameters", params_schema } } } }, + { { "type", "function" }, + { "function", + json{ { "name", "barbar" }, { "description", "Test function bar" }, { "parameters", params_schema } } } } +}); + +static json user_msg = json{ + { "role", "user" }, + { "content", "Hello" } +}; + +static json build_tool_call(const std::string & name, const json & args, const std::string & id = "call00001") { + return json{ + { "id", id }, + { "type", "function" }, + { "function", json{ { "name", name }, { "arguments", args } } } + }; +} + +static json first_tool_call_zero_args = build_tool_call("foofoo", json::object(), "call00001"); +static json first_tool_call_one_arg = build_tool_call("foofoo", {{ "first", "XXXX" }}, "call00001"); +static json first_tool_call_one_arg_other_val = build_tool_call("foofoo", {{ "first", "YYYY" }}, "call00001"); +static json first_tool_call_other_arg = build_tool_call("foofoo", {{ "second", "YYYY" }}, "call00001"); + +static json first_tool_call = + build_tool_call("foofoo", json{{ "first", "XXXX" }, { "second", "YYYY" }}, "call00001"); +static json second_tool_call = + build_tool_call("barbar", json{ { "first", "XXXX" }, { "second", "YYYY" }}, "call00002"); +static json first_tool_call_alt_id = + build_tool_call("foofoo", json{{ "first", "XXXX" }, { "second", "YYYY" }}, "call99999"); + +analyze_template::analyze_template(const common_chat_template & tmpl) + : jinja_caps(tmpl.original_caps()) + , reasoning(tmpl, jinja_caps.supports_tool_calls) + , content(tmpl, reasoning) + , tools(jinja_caps.supports_tool_calls ? analyze_tools(tmpl, jinja_caps, reasoning) : analyze_tools()) +{ + LOG_DBG(ANSI_PURPLE "=== Starting differential analysis ===\n" ANSI_RESET); + + collect_preserved_tokens(); + + for (auto & workaround : workarounds) { + workaround(tmpl, *this); + } + + LOG_DBG(ANSI_PURPLE "=== Differential analysis complete ===\n" ANSI_RESET); +} + +void analyze_template::collect_preserved_tokens() { + auto add_token = [this](const std::string & org_token) { + std::string token = trim_whitespace(org_token); + if (!token.empty()) { + // Avoid duplicates + if (std::find(preserved_tokens.begin(), preserved_tokens.end(), token) == preserved_tokens.end()) { + preserved_tokens.push_back(token); + } + } + }; + + add_token(reasoning.start); + add_token(reasoning.end); + add_token(content.start); + add_token(content.end); + add_token(tools.format.section_start); + add_token(tools.format.section_end); + add_token(tools.format.per_call_start); + add_token(tools.format.per_call_end); + add_token(tools.function.name_prefix); + add_token(tools.function.name_suffix); + add_token(tools.function.close); + add_token(tools.arguments.start); + add_token(tools.arguments.end); + add_token(tools.arguments.name_prefix); + add_token(tools.arguments.name_suffix); + add_token(tools.arguments.separator); + add_token(tools.arguments.value_prefix); + add_token(tools.arguments.value_suffix); + add_token(tools.call_id.prefix); + add_token(tools.call_id.suffix); +} + +analyze_reasoning::analyze_reasoning(const common_chat_template & tmpl, bool supports_tools) + : analyze_base(tmpl) { + LOG_DBG(ANSI_ORANGE "Phase 1: Reasoning analysis\n" ANSI_RESET); + + compare_reasoning_presence(); + compare_thinking_enabled(); + if (supports_tools) { + compare_reasoning_scope(); + } +} + +void analyze_reasoning::compare_reasoning_presence() { + json user_msg = json{ + { "role", "user" }, + { "content", "Hello" } + }; + + json assistant_no_reasoning = json{ + { "role", "assistant" }, + { "content", "I can help." } + }; + + json assistant_with_reasoning = json{ + { "role", "assistant" }, + { "content", "I can help." }, + { "reasoning_content", "Let me think about this." } + }; + + template_params params; + params.messages = json::array({ user_msg, assistant_no_reasoning }); + params.add_generation_prompt = false; + params.enable_thinking = true; + + auto comparison = compare_variants( + *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_with_reasoning }); }); + + if (!comparison) { + LOG_DBG(ANSI_ORANGE "%s: Template application failed, skipping reasoning detection\n" ANSI_RESET, __func__); + return; + } + + const auto & diff = comparison->diff; + + const std::string reasoning_content = "Let me think about this."; + + if (!diff.right.empty() && diff.right.find(reasoning_content) != std::string::npos) { + auto seg = prune_whitespace_segments(segmentize_markers(diff.right)); + if (seg.size() >= 3 && trim_whitespace(seg[1].value) == reasoning_content) { + // easy one: opening marker - reasoning - closing marker (possibly with trailing whitespace) + mode = reasoning_mode::TAG_BASED; + start = trim_whitespace(seg[0].value); + end = trim_leading_whitespace(seg[2].value); + for (size_t i = 3; i < seg.size(); i++) { + end += seg[i].value; + } + // we always truncate because this doesn't really influence correctness but model might not always generate newline + end = trim_whitespace(end); + } else if (seg.size() >= 2 && trim_whitespace(seg[0].value) == reasoning_content) { + // delimited + mode = reasoning_mode::DELIMITER; + end = trim_leading_whitespace(seg[1].value); + for (size_t i = 2; i < seg.size(); i++) { + end += seg[i].value; + } + end = trim_whitespace(end); + } else if (seg.size() == 1 && trim_whitespace(seg[0].value) == reasoning_content) { + // the marker might be in the prefix actually, let's check for case of + // left: empty + // right: reasoning_content + // suffix: content + // prefix: ... + auto suf_seg = prune_whitespace_segments(segmentize_markers(diff.suffix)); + if (trim_whitespace(diff.left).empty() && suf_seg.size() >= 2 && suf_seg[0].type == segment_type::MARKER && + trim_whitespace(suf_seg[1].value).find("I can help.") == 0) { + auto pre_seg = prune_whitespace_segments(segmentize_markers(diff.prefix)); + if (pre_seg[pre_seg.size() - 1].type == segment_type::MARKER || + (pre_seg.size() > 1 && trim_whitespace(pre_seg[pre_seg.size() - 1].value).empty() && + pre_seg[pre_seg.size() - 2].type == segment_type::MARKER)) { + auto marker_seg = pre_seg[pre_seg.size() - 1]; + if (marker_seg.type == segment_type::TEXT) { + marker_seg = pre_seg[pre_seg.size() - 2]; + } + mode = reasoning_mode::FORCED_CLOSED; + start = trim_whitespace(marker_seg.value); + end = trim_whitespace(suf_seg[0].value); + } + } + } + } +} + +void analyze_reasoning::compare_thinking_enabled() { + json user_msg = json{ + { "role", "user" }, + { "content", "Hello" } + }; + + template_params params; + params.messages = json::array({ user_msg }); + params.add_generation_prompt = true; + params.enable_thinking = false; + + auto comparison = compare_variants(*tmpl, params, [&](template_params & p) { p.enable_thinking = true; }); + + if (!comparison) { + LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET , __func__); + return; + } + + const auto & diff = comparison->diff; + + std::string left_trimmed = diff.left; + trim_whitespace(left_trimmed); + + if (left_trimmed.empty() && !diff.right.empty()) { + std::string right_trimmed = diff.right; + trim_whitespace(right_trimmed); + + if (!right_trimmed.empty() && string_ends_with(comparison->output_B, right_trimmed)) { + if (start.empty()) { + start = right_trimmed; + mode = reasoning_mode::FORCED_OPEN; + } + } + } + + if (start.empty() && !end.empty()) { + mode = reasoning_mode::DELIMITER; + } + + // Check for FORCED_CLOSED: when enable_thinking=false produces both start and end markers, + // but enable_thinking=true produces only the start marker + if (!comparison->output_A.empty() && !comparison->output_B.empty()) { + std::string output_A = comparison->output_A; // enable_thinking=false + std::string output_B = comparison->output_B; // enable_thinking=true + + // Both should end with the assistant role marker + // Check if output_A has both reasoning_start and reasoning_end markers + // while output_B has only reasoning_start + if (!start.empty()) { + // Check if output_A contains both start and end markers + bool A_has_start = output_A.find(start) != std::string::npos; + bool A_has_end = !end.empty() && output_A.find(end) != std::string::npos; + + // Check if output_B contains only the start marker (and not the end marker) + bool B_has_start = output_B.find(start) != std::string::npos; + bool B_has_end = !end.empty() && output_B.find(end) != std::string::npos; + + // For FORCED_CLOSED: A should have both, B should have only start + if (A_has_start && A_has_end && B_has_start && !B_has_end) { + mode = reasoning_mode::FORCED_CLOSED; + } + } else if (!end.empty()) { + // We might not have detected the reasoning open marker until now, + // but this is another chance to do so + auto diff = comparison->diff; + auto diff_rt = trim_whitespace(diff.right); + auto diff_lt = trim_whitespace(diff.left); + if (diff_rt.empty() && diff_lt == end) { + auto seg = segmentize_markers(trim_whitespace(diff.prefix)); + if (!seg.empty() && seg[seg.size() - 1].type == MARKER) { // this is FORCED_CLOSED + start = seg[seg.size() - 1].value; + mode = reasoning_mode::FORCED_CLOSED; + } + } + } + } + + if (start.empty() && end.empty()) { + if (!diff.left.empty() && !diff.right.empty()) { + auto seg_A = segmentize_markers(trim_trailing_whitespace(diff.left)); + auto seg_B = segmentize_markers(trim_trailing_whitespace(diff.right)); + if (seg_A.size() == 1 && seg_B.size() == 1) { + mode = reasoning_mode::FORCED_CLOSED; + start = seg_B[0].value; + end = seg_A[0].value; + } + } + } +} + +void analyze_reasoning::compare_reasoning_scope() { + json assistant_reasoning_content = json{ + { "role", "assistant" }, + { "content", "Here is my response." }, + { "reasoning_content", "Let me think." } + }; + + json assistant_reasoning_tools = json{ + { "role", "assistant" }, + { "content", nullptr }, + { "reasoning_content", "Let me think." }, + { "tool_calls", + json::array({ build_tool_call("foofoo", json{ { "first", "VVVV" }, { "second", "XXXX" } }) }) } + }; + + template_params params; + params.messages = json::array({ user_msg, assistant_reasoning_content }); + params.tools = tools; + params.add_generation_prompt = false; + params.enable_thinking = true; + + auto comparison = compare_variants( + *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_reasoning_tools }); }); + + if (!comparison) { + LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__); + return; + } + + std::string reasoning_content = "Let me think."; + + // Check if reasoning only appears in variant B (with tools) + bool reasoning_in_A = comparison->output_A.find(reasoning_content) != std::string::npos; + bool reasoning_in_B = comparison->output_B.find(reasoning_content) != std::string::npos; + + if (!reasoning_in_A && reasoning_in_B) { + mode = reasoning_mode::TOOLS_ONLY; + LOG_DBG("R3: Detected TOOLS_ONLY reasoning mode\n"); + + // Extract reasoning markers from output_B + // The reasoning_content is "Let me think." + size_t reasoning_pos = comparison->output_B.find(reasoning_content); + if (reasoning_pos != std::string::npos) { + // Find start marker before reasoning_content + std::string before_reasoning = comparison->output_B.substr(0, reasoning_pos); + before_reasoning = trim_trailing_whitespace(before_reasoning); + auto segments_before = segmentize_markers(before_reasoning); + std::reverse(segments_before.begin(), segments_before.end()); + + for (auto & segment : segments_before) { + if (segment.type == segment_type::MARKER) { + start = segment.value; + break; + } + } + + // Find end marker after reasoning_content + size_t reasoning_end = reasoning_pos + reasoning_content.length(); + std::string after_reasoning = comparison->output_B.substr(reasoning_end); + after_reasoning = trim_leading_whitespace(after_reasoning); + + if (!after_reasoning.empty()) { + // Try to find matching end marker + if (!start.empty()) { + auto segments = segmentize_markers(after_reasoning); + for (auto & segment : segments) { + if (segment.type == segment_type::MARKER) { + end = segment.value; + break; + } + } + } + } + } + } +} + +analyze_content::analyze_content(const common_chat_template & tmpl, const analyze_reasoning & reasoning) + : analyze_base(tmpl) { + LOG_DBG(ANSI_ORANGE "Phase 2: Content analysis\n" ANSI_RESET); + + json assistant_content_only = json{ + { "role", "assistant" }, + { "content", "Response text" } + }; + + json assistant_with_tools = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ build_tool_call("test_func", json{ { "arg1", "value1" } }) }) } + }; + + json assistant_with_reasoning = json{ + { "role", "assistant" }, + { "content", "" }, + { "reasoning_content", "Need to think" } + }; + + template_params params_content_only; + params_content_only.messages = json::array({ user_msg, assistant_content_only }); + params_content_only.add_generation_prompt = false; + params_content_only.enable_thinking = true; + params_content_only.tools = tools; + + auto comparison_with_tools = compare_variants(tmpl, params_content_only, [&](template_params & p) { + p.messages = json::array({ user_msg, assistant_with_tools }); + }); + + auto comparison_with_reasoning = compare_variants(tmpl, params_content_only, [&](template_params & p) { + p.messages = json::array({ user_msg, assistant_with_reasoning }); + }); + + if (!comparison_with_tools || !comparison_with_reasoning) { + LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__); + } + + const auto & diff_tools = comparison_with_tools->diff; + const auto & diff_reasoning = comparison_with_reasoning->diff; + + std::string response = "Response text"; + + bool found_plain_content = false; + if (trim_whitespace(diff_tools.left) == response) { + auto parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) { + return p.space() + diff_reasoning.left + p.space() + p.optional(p.marker()) + p.space() + p.end(); + }); + if (parser.parse_and_extract(diff_reasoning.left).result.success()) { + // We only have the content text in the diff (possibly with a stray EOG marker), so no markers + mode = content_mode::PLAIN; + found_plain_content = true; + } + /* auto segments = segmentize_markers(diff_reasoning.left); + if (trim_whitespace(diff_reasoning.left) == response || + (segments.size() == 2 && trim_whitespace(segments[0].value) == response)) { + // We only have the content text in the diff (possibly with a stray EOG marker), so no markers + mode = content_mode::PLAIN; + found_plain_content = true; + }*/ else if (reasoning.mode != reasoning_mode::NONE && !reasoning.end.empty() && + diff_reasoning.left.find(reasoning.end) != std::string::npos) { + std::string post_closed_reasoning = diff_reasoning.left.substr( + diff_reasoning.left.find(reasoning.end) + reasoning.end.length()); + if (trim_whitespace(post_closed_reasoning) == "Response text") { + mode = content_mode::PLAIN; + found_plain_content = true; + } + } + } + if (!found_plain_content) { + std::string rdiff = diff_reasoning.left; + if (!reasoning.end.empty() && rdiff.find(reasoning.end) != std::string::npos) { + rdiff = rdiff.substr(rdiff.find(reasoning.end) + reasoning.end.length()); + } + // Take the more promising diff + std::string pure_content = rdiff.length() > diff_tools.left.length() ? rdiff : diff_tools.left; + size_t pos = pure_content.find("Response text"); + if (pos == std::string::npos) { + LOG_DBG(ANSI_ORANGE "%s: Error: response text not found - improper template application?\n" ANSI_RESET, __func__); + return; + } + start = trim_leading_whitespace(pure_content.substr(0, pos)); + end = trim_leading_whitespace(pure_content.substr(pos + 13)); // 13 - len of "Response text" + // TODO: WRAPPED_WITH_REASONING + } + + // Determine content mode + if (!start.empty() || !end.empty()) { + mode = content_mode::ALWAYS_WRAPPED; + // TODO: END_DELIMITED content mode - delimited at end but not at start? + } +} + +bool analyze_content::is_always_wrapped() const { + return mode == content_mode::ALWAYS_WRAPPED && !start.empty() && !end.empty(); +} + +analyze_tools::analyze_tools(const common_chat_template & tmpl, + const jinja::caps & caps, + const analyze_reasoning & reasoning) + : analyze_base(tmpl) { + LOG_DBG(ANSI_ORANGE "Phase 3: Tool call analysis\n" ANSI_RESET); + + analyze_tool_calls(reasoning); + + if (format.mode != tool_format::NONE && format.mode != tool_format::JSON_NATIVE) { + if (caps.supports_parallel_tool_calls) { + check_per_call_markers(); + } + extract_function_markers(); + if (format.mode == tool_format::TAG_WITH_TAGGED) { + analyze_arguments(); + } + extract_argument_separator(); + extract_args_markers(); + extract_call_id_markers(); + } +} + +void analyze_tools::analyze_tool_calls(const analyze_reasoning & reasoning) { + json assistant_no_tools = json{ + { "role", "assistant" }, + { "content", "Response." } + }; + + json assistant_with_tools = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call }) } + }; + + template_params params; + params.messages = json::array({ user_msg, assistant_no_tools }); + params.tools = tools; + params.add_generation_prompt = false; + params.enable_thinking = true; + + auto comparison = compare_variants( + *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_with_tools }); }); + + if (!comparison) { + LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__); + return; + } + + const auto & diff = comparison->diff; + + std::string tool_section = diff.right; + + if (tool_section.empty()) { + return; + } + + analyze_tool_call_format(tool_section, "foofoo", "first", reasoning); +} + +void analyze_tools::analyze_tool_call_format(const std::string & haystack, + const std::string & fun_name_needle, + const std::string & arg_name_needle, + const analyze_reasoning & reasoning) { + if (fun_name_needle.empty() || arg_name_needle.empty() || haystack.empty()) { + return; + } + + auto in_json_haystack = [&haystack](const std::string & needle) -> bool { + // Find the needle in the haystack + size_t needle_pos = haystack.find(needle); + if (needle_pos == std::string::npos) { + return false; + } + if (needle_pos < 2) { + return false; // not enough space for a JSON structure + } + if (haystack[needle_pos - 1] == '\'' || haystack[needle_pos - 1] == '"') { + int cur = needle_pos - 2; + for (; cur >= 0 && std::isspace(haystack[cur]); cur--) { + } + if (haystack[cur] == ':' || haystack[cur] == '{') { + return true; + } + } + return false; + }; + + if (in_json_haystack(fun_name_needle)) { + // no need to check further, we're in JSON land + format.mode = tool_format::JSON_NATIVE; + } else if (in_json_haystack(arg_name_needle)) { + format.mode = tool_format::TAG_WITH_JSON; + } else { + format.mode = tool_format::TAG_WITH_TAGGED; + } + + // first, remove any reasoning markers + std::string clean_haystack = haystack; + if (!reasoning.start.empty()) { + auto pos = haystack.find(reasoning.start); + if (pos != std::string::npos) { + clean_haystack = haystack.substr(0, pos) + haystack.substr(pos + reasoning.start.length()); + } + } + if (!reasoning.end.empty()) { + auto pos = clean_haystack.find(reasoning.end); + if (pos != std::string::npos) { + clean_haystack = clean_haystack.substr(0, pos) + clean_haystack.substr(pos + reasoning.end.length()); + } + } + + if (format.mode == tool_format::JSON_NATIVE) { + analyze_tool_call_format_json_native(clean_haystack, fun_name_needle, arg_name_needle); + } else { + analyze_tool_call_format_non_json(clean_haystack, fun_name_needle); + } + // always relax whitespace requirements on ending markers since they don't influence content + format.section_end = trim_whitespace(format.section_end); + format.per_call_end = trim_whitespace(format.per_call_end); +} + +void analyze_tools::analyze_tool_call_format_json_native(const std::string & clean_haystack, + const std::string & fun_name_needle, + const std::string & arg_name_needle) { + // we might not have the typical OpenAI tool calling structure + int json_start = clean_haystack.find_first_of('{'); + int json_end = clean_haystack.find_last_of('}'); + std::string cut = clean_haystack.substr(json_start, json_end - json_start + 1); + json call_struct = json::parse(cut); + auto register_field = [&](const std::string & prefix, const nlohmann::detail::iteration_proxy_value & subel) { + if (subel.value().is_string() && std::string(subel.value()).find("call0000") != std::string::npos) { + format.id_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key(); + } else if (subel.value().is_string() && std::string(subel.value()) == fun_name_needle) { + format.name_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key(); + } else if (subel.value().dump().find(arg_name_needle) != + std::string::npos) { // handle both string and JSON obj variants + format.args_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key(); + } else if (subel.key().find("id") != std::string::npos) { + // heuristics for generated id field + format.gen_id_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key(); + } + }; + for (const auto & el : call_struct.items()) { + if (el.key() == fun_name_needle) { + format.fun_name_is_key = true; + // When function name is the key, there's no name field and args are direct + format.name_field.clear(); + format.args_field.clear(); + // Don't register this element - the function name IS the key, not a field + } else { + if (el.value().is_object() && + el.value().dump().find(arg_name_needle) == std::string::npos) { // not the args object + format.function_field = el.key(); + for (const auto & subel : el.value().items()) { + register_field(el.key(), subel); + } + } + // Register this element as a potential field + register_field("", el); + } + } + // TODO: support for generated (not provided) tool call IDs + auto space_or_bracket = [](bool opening, char c) -> bool { + return std::isspace(c) || (opening ? c == '[' : c == ']'); + }; + // now let's check if we're in an array construction, mark it if so and get out of it + if (json_start > 0 && space_or_bracket(true, clean_haystack[json_start - 1])) { + for (--json_start; space_or_bracket(true, clean_haystack[json_start]) && json_start > 0; json_start--) { + if (clean_haystack[json_start] == '[') { + format.tools_array_wrapped = true; + break; + } + } + if (!format.tools_array_wrapped) { + json_start++; // we ate into the last pre-json character + } + } + if (json_end < (int) clean_haystack.length() - 1 && space_or_bracket(false, clean_haystack[json_end + 1])) { + for (++json_end; + space_or_bracket(false, clean_haystack[json_end]) && json_end < (int) clean_haystack.length() - 1; + json_end++) { + } + } + + std::vector> located_params; + if (!format.name_field.empty()) { + located_params.push_back({ clean_haystack.find(format.name_field), format.name_field }); + } + if (!format.args_field.empty()) { + located_params.push_back({ clean_haystack.find(format.args_field), format.args_field }); + } + if (!format.id_field.empty()) { + located_params.push_back({ clean_haystack.find(format.id_field), format.id_field }); + } + if (!format.gen_id_field.empty()) { + located_params.push_back({ clean_haystack.find(format.gen_id_field), format.gen_id_field }); + } + std::sort(located_params.begin(), located_params.end()); + for (auto & pair : located_params) { + format.parameter_order.push_back(pair.second); + } + // we can immediately extract tool calling markers too + format.section_start = trim_leading_whitespace(clean_haystack.substr(0, json_start)); + format.section_end = trim_whitespace(clean_haystack.substr(json_end)); + // When tools_array_wrapped is true, the closing bracket is part of the array structure, + // not a separate section end marker. Clear tool_section_end to avoid duplicate brackets. + if (format.tools_array_wrapped && format.section_end == "]") { + format.section_end.clear(); + } +} + +void analyze_tools::analyze_tool_call_format_non_json(const std::string & clean_haystack, + const std::string & fun_name_needle) { + // we need to split by markers... + auto haystack_split = segmentize_markers(trim_leading_whitespace(clean_haystack)); + int where_is_nemo = 0; + int i = 0; + for (auto & segment : haystack_split) { + if (segment.value.find(fun_name_needle) != std::string::npos) { + where_is_nemo = i; + break; + } + i++; + } + + // basically the rule here is: + // - we append everything adjacent to a marker to the marker (treat it as part of the marker) + // - we assume symmetry (as many opening as closing markers) + // - we count the number of opening markers and then try to move backwards from the end until we've + // eaten as many closing markers as there were opening markers + if (where_is_nemo > 1) { // we might have more than one marker set here + std::vector preceding_markers; + for (int seg = where_is_nemo - 1; seg >= 0; seg--) { + if (haystack_split[seg].type == MARKER) { + preceding_markers.push_back(haystack_split[seg]); + } + } + size_t how_many_markers = preceding_markers.size(); + if (how_many_markers > 1) { + bool had_marker = false; + for (int seg = where_is_nemo - 1; seg >= 0; seg--) { + if (haystack_split[seg].type == MARKER) { + if (!had_marker) { + had_marker = true; + format.per_call_start = haystack_split[seg].value + format.per_call_start; + } else { + format.section_start = haystack_split[seg].value + format.section_start; + } + } else { + if (had_marker) { + format.section_start = haystack_split[seg].value + format.section_start; + } else { + format.per_call_start = haystack_split[seg].value + format.per_call_start; + } + } + } + had_marker = false; + size_t backtracked_so_far = 0; + for (size_t seg = haystack_split.size() - 1; seg > (size_t) where_is_nemo; seg--) { + if (haystack_split[seg].type == MARKER) { + backtracked_so_far++; + if (!had_marker) { + had_marker = true; + format.section_end = haystack_split[seg].value + format.section_end; + } else { + format.per_call_end = haystack_split[seg].value + format.per_call_end; + } + } else { + if (had_marker) { + format.per_call_end = haystack_split[seg].value + format.per_call_end; + } else { + format.section_end = haystack_split[seg].value + format.section_end; + } + } + if (backtracked_so_far >= how_many_markers) { + break; + } + } + } else { + for (int seg = 0; seg < where_is_nemo; seg++) { + format.section_start += haystack_split[seg].value; + } + for (size_t seg = haystack_split.size() - 1; seg > (size_t) where_is_nemo; seg--) { + format.section_end = haystack_split[seg].value + format.section_end; + if (haystack_split[seg].type == segment_type::MARKER) { + break; + } + } + } + } else { + format.section_start += haystack_split[0].value; + for (size_t seg = haystack_split.size() - 1; seg > (size_t) where_is_nemo; seg--) { + format.section_end = haystack_split[seg].value + format.section_end; + if (haystack_split[seg].type == segment_type::MARKER) { + break; + } + } + } +} + +void analyze_tools::check_per_call_markers() { + json assistant_one_tool = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call }) } + }; + + json assistant_two_tools = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call, second_tool_call }) } + }; + + template_params params; + params.messages = json::array({ user_msg, assistant_one_tool }); + params.tools = tools; + params.add_generation_prompt = false; + params.enable_thinking = true; + + auto one_vs_two = compare_variants( + *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_two_tools }); }); + + if (!one_vs_two) { + LOG_DBG(ANSI_ORANGE "%s: Generating double tool call comparison failed\n" ANSI_RESET, __func__); + return; + } + + diff_split filter_common_call_part = calculate_diff_split(one_vs_two->diff.suffix, one_vs_two->diff.right); + + std::string second_tool_content = trim_leading_whitespace(filter_common_call_part.right); + if (!format.section_start.empty() && + second_tool_content.find(format.section_start) == 0) { + format.per_call_start = format.section_start; + format.per_call_end = format.section_end; + format.section_start.clear(); + format.section_end.clear(); + } +} + +void analyze_tools::extract_function_markers() { + json assistant_nocall = json{ + { "role", "assistant" }, + { "content", "BBBB" }, + }; + + json assistant_foofoo = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call }) } + }; + + json assistant_barbar = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ second_tool_call }) } + }; + + template_params params; + params.messages = json::array({ user_msg, assistant_foofoo }); + params.tools = tools; + params.add_generation_prompt = false; + params.enable_thinking = true; + + auto comparison = compare_variants( + *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_barbar }); }); + + if (!comparison) { + LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__); + return; + } + + const auto & diff = comparison->diff; + + if (diff.left.find("foofoo") != std::string::npos && diff.right.find("barbar") != std::string::npos) { + std::string prefix_marker; + if (!format.per_call_start.empty()) { + prefix_marker = format.per_call_start; + } else { + prefix_marker = format.section_start; + } + if (!prefix_marker.empty() && diff.prefix.rfind(prefix_marker) != std::string::npos) { + function.name_prefix = + diff.prefix.substr(diff.prefix.rfind(prefix_marker) + prefix_marker.size()); + } + + auto seg = segmentize_markers(diff.left); + for (const auto & s : seg) { + if (s.value.find("foofoo") == std::string::npos) { + function.name_prefix += s.value; + } else { + size_t pos = s.value.find("foofoo"); + std::string pre = s.value.substr(0, pos); + std::string post = s.value.substr(pos + 6); // 6 = len("foofoo") + function.name_prefix += pre; + function.name_suffix += post; + break; + } + } + + auto seg_suf = segmentize_markers(diff.suffix); + size_t stop = 0; + size_t stop_internal_pos = 0; + for (const auto & ss : seg_suf) { + bool has_needle = false; + if (format.mode == tool_format::TAG_WITH_JSON) { + has_needle = (ss.type == segment_type::TEXT && ss.value.find_first_of("{[") != std::string::npos); + if (has_needle) { + stop_internal_pos = ss.value.find_first_of("{["); + break; + } + } else { + has_needle = ss.value.find("first") != std::string::npos; + if (has_needle) { + stop_internal_pos = ss.value.find("first"); + break; + } + } + stop++; + } + if (stop < seg_suf.size() - 1) { + if (format.mode == tool_format::TAG_WITH_TAGGED) { + size_t how_far = 0; + if (stop > 0) { + if (seg_suf[stop].type == segment_type::MARKER) { + how_far = stop; + } else { + how_far = stop - 1; + } + for (size_t i = 0; i < how_far; i++) { + function.name_suffix += seg_suf[i].value; + } + } + } else { + for (size_t i = 0; i < stop; i++) { + function.name_suffix += seg_suf[i].value; + } + const std::string & stopper = seg_suf[stop].value; + function.name_suffix += stopper.substr(0, stop_internal_pos); + } + } + + // now just to find the closer + std::string suffix_marker; + if (!format.per_call_end.empty()) { + suffix_marker = format.per_call_end; + } else { + suffix_marker = format.section_end; + } + std::string closer_suffix; + if (suffix_marker.empty()) { + // we'll have to rely on an extra diff with no-calls version + auto notool_comp = compare_variants( + *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_nocall }); }); + auto nt_diff = notool_comp->diff; + closer_suffix = nt_diff.left.substr(nt_diff.left.find("YYYY") + 4); + } else { + closer_suffix = diff.suffix.substr(0, diff.suffix.find(suffix_marker)); + } + if (!closer_suffix.empty()) { + auto closer_seg = segmentize_markers(closer_suffix); + bool need_to_eat_arg_marker = (format.mode == tool_format::TAG_WITH_TAGGED); + size_t last_arg_seg = closer_seg.size() - 1; + for (int i = (int) closer_seg.size() - 1; i >= 0; i--) { + if (closer_seg[i].value.find("YYYY") != std::string::npos) { + last_arg_seg = i; + } + } + if (format.mode == tool_format::TAG_WITH_JSON) { + const auto & entire_seg = closer_seg[last_arg_seg].value; + size_t pos = entire_seg.find_last_of("}]"); + if (pos != std::string::npos && pos < entire_seg.size() - 1) { + function.close = trim_leading_whitespace(entire_seg.substr(pos + 1)); + } + } + for (size_t i = last_arg_seg + 1; i < closer_seg.size(); i++) { + if (closer_seg[i].type == segment_type::MARKER) { + if (need_to_eat_arg_marker) { + need_to_eat_arg_marker = false; + } else { + function.close += closer_seg[i].value; + } + } else if (!need_to_eat_arg_marker) { + function.close += closer_seg[i].value; + } + } + } + function.close = trim_leading_whitespace(function.close); + } +} + +void analyze_tools::analyze_arguments() { + LOG_DBG(ANSI_ORANGE "Phase 4: Argument analysis\n" ANSI_RESET); + + extract_argument_name_markers(); + extract_argument_value_markers(); +} + +void analyze_tools::extract_argument_name_markers() { + json assistant_first_arg = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call_one_arg }) } + }; + + json assistant_second_arg = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call_other_arg }) } + }; + + template_params params; + params.messages = json::array({ user_msg, assistant_first_arg }); + params.tools = tools; + params.add_generation_prompt = false; + params.enable_thinking = true; + + auto comparison = compare_variants( + *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_second_arg }); }); + + if (!comparison) { + LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__); + return; + } + + const auto & diff = comparison->diff; + + if (!diff.left.empty() && !diff.right.empty()) { + size_t common_len = 0; + size_t min_len = std::min(diff.left.length(), diff.right.length()); + while (common_len < min_len && diff.left[common_len] == diff.right[common_len]) { + common_len++; + } + + if (common_len > 0) { // we have a marker structure with the name *inside* the marker + std::string common_prefix = diff.left.substr(0, common_len); + std::string left_remainder = diff.left.substr(common_len); + std::string right_remainder = diff.right.substr(common_len); + size_t left_close = + left_remainder.find_first_of("\"X"); // because arg-val is XXXX, can be quoted or unquoted + size_t right_close = right_remainder.find_first_of("\"Y"); // here arg-val is YYYY + + if (left_close != std::string::npos && right_close != std::string::npos) { + std::string left_name = left_remainder.substr(0, 5); // 5 = len("first") + std::string right_name = right_remainder.substr(0, 6); // 6 = len("second") + + if (left_name == "first" && right_name == "second") { + arguments.name_prefix = trim_whitespace(common_prefix); + std::string suffix_left = left_remainder.substr(5, left_close - 5); + std::string suffix_right = right_remainder.substr(6, right_close - 6); + if (suffix_left == suffix_right) { + arguments.name_suffix = trim_leading_whitespace(suffix_left); + } + } + } + } else if (diff.left.substr(0, 5) == "first" && diff.right.substr(0, 6) == "second") { + // we most likely have actual markers for argument names + auto pre_seg = segmentize_markers(diff.prefix); + for (int i = pre_seg.size() - 1; i >= 0; i--) { + arguments.name_prefix = arguments.name_prefix + pre_seg[i].value; + if (pre_seg[i].type == segment_type::MARKER) { + break; + } + } + auto left_seg = segmentize_markers(diff.left); + if (left_seg.size() == 1) { // only the name + maybe extra whitespace / normal chars in differing part + arguments.name_suffix = diff.left.substr(5); + auto suf_seg= segmentize_markers(diff.suffix); + for (size_t i = 0; i < suf_seg.size(); i++) { + arguments.name_suffix += suf_seg[i].value; + if (suf_seg[i].type == segment_type::MARKER) { + if (i < suf_seg.size() - 2 && suf_seg[i + 1].type == segment_type::TEXT && + trim_whitespace(suf_seg[i + 1].value).empty()) { + // we need to include post-marker whitespace/newlines as well + arguments.name_suffix += suf_seg[i + 1].value; + } + break; + } + } + } else { + for (size_t i = 0; i < left_seg.size(); i++) { + std::string to_add; + if (i == 0) { + to_add = left_seg[i].value.substr(5); + } else { + to_add = left_seg[i].value; + } + arguments.name_suffix += to_add; + if (left_seg[i].type == segment_type::MARKER) { + if (i < left_seg.size() - 2 && left_seg[i + 1].type == segment_type::TEXT && + trim_whitespace(left_seg[i + 1].value).empty()) { + // we need to include post-marker whitespace/newlines as well + arguments.name_suffix += left_seg[i + 1].value; + } + break; + } + } + } + } + } +} + +void analyze_tools::extract_argument_value_markers() { + json assistant_val_X = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call_one_arg }) } + }; + + json assistant_val_Y = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call_one_arg_other_val }) } + }; + + template_params params; + params.messages = json::array({ user_msg, assistant_val_X }); + params.tools = tools; + params.add_generation_prompt = false; + params.enable_thinking = true; + + auto comparison = compare_variants( + *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_val_Y }); }); + + if (!comparison) { + LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__); + return; + } + + const auto & diff = comparison->diff; + + if (diff.left == "XXXX" && diff.right == "YYYY") { + std::string arg_name_ending = "first" + arguments.name_suffix; + std::string prefix = diff.prefix; + if (prefix.rfind(arg_name_ending) != std::string::npos) { + prefix = prefix.substr(prefix.rfind(arg_name_ending) + arg_name_ending.size()); + } + if (!prefix.empty()) { + auto seg_pre = segmentize_markers(prefix); + for (int i = seg_pre.size() - 1; i >= 0; i--) { + arguments.value_prefix = seg_pre[i].value + arguments.value_prefix; + if (seg_pre[i].type == segment_type::MARKER) { + break; + } + } + } + + std::string value_suffix = diff.suffix; + if (!function.close.empty()) { + size_t func_close_pos = value_suffix.find(function.close); + if (func_close_pos != std::string::npos) { + value_suffix = value_suffix.substr(0, func_close_pos); + } + } else if (!format.per_call_end.empty() || !format.section_end.empty()) { + std::string end_marker = + !format.per_call_end.empty() ? format.per_call_end : format.section_end; + size_t end_marker_pos = value_suffix.find(end_marker); + if (end_marker_pos != std::string::npos) { + value_suffix = value_suffix.substr(0, end_marker_pos); + } + } + value_suffix = trim_leading_whitespace(value_suffix); + if (!value_suffix.empty()) { + arguments.value_suffix = value_suffix; + } + } +} + +void analyze_tools::extract_argument_separator() { + json assistant_one_arg = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call_one_arg }) } + }; + + json assistant_two_args = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call }) } + }; + + template_params params; + params.messages = json::array({ user_msg, assistant_one_arg }); + params.tools = tools; + params.add_generation_prompt = false; + params.enable_thinking = true; + + auto comparison = compare_variants( + *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_two_args }); }); + + if (!comparison) { + LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__); + return; + } + + const auto & diff = comparison->diff; + + if (!diff.right.empty()) { + std::string separator = until_common_prefix(diff.right, "first", "second"); + arguments.separator = separator; + } +} + +void analyze_tools::extract_args_markers() { + json assistant_no_args = json{ + { "role", "assistant"}, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call_zero_args }) } + }; + + json assistant_with_args = json{ + { "role", "assistant"}, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call_one_arg }) } + }; + + template_params params; + params.messages = json::array({ user_msg, assistant_no_args }); + params.tools = tools; + params.add_generation_prompt = false; + params.enable_thinking = true; + + auto comparison = compare_variants( + *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_with_args }); }); + + if (!comparison) { + LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__); + return; + } + + const auto & diff = comparison->diff; + + if (format.mode != tool_format::JSON_NATIVE) { + std::string prefix_marker = !format.section_start.empty() ? format.section_start : format.per_call_start; + std::string suffix_marker = !format.section_end.empty() ? format.section_end : format.per_call_end; + // these might happen earlier in the tools section as an example or somewhere else, so we need to find the closest ones + size_t prefix_pos = prefix_marker.empty() ? 0 : diff.prefix.rfind(prefix_marker); + size_t suffix_pos = suffix_marker.empty() ? diff.suffix.size() : diff.suffix.find(suffix_marker); + if (prefix_pos == std::string::npos) { + prefix_pos = 0; + } + if (suffix_pos == std::string::npos) { + suffix_pos = diff.suffix.size(); + } + std::string prefix_cut = diff.prefix.substr(prefix_pos + prefix_marker.size()); + std::string suffix_cut = diff.suffix.substr(0, suffix_pos); + std::string args_start = until_common_prefix(prefix_cut, "{}", "{\"first\":"); + std::string args_end = after_common_suffix(suffix_cut, "{}", "\"XXXX\"}"); + + if (!args_start.empty() || !args_end.empty()) { + arguments.start = args_start; + arguments.end = args_end; + } + } +} + +void analyze_tools::extract_call_id_markers() { + json assistant_id1 = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call }) } + }; + + json assistant_id2 = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call_alt_id }) } + }; + + template_params params; + params.messages = json::array({ user_msg, assistant_id1 }); + params.tools = tools; + params.add_generation_prompt = false; + params.enable_thinking = true; + + auto comparison = compare_variants( + *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_id2 }); }); + + if (!comparison) { + LOG_DBG(ANSI_ORANGE "%s: Template application failed for call_id detection\n" ANSI_RESET, __func__); + return; + } + + const auto & diff = comparison->diff; + + if (diff.left.empty() && diff.right.empty()) { + return; + } + + std::string id_value_1 = "call00001"; + std::string id_value_2 = "call99999"; + + size_t common_id_prefix_len = 0; + for (size_t i = 0; i < std::min(id_value_1.length(), id_value_2.length()); i++) { + if (id_value_1[i] == id_value_2[i]) { + common_id_prefix_len++; + } else { + break; + } + } + std::string common_id_part = id_value_1.substr(0, common_id_prefix_len); + + // Check if the function name is in the prefix (normal case: BETWEEN_FUNC_AND_ARGS or POST_ARGS) + // or in the suffix (call_id is PRE_FUNC_NAME) + std::string func_name = "foofoo"; + size_t func_name_in_prefix = diff.prefix.rfind(func_name); + size_t func_name_in_suffix = diff.suffix.find(func_name); + + if (func_name_in_prefix != std::string::npos && func_name_in_suffix == std::string::npos) { + // Function name is only in prefix - call_id is BETWEEN_FUNC_AND_ARGS or POST_ARGS + // Check if args indicator "{" is in prefix or suffix + size_t args_in_prefix = diff.prefix.find('{', func_name_in_prefix); + size_t args_in_suffix = diff.suffix.find('{'); + + if (args_in_suffix != std::string::npos && + (args_in_prefix == std::string::npos || args_in_prefix > diff.prefix.length())) { + // Args are in suffix, so call_id is BETWEEN_FUNC_AND_ARGS + call_id.pos = call_id_position::BETWEEN_FUNC_AND_ARGS; + + // The prefix ends with: ... + // Segmentize to find the call_id_prefix marker + std::string after_func = diff.prefix.substr(func_name_in_prefix + func_name.length()); + auto segments = segmentize_markers(after_func); + + std::string marker_before_id; + for (size_t i = 0; i < segments.size(); i++) { + if (segments[i].type == segment_type::MARKER) { + // Check if the next segment (if any) contains the common_id_part + if (i + 1 < segments.size() && segments[i + 1].value.find(common_id_part) != std::string::npos) { + marker_before_id = segments[i].value; + break; + } + // Or if this is the last marker and the text after contains common_id_part + if (i == segments.size() - 1 || + (i + 1 < segments.size() && segments[i + 1].type == segment_type::TEXT && + segments[i + 1].value.find(common_id_part) != std::string::npos)) { + marker_before_id = segments[i].value; + } + } + } + + if (!marker_before_id.empty()) { + call_id.prefix = marker_before_id; + } else { + // Fallback: look for the last marker in after_func + for (int i = (int) segments.size() - 1; i >= 0; i--) { + if (segments[i].type == segment_type::MARKER) { + call_id.prefix = segments[i].value; + break; + } + } + } + + // Extract call_id_suffix: the first marker in the suffix before args + auto suffix_segments = segmentize_markers(diff.suffix); + for (size_t i = 0; i < suffix_segments.size(); i++) { + if (suffix_segments[i].type == segment_type::MARKER) { + call_id.suffix = suffix_segments[i].value; + break; + } + // Stop if we hit the args + if (suffix_segments[i].value.find('{') != std::string::npos) { + break; + } + } + } else if (args_in_prefix != std::string::npos) { + // Args are in prefix, so call_id is POST_ARGS + call_id.pos = call_id_position::POST_ARGS; + + // Extract markers from between args and the ID + std::string after_args = diff.prefix.substr(args_in_prefix); + size_t closing_brace = after_args.rfind('}'); + if (closing_brace != std::string::npos) { + std::string between_args_and_id = after_args.substr(closing_brace + 1); + auto segments = segmentize_markers(between_args_and_id); + for (int i = (int) segments.size() - 1; i >= 0; i--) { + if (segments[i].type == segment_type::MARKER) { + call_id.prefix = segments[i].value; + break; + } + } + } + + // call_id_suffix would be in the suffix (first marker) + auto suffix_segments = segmentize_markers(diff.suffix); + for (const auto & seg : suffix_segments) { + if (seg.type == segment_type::MARKER) { + call_id.suffix = seg.value; + break; + } + } + } + } else if (func_name_in_suffix != std::string::npos && func_name_in_prefix == std::string::npos) { + // Function name is only in suffix - call_id is PRE_FUNC_NAME + call_id.pos = call_id_position::PRE_FUNC_NAME; + + // Extract call_id_prefix from prefix (last marker before the common_id_part) + auto prefix_segments = segmentize_markers(diff.prefix); + for (int i = (int) prefix_segments.size() - 1; i >= 0; i--) { + if (prefix_segments[i].type == segment_type::MARKER) { + call_id.prefix = prefix_segments[i].value; + break; + } + } + + // Extract call_id_suffix from suffix (first marker before func_name) + std::string before_func = diff.suffix.substr(0, func_name_in_suffix); + auto suffix_segments = segmentize_markers(before_func); + for (const auto & seg : suffix_segments) { + if (seg.type == segment_type::MARKER) { + call_id.suffix = seg.value; + break; + } + } + } + + // When call_id is detected, per_call_end may have been incorrectly set to include + // the call_id_suffix and sample args. Clear it if it starts with call_id_suffix. + if (call_id.pos != call_id_position::NONE && !call_id.suffix.empty() && + format.per_call_end.find(call_id.suffix) == 0) { + format.per_call_end.clear(); + } +} + +} // namespace autoparser diff --git a/common/chat-diff-analyzer.h b/common/chat-diff-analyzer.h new file mode 100644 index 0000000000..a94c40459c --- /dev/null +++ b/common/chat-diff-analyzer.h @@ -0,0 +1,434 @@ +#pragma once + +#include "chat.h" +#include "jinja/caps.h" +#include "peg-parser.h" +#include "nlohmann/json.hpp" + +#include +#include +#include +#include +#include + +using json = nlohmann::ordered_json; + +class common_chat_peg_unified_builder; + +// ============================================================================ +// Parameters for template application +// ============================================================================ +struct template_params { + json messages; + json tools; + bool add_generation_prompt = false; + bool enable_thinking = true; + std::optional extra_context = std::nullopt; +}; + +struct diff_split { + std::string prefix; + std::string suffix; + std::string left; + std::string right; + + bool operator==(struct diff_split & other) const { + return prefix == other.prefix && suffix == other.suffix && left == other.left && right == other.right; + } +}; + +// Result of compare_variants containing diff and original outputs +struct compare_variants_result { + diff_split diff; + std::string output_A; + std::string output_B; +}; + +namespace autoparser { + +struct templates_params; + +// ============================================================================ +// Marker Registry: All markers extracted via differential analysis +// ============================================================================ + +// Markers extracted from differential analysis of template outputs +// Each marker is derived from a specific comparison in the analysis matrix +struct marker_registry { + // === Reasoning markers (from Phase 1: R1-R3) === + std::string reasoning_start; // e.g., "", "[THINK]", "<|START_THINKING|>", "" + std::string reasoning_end; // e.g., "", "[BEGIN FINAL RESPONSE]", "<|END_THINKING|>" + + // === Content markers (from Phase 2: C1-C2) === + std::string content_start; // e.g., "", ">>>all\n", "" + std::string content_end; // e.g., "", "" + + // === Tool section markers (from Phase 3: T1-T2) === + std::string tool_section_start; // e.g., "", "[TOOL_CALLS]", "" + std::string tool_section_end; // e.g., "", "" + std::string per_call_start; // e.g., "<|tool_call_begin|>", "" (for multi-call templates) + std::string per_call_end; // e.g., "<|tool_call_end|>", "" + std::string call_separator; // e.g., ",", "\n", "" (between multiple calls) + + // === Function markers (from Phase 3: T3-T5) === + std::string func_name_prefix; // e.g., "", "\"", ":0" + std::string func_close; // e.g., "", "" (for tag-based) + std::string args_start; // e.g., "{", "<|tool_call_argument_begin|>" + std::string args_end; // e.g., "}", "" + + // === Argument markers (from Phase 4: A1-A3, for tagged args format) === + std::string arg_name_prefix; // e.g., "", "\"" + std::string arg_name_suffix; // e.g., ">", "
", "\":" + std::string arg_value_prefix; // e.g., "", "", "" + std::string arg_value_suffix; // e.g., "", "", "" + std::string arg_separator; // e.g., "", "\n", "," + + // === Call ID markers (for non-JSON formats with tool call IDs) === + std::string call_id_prefix; // e.g., "[CALL_ID]" (marker before call ID value) + std::string call_id_suffix; // e.g., "" (marker after call ID value, before next section) +}; + +// ============================================================================ +// Analysis Result Enums +// ============================================================================ + +// Reasoning handling mode (derived from R1-R3 comparisons) +enum class reasoning_mode { + NONE, // No reasoning markers detected + TAG_BASED, // Standard tag-based: ... + DELIMITER, // Delimiter-based: [BEGIN FINAL RESPONSE] (reasoning ends at delimiter) + FORCED_OPEN, // Template ends with open reasoning tag (empty start, non-empty end) + FORCED_CLOSED, // Template ends with open reasoning tag on enabled thinking but + // with both opened and closed tag for disabled thinking + TOOLS_ONLY // Only reason on tool calls, not on normal content +}; + +inline std::ostream & operator<<(std::ostream & os, const reasoning_mode & mode) { + switch (mode) { + case reasoning_mode::NONE: + return os << "NONE"; + case reasoning_mode::TAG_BASED: + return os << "TAG_BASED"; + case reasoning_mode::DELIMITER: + return os << "DELIMITER"; + case reasoning_mode::FORCED_OPEN: + return os << "FORCED_OPEN"; + case reasoning_mode::FORCED_CLOSED: + return os << "FORCED_CLOSED"; + case reasoning_mode::TOOLS_ONLY: + return os << "TOOLS_ONLY"; + default: + return os << "UNKNOWN"; + } +} + +// Content wrapping mode (derived from C1 comparison) +enum class content_mode { + PLAIN, // No content markers + ALWAYS_WRAPPED, // Content always wrapped with markers + WRAPPED_WITH_REASONING, // Content wrapped only when reasoning present +}; + +inline std::ostream & operator<<(std::ostream & os, const content_mode & mode) { + switch (mode) { + case content_mode::PLAIN: + return os << "PLAIN"; + case content_mode::ALWAYS_WRAPPED: + return os << "ALWAYS_WRAPPED"; + case content_mode::WRAPPED_WITH_REASONING: + return os << "WRAPPED_WITH_REASONING"; + default: + return os << "UNKNOWN"; + } +} + +// Call ID position in tool calls (for non-JSON formats) +enum class call_id_position { + NONE, // No call ID support detected + PRE_FUNC_NAME, // Call ID before function name: [CALL_ID]id[FUNC]name{args} + BETWEEN_FUNC_AND_ARGS, // Call ID between function and args: [FUNC]name[CALL_ID]id{args} + POST_ARGS, // Call ID after arguments: [FUNC]name{args}[CALL_ID]id +}; + +inline std::ostream & operator<<(std::ostream & os, const call_id_position & pos) { + switch (pos) { + case call_id_position::NONE: + return os << "NONE"; + case call_id_position::PRE_FUNC_NAME: + return os << "PRE_FUNC_NAME"; + case call_id_position::BETWEEN_FUNC_AND_ARGS: + return os << "BETWEEN_FUNC_AND_ARGS"; + case call_id_position::POST_ARGS: + return os << "POST_ARGS"; + default: + return os << "UNKNOWN"; + } +} + +// Tool call format classification (derived from T1-T5, A1-A3 comparisons) +enum class tool_format { + NONE, // No tool support detected + JSON_NATIVE, // Pure JSON: {"name": "X", "arguments": {...}} + TAG_WITH_JSON, // Tag-based with JSON args: {...} + TAG_WITH_TAGGED, // Tag-based with tagged args: value +}; + +inline std::ostream & operator<<(std::ostream & os, const tool_format & format) { + switch (format) { + case tool_format::NONE: + return os << "NONE"; + case tool_format::JSON_NATIVE: + return os << "JSON_NATIVE"; + case tool_format::TAG_WITH_JSON: + return os << "TAG_WITH_JSON"; + case tool_format::TAG_WITH_TAGGED: + return os << "TAG_WITH_TAGGED"; + default: + return os << "UNKNOWN"; + } +} + +// ============================================================================ +// Sub-structs for tool analysis +// ============================================================================ + +struct tool_format_analysis { + tool_format mode = tool_format::NONE; + + std::string section_start; // e.g., "", "[TOOL_CALLS]", "" + std::string section_end; // e.g., "", "" + std::string per_call_start; // e.g., "<|tool_call_begin|>", "" (for multi-call templates) + std::string per_call_end; // e.g., "<|tool_call_end|>", "" + + bool fun_name_is_key = false; // In JSON format function name is JSON key, i.e. { "": { ... arguments ... } } + bool tools_array_wrapped = false; // Tool calls wrapped in JSON array [...] + + std::string function_field = "function"; + std::string name_field = "name"; + std::string args_field = "arguments"; + std::string id_field; + std::string gen_id_field; + std::vector parameter_order; +}; + +struct tool_function_analysis { + std::string name_prefix; // e.g., "", "\"", ":0" + std::string close; // e.g., "", "" (for tag-based) +}; + +struct tool_arguments_analysis { + std::string start; // e.g., "<|tool_call_argument_begin|>", "" + std::string end; // e.g., "<|tool_call_argument_end|>", "" + std::string name_prefix; // e.g., "", "\"" + std::string name_suffix; // e.g., ">", "", "\":" + std::string value_prefix; // e.g., "", "", "" + std::string value_suffix; // e.g., "", "", "" + std::string separator; // e.g., "", "\n", "," +}; + +struct tool_id_analysis { + call_id_position pos = call_id_position::NONE; + + std::string prefix; // e.g., "[CALL_ID]" (marker before call ID value) + std::string suffix; // e.g., "" (marker after call ID value, before next section) +}; + +// ============================================================================ +// Parser build context (shared interface for build_parser methods) +// ============================================================================ + +struct analyze_content; + +struct parser_build_context { + common_chat_peg_unified_builder & p; + const templates_params & inputs; + common_peg_parser reasoning_parser; + bool extracting_reasoning = false; + const analyze_content * content = nullptr; + + parser_build_context(common_chat_peg_unified_builder & p, const templates_params & inputs); +}; + +// ============================================================================ +// Base class for analyzers with parser building +// ============================================================================ + +struct analyze_base { + virtual ~analyze_base() = default; + virtual common_peg_parser build_parser(parser_build_context & ctx) const = 0; + + protected: + const common_chat_template * tmpl = nullptr; + + analyze_base() = default; + explicit analyze_base(const common_chat_template & tmpl) : tmpl(&tmpl) {} +}; + +// ============================================================================ +// Reasoning analyzer +// ============================================================================ + +struct analyze_reasoning : analyze_base { + reasoning_mode mode = reasoning_mode::NONE; + + std::string start; // e.g., "", "[THINK]", "<|START_THINKING|>", "" + std::string end; // e.g., "", "[BEGIN FINAL RESPONSE]", "<|END_THINKING|>" + + analyze_reasoning() = default; + analyze_reasoning(const common_chat_template & tmpl, bool supports_tools); + + common_peg_parser build_parser(parser_build_context & ctx) const override; + + private: + // Look for reasoning markers in rendered content + void compare_reasoning_presence(); + + // Compare generation prompt with enable_thinking=true vs false + void compare_thinking_enabled(); + + // Check if reasoning is always possible or only in tool calls + void compare_reasoning_scope(); +}; + +// ============================================================================ +// Content analyzer +// ============================================================================ + +struct analyze_content : analyze_base { + content_mode mode = content_mode::PLAIN; + + std::string start; // e.g., "", ">>>all\n", "" + std::string end; // e.g., "", "" + + bool requires_nonnull_content = false; + + analyze_content() = default; + analyze_content(const common_chat_template & tmpl, const analyze_reasoning & reasoning); + + common_peg_parser build_parser(parser_build_context & ctx) const override; + + bool is_always_wrapped() const; + common_peg_parser build_optional_wrapped(parser_build_context & ctx) const; +}; + +// ============================================================================ +// Tool analyzer +// ============================================================================ + +struct analyze_tools : analyze_base { + tool_format_analysis format; + tool_function_analysis function; + tool_arguments_analysis arguments; + tool_id_analysis call_id; + + analyze_tools() = default; + analyze_tools(const common_chat_template & tmpl, + const jinja::caps & caps, + const analyze_reasoning & reasoning); + + common_peg_parser build_parser(parser_build_context & ctx) const override; + + private: + // Extract tool calling 'haystack' for further analysis and delegate further analysis based on format + void analyze_tool_calls(const analyze_reasoning & reasoning); + + // Analyze format based on position of function and argument name in needle + void analyze_tool_call_format(const std::string & haystack, + const std::string & fun_name_needle, + const std::string & arg_name_needle, + const analyze_reasoning & reasoning); + + // Analyze specifics of JSON native format (entire tool call is a JSON object) + void analyze_tool_call_format_json_native(const std::string & clean_haystack, + const std::string & fun_name_needle, + const std::string & arg_name_needle); + + // Analyze specifics of non-JSON native format (tags for function name or for function name and arguments) + void analyze_tool_call_format_non_json(const std::string & clean_haystack, + const std::string & fun_name_needle); + + // Check for and extract specific per-call markers for non-native-JSON templates with parallel call support + void check_per_call_markers(); + + // Extract function name markers + void extract_function_markers(); + + // Delegates to separate functions for: separator analysis, argument name analysis, argument value analysis + void analyze_arguments(); + + // Extract argument name markers + void extract_argument_name_markers(); + + // Extract argument value markers + void extract_argument_value_markers(); + + // Extract argument separator, if specified (eg. ......) + void extract_argument_separator(); + + // Extract argument wrapper markers, if present (eg. '......') + void extract_args_markers(); + + // Extract call ID markers, if present + void extract_call_id_markers(); + + // Per-format tool parser builders + common_peg_parser build_tool_parser_json_native(parser_build_context & ctx) const; + common_peg_parser build_tool_parser_tag_json(parser_build_context & ctx) const; + common_peg_parser build_tool_parser_tag_tagged(parser_build_context & ctx) const; +}; + +// ============================================================================ +// Top-level template analyzer (merges differential_analyzer + diff_analysis_result) +// ============================================================================ + +struct analyze_template { + jinja::caps jinja_caps; + analyze_reasoning reasoning; + analyze_content content; + analyze_tools tools; + + // Preserved tokens for tokenizer (union of all non-empty markers) + std::vector preserved_tokens; + + // Constructor: runs full differential analysis on a template + explicit analyze_template(const common_chat_template & tmpl); + + // Build the unified PEG parser for this template + common_peg_arena build_parser(const templates_params & inputs) const; + + private: + // Collect tokens from entire analysis to preserve + void collect_preserved_tokens(); +}; + +} // namespace autoparser + +enum segment_type { TEXT, MARKER }; + +inline std::ostream & operator<<(std::ostream & os, const segment_type & type) { + switch (type) { + case segment_type::TEXT: + return os << "TEXT"; + case segment_type::MARKER: + return os << "MARKER"; + default: + return os << "UNKNOWN"; + } +} + +struct segment { + segment_type type; + std::string value; + + segment(segment_type type, std::string value) : type(type), value(std::move(value)) {} + + bool operator==(const segment & other) const { + return type == other.type && value == other.value; + } + + bool operator!=(const segment & other) const { + return !(*this == other); + } +}; diff --git a/common/chat-parser-xml-toolcall.cpp b/common/chat-parser-xml-toolcall.cpp deleted file mode 100644 index a80900ff8d..0000000000 --- a/common/chat-parser-xml-toolcall.cpp +++ /dev/null @@ -1,879 +0,0 @@ -#include "chat.h" -#include "chat-parser.h" -#include "common.h" -#include "json-partial.h" -#include "json-schema-to-grammar.h" -#include "log.h" -#include "regex-partial.h" - -using json = nlohmann::ordered_json; - -class xml_toolcall_syntax_exception : public std::runtime_error { - public: - xml_toolcall_syntax_exception(const std::string & message) : std::runtime_error(message) {} -}; - -template -inline void sort_uniq(std::vector &vec) { - std::sort(vec.begin(), vec.end()); - vec.erase(std::unique(vec.begin(), vec.end()), vec.end()); -} - -template -inline bool all_space(const T &str) { - return std::all_of(str.begin(), str.end(), [](unsigned char ch) { return std::isspace(ch); }); -} - -static size_t utf8_truncate_safe(const std::string_view s) { - size_t len = s.size(); - if (len == 0) return 0; - size_t i = len; - for (size_t back = 0; back < 4 && i > 0; ++back) { - --i; - unsigned char c = s[i]; - if ((c & 0x80) == 0) { - return len; - } else if ((c & 0xC0) == 0xC0) { - size_t expected_len = 0; - if ((c & 0xE0) == 0xC0) expected_len = 2; - else if ((c & 0xF0) == 0xE0) expected_len = 3; - else if ((c & 0xF8) == 0xF0) expected_len = 4; - else return i; - if (len - i >= expected_len) { - return len; - } else { - return i; - } - } - } - return len - std::min(len, size_t(3)); -} - -inline void utf8_truncate_safe_resize(std::string &s) { - s.resize(utf8_truncate_safe(s)); -} - -inline std::string_view utf8_truncate_safe_view(const std::string_view s) { - return s.substr(0, utf8_truncate_safe(s)); -} - -static std::optional try_find_2_literal_splited_by_spaces(common_chat_msg_parser & builder, const std::string & literal1, const std::string & literal2) { - if (literal1.size() == 0) return builder.try_find_literal(literal2); - const auto saved_pos = builder.pos(); - while (auto res = builder.try_find_literal(literal1)) { - builder.consume_spaces(); - const auto match_len = std::min(literal2.size(), builder.input().size() - builder.pos()); - if (builder.input().compare(builder.pos(), match_len, literal2, 0, match_len) == 0) { - if (res->prelude.size() != res->groups[0].begin - saved_pos) { - res->prelude = builder.str({saved_pos, res->groups[0].begin}); - } - builder.move_to(builder.pos() + match_len); - res->groups[0].end = builder.pos(); - GGML_ASSERT(res->groups[0].begin != res->groups[0].end); - return res; - } - builder.move_to(res->groups[0].begin + 1); - } - builder.move_to(saved_pos); - return std::nullopt; -} - -/** - * make a GBNF that accept any strings except those containing any of the forbidden strings. - */ -std::string make_gbnf_excluding(std::vector forbids) { - constexpr auto charclass_escape = [](unsigned char c) -> std::string { - if (c == '\\' || c == ']' || c == '^' || c == '-') { - std::string s = "\\"; - s.push_back((char)c); - return s; - } - if (isprint(c)) { - return std::string(1, (char)c); - } - char buf[16]; - snprintf(buf, 15, "\\x%02X", c); - return std::string(buf); - }; - constexpr auto build_expr = [charclass_escape](auto self, const std::vector& forbids, int l, int r, int depth) -> std::string { - std::vector>> children; - int i = l; - while (i < r) { - const std::string &s = forbids[i]; - if ((int)s.size() == depth) { - ++i; - continue; - } - unsigned char c = (unsigned char)s[depth]; - int j = i; - while (j < r && (int)forbids[j].size() > depth && - (unsigned char)forbids[j][depth] == c) { - ++j; - } - children.push_back({c, {i, j}}); - i = j; - } - std::vector alts; - if (!children.empty()) { - std::string cls; - for (auto &ch : children) cls += charclass_escape(ch.first); - alts.push_back(std::string("[^") + cls + "]"); - } - for (auto &ch : children) { - std::string childExpr = self(self, forbids, ch.second.first, ch.second.second, depth+1); - if (!childExpr.empty()) { - std::string quoted_ch = "\""; - if (ch.first == '\\') quoted_ch += "\\\\"; - else if (ch.first == '"') quoted_ch += "\\\""; - else if (isprint(ch.first)) quoted_ch.push_back(ch.first); - else { - char buf[16]; - snprintf(buf, 15, "\\x%02X", ch.first); - quoted_ch += buf; - } - quoted_ch += "\""; - std::string branch = quoted_ch + std::string(" ") + childExpr; - alts.push_back(branch); - } - } - if (alts.empty()) return ""; - std::ostringstream oss; - oss << "( "; - for (size_t k = 0; k < alts.size(); ++k) { - if (k) oss << " | "; - oss << alts[k]; - } - oss << " )"; - return oss.str(); - }; - if (forbids.empty()) return "( . )*"; - sort(forbids.begin(), forbids.end()); - std::string expr = build_expr(build_expr, forbids, 0, forbids.size(), 0); - if (expr.empty()) { - std::string cls; - for (auto &s : forbids) if (!s.empty()) cls += charclass_escape((unsigned char)s[0]); - expr = std::string("( [^") + cls + "] )"; - } - if (forbids.size() == 1) - return expr + "*"; - else - return std::string("( ") + expr + " )*"; -} - -/** - * Build grammar for xml-style tool call - * form.scope_start and form.scope_end can be empty. - * Requires data.format for model-specific hacks. - */ -void build_grammar_xml_tool_call(common_chat_params & data, const json & tools, const struct xml_tool_call_format & form) { - GGML_ASSERT(!form.tool_start.empty()); - GGML_ASSERT(!form.tool_sep.empty()); - GGML_ASSERT(!form.key_start.empty()); - GGML_ASSERT(!form.val_end.empty()); - GGML_ASSERT(!form.tool_end.empty()); - - std::string key_val_sep = form.key_val_sep; - if (form.key_val_sep2) { - key_val_sep += "\n"; - key_val_sep += *form.key_val_sep2; - } - GGML_ASSERT(!key_val_sep.empty()); - - if (tools.is_array() && !tools.empty()) { - data.grammar = build_grammar([&](const common_grammar_builder &builder) { - auto string_arg_val = form.last_val_end ? - builder.add_rule("string-arg-val", make_gbnf_excluding({form.val_end, *form.last_val_end})) : - builder.add_rule("string-arg-val", make_gbnf_excluding({form.val_end})); - - std::vector tool_rules; - for (const auto & tool : tools) { - if (!tool.contains("type") || tool.at("type") != "function" || !tool.contains("function")) { - LOG_WRN("Skipping tool without function: %s", tool.dump(2).c_str()); - continue; - } - const auto & function = tool.at("function"); - if (!function.contains("name") || !function.at("name").is_string()) { - LOG_WRN("Skipping invalid function (invalid name): %s", function.dump(2).c_str()); - continue; - } - if (!function.contains("parameters") || !function.at("parameters").is_object()) { - LOG_WRN("Skipping invalid function (invalid parameters): %s", function.dump(2).c_str()); - continue; - } - std::string name = function.at("name"); - auto parameters = function.at("parameters"); - builder.resolve_refs(parameters); - - struct parameter_rule { - std::string symbol_name; - bool is_required; - }; - std::vector arg_rules; - if (!parameters.contains("properties") || !parameters.at("properties").is_object()) { - LOG_WRN("Skipping invalid function (invalid properties): %s", function.dump(2).c_str()); - continue; - } else { - std::vector requiredParameters; - if (parameters.contains("required")) { - try { parameters.at("required").get_to(requiredParameters); } - catch (const std::runtime_error&) { - LOG_WRN("Invalid function required parameters, ignoring: %s", function.at("required").dump(2).c_str()); - } - } - sort_uniq(requiredParameters); - for (const auto & [key, value] : parameters.at("properties").items()) { - std::string quoted_key = key; - bool required = std::binary_search(requiredParameters.begin(), requiredParameters.end(), key); - if (form.key_start.back() == '"' && key_val_sep[0] == '"') { - quoted_key = gbnf_format_literal(key); - quoted_key = quoted_key.substr(1, quoted_key.size() - 2); - } - arg_rules.push_back(parameter_rule {builder.add_rule("func-" + name + "-kv-" + key, - gbnf_format_literal(form.key_start) + " " + - gbnf_format_literal(quoted_key) + " " + - gbnf_format_literal(key_val_sep) + " " + - ((value.contains("type") && value["type"].is_string() && value["type"] == "string" && (!form.raw_argval || *form.raw_argval)) ? - (form.raw_argval ? - string_arg_val : - "( " + string_arg_val + " | " + builder.add_schema(name + "-arg-" + key, value) + " )" - ) : - builder.add_schema(name + "-arg-" + key, value) - ) - ), required}); - } - } - - auto next_arg_with_sep = builder.add_rule(name + "-last-arg-end", form.last_val_end ? gbnf_format_literal(*form.last_val_end) : gbnf_format_literal(form.val_end)); - decltype(next_arg_with_sep) next_arg = "\"\""; - for (auto i = arg_rules.size() - 1; /* i >= 0 && */ i < arg_rules.size(); --i) { - std::string include_this_arg = arg_rules[i].symbol_name + " " + next_arg_with_sep; - next_arg = builder.add_rule(name + "-arg-after-" + std::to_string(i), arg_rules[i].is_required ? - include_this_arg : "( " + include_this_arg + " ) | " + next_arg - ); - include_this_arg = gbnf_format_literal(form.val_end) + " " + include_this_arg; - next_arg_with_sep = builder.add_rule(name + "-arg-after-" + std::to_string(i) + "-with-sep", arg_rules[i].is_required ? - include_this_arg : "( " + include_this_arg + " ) | " + next_arg_with_sep - ); - } - - std::string quoted_name = name; - if (form.tool_start.back() == '"' && form.tool_sep[0] == '"') { - quoted_name = gbnf_format_literal(name); - quoted_name = quoted_name.substr(1, quoted_name.size() - 2); - } - quoted_name = gbnf_format_literal(quoted_name); - // Kimi-K2 uses functions.{{ tool_call['function']['name'] }}:{{ loop.index }} as function name - if (data.format == COMMON_CHAT_FORMAT_KIMI_K2) { - quoted_name = "\"functions.\" " + quoted_name + " \":\" [0-9]+"; - } - tool_rules.push_back(builder.add_rule(name + "-call", - gbnf_format_literal(form.tool_start) + " " + - quoted_name + " " + - gbnf_format_literal(form.tool_sep) + " " + - next_arg - )); - } - - auto tool_call_once = builder.add_rule("root-tool-call-once", string_join(tool_rules, " | ")); - auto tool_call_more = builder.add_rule("root-tool-call-more", gbnf_format_literal(form.tool_end) + " " + tool_call_once); - auto call_end = builder.add_rule("root-call-end", form.last_tool_end ? gbnf_format_literal(*form.last_tool_end) : gbnf_format_literal(form.tool_end)); - auto tool_call_multiple_with_end = builder.add_rule("root-tool-call-multiple-with-end", tool_call_once + " " + tool_call_more + "* " + call_end); - builder.add_rule("root", - (form.scope_start.empty() ? "" : gbnf_format_literal(form.scope_start) + " ") + - tool_call_multiple_with_end + "?" + - (form.scope_end.empty() ? "" : " " + gbnf_format_literal(form.scope_end)) - ); - }); - - // grammar trigger for tool call - data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, form.scope_start + form.tool_start }); - } -} - -/** - * Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched. - * Throws xml_toolcall_syntax_exception if there is invalid syntax and cannot recover the original status for common_chat_msg_parser. - * form.scope_start, form.tool_sep and form.scope_end can be empty. - */ -inline bool parse_xml_tool_calls(common_chat_msg_parser & builder, const struct xml_tool_call_format & form) { - GGML_ASSERT(!form.tool_start.empty()); - GGML_ASSERT(!form.key_start.empty()); - GGML_ASSERT(!form.key_val_sep.empty()); - GGML_ASSERT(!form.val_end.empty()); - GGML_ASSERT(!form.tool_end.empty()); - - // Helper to choose return false or throw error - constexpr auto return_error = [](common_chat_msg_parser & builder, auto &start_pos, const bool &recovery) { - LOG_DBG("Failed to parse XML-Style tool call at position: %s\n", gbnf_format_literal(builder.consume_rest().substr(0, 20)).c_str()); - if (recovery) { - builder.move_to(start_pos); - return false; - } else throw xml_toolcall_syntax_exception("Tool call parsing failed with unrecoverable errors. Try using a grammar to constrain the model’s output."); - }; - // Drop substring from needle to end from a JSON - constexpr auto partial_json = [](std::string &json_str, std::string_view needle = "XML_TOOL_CALL_PARTIAL_FLAG") { - auto pos = json_str.rfind(needle); - if (pos == std::string::npos) { - return false; - } - for (auto i = pos + needle.size(); i < json_str.size(); ++i) { - unsigned char ch = static_cast(json_str[i]); - if (ch != '\'' && ch != '"' && ch != '}' && ch != ':' && !std::isspace(ch)) { - return false; - } - } - if (pos != 0 && json_str[pos - 1] == '"') { - --pos; - } - json_str.resize(pos); - return true; - }; - // Helper to generate a partial argument JSON - constexpr auto gen_partial_json = [partial_json](auto set_partial_arg, auto &arguments, auto &builder, auto &function_name) { - auto rest = builder.consume_rest(); - utf8_truncate_safe_resize(rest); - set_partial_arg(rest, "XML_TOOL_CALL_PARTIAL_FLAG"); - auto tool_str = arguments.dump(); - if (partial_json(tool_str)) { - if (builder.add_tool_call(function_name, "", tool_str)) { - return; - } - } - LOG_DBG("Failed to parse partial XML-Style tool call, fallback to non-partial: %s\n", tool_str.c_str()); - }; - // Helper to find a close (because there may be form.last_val_end or form.last_tool_end) - constexpr auto try_find_close = []( - common_chat_msg_parser & builder, - const std::string & end, - const std::optional & alt_end, - const std::string & end_next, - const std::optional & alt_end_next - ) { - auto saved_pos = builder.pos(); - auto tc = builder.try_find_literal(end); - auto val_end_size = end.size(); - if (alt_end) { - auto pos_1 = builder.pos(); - builder.move_to(saved_pos); - auto tc2 = try_find_2_literal_splited_by_spaces(builder, *alt_end, end_next); - if (alt_end_next) { - builder.move_to(saved_pos); - auto tc3 = try_find_2_literal_splited_by_spaces(builder, *alt_end, *alt_end_next); - if (tc3 && (!tc2 || tc2->prelude.size() > tc3->prelude.size())) { - tc2 = tc3; - } - } - if (tc2 && (!tc || tc->prelude.size() > tc2->prelude.size())) { - tc = tc2; - tc->groups[0].end = std::min(builder.input().size(), tc->groups[0].begin + alt_end->size()); - builder.move_to(tc->groups[0].end); - val_end_size = alt_end->size(); - } else { - builder.move_to(pos_1); - } - } - return std::make_pair(val_end_size, tc); - }; - // Helper to find a val_end or last_val_end, returns matched pattern size - const auto try_find_val_end = [try_find_close, &builder, &form]() { - return try_find_close(builder, form.val_end, form.last_val_end, form.tool_end, form.last_tool_end); - }; - // Helper to find a tool_end or last_tool_end, returns matched pattern size - const auto try_find_tool_end = [try_find_close, &builder, &form]() { - return try_find_close(builder, form.tool_end, form.last_tool_end, form.scope_end, std::nullopt); - }; - - bool recovery = true; - const auto start_pos = builder.pos(); - if (!all_space(form.scope_start)) { - if (auto tc = builder.try_find_literal(form.scope_start)) { - if (all_space(tc->prelude)) { - if (form.scope_start.size() != tc->groups[0].end - tc->groups[0].begin) - throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.scope_start)); - } else { - builder.move_to(start_pos); - return false; - } - } else return false; - } - while (auto tc = builder.try_find_literal(form.tool_start)) { - if (!all_space(tc->prelude)) { - LOG_DBG("XML-Style tool call: Expected %s, but found %s, trying to match next pattern\n", - gbnf_format_literal(form.tool_start).c_str(), - gbnf_format_literal(tc->prelude).c_str() - ); - builder.move_to(tc->groups[0].begin - tc->prelude.size()); - break; - } - - // Find tool name - auto func_name = builder.try_find_literal(all_space(form.tool_sep) ? form.key_start : form.tool_sep); - if (!func_name) { - auto [sz, tc] = try_find_tool_end(); - func_name = tc; - } - if (!func_name) { - // Partial tool name not supported - throw common_chat_msg_partial_exception("incomplete tool_call"); - } - // If the model generate multiple tool call and the first tool call has no argument - if (func_name->prelude.find(form.tool_end) != std::string::npos || (form.last_tool_end ? func_name->prelude.find(*form.last_tool_end) != std::string::npos : false)) { - builder.move_to(func_name->groups[0].begin - func_name->prelude.size()); - auto [sz, tc] = try_find_tool_end(); - func_name = tc; - } - - // Parse tool name - builder.move_to(all_space(form.tool_sep) ? func_name->groups[0].begin : func_name->groups[0].end); - std::string function_name = string_strip(func_name->prelude); - // Kimi-K2 uses functions.{{ tool_call['function']['name'] }}:{{ loop.index }} as function name - if (builder.syntax().format == COMMON_CHAT_FORMAT_KIMI_K2) { - if (string_starts_with(function_name, "functions.")) { - static const std::regex re(":\\d+$"); - if (std::regex_search(function_name, re)) { - function_name = function_name.substr(10, function_name.rfind(":") - 10); - } - } - } - - // Argument JSON - json arguments = json::object(); - - // Helper to generate a partial argument JSON - const auto gen_partial_args = [&](auto set_partial_arg) { - gen_partial_json(set_partial_arg, arguments, builder, function_name); - }; - - // Parse all arg_key/arg_value pairs - while (auto tc = builder.try_find_literal(form.key_start)) { - if (!all_space(tc->prelude)) { - LOG_DBG("XML-Style tool call: Expected %s, but found %s, trying to match next pattern\n", - gbnf_format_literal(form.key_start).c_str(), - gbnf_format_literal(tc->prelude).c_str() - ); - builder.move_to(tc->groups[0].begin - tc->prelude.size()); - break; - } - if (tc->groups[0].end - tc->groups[0].begin != form.key_start.size()) { - auto tool_call_arg = arguments.dump(); - if (tool_call_arg.size() != 0 && tool_call_arg[tool_call_arg.size() - 1] == '}') { - tool_call_arg.resize(tool_call_arg.size() - 1); - } - builder.add_tool_call(function_name, "", tool_call_arg); - throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.key_start)); - } - - // Parse arg_key - auto key_res = builder.try_find_literal(form.key_val_sep); - if (!key_res) { - gen_partial_args([&](auto &rest, auto &needle) {arguments[rest + needle] = "";}); - throw common_chat_msg_partial_exception("Expected " + gbnf_format_literal(form.key_val_sep) + " after " + gbnf_format_literal(form.key_start)); - } - if (key_res->groups[0].end - key_res->groups[0].begin != form.key_val_sep.size()) { - gen_partial_args([&](auto &, auto &needle) {arguments[key_res->prelude + needle] = "";}); - throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.key_val_sep)); - } - auto &key = key_res->prelude; - recovery = false; - - // Parse arg_value - if (form.key_val_sep2) { - if (auto tc = builder.try_find_literal(*form.key_val_sep2)) { - if (!all_space(tc->prelude)) { - LOG_DBG("Failed to parse XML-Style tool call: Unexcepted %s between %s and %s\n", - gbnf_format_literal(tc->prelude).c_str(), - gbnf_format_literal(form.key_val_sep).c_str(), - gbnf_format_literal(*form.key_val_sep2).c_str() - ); - return return_error(builder, start_pos, false); - } - if (tc->groups[0].end - tc->groups[0].begin != form.key_val_sep2->size()) { - gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;}); - throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(*form.key_val_sep2)); - } - } else { - gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;}); - throw common_chat_msg_partial_exception("Expected " + gbnf_format_literal(*form.key_val_sep2) + " after " + gbnf_format_literal(form.key_val_sep)); - } - } - auto val_start = builder.pos(); - - // Test if arg_val is a partial JSON - std::optional value_json = std::nullopt; - if (!form.raw_argval || !*form.raw_argval) { - try { value_json = builder.try_consume_json(); } - catch (const std::runtime_error&) { builder.move_to(val_start); } - // TODO: Delete this when json_partial adds top-level support for null/true/false - if (builder.pos() == val_start) { - const static std::regex number_regex(R"([0-9-][0-9]*(\.\d*)?([eE][+-]?\d*)?)"); - builder.consume_spaces(); - std::string_view sv = utf8_truncate_safe_view(builder.input()); - sv.remove_prefix(builder.pos()); - std::string rest = "a"; - if (sv.size() < 6) rest = sv; - if (string_starts_with("null", rest) || string_starts_with("true", rest) || string_starts_with("false", rest) || std::regex_match(sv.begin(), sv.end(), number_regex)) { - value_json = {123, {"123", "123"}}; - builder.consume_rest(); - } else { - builder.move_to(val_start); - } - } - } - - // If it is a JSON and followed by , parse as json - // cannot support streaming because it may be a plain text starting with JSON - if (value_json) { - auto json_end = builder.pos(); - builder.consume_spaces(); - if (builder.pos() == builder.input().size()) { - if (form.raw_argval && !*form.raw_argval && (value_json->json.is_string() || value_json->json.is_object() || value_json->json.is_array())) { - arguments[key] = value_json->json; - auto json_str = arguments.dump(); - if (!value_json->healing_marker.json_dump_marker.empty()) { - GGML_ASSERT(std::string::npos != json_str.rfind(value_json->healing_marker.json_dump_marker)); - json_str.resize(json_str.rfind(value_json->healing_marker.json_dump_marker)); - } else { - GGML_ASSERT(json_str.back() == '}'); - json_str.resize(json_str.size() - 1); - } - builder.add_tool_call(function_name, "", json_str); - } else { - gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;}); - } - LOG_DBG("Possible JSON arg_value: %s\n", value_json->json.dump().c_str()); - throw common_chat_msg_partial_exception("JSON arg_value detected. Waiting for more tokens for validations."); - } - builder.move_to(json_end); - auto [val_end_size, tc] = try_find_val_end(); - if (tc && all_space(tc->prelude) && value_json->healing_marker.marker.empty()) { - if (tc->groups[0].end - tc->groups[0].begin != val_end_size) { - gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;}); - LOG_DBG("Possible terminated JSON arg_value: %s\n", value_json->json.dump().c_str()); - throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.val_end) + (form.last_val_end ? gbnf_format_literal(*form.last_val_end) : "")); - } else arguments[key] = value_json->json; - } else builder.move_to(val_start); - } - - // If not, parse as plain text - if (val_start == builder.pos()) { - if (auto [val_end_size, value_plain] = try_find_val_end(); value_plain) { - auto &value_str = value_plain->prelude; - if (form.trim_raw_argval) value_str = string_strip(value_str); - if (value_plain->groups[0].end - value_plain->groups[0].begin != val_end_size) { - gen_partial_args([&](auto &, auto &needle) {arguments[key] = value_str + needle;}); - throw common_chat_msg_partial_exception( - "Expected " + gbnf_format_literal(form.val_end) + - " after " + gbnf_format_literal(form.key_val_sep) + - (form.key_val_sep2 ? " " + gbnf_format_literal(*form.key_val_sep2) : "") - ); - } - arguments[key] = value_str; - } else { - if (form.trim_raw_argval) { - gen_partial_args([&](auto &rest, auto &needle) {arguments[key] = string_strip(rest) + needle;}); - } else { - gen_partial_args([&](auto &rest, auto &needle) {arguments[key] = rest + needle;}); - } - throw common_chat_msg_partial_exception( - "Expected " + gbnf_format_literal(form.val_end) + - " after " + gbnf_format_literal(form.key_val_sep) + - (form.key_val_sep2 ? " " + gbnf_format_literal(*form.key_val_sep2) : "") - ); - } - } - } - - // Consume closing tag - if (auto [tool_end_size, tc] = try_find_tool_end(); tc) { - if (!all_space(tc->prelude)) { - LOG_DBG("Failed to parse XML-Style tool call: Expected %s, but found %s\n", - gbnf_format_literal(form.tool_end).c_str(), - gbnf_format_literal(tc->prelude).c_str() - ); - return return_error(builder, start_pos, recovery); - } - if (tc->groups[0].end - tc->groups[0].begin == tool_end_size) { - // Add the parsed tool call - if (!builder.add_tool_call(function_name, "", arguments.dump())) { - throw common_chat_msg_partial_exception("Failed to add XML-Style tool call"); - } - recovery = false; - continue; - } - } - - auto tool_call_arg = arguments.dump(); - if (tool_call_arg.size() != 0 && tool_call_arg[tool_call_arg.size() - 1] == '}') { - tool_call_arg.resize(tool_call_arg.size() - 1); - } - builder.add_tool_call(function_name, "", tool_call_arg); - throw common_chat_msg_partial_exception("Expected " + gbnf_format_literal(form.tool_end) + " after " + gbnf_format_literal(form.val_end)); - } - if (auto tc = builder.try_find_literal(form.scope_end)) { - if (!all_space(tc->prelude)) { - LOG_DBG("Failed to parse XML-Style tool call: Expected %s, but found %s\n", - gbnf_format_literal(form.scope_end).c_str(), - gbnf_format_literal(tc->prelude).c_str() - ); - return return_error(builder, start_pos, recovery); - } - } else { - if (all_space(form.scope_end)) return true; - builder.consume_spaces(); - if (builder.pos() == builder.input().size()) - throw common_chat_msg_partial_exception("incomplete tool calls"); - LOG_DBG("Failed to parse XML-Style tool call: Expected %s, but found %s\n", - gbnf_format_literal(form.scope_end).c_str(), - gbnf_format_literal(builder.consume_rest()).c_str() - ); - return return_error(builder, start_pos, recovery); - } - - return true; -} - -/** - * Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched. - * May cause std::runtime_error if there is invalid syntax because partial valid tool call is already sent out to client. - * form.scope_start, form.tool_sep and form.scope_end can be empty. - */ -bool common_chat_msg_parser::try_consume_xml_tool_calls(const struct xml_tool_call_format & form) { - auto pos = pos_; - auto tsize = result_.tool_calls.size(); - try { return parse_xml_tool_calls(*this, form); } - catch (const xml_toolcall_syntax_exception&) {} - move_to(pos); - result_.tool_calls.resize(tsize); - return false; -} - -/** - * Parse content uses reasoning and XML-Style tool call - * TODO: Note that form.allow_toolcall_in_think is not tested yet. If anyone confirms it works, this comment can be removed. - */ -inline void parse_msg_with_xml_tool_calls(common_chat_msg_parser & builder, const struct xml_tool_call_format & form, const std::string & start_think = "", const std::string & end_think = "") { - constexpr auto rstrip = [](std::string &s) { - s.resize(std::distance(s.begin(), std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); }).base())); - }; - // Erase substring from l to r, along with additional spaces nearby - constexpr auto erase_spaces = [](auto &str, size_t l, size_t r) { - while (/* l > -1 && */ --l < str.size() && std::isspace(static_cast(str[l]))); - ++l; - while (++r < str.size() && std::isspace(static_cast(str[r]))); - if (l < r) str[l] = '\n'; - if (l + 1 < r) str[l + 1] = '\n'; - if (l != 0) l += 2; - str.erase(l, r - l); - return l; - }; - constexpr auto trim_suffix = [](std::string &content, std::initializer_list list) { - auto best_match = content.size(); - for (auto pattern: list) { - if (pattern.size() == 0) continue; - for (auto match_idx = content.size() - std::min(pattern.size(), content.size()); content.size() > match_idx; match_idx++) { - auto match_len = content.size() - match_idx; - if (content.compare(match_idx, match_len, pattern.data(), match_len) == 0 && best_match > match_idx) { - best_match = match_idx; - } - } - } - if (content.size() > best_match) { - content.erase(best_match); - } - }; - const auto trim_potential_partial_word = [&start_think, &end_think, &form, trim_suffix](std::string &content) { - return trim_suffix(content, { - start_think, end_think, form.scope_start, form.tool_start, form.tool_sep, form.key_start, - form.key_val_sep, form.key_val_sep2 ? form.key_val_sep2->c_str() : "", - form.val_end, form.last_val_end ? form.last_val_end->c_str() : "", - form.tool_end, form.last_tool_end ? form.last_tool_end->c_str() : "", - form.scope_end - }); - }; - - - // Trim leading spaces without affecting keyword matching - static const common_regex spaces_regex("\\s*"); - { - auto tc = builder.consume_regex(spaces_regex); - auto spaces = builder.str(tc.groups[0]); - auto s1 = spaces.size(); - trim_potential_partial_word(spaces); - auto s2 = spaces.size(); - builder.move_to(builder.pos() - (s1 - s2)); - } - - // Parse content - bool reasoning_unclosed = builder.syntax().thinking_forced_open; - std::string unclosed_reasoning_content(""); - for (;;) { - auto tc = try_find_2_literal_splited_by_spaces(builder, form.scope_start, form.tool_start); - std::string content; - std::string tool_call_start; - - if (tc) { - content = std::move(tc->prelude); - tool_call_start = builder.str(tc->groups[0]); - LOG_DBG("Matched tool start: %s\n", gbnf_format_literal(tool_call_start).c_str()); - } else { - content = builder.consume_rest(); - utf8_truncate_safe_resize(content); - } - - // Handle unclosed think block - if (reasoning_unclosed) { - if (auto pos = content.find(end_think); pos == std::string::npos && builder.pos() != builder.input().size()) { - unclosed_reasoning_content += content; - if (!(form.allow_toolcall_in_think && tc)) { - unclosed_reasoning_content += tool_call_start; - continue; - } - } else { - reasoning_unclosed = false; - std::string reasoning_content; - if (pos == std::string::npos) { - reasoning_content = std::move(content); - } else { - reasoning_content = content.substr(0, pos); - content.erase(0, pos + end_think.size()); - } - if (builder.pos() == builder.input().size() && all_space(content)) { - rstrip(reasoning_content); - trim_potential_partial_word(reasoning_content); - rstrip(reasoning_content); - if (reasoning_content.empty()) { - rstrip(unclosed_reasoning_content); - trim_potential_partial_word(unclosed_reasoning_content); - rstrip(unclosed_reasoning_content); - if (unclosed_reasoning_content.empty()) continue; - } - } - if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE || builder.syntax().reasoning_in_content) { - builder.add_content(start_think); - builder.add_content(unclosed_reasoning_content); - builder.add_content(reasoning_content); - if (builder.pos() != builder.input().size() || !all_space(content)) - builder.add_content(end_think); - } else { - builder.add_reasoning_content(unclosed_reasoning_content); - builder.add_reasoning_content(reasoning_content); - } - unclosed_reasoning_content.clear(); - } - } - - // Handle multiple think block - bool toolcall_in_think = false; - for (auto think_start = content.find(start_think); think_start != std::string::npos; think_start = content.find(start_think, think_start)) { - if (auto think_end = content.find(end_think, think_start + start_think.size()); think_end != std::string::npos) { - if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) { - auto reasoning_content = content.substr(think_start + start_think.size(), think_end - think_start - start_think.size()); - builder.add_reasoning_content(reasoning_content); - think_start = erase_spaces(content, think_start, think_end + end_think.size() - 1); - } else { - think_start = think_end + end_think.size() - 1; - } - } else { - // This start is in thinking block, skip this tool call - // This start is in thinking block - if (form.allow_toolcall_in_think) { - unclosed_reasoning_content = content.substr(think_start + start_think.size()); - } else { - unclosed_reasoning_content = content.substr(think_start + start_think.size()) + tool_call_start; - } - reasoning_unclosed = true; - content.resize(think_start); - toolcall_in_think = true; - } - } - - if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) { - rstrip(content); - // Handle unclosed token from content: delete all token - if (auto pos = content.rfind(end_think); pos != std::string::npos) { - while (pos != std::string::npos) { - pos = erase_spaces(content, pos, pos + end_think.size() - 1); - pos = content.rfind(end_think, pos); - } - } - // Strip if needed - if (content.size() > 0 && std::isspace(static_cast(content[0]))) { - content = string_strip(content); - } - } - - // remove potential partial suffix - if (builder.pos() == builder.input().size()) { - if (unclosed_reasoning_content.empty()) { - rstrip(content); - trim_potential_partial_word(content); - rstrip(content); - } else { - rstrip(unclosed_reasoning_content); - trim_potential_partial_word(unclosed_reasoning_content); - rstrip(unclosed_reasoning_content); - } - } - - // consume unclosed_reasoning_content if allow_toolcall_in_think is set - if (form.allow_toolcall_in_think && !unclosed_reasoning_content.empty()) { - if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) { - builder.add_reasoning_content(unclosed_reasoning_content); - } else { - if (content.empty()) { - content = start_think + unclosed_reasoning_content; - } else { - content += "\n\n" + start_think; - content += unclosed_reasoning_content; - } - } - unclosed_reasoning_content.clear(); - } - - // Add content - if (!content.empty()) { - // If there are multiple content blocks - if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content && builder.result().content.size() != 0) { - builder.add_content("\n\n"); - } - builder.add_content(content); - } - - // This start is in thinking block and toolcall_in_think not set, skip this tool call - if (toolcall_in_think && !form.allow_toolcall_in_think) { - continue; - } - - // There is no tool call and all content is parsed - if (!tc) { - GGML_ASSERT(builder.pos() == builder.input().size()); - GGML_ASSERT(unclosed_reasoning_content.empty()); - if (!form.allow_toolcall_in_think) GGML_ASSERT(!reasoning_unclosed); - break; - } - - builder.move_to(tc->groups[0].begin); - if (builder.try_consume_xml_tool_calls(form)) { - auto end_of_tool = builder.pos(); - builder.consume_spaces(); - if (builder.pos() != builder.input().size()) { - builder.move_to(end_of_tool); - if (!builder.result().content.empty()) { - builder.add_content("\n\n"); - } - } - } else { - static const common_regex next_char_regex("."); - auto c = builder.str(builder.consume_regex(next_char_regex).groups[0]); - rstrip(c); - builder.add_content(c); - } - } -} - -/** - * Parse content uses reasoning and XML-Style tool call - */ -void common_chat_msg_parser::consume_reasoning_with_xml_tool_calls(const struct xml_tool_call_format & form, const std::string & start_think, const std::string & end_think) { - parse_msg_with_xml_tool_calls(*this, form, start_think, end_think); -} diff --git a/common/chat-parser-xml-toolcall.h b/common/chat-parser-xml-toolcall.h deleted file mode 100644 index b309fb6670..0000000000 --- a/common/chat-parser-xml-toolcall.h +++ /dev/null @@ -1,45 +0,0 @@ -#pragma once - -#include "chat.h" - -#include - -#include -#include -#include - - -// Sample config: -// MiniMax-M2 (left): \n\nvalue\n...\n... -// GLM 4.5 (right): function_name\nkey\nvalue\n -struct xml_tool_call_format { - std::string scope_start; // \n // \n // can be empty - std::string tool_start; // - std::string tool_sep; // \">\n // \n // can be empty only for parse_xml_tool_calls - std::string key_start; // - std::string key_val_sep; // \"> // \n - std::string val_end; // \n // \n - std::string tool_end; // \n // \n - std::string scope_end; // // // can be empty - // Set this if there can be dynamic spaces inside key_val_sep. - // e.g. key_val_sep= key_val_sep2= for GLM4.5 - std::optional key_val_sep2 = std::nullopt; - // Set true if argval should only be raw string. e.g. Hello "world" hi - // Set false if argval should only be json string. e.g. "Hello \"world\" hi" - // Defaults to std::nullopt, both will be allowed. - std::optional raw_argval = std::nullopt; - std::optional last_val_end = std::nullopt; - std::optional last_tool_end = std::nullopt; - bool trim_raw_argval = false; - bool allow_toolcall_in_think = false; -}; - -// make a GBNF that accept any strings except those containing any of the forbidden strings. -std::string make_gbnf_excluding(std::vector forbids); - -/** - * Build grammar for xml-style tool call - * form.scope_start and form.scope_end can be empty. - * Requires data.format for model-specific hacks. - */ -void build_grammar_xml_tool_call(common_chat_params & data, const nlohmann::ordered_json & tools, const struct xml_tool_call_format & form); diff --git a/common/chat-parser.cpp b/common/chat-parser.cpp deleted file mode 100644 index 29819e48d3..0000000000 --- a/common/chat-parser.cpp +++ /dev/null @@ -1,1669 +0,0 @@ -#include "chat-parser.h" -#include "chat-peg-parser.h" -#include "common.h" -#include "log.h" -#include "peg-parser.h" -#include "regex-partial.h" - -#include -#include -#include -#include -#include -#include -#include - -using json = nlohmann::ordered_json; - -static void parse_prefixed_json_tool_call_array(common_chat_msg_parser & builder, - const common_regex & prefix, - size_t rstrip_prefix = 0) { - static const std::vector> args_paths = { { "arguments" } }; - if (auto res = builder.try_find_regex(prefix)) { - builder.move_back(rstrip_prefix); - auto tool_calls = builder.consume_json_with_dumped_args(args_paths); - if (!builder.add_tool_calls(tool_calls.value) || tool_calls.is_partial) { - throw common_chat_msg_partial_exception("incomplete tool call array"); - } - } else { - builder.add_content(builder.consume_rest()); - } -} - -static std::string wrap_code_as_arguments(common_chat_msg_parser & builder, const std::string & code) { - std::string arguments; - if (builder.is_partial()) { - arguments = (json{ - { "code", code + builder.healing_marker() } - }) - .dump(); - auto idx = arguments.find(builder.healing_marker()); - if (idx != std::string::npos) { - arguments.resize(idx); - } - } else { - arguments = (json{ - { "code", code } - }) - .dump(); - } - return arguments; -} - -/** - * Takes a prefix regex that must have 1 group to capture the function name, a closing suffix, and expects json parameters in between. - * Aggregates the prefix, suffix and in-between text into the content. - */ -static void parse_json_tool_calls( - common_chat_msg_parser & builder, - const std::optional & block_open, - const std::optional & function_regex_start_only, - const std::optional & function_regex, - const common_regex & close_regex, - const std::optional & block_close, - bool allow_raw_python = false, - const std::function & get_function_name = - nullptr) { - auto parse_tool_calls = [&]() { - size_t from = std::string::npos; - auto first = true; - while (true) { - auto start_pos = builder.pos(); - auto res = function_regex_start_only && first ? builder.try_consume_regex(*function_regex_start_only) : - function_regex ? builder.try_find_regex(*function_regex, from) : - std::nullopt; - - if (res) { - std::string name; - if (get_function_name) { - name = get_function_name(*res); - } else { - GGML_ASSERT(res->groups.size() == 2); - name = builder.str(res->groups[1]); - } - first = false; - if (name.empty()) { - // get_function_name signalled us that we should skip this match and treat it as content. - from = res->groups[0].begin + 1; - continue; - } - from = std::string::npos; - - auto maybe_raw_python = name == "python" && allow_raw_python; - if (builder.input()[builder.pos()] == '{' || !maybe_raw_python) { - if (auto arguments = builder.try_consume_json_with_dumped_args({ {} })) { - if (!builder.add_tool_call(name, "", arguments->value) || arguments->is_partial) { - throw common_chat_msg_partial_exception("incomplete tool call"); - } - builder.consume_regex(close_regex); - } - continue; - } - if (maybe_raw_python) { - auto arguments = wrap_code_as_arguments(builder, builder.consume_rest()); - if (!builder.add_tool_call(name, "", arguments)) { - throw common_chat_msg_partial_exception("incomplete tool call"); - } - return; - } - throw common_chat_msg_partial_exception("incomplete tool call"); - } else { - builder.move_to(start_pos); - } - break; - } - if (block_close) { - builder.consume_regex(*block_close); - } - builder.consume_spaces(); - builder.add_content(builder.consume_rest()); - }; - if (block_open) { - if (auto res = builder.try_find_regex(*block_open)) { - parse_tool_calls(); - } else { - builder.add_content(builder.consume_rest()); - } - } else { - parse_tool_calls(); - } -} - -common_chat_msg_parser::common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_parser_params & syntax) - : input_(input), is_partial_(is_partial), syntax_(syntax) -{ - result_.role = "assistant"; - - while (true) { - std::string id = std::to_string(std::rand()); - if (input.find(id) == std::string::npos) { - healing_marker_ = id; - break; - } - } -} - -std::string common_chat_msg_parser::str(const common_string_range & rng) const { - GGML_ASSERT(rng.begin <= rng.end); - return input_.substr(rng.begin, rng.end - rng.begin); -} - -void common_chat_msg_parser::add_content(const std::string &content) { - result_.content += content; -} - -void common_chat_msg_parser::add_reasoning_content(const std::string &reasoning_content) { - result_.reasoning_content += reasoning_content; -} - -bool common_chat_msg_parser::add_tool_call(const std::string & name, const std::string & id, const std::string & arguments) { - if (name.empty()) { - return false; - } - - common_chat_tool_call tool_call; - tool_call.name = name; - tool_call.arguments = arguments; - tool_call.id = id; - - // LOG_DBG("Tool call arguments:\n\traw: %s\n\tresult: %s\n", arguments.c_str(), tool_call.arguments.c_str()); - result_.tool_calls.emplace_back(tool_call); - - return true; -} -bool common_chat_msg_parser::add_tool_call(const json & tool_call) { - std::string name = tool_call.contains("name") ? tool_call.at("name") : ""; - std::string id = tool_call.contains("id") ? tool_call.at("id") : ""; - std::string arguments = ""; - if (tool_call.contains("arguments")) { - if (tool_call.at("arguments").is_object()) { - arguments = tool_call.at("arguments").dump(); - } else { - arguments = tool_call.at("arguments"); - } - } - - return add_tool_call(name, id, arguments); -} - -bool common_chat_msg_parser::add_tool_calls(const json & arr) { - for (const auto & item : arr) { - if (!add_tool_call(item)) { - return false; - } - } - return true; -} - -bool common_chat_msg_parser::add_tool_call_short_form(const json & tool_call) { - if (!tool_call.is_object() || tool_call.size() != 1) { - return false; - } - - // Get the tool name (the single key in the object) - auto it = tool_call.begin(); - std::string name = it.key(); - - if (name.empty()) { - return false; - } - - // Get the arguments (the nested object) - const json & args_json = it.value(); - std::string arguments = ""; - - if (args_json.is_object()) { - arguments = args_json.dump(); - } else if (args_json.is_string()) { - arguments = args_json; - } else if (!args_json.is_null()) { - // For other types, convert to string representation - arguments = args_json.dump(); - } - - return add_tool_call(name, "", arguments); -} -void common_chat_msg_parser::finish() { - if (!is_partial_ && pos_ != input_.size()) { - throw std::runtime_error("Unexpected content at end of input");// + input_.substr(pos_)); - } -} - -bool common_chat_msg_parser::consume_spaces() { - const auto length = input_.size(); - auto consumed = false; - while (pos_ < length && std::isspace(input_[pos_])) { - ++pos_; - consumed = true; - } - return consumed; -} - -bool common_chat_msg_parser::try_consume_literal(const std::string & literal) { - auto pos = pos_; - for (auto i = 0u; i < literal.size(); ++i) { - if (pos >= input_.size()) { - return false; - } - if (input_[pos] != literal[i]) { - return false; - } - ++pos; - } - pos_ = pos; - return true; -} - -std::optional common_chat_msg_parser::try_find_literal(const std::string & literal) { - auto idx = input_.find(literal, pos_); - if (idx != std::string::npos) { - find_regex_result res; - res.prelude = input_.substr(pos_, idx - pos_); - auto end = idx + literal.size(); - res.groups.emplace_back(common_string_range{idx, end}); - move_to(end); - return res; - } - if (is_partial_) { - idx = string_find_partial_stop(input_, literal); - if (idx != std::string::npos && idx >= pos_) { - find_regex_result res; - res.prelude = input_.substr(pos_, idx - pos_); - auto end = input_.size(); - res.groups.emplace_back(common_string_range{idx, end}); - move_to(end); - return res; - } - } - return std::nullopt; -} - -void common_chat_msg_parser::consume_literal(const std::string & literal) { - if (!try_consume_literal(literal)) { - throw common_chat_msg_partial_exception(literal); - } -} - -bool common_chat_msg_parser::try_parse_reasoning(const std::string & start_think, const std::string & end_think) { - std::string pending_reasoning_prefix; - - if (syntax_.reasoning_format == COMMON_REASONING_FORMAT_NONE) { - return false; - } - - auto set_reasoning_prefix = [&](size_t prefix_pos) { - if (!syntax_.thinking_forced_open || syntax_.reasoning_in_content) { - return; - } - if (prefix_pos + start_think.size() > input_.size()) { - pending_reasoning_prefix.clear(); - return; - } - // Capture the exact literal that opened the reasoning section so we can - // surface it back to callers. This ensures formats that force the - // reasoning tag open (e.g. DeepSeek R1) retain their original prefix - // instead of dropping it during parsing. - pending_reasoning_prefix = input_.substr(prefix_pos, start_think.size()); - }; - - auto handle_reasoning = [&](const std::string & reasoning, bool closed) { - auto stripped_reasoning = string_strip(reasoning); - if (stripped_reasoning.empty()) { - return; - } - if (syntax_.reasoning_in_content) { - add_content(syntax_.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ? "" : start_think); - add_content(stripped_reasoning); - if (closed) { - add_content(syntax_.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ? "" : end_think); - } - } else { - if (!pending_reasoning_prefix.empty()) { - add_reasoning_content(pending_reasoning_prefix); - pending_reasoning_prefix.clear(); - } - add_reasoning_content(stripped_reasoning); - } - }; - - const size_t saved_pos = pos_; - const size_t saved_content_size = result_.content.size(); - const size_t saved_reasoning_size = result_.reasoning_content.size(); - - auto restore_state = [&]() { - move_to(saved_pos); - result_.content.resize(saved_content_size); - result_.reasoning_content.resize(saved_reasoning_size); - }; - - // Allow leading whitespace to be preserved as content when reasoning is present at the start - size_t cursor = pos_; - size_t whitespace_end = cursor; - while (whitespace_end < input_.size() && std::isspace(static_cast(input_[whitespace_end]))) { - ++whitespace_end; - } - - if (whitespace_end >= input_.size()) { - restore_state(); - if (syntax_.thinking_forced_open) { - auto rest = input_.substr(saved_pos); - if (!rest.empty()) { - handle_reasoning(rest, /* closed */ !is_partial()); - } - move_to(input_.size()); - return true; - } - return false; - } - - cursor = whitespace_end; - const size_t remaining = input_.size() - cursor; - const size_t start_prefix = std::min(start_think.size(), remaining); - const bool has_start_tag = input_.compare(cursor, start_prefix, start_think, 0, start_prefix) == 0; - - if (has_start_tag && start_prefix < start_think.size()) { - move_to(input_.size()); - return true; - } - - if (has_start_tag) { - if (whitespace_end > pos_) { - add_content(input_.substr(pos_, whitespace_end - pos_)); - } - set_reasoning_prefix(cursor); - cursor += start_think.size(); - } else if (syntax_.thinking_forced_open) { - cursor = whitespace_end; - } else { - restore_state(); - return false; - } - while (true) { - if (cursor >= input_.size()) { - move_to(input_.size()); - return true; - } - - size_t end_pos = input_.find(end_think, cursor); - if (end_pos == std::string::npos) { - std::string_view remaining_view(input_.data() + cursor, input_.size() - cursor); - size_t partial_off = string_find_partial_stop(remaining_view, end_think); - size_t reasoning_end = partial_off == std::string::npos ? input_.size() : cursor + partial_off; - if (reasoning_end > cursor) { - handle_reasoning(input_.substr(cursor, reasoning_end - cursor), /* closed */ partial_off == std::string::npos && !is_partial()); - } - move_to(input_.size()); - return true; - } - - if (end_pos > cursor) { - handle_reasoning(input_.substr(cursor, end_pos - cursor), /* closed */ true); - } else { - handle_reasoning("", /* closed */ true); - } - - cursor = end_pos + end_think.size(); - - while (cursor < input_.size() && std::isspace(static_cast(input_[cursor]))) { - ++cursor; - } - - const size_t next_remaining = input_.size() - cursor; - if (next_remaining == 0) { - move_to(cursor); - return true; - } - - const size_t next_prefix = std::min(start_think.size(), next_remaining); - if (input_.compare(cursor, next_prefix, start_think, 0, next_prefix) == 0) { - if (next_prefix < start_think.size()) { - move_to(input_.size()); - return true; - } - set_reasoning_prefix(cursor); - cursor += start_think.size(); - continue; - } - - move_to(cursor); - return true; - } -} - -std::string common_chat_msg_parser::consume_rest() { - auto rest = input_.substr(pos_); - pos_ = input_.size(); - return rest; -} - -// Tries to find the regex, consumes it (pos right after it) and gives the prelude (right before it) and the groups to the callback. -std::optional common_chat_msg_parser::try_find_regex(const common_regex & regex, size_t from, bool add_prelude_to_content) { - auto m = regex.search(input_, from == std::string::npos ? pos_ : from); - if (m.type == COMMON_REGEX_MATCH_TYPE_NONE) { - return std::nullopt; - } - auto prelude = input_.substr(pos_, m.groups[0].begin - pos_); - pos_ = m.groups[0].end; - - if (add_prelude_to_content) { - add_content(prelude); - } - if (m.type == COMMON_REGEX_MATCH_TYPE_PARTIAL) { - if (is_partial()) { - throw common_chat_msg_partial_exception(regex.str()); - } - return std::nullopt; - } - return find_regex_result{prelude, m.groups}; -} - -common_chat_msg_parser::find_regex_result common_chat_msg_parser::consume_regex(const common_regex & regex) { - if (auto result = try_consume_regex(regex)) { - return *result; - } - throw common_chat_msg_partial_exception(regex.str()); -} - -std::optional common_chat_msg_parser::try_consume_regex(const common_regex & regex) { - auto m = regex.search(input_, pos_); - if (m.type == COMMON_REGEX_MATCH_TYPE_NONE) { - return std::nullopt; - } - if (m.type == COMMON_REGEX_MATCH_TYPE_PARTIAL) { - if (is_partial()) { - throw common_chat_msg_partial_exception(regex.str()); - } - return std::nullopt; - } - if (m.groups[0].begin != pos_) { - // Didn't match at the current position. - return std::nullopt; - } - pos_ = m.groups[0].end; - - return find_regex_result { - /* .prelude = */ "", - m.groups, - }; -} - -std::optional common_chat_msg_parser::try_consume_json() { - auto it = input_.cbegin() + pos_; - const auto end = input_.cend(); - common_json result; - if (!common_json_parse(it, end, healing_marker_, result)) { - return std::nullopt; - } - pos_ = std::distance(input_.cbegin(), it); - if (result.healing_marker.marker.empty()) { - // No healing marker, just return the parsed json - return result; - } - if (!is_partial()) { - throw common_chat_msg_partial_exception("JSON"); - } - return result; -} - -common_json common_chat_msg_parser::consume_json() { - if (auto result = try_consume_json()) { - return *result; - } - throw common_chat_msg_partial_exception("JSON"); -} - -common_chat_msg_parser::consume_json_result common_chat_msg_parser::consume_json_with_dumped_args( - const std::vector> & args_paths, - const std::vector> & content_paths -) { - if (auto result = try_consume_json_with_dumped_args(args_paths, content_paths)) { - return *result; - } - throw common_chat_msg_partial_exception("JSON"); -} - -std::optional common_chat_msg_parser::try_consume_json_with_dumped_args( - const std::vector> & args_paths, - const std::vector> & content_paths -) { - auto partial = try_consume_json(); - if (!partial) { - return std::nullopt; - } - auto is_arguments_path = [&](const std::vector & path) { - return std::find(args_paths.begin(), args_paths.end(), path) != args_paths.end(); - }; - auto is_content_path = [&](const std::vector & path) { - return std::find(content_paths.begin(), content_paths.end(), path) != content_paths.end(); - }; - - if (partial->healing_marker.marker.empty()) { - if (args_paths.empty()) { - // No arguments to dump, and JSON was parsed fully. - return consume_json_result { - partial->json, - /* .is_partial = */ false, - }; - } - if (is_arguments_path({})) { - // Entire JSON is the arguments and was parsed fully. - return consume_json_result { - partial->json.dump(/* indent */ -1, /* indent_char */ ' ', /* ensure_ascii */ true), - /* .is_partial = */ false, - }; - } - } - - LOG_DBG("Parsed partial JSON: %s (json_healing_marker: %s)\n", partial->json.dump().c_str(), partial->healing_marker.json_dump_marker.c_str()); - - auto found_healing_marker = false; - std::vector path; - std::function remove_unsupported_healings_and_dump_args = [&](const json & j) -> json { - if (is_arguments_path(path)) { - auto arguments = j.dump(/* indent */ -1, /* indent_char */ ' ', /* ensure_ascii */ true); - if (is_partial() && !partial->healing_marker.marker.empty()) { - auto idx = arguments.find(partial->healing_marker.json_dump_marker); - if (idx != std::string::npos) { - arguments.resize(idx); - found_healing_marker = true; - } - if (arguments == "\"") { - // This happens because of completing `:"$magic` after `"arguments"` - arguments = ""; - } - } - return arguments; - } - if (is_content_path(path)) { - if (!j.is_string()) { - throw std::runtime_error("Content path must be a string"); - } - std::string str = j; - auto idx = str.find(partial->healing_marker.marker); // not using json_dump_marker as we're inside a string - if (idx != std::string::npos) { - str.resize(idx); - found_healing_marker = true; - } - return str; - } - if (j.is_object()) { - auto obj = json::object(); - for (const auto & p : j.items()) { - const auto & key = p.key(); - const auto & value = p.value(); - const std::string key_str = key; // NOLINT - auto idx = key_str.find(healing_marker_); - if (idx != std::string::npos) { - found_healing_marker = true; - break; - } - path.push_back(key_str); - if (value.is_string()) { - const std::string value_str = value; - if (value_str.find(healing_marker_) != std::string::npos) { - found_healing_marker = true; - if (is_content_path(path)) { - if (partial->healing_marker.marker == partial->healing_marker.json_dump_marker) { - // The healing occurred inside the string: good. Otherwise we just ditch the entire key/value pair. - obj[key] = remove_unsupported_healings_and_dump_args(value); - } - } - break; - } - obj[key] = value; - } else { - obj[key] = remove_unsupported_healings_and_dump_args(value); - } - path.pop_back(); - } - return obj; - } - if (j.is_array()) { - auto arr = json::array(); - for (const auto & value : j) { - if (value.is_string()) { - std::string str = value; - auto idx = str.find(healing_marker_); - if (idx != std::string::npos) { - // Don't heal array values that aren't in the arguments. - found_healing_marker = true; - break; - } - } - arr.push_back(remove_unsupported_healings_and_dump_args(value)); - } - return arr; - } - return j; - }; - - auto cleaned = remove_unsupported_healings_and_dump_args(partial->json); - LOG_DBG("Cleaned up JSON %s to %s (json_healing_marker : '%s')\n", partial->json.dump().c_str(), cleaned.dump().c_str(), partial->healing_marker.json_dump_marker.c_str()); - return consume_json_result { - cleaned, - /* .is_partial = */ found_healing_marker, - }; -} - -void common_chat_msg_parser::clear_tools() { - result_.tool_calls.clear(); -} - -/** - * All common_chat_parse_* moved from chat.cpp to chat-parser.cpp below - * to reduce incremental compile time for parser changes. - */ -static void common_chat_parse_generic(common_chat_msg_parser & builder) { - if (!builder.syntax().parse_tool_calls) { - builder.add_content(builder.consume_rest()); - return; - } - static const std::vector> content_paths = { - {"response"}, - }; - static const std::vector> args_paths = { - {"tool_call", "arguments"}, - {"tool_calls", "arguments"}, - }; - auto data = builder.consume_json_with_dumped_args(args_paths, content_paths); - if (data.value.contains("tool_calls")) { - if (!builder.add_tool_calls(data.value.at("tool_calls")) || data.is_partial) { - throw common_chat_msg_partial_exception("incomplete tool calls"); - } - } else if (data.value.contains("tool_call")) { - if (!builder.add_tool_call(data.value.at("tool_call")) || data.is_partial) { - throw common_chat_msg_partial_exception("incomplete tool call"); - } - } else if (data.value.contains("response")) { - const auto & response = data.value.at("response"); - builder.add_content(response.is_string() ? response.template get() : response.dump(2)); - if (data.is_partial) { - throw common_chat_msg_partial_exception("incomplete response"); - } - } else { - throw common_chat_msg_partial_exception("Expected 'tool_call', 'tool_calls' or 'response' in JSON"); - } -} - -static void common_chat_parse_mistral_nemo(common_chat_msg_parser & builder) { - if (!builder.syntax().parse_tool_calls) { - builder.add_content(builder.consume_rest()); - return; - } - - static const common_regex prefix(regex_escape("[TOOL_CALLS]")); - parse_prefixed_json_tool_call_array(builder, prefix); -} - -static void common_chat_parse_magistral(common_chat_msg_parser & builder) { - builder.try_parse_reasoning("[THINK]", "[/THINK]"); - - if (!builder.syntax().parse_tool_calls) { - builder.add_content(builder.consume_rest()); - return; - } - - static const common_regex prefix(regex_escape("[TOOL_CALLS]")); - parse_prefixed_json_tool_call_array(builder, prefix); -} - -static void common_chat_parse_command_r7b(common_chat_msg_parser & builder) { - builder.try_parse_reasoning("<|START_THINKING|>", "<|END_THINKING|>"); - - static const common_regex start_action_regex("<\\|START_ACTION\\|>"); - static const common_regex end_action_regex("<\\|END_ACTION\\|>"); - static const common_regex start_response_regex("<\\|START_RESPONSE\\|>"); - static const common_regex end_response_regex("<\\|END_RESPONSE\\|>"); - - if (auto res = builder.try_find_regex(start_action_regex)) { - // If we didn't extract thoughts, prelude includes them. - auto tool_calls = builder.consume_json_with_dumped_args({{"parameters"}}); - for (const auto & tool_call : tool_calls.value) { - std::string name = tool_call.contains("tool_name") ? tool_call.at("tool_name") : ""; - std::string id = tool_call.contains("tool_call_id") ? tool_call.at("tool_call_id") : ""; - std::string arguments = tool_call.contains("parameters") ? tool_call.at("parameters") : ""; - if (!builder.add_tool_call(name, id, arguments) || tool_calls.is_partial) { - throw common_chat_msg_partial_exception("incomplete tool call"); - } - } - if (tool_calls.is_partial) { - throw common_chat_msg_partial_exception("incomplete tool call"); - } - builder.consume_regex(end_action_regex); - } else if (auto res = builder.try_find_regex(start_response_regex)) { - if (!builder.try_find_regex(end_response_regex)) { - builder.add_content(builder.consume_rest()); - throw common_chat_msg_partial_exception(end_response_regex.str()); - } - } else { - builder.add_content(builder.consume_rest()); - } -} - -static void common_chat_parse_llama_3_1(common_chat_msg_parser & builder, bool with_builtin_tools = false) { - builder.try_parse_reasoning("", ""); - - if (!builder.syntax().parse_tool_calls) { - builder.add_content(builder.consume_rest()); - return; - } - - static const common_regex function_regex( - "\\s*\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"parameters\"\\s*: "); - static const common_regex close_regex("\\}\\s*"); - - static const common_regex function_name_regex("\\s*(\\w+)\\s*\\.\\s*call\\("); - static const common_regex arg_name_regex("\\s*(\\w+)\\s*=\\s*"); - - if (with_builtin_tools) { - static const common_regex builtin_call_regex("<\\|python_tag\\|>"); - if (auto res = builder.try_find_regex(builtin_call_regex)) { - auto fun_res = builder.consume_regex(function_name_regex); - auto function_name = builder.str(fun_res.groups[1]); - - common_healing_marker healing_marker; - json args = json::object(); - while (true) { - if (auto arg_res = builder.try_consume_regex(arg_name_regex)) { - auto arg_name = builder.str(arg_res->groups[1]); - auto partial = builder.consume_json(); - args[arg_name] = partial.json; - healing_marker.marker = partial.healing_marker.marker; - healing_marker.json_dump_marker = partial.healing_marker.json_dump_marker; - builder.consume_spaces(); - if (!builder.try_consume_literal(",")) { - break; - } - } else { - break; - } - } - builder.consume_literal(")"); - builder.consume_spaces(); - - auto arguments = args.dump(); - if (!builder.add_tool_call(function_name, "", arguments)) { - throw common_chat_msg_partial_exception("Incomplete tool call"); - } - return; - } - } - parse_json_tool_calls( - builder, - /* block_open= */ std::nullopt, - /* function_regex_start_only= */ function_regex, - /* function_regex= */ std::nullopt, - close_regex, - std::nullopt); - -} - -static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) { - builder.try_parse_reasoning("", ""); - if (!builder.syntax().parse_tool_calls) { - builder.add_content(builder.consume_rest()); - return; - } - - static const common_regex tool_calls_begin("(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)"); - static const common_regex tool_calls_end("<|tool▁calls▁end|>"); - static const common_regex function_regex("(?:<|tool▁call▁begin|>)?function<|tool▁sep|>([^\n]+)\n```json\n"); - static const common_regex close_regex("```[\\s\\r\\n]*<|tool▁call▁end|>"); - - parse_json_tool_calls( - builder, - /* block_open= */ tool_calls_begin, - /* function_regex_start_only= */ std::nullopt, - function_regex, - close_regex, - tool_calls_end); -} - -static void common_chat_parse_deepseek_v3_1_content(common_chat_msg_parser & builder) { - static const common_regex function_regex("(?:<|tool▁call▁begin|>)?([^\\n<]+)(?:<|tool▁sep|>)"); - - static const common_regex close_regex("(?:[\\s]*)?<|tool▁call▁end|>"); - static const common_regex tool_calls_begin("(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)"); - static const common_regex tool_calls_end("<|tool▁calls▁end|>"); - - if (!builder.syntax().parse_tool_calls) { - LOG_DBG("%s: not parse_tool_calls\n", __func__); - builder.add_content(builder.consume_rest()); - return; - } - - LOG_DBG("%s: parse_tool_calls\n", __func__); - - parse_json_tool_calls( - builder, - /* block_open= */ tool_calls_begin, - /* function_regex_start_only= */ std::nullopt, - function_regex, - close_regex, - tool_calls_end); -} - -static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) { - // DeepSeek V3.1 outputs reasoning content between "" and "" tags, followed by regular content - // First try to parse using the standard reasoning parsing method - LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str()); - - auto start_pos = builder.pos(); - auto found_end_think = builder.try_find_literal(""); - builder.move_to(start_pos); - - if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) { - LOG_DBG("%s: no end_think, not partial, adding content\n", __func__); - common_chat_parse_deepseek_v3_1_content(builder); - } else if (builder.try_parse_reasoning("", "")) { - // If reasoning was parsed successfully, the remaining content is regular content - LOG_DBG("%s: parsed reasoning, adding content\n", __func__); - // <|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>NAME\n```json\nJSON\n```<|tool▁call▁end|><|tool▁calls▁end|> - common_chat_parse_deepseek_v3_1_content(builder); - } else { - if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) { - LOG_DBG("%s: reasoning_format none, adding content\n", __func__); - common_chat_parse_deepseek_v3_1_content(builder); - return; - } - // If no reasoning tags found, check if we should treat everything as reasoning - if (builder.syntax().thinking_forced_open) { - // If thinking is forced open but no tags found, treat everything as reasoning - LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__); - builder.add_reasoning_content(builder.consume_rest()); - } else { - LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__); - // <|tool▁call▁begin|>NAME<|tool▁sep|>JSON<|tool▁call▁end|> - common_chat_parse_deepseek_v3_1_content(builder); - } - } -} - -static void common_chat_parse_minimax_m2(common_chat_msg_parser & builder) { - static const xml_tool_call_format form { - /* form.scope_start = */ "", - /* form.tool_start = */ "", - /* form.key_start = */ "", - /* form.val_end = */ "", - /* form.tool_end = */ "", - /* form.scope_end = */ "", - }; - builder.consume_reasoning_with_xml_tool_calls(form, "", ""); -} - -static void common_chat_parse_qwen3_coder_xml(common_chat_msg_parser & builder) { - static const xml_tool_call_format form = ([]() { - xml_tool_call_format form {}; - form.scope_start = ""; - form.tool_start = "", ""); -} - -static void common_chat_parse_apriel_1_5(common_chat_msg_parser & builder) { - static const xml_tool_call_format form = ([]() { - xml_tool_call_format form {}; - form.scope_start = "["; - form.tool_start = "{\"name\": \""; - form.tool_sep = "\", \"arguments\": {"; - form.key_start = "\""; - form.key_val_sep = "\": "; - form.val_end = ", "; - form.tool_end = "}, "; - form.scope_end = "]"; - form.raw_argval = false; - form.last_val_end = ""; - form.last_tool_end = "}"; - return form; - })(); - builder.consume_reasoning_with_xml_tool_calls(form, "", ""); -} - -static void common_chat_parse_xiaomi_mimo(common_chat_msg_parser & builder) { - static const xml_tool_call_format form = ([]() { - xml_tool_call_format form {}; - form.scope_start = ""; - form.tool_start = "\n{\"name\": \""; - form.tool_sep = "\", \"arguments\": {"; - form.key_start = "\""; - form.key_val_sep = "\": "; - form.val_end = ", "; - form.tool_end = "}\n"; - form.scope_end = ""; - form.raw_argval = false; - form.last_val_end = ""; - return form; - })(); - builder.consume_reasoning_with_xml_tool_calls(form); -} - -static void common_chat_parse_gpt_oss(common_chat_msg_parser & builder) { - static const std::string constraint = "(?: (<\\|constrain\\|>)?([a-zA-Z0-9_-]+))"; - static const std::string recipient("(?: to=functions\\.([^<\\s]+))"); - - static const common_regex start_regex("<\\|start\\|>assistant"); - static const common_regex analysis_regex("<\\|channel\\|>analysis"); - static const common_regex final_regex("<\\|channel\\|>final" + constraint + "?"); - static const common_regex preamble_regex("<\\|channel\\|>commentary"); - static const common_regex tool_call1_regex(recipient + "<\\|channel\\|>(analysis|commentary)" + constraint + "?"); - static const common_regex tool_call2_regex("<\\|channel\\|>(analysis|commentary)" + recipient + constraint + "?"); - - auto consume_end = [&](bool include_end = false) { - if (auto res = builder.try_find_literal("<|end|>")) { - return res->prelude + (include_end ? builder.str(res->groups[0]) : ""); - } - return builder.consume_rest(); - }; - - auto handle_tool_call = [&](const std::string & name) { - if (auto args = builder.try_consume_json_with_dumped_args({{}})) { - if (builder.syntax().parse_tool_calls) { - if (!builder.add_tool_call(name, "", args->value) || args->is_partial) { - throw common_chat_msg_partial_exception("incomplete tool call"); - } - } else if (args->is_partial) { - throw common_chat_msg_partial_exception("incomplete tool call"); - } - } - }; - - auto regex_match = [](const common_regex & regex, const std::string & input) -> std::optional { - auto match = regex.search(input, 0, true); - if (match.type == COMMON_REGEX_MATCH_TYPE_FULL) { - return match; - } - return std::nullopt; - }; - - do { - auto header_start_pos = builder.pos(); - auto content_start = builder.try_find_literal("<|message|>"); - if (!content_start) { - throw common_chat_msg_partial_exception("incomplete header"); - } - - auto header = content_start->prelude; - - if (auto match = regex_match(tool_call1_regex, header)) { - auto group = match->groups[1]; - auto name = header.substr(group.begin, group.end - group.begin); - handle_tool_call(name); - continue; - } - - if (auto match = regex_match(tool_call2_regex, header)) { - auto group = match->groups[2]; - auto name = header.substr(group.begin, group.end - group.begin); - handle_tool_call(name); - continue; - } - - if (regex_match(analysis_regex, header)) { - builder.move_to(header_start_pos); - if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE || builder.syntax().reasoning_in_content) { - builder.add_content(consume_end(true)); - } else { - builder.try_parse_reasoning("<|channel|>analysis<|message|>", "<|end|>"); - } - continue; - } - - if(regex_match(final_regex, header) || regex_match(preamble_regex, header)) { - builder.add_content(consume_end()); - continue; - } - - // Possibly a malformed message, attempt to recover by rolling - // back to pick up the next <|start|> - LOG_DBG("%s: unknown header from message: %s\n", __func__, header.c_str()); - builder.move_to(header_start_pos); - } while (builder.try_find_regex(start_regex, std::string::npos, false)); - - auto remaining = builder.consume_rest(); - if (!remaining.empty()) { - LOG_DBG("%s: content after last message: %s\n", __func__, remaining.c_str()); - } -} - -static void common_chat_parse_glm_4_5(common_chat_msg_parser & builder) { - static const xml_tool_call_format form { - /* form.scope_start = */ "", - /* form.tool_start = */ "", - /* form.tool_sep = */ "", - /* form.key_start = */ "", - /* form.key_val_sep = */ "", - /* form.val_end = */ "", - /* form.tool_end = */ "", - /* form.scope_end = */ "", - /* form.key_val_sep2 = */ "", - }; - builder.consume_reasoning_with_xml_tool_calls(form, "", ""); -} - -static void common_chat_parse_firefunction_v2(common_chat_msg_parser & builder) { - if (!builder.syntax().parse_tool_calls) { - builder.add_content(builder.consume_rest()); - return; - } - static const common_regex prefix(regex_escape(" functools[")); - parse_prefixed_json_tool_call_array(builder, prefix, /* rstrip_prefix= */ 1); -} - -static void common_chat_parse_functionary_v3_2(common_chat_msg_parser & builder) { - static const common_regex function_regex_start_only(R"((\w+\n\{|python\n|all\n))"); - static const common_regex function_regex(R"(>>>(\w+\n\{|python\n|all\n))"); - static const common_regex close_regex(R"(\s*)"); - - parse_json_tool_calls( - builder, - std::nullopt, - function_regex_start_only, - function_regex, - close_regex, - std::nullopt, - /* allow_raw_python= */ true, - /* get_function_name= */ [&](const auto & res) -> std::string { - auto at_start = res.groups[0].begin == 0; - auto name = builder.str(res.groups[1]); - if (!name.empty() && name.back() == '{') { - // Unconsume the opening brace '{' to ensure the JSON parsing goes well. - builder.move_back(1); - } - auto idx = name.find_last_not_of("\n{"); - name = name.substr(0, idx + 1); - if (at_start && name == "all") { - return ""; - } - return name; - }); -} - -static void common_chat_parse_functionary_v3_1_llama_3_1(common_chat_msg_parser & builder) { - if (!builder.syntax().parse_tool_calls) { - builder.add_content(builder.consume_rest()); - return; - } - // This version of Functionary still supports the llama 3.1 tool call format for the python tool. - static const common_regex python_tag_regex(regex_escape("<|python_tag|>")); - - static const common_regex function_regex(R"()"); - static const common_regex close_regex(R"()"); - - parse_json_tool_calls( - builder, - /* block_open= */ std::nullopt, - /* function_regex_start_only= */ std::nullopt, - function_regex, - close_regex, - std::nullopt); - - if (auto res = builder.try_find_regex(python_tag_regex)) { - auto arguments = wrap_code_as_arguments(builder, builder.consume_rest()); - builder.add_tool_call("python", "", arguments); - return; - } -} - -static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) { - builder.try_parse_reasoning("", ""); - if (!builder.syntax().parse_tool_calls) { - builder.add_content(builder.consume_rest()); - return; - } - - static const common_regex open_regex( - "(?:" - "(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start) - "(" // match 2 (open_tag) - "" - "|" - "|" - "|" - "|" - "|" - "|" - "|" - ")?" - "(\\s*\\{\\s*\"name\")" // match 3 (named tool call) - ")" - "|]+)>" // match 4 (function name) - "|" // match 5 (function name again) - ); - - while (auto res = builder.try_find_regex(open_regex)) { - const auto & block_start = res->groups[1]; - std::string block_end = block_start.empty() ? "" : "```"; - - const auto & open_tag = res->groups[2]; - std::string close_tag; - - if (!res->groups[3].empty()) { - builder.move_to(res->groups[3].begin); - close_tag = open_tag.empty() ? "" : "value) || tool_call->is_partial) { - throw common_chat_msg_partial_exception("incomplete tool call"); - } - builder.consume_spaces(); - builder.consume_literal(close_tag); - builder.consume_spaces(); - if (!block_end.empty()) { - builder.consume_literal(block_end); - builder.consume_spaces(); - } - } else { - throw common_chat_msg_partial_exception("failed to parse tool call"); - } - } else { - auto function_name = builder.str(res->groups[4]); - if (function_name.empty()) { - function_name = builder.str(res->groups[5]); - } - GGML_ASSERT(!function_name.empty()); - - close_tag = ""; - - if (auto arguments = builder.try_consume_json_with_dumped_args({{}})) { - if (!builder.add_tool_call(function_name, "", arguments->value) || arguments->is_partial) { - throw common_chat_msg_partial_exception("incomplete tool call"); - } - builder.consume_spaces(); - builder.consume_literal(close_tag); - builder.consume_spaces(); - if (!block_end.empty()) { - builder.consume_literal(block_end); - builder.consume_spaces(); - } - } - } - } - - builder.add_content(builder.consume_rest()); -} - -static void common_chat_parse_granite(common_chat_msg_parser & builder) { - // Parse thinking tags - static const common_regex start_think_regex(regex_escape("")); - static const common_regex end_think_regex(regex_escape("")); - // Granite models output partial tokens such as "<" and "groups[0].begin); - builder.try_find_regex(end_think_regex, std::string::npos, false); - // Restore position for try_parse_reasoning() - builder.move_to(res->groups[0].begin); - } - builder.try_parse_reasoning("", ""); - - // Parse response tags - static const common_regex start_response_regex(regex_escape("")); - static const common_regex end_response_regex(regex_escape("")); - // Granite models output partial tokens such as "<" and "")); - if (auto res = builder.try_find_regex(tool_call_regex)) { - builder.move_to(res->groups[0].end); - - // Expect JSON array of tool calls - if (auto tool_call = builder.try_consume_json_with_dumped_args({{{"arguments"}}})) { - if (!builder.add_tool_calls(tool_call->value) || tool_call->is_partial) { - throw common_chat_msg_partial_exception("incomplete tool call"); - } - } - } else { - builder.add_content(builder.consume_rest()); - } -} - -static void common_chat_parse_nemotron_v2(common_chat_msg_parser & builder) { - // Parse thinking tags - builder.try_parse_reasoning("", ""); - if (!builder.syntax().parse_tool_calls) { - builder.add_content(builder.consume_rest()); - return; - } - - // Look for tool calls - static const common_regex tool_call_regex(regex_escape("")); - if (auto res = builder.try_find_regex(tool_call_regex)) { - builder.move_to(res->groups[0].end); - - // Expect JSON array of tool calls - auto tool_calls_data = builder.consume_json(); - if (tool_calls_data.json.is_array()) { - if (!builder.try_consume_literal("")) { - throw common_chat_msg_partial_exception("Incomplete tool call"); - } - builder.add_tool_calls(tool_calls_data.json); - } else { - throw common_chat_msg_partial_exception("Incomplete tool call"); - } - } - builder.add_content(builder.consume_rest()); -} - -static void common_chat_parse_apertus(common_chat_msg_parser & builder) { - // Parse thinking tags - builder.try_parse_reasoning("<|inner_prefix|>", "<|inner_suffix|>"); - if (!builder.syntax().parse_tool_calls) { - builder.add_content(builder.consume_rest()); - return; - } - - // Look for tool calls - static const common_regex tool_call_regex(regex_escape("<|tools_prefix|>")); - if (auto res = builder.try_find_regex(tool_call_regex)) { - builder.move_to(res->groups[0].end); - - auto tool_calls_data = builder.consume_json(); - if (tool_calls_data.json.is_array()) { - builder.consume_spaces(); - if (!builder.try_consume_literal("<|tools_suffix|>")) { - throw common_chat_msg_partial_exception("Incomplete tool call"); - } - for (const auto & value : tool_calls_data.json) { - if (value.is_object()) { - builder.add_tool_call_short_form(value); - } - } - } else { - throw common_chat_msg_partial_exception("Incomplete tool call"); - } - } - builder.add_content(builder.consume_rest()); -} - - -static void common_chat_parse_lfm2(common_chat_msg_parser & builder) { - if (!builder.syntax().parse_tool_calls) { - builder.add_content(builder.consume_rest()); - return; - } - - // LFM2 format: <|tool_call_start|>[{"name": "get_current_time", "arguments": {"location": "Paris"}}]<|tool_call_end|> - static const common_regex tool_call_start_regex(regex_escape("<|tool_call_start|>")); - static const common_regex tool_call_end_regex(regex_escape("<|tool_call_end|>")); - - // Loop through all tool calls - while (auto res = builder.try_find_regex(tool_call_start_regex, std::string::npos, /* add_prelude_to_content= */ true)) { - builder.move_to(res->groups[0].end); - - // Parse JSON array format: [{"name": "...", "arguments": {...}}] - auto tool_calls_data = builder.consume_json(); - - // Consume end marker - builder.consume_spaces(); - if (!builder.try_consume_regex(tool_call_end_regex)) { - throw common_chat_msg_partial_exception("Expected <|tool_call_end|>"); - } - - // Process each tool call in the array - if (tool_calls_data.json.is_array()) { - for (const auto & tool_call : tool_calls_data.json) { - if (!tool_call.is_object()) { - throw common_chat_msg_partial_exception("Tool call must be an object"); - } - - if (!tool_call.contains("name")) { - throw common_chat_msg_partial_exception("Tool call missing 'name' field"); - } - - std::string function_name = tool_call.at("name"); - std::string arguments = "{}"; - - if (tool_call.contains("arguments")) { - if (tool_call.at("arguments").is_object()) { - arguments = tool_call.at("arguments").dump(); - } else if (tool_call.at("arguments").is_string()) { - arguments = tool_call.at("arguments"); - } - } - - if (!builder.add_tool_call(function_name, "", arguments)) { - throw common_chat_msg_partial_exception("Incomplete tool call"); - } - } - } else { - throw common_chat_msg_partial_exception("Expected JSON array for tool calls"); - } - - // Consume any trailing whitespace after this tool call - builder.consume_spaces(); - } - - // Consume any remaining content after all tool calls - auto remaining = builder.consume_rest(); - if (!string_strip(remaining).empty()) { - builder.add_content(remaining); - } -} - -static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) { - static const xml_tool_call_format form { - /* form.scope_start = */ "", - /* form.tool_start = */ "", - /* form.key_start = */ "", - /* form.val_end = */ "", - /* form.tool_end = */ "", - /* form.scope_end = */ "", - }; - builder.consume_reasoning_with_xml_tool_calls(form, "", ""); -} - -static void common_chat_parse_solar_open(common_chat_msg_parser & builder) { - builder.try_parse_reasoning("<|think|>", "<|end|><|begin|>assistant<|content|>"); - - // TODO: Tool calling - - builder.add_content(builder.consume_rest()); -} - -static void common_chat_parse_exaone_moe_content(common_chat_msg_parser & builder) { - // 1) { "name": "...", "arguments": {...} } - // 2) { "id": "...", "type": "function", "function": { "name": "...", "arguments": {...} } } - static const common_regex tool_call_open(R"(]*>)"); - - if (!builder.syntax().parse_tool_calls) { - LOG_DBG("%s: not parse_tool_calls\n", __func__); - builder.add_content(builder.consume_rest()); - return; - } - - LOG_DBG("%s: parse_tool_calls\n", __func__); - - // Find all blocks - while (auto first = builder.try_find_regex(tool_call_open, std::string::npos, /* add_prelude_to_content= */ true)) { - builder.move_to(first->groups[0].end); - builder.consume_spaces(); - - builder.try_consume_literal("```json"); - builder.try_consume_literal("```"); - builder.consume_spaces(); - - // Consume JSON object - auto data = builder.consume_json(); - - builder.consume_spaces(); - builder.try_consume_literal("```"); - builder.consume_spaces(); - - if (!builder.try_consume_literal("")) { - throw common_chat_msg_partial_exception("incomplete tool call"); - } - builder.consume_spaces(); - - // Extract name and arguments - std::string name; - std::string id; - nlohmann::ordered_json arguments; - - const auto extract_args = [&](const nlohmann::ordered_json & obj) -> bool { - if (!obj.contains("name") || !obj.contains("arguments")) { - return false; - } - name = obj.at("name").get(); - arguments = obj.at("arguments"); - if (obj.contains("id") && obj.at("id").is_string()) { - id = obj.at("id").get(); - } - return true; - }; - - if (!extract_args(data.json)) { - if (data.json.contains("function") && data.json.at("function").is_object()) { - auto fn = data.json.at("function"); - extract_args(fn); - if (id.empty() && data.json.contains("id") && data.json.at("id").is_string()) { - id = data.json.at("id").get(); - } - } - } - - // If name is empty, treat the JSON object as content - if (name.empty()) { - LOG_DBG("%s: tool call missing name, treating as content\n", __func__); - builder.add_content(data.json.dump()); - continue; - } - - std::string args_str = arguments.dump(); - if (!builder.add_tool_call(name, id, args_str)) { - throw common_chat_msg_partial_exception("incomplete tool call"); - } - } - - builder.add_content(builder.consume_rest()); -} - -static void common_chat_parse_exaone_moe(common_chat_msg_parser & builder) { - LOG_DBG("%s: parsing exaone_moe\n", __func__); - // EXAONE MoE outputs reasoning content between "" and "" tags, followed by regular content - // First try to parse using the standard reasoning parsing method - LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str()); - - auto start_pos = builder.pos(); - auto found_end_think = builder.try_find_literal(""); - builder.move_to(start_pos); - - if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) { - LOG_DBG("%s: no end_think, not partial, adding content\n", __func__); - common_chat_parse_exaone_moe_content(builder); - } else if (builder.try_parse_reasoning("", "")) { - // If reasoning was parsed successfully, the remaining content is regular content - LOG_DBG("%s: parsed reasoning, adding content\n", __func__); - common_chat_parse_exaone_moe_content(builder); - } else { - if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) { - LOG_DBG("%s: reasoning_format none, adding content\n", __func__); - common_chat_parse_exaone_moe_content(builder); - return; - } - // If no reasoning tags found, check if we should treat everything as reasoning - if (builder.syntax().thinking_forced_open) { - // If thinking is forced open but no tags found, treat everything as reasoning - LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__); - builder.add_reasoning_content(builder.consume_rest()); - } else { - LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__); - common_chat_parse_exaone_moe_content(builder); - } - } -} - -static void common_chat_parse_content_only(common_chat_msg_parser & builder) { - builder.try_parse_reasoning("", ""); - builder.add_content(builder.consume_rest()); -} - -static void common_chat_parse(common_chat_msg_parser & builder) { - LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(builder.syntax().format), builder.input().c_str()); - - switch (builder.syntax().format) { - case COMMON_CHAT_FORMAT_CONTENT_ONLY: - common_chat_parse_content_only(builder); - break; - case COMMON_CHAT_FORMAT_GENERIC: - common_chat_parse_generic(builder); - break; - case COMMON_CHAT_FORMAT_MISTRAL_NEMO: - common_chat_parse_mistral_nemo(builder); - break; - case COMMON_CHAT_FORMAT_MAGISTRAL: - common_chat_parse_magistral(builder); - break; - case COMMON_CHAT_FORMAT_LLAMA_3_X: - common_chat_parse_llama_3_1(builder); - break; - case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: - common_chat_parse_llama_3_1(builder, /* with_builtin_tools= */ true); - break; - case COMMON_CHAT_FORMAT_DEEPSEEK_R1: - common_chat_parse_deepseek_r1(builder); - break; - case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1: - common_chat_parse_deepseek_v3_1(builder); - break; - case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: - common_chat_parse_functionary_v3_2(builder); - break; - case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: - common_chat_parse_functionary_v3_1_llama_3_1(builder); - break; - case COMMON_CHAT_FORMAT_HERMES_2_PRO: - common_chat_parse_hermes_2_pro(builder); - break; - case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: - common_chat_parse_firefunction_v2(builder); - break; - case COMMON_CHAT_FORMAT_COMMAND_R7B: - common_chat_parse_command_r7b(builder); - break; - case COMMON_CHAT_FORMAT_GRANITE: - common_chat_parse_granite(builder); - break; - case COMMON_CHAT_FORMAT_GPT_OSS: - common_chat_parse_gpt_oss(builder); - break; - case COMMON_CHAT_FORMAT_SEED_OSS: - common_chat_parse_seed_oss(builder); - break; - case COMMON_CHAT_FORMAT_NEMOTRON_V2: - common_chat_parse_nemotron_v2(builder); - break; - case COMMON_CHAT_FORMAT_APERTUS: - common_chat_parse_apertus(builder); - break; - case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: - common_chat_parse_lfm2(builder); - break; - case COMMON_CHAT_FORMAT_MINIMAX_M2: - common_chat_parse_minimax_m2(builder); - break; - case COMMON_CHAT_FORMAT_GLM_4_5: - common_chat_parse_glm_4_5(builder); - break; - case COMMON_CHAT_FORMAT_KIMI_K2: - common_chat_parse_kimi_k2(builder); - break; - case COMMON_CHAT_FORMAT_QWEN3_CODER_XML: - common_chat_parse_qwen3_coder_xml(builder); - break; - case COMMON_CHAT_FORMAT_APRIEL_1_5: - common_chat_parse_apriel_1_5(builder); - break; - case COMMON_CHAT_FORMAT_XIAOMI_MIMO: - common_chat_parse_xiaomi_mimo(builder); - break; - case COMMON_CHAT_FORMAT_SOLAR_OPEN: - common_chat_parse_solar_open(builder); - break; - case COMMON_CHAT_FORMAT_EXAONE_MOE: - common_chat_parse_exaone_moe(builder); - break; - default: - throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format)); - } - builder.finish(); -} - -common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & syntax) { - if (syntax.format == COMMON_CHAT_FORMAT_PEG_SIMPLE || - syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE || - syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) { - return common_chat_peg_parse(syntax.parser, input, is_partial, syntax); - } - common_chat_msg_parser builder(input, is_partial, syntax); - try { - common_chat_parse(builder); - } catch (const common_chat_msg_partial_exception & ex) { - LOG_DBG("Partial parse: %s\n", ex.what()); - if (!is_partial) { - builder.clear_tools(); - builder.move_to(0); - common_chat_parse_content_only(builder); - } - } - auto msg = builder.result(); - if (!is_partial) { - LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str()); - } - return msg; -} - -common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_parser_params & syntax) { - if (parser.empty()) { - throw std::runtime_error("Failed to parse due to missing parser definition."); - } - - LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(syntax.format), input.c_str()); - - common_peg_parse_context ctx(input, is_partial); - auto result = parser.parse(ctx); - if (result.fail()) { - throw std::runtime_error(std::string("Failed to parse input at pos ") + std::to_string(result.end)); - } - - common_chat_msg msg; - msg.role = "assistant"; - - if (syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE) { - auto mapper = common_chat_peg_native_mapper(msg); - mapper.from_ast(ctx.ast, result); - } else if (syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) { - auto mapper = common_chat_peg_constructed_mapper(msg); - mapper.from_ast(ctx.ast, result); - } else { - // Generic mapper - auto mapper = common_chat_peg_mapper(msg); - mapper.from_ast(ctx.ast, result); - } - if (!is_partial) { - LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str()); - } - return msg; -} diff --git a/common/chat-parser.h b/common/chat-parser.h deleted file mode 100644 index 3ed9c30a2b..0000000000 --- a/common/chat-parser.h +++ /dev/null @@ -1,133 +0,0 @@ -#pragma once - -#include "chat.h" -#include "chat-parser-xml-toolcall.h" -#include "json-partial.h" -#include "regex-partial.h" - -#include - -#include -#include -#include - -class common_chat_msg_partial_exception : public std::runtime_error { - public: - common_chat_msg_partial_exception(const std::string & message) : std::runtime_error(message) {} -}; - -class common_chat_msg_parser { - std::string input_; - bool is_partial_; - common_chat_parser_params syntax_; // TODO: rename to params - std::string healing_marker_; - - size_t pos_ = 0; - common_chat_msg result_; - - public: - common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_parser_params & syntax); - const std::string & input() const { return input_; } - size_t pos() const { return pos_; } - const std::string & healing_marker() const { return healing_marker_; } - const bool & is_partial() const { return is_partial_; } - const common_chat_msg & result() const { return result_; } - const common_chat_parser_params & syntax() const { return syntax_; } - - void move_to(size_t pos) { - if (pos > input_.size()) { - throw std::runtime_error("Invalid position!"); - } - pos_ = pos; - } - void move_back(size_t n) { - if (pos_ < n) { - throw std::runtime_error("Can't move back that far!"); - } - pos_ -= n; - } - - // Get the substring of the input at the given range - std::string str(const common_string_range & rng) const; - - // Appends to the result.content field - void add_content(const std::string & content); - - // Appends to the result.reasoning_content field - void add_reasoning_content(const std::string & reasoning_content); - - // Adds a tool call to the result. If the tool call is too incomplete (e.g. name empty), it won't add anything. - bool add_tool_call(const std::string & name, const std::string & id, const std::string & arguments); - - // Adds a tool call using the "name", "id" and "arguments" fields of the json object - bool add_tool_call(const nlohmann::ordered_json & tool_call); - - // Adds an array of tool calls using their "name", "id" and "arguments" fields. - bool add_tool_calls(const nlohmann::ordered_json & arr); - - // Adds a tool call using the short form: { "tool_name": { "arg1": val, "arg2": val } } - bool add_tool_call_short_form(const nlohmann::ordered_json & tool_call); - - void finish(); - - bool consume_spaces(); - - void consume_literal(const std::string & literal); - - bool try_parse_reasoning(const std::string & start_think, const std::string & end_think); - - std::string consume_rest(); - - struct find_regex_result { - std::string prelude; - std::vector groups; - }; - - std::optional try_find_regex(const common_regex & regex, size_t from = std::string::npos, bool add_prelude_to_content = true); - - bool try_consume_literal(const std::string & literal); - - std::optional try_find_literal(const std::string & literal); - - find_regex_result consume_regex(const common_regex & regex); - - std::optional try_consume_regex(const common_regex & regex); - - std::optional try_consume_json(); - common_json consume_json(); - - struct consume_json_result { - nlohmann::ordered_json value; - bool is_partial; - }; - - /* - Consume (possibly partial) json and converts specific subtrees to (possibly truncated) JSON strings. - - By default, object keys can't be truncated, nor can string values (their corresponding key is removed, - e.g. `{"foo": "bar", "baz": "b` -> `{"foo": "bar"}` - - But one can allow subpaths to be kept truncated, and possibly json-dumped to truncated json strings - - with `content_paths={{"foo"}}` -> `{"foo": "b` -> {"foo": "b"}` - - with `args_paths={{"foo"}}` -> `{"foo": {"b` -> `{"foo": "{b"}` - */ - consume_json_result consume_json_with_dumped_args( - const std::vector> & args_paths = {}, - const std::vector> & content_paths = {} - ); - std::optional try_consume_json_with_dumped_args( - const std::vector> & args_paths = {}, - const std::vector> & content_paths = {} - ); - - /** - * Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched. - * form.scope_start, form.tool_sep and form.scope_end can be empty. - */ - bool try_consume_xml_tool_calls(const struct xml_tool_call_format & form); - - // Parse content uses reasoning and XML-Style tool call - void consume_reasoning_with_xml_tool_calls(const struct xml_tool_call_format & form, const std::string & start_think = "", const std::string & end_think = ""); - - void clear_tools(); -}; diff --git a/common/chat-peg-parser.cpp b/common/chat-peg-parser.cpp index 1bcba9cd86..039e52177c 100644 --- a/common/chat-peg-parser.cpp +++ b/common/chat-peg-parser.cpp @@ -1,13 +1,16 @@ #include "chat-peg-parser.h" +#include "chat-auto-parser.h" +#include "ggml.h" + #include -using json = nlohmann::json; +using json = nlohmann::ordered_json; static std::string_view trim_trailing_space(std::string_view sv, int max = -1) { int count = 0; while (!sv.empty() && std::isspace(static_cast(sv.back()))) { - if (max != -1 && count <= max) { + if (max != -1 && count >= max) { break; } sv.remove_suffix(1); @@ -16,109 +19,746 @@ static std::string_view trim_trailing_space(std::string_view sv, int max = -1) { return sv; } +static std::string_view trim_leading_space(std::string_view sv, int max = -1) { + int count = 0; + while (!sv.empty() && std::isspace(static_cast(sv.front()))) { + if (max != -1 && count >= max) { + break; + } + sv.remove_prefix(1); + count++; + } + return sv; +} + +static std::string_view trim(std::string_view sv) { + return trim_trailing_space(trim_leading_space(sv, 1)); +} + +// Count the number of unclosed '{' braces in a JSON-like string, +// properly skipping braces inside quoted strings. +static int json_brace_depth(const std::string & s) { + int depth = 0; + bool in_string = false; + bool escaped = false; + for (char c : s) { + if (escaped) { + escaped = false; + continue; + } + if (c == '\\' && in_string) { + escaped = true; + continue; + } + if (c == '"') { + in_string = !in_string; + continue; + } + if (!in_string) { + if (c == '{') { + depth++; + } else if (c == '}') { + depth--; + } + } + } + return depth; +} + +// JSON-escape a string and return the inner content (without surrounding quotes). +static std::string escape_json_string_inner(const std::string & s) { + std::string escaped = json(s).dump(); + if (escaped.size() >= 2 && escaped.front() == '"' && escaped.back() == '"') { + return escaped.substr(1, escaped.size() - 2); + } + return escaped; +} + +// Convert Python-style single-quoted strings to JSON double-quoted strings +// Only converts outer string delimiters, properly handling escape sequences: +// - {'key': 'value'} -> {"key": "value"} +// - {'code': 'print(\'hello\')'} -> {"code": "print('hello')"} +// - {'msg': 'He said "hi"'} -> {"msg": "He said \"hi\""} +static std::string normalize_quotes_to_json(const std::string & input) { + std::string result; + result.reserve(input.size() + 16); // May need extra space for escaping + + bool in_single_quoted = false; + bool in_double_quoted = false; + + for (size_t i = 0; i < input.size(); ++i) { + char c = input[i]; + + // Handle escape sequences + if (c == '\\' && i + 1 < input.size()) { + char next = input[i + 1]; + + if (in_single_quoted) { + // Inside a single-quoted string being converted to double quotes + if (next == '\'') { + // \' -> ' (escaped single quote becomes unescaped in double-quoted string) + result += '\''; + ++i; + continue; + } + if (next == '"') { + // \" stays as \" (already escaped, works in double-quoted string) + result += "\\\""; + ++i; + continue; + } + // Other escapes (\n, \\, etc.): pass through both characters + result += c; + result += next; + ++i; + continue; + } + + if (in_double_quoted) { + // Inside a double-quoted string - pass through escape sequences as-is + result += c; + result += next; + ++i; + continue; + } + + // Outside any string - just pass through the backslash + result += c; + continue; + } + + // Handle quote characters + if (c == '"') { + if (in_single_quoted) { + // Unescaped double quote inside single-quoted string -> must escape for JSON + result += "\\\""; + } else { + // Double quote as string delimiter or outside strings + in_double_quoted = !in_double_quoted; + result += c; + } + } else if (c == '\'') { + if (in_double_quoted) { + // Single quote inside double-quoted string -> pass through + result += c; + } else if (in_single_quoted) { + // Closing single quote -> convert to double quote + in_single_quoted = false; + result += '"'; + } else { + // Opening single quote -> convert to double quote + in_single_quoted = true; + result += '"'; + } + } else { + result += c; + } + } + + return result; +} + void common_chat_peg_mapper::from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result) { - arena.visit(result, [this](const common_peg_ast_node & node) { - map(node); - }); + arena.visit(result, [this](const common_peg_ast_node & node) { map(node); }); } void common_chat_peg_mapper::map(const common_peg_ast_node & node) { bool is_reasoning = node.tag == common_chat_peg_builder::REASONING; - bool is_content = node.tag == common_chat_peg_builder::CONTENT; + bool is_content = node.tag == common_chat_peg_builder::CONTENT; - if (is_reasoning) { - result.reasoning_content = std::string(trim_trailing_space(node.text)); + if (is_reasoning) { // GPT OSS can have more than 1 reasoning block, so concatenate here + result.reasoning_content += std::string(node.text); } if (is_content) { - result.content = std::string(trim_trailing_space(node.text)); + // Concatenate content from multiple content nodes (e.g., when reasoning markers + // are preserved before content markers in reasoning_format=NONE mode) + result.content += std::string(node.text); } } -void common_chat_peg_native_mapper::map(const common_peg_ast_node & node) { +void tag_based_peg_mapper::from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result) { + arena.visit(result, [this](const common_peg_ast_node & node) { + if (!node.tag.empty()) { + tags[node.tag] = std::string(node.text); + } + }); +} + +tagged_parse_result tagged_peg_parser::parse_and_extract(const std::string & input, bool is_partial) const { + common_peg_parse_context ctx(input, is_partial); + auto parse_result = arena.parse(ctx); + + tag_based_peg_mapper mapper; + mapper.from_ast(ctx.ast, parse_result); + + return { std::move(parse_result), std::move(mapper.tags) }; +} + +tagged_peg_parser build_tagged_peg_parser( + const std::function & fn) { + common_peg_parser_builder builder; + builder.set_root(fn(builder)); + return { builder.build() }; +} + +common_peg_parser common_chat_peg_builder::tag_with_safe_content(const std::string & tag_name, + const std::string & marker, + const common_peg_parser & p) { + if (marker.empty()) { + return zero_or_more(choice({ p, rule(tag_name, content(any())) })); + } + auto content_chunk = rule(tag_name, content(negate(literal(marker)) + any() + until(marker))); + return zero_or_more(choice({ p, content_chunk })); +} + +std::string & common_chat_peg_unified_mapper::args_target() { + return (current_tool && !current_tool->name.empty()) ? current_tool->arguments : args_buffer; +} + +void common_chat_peg_unified_mapper::from_ast(const common_peg_ast_arena & arena, + const common_peg_parse_result & parse_result_arg) { + // Call base class to visit all nodes + common_chat_peg_mapper::from_ast(arena, parse_result_arg); + + // Flush any pending tool call that was started but never got a name + // This happens during partial parsing when the tool call is incomplete + if (pending_tool_call.has_value() && !pending_tool_call->name.empty()) { + if (!args_buffer.empty()) { + pending_tool_call->arguments = args_buffer; + } + if (closing_quote_pending && !pending_tool_call->arguments.empty()) { + pending_tool_call->arguments += "\""; + } + result.tool_calls.push_back(pending_tool_call.value()); + pending_tool_call.reset(); + } +} + +void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) { + // First call base class for reasoning/content handling common_chat_peg_mapper::map(node); - bool is_tool_open = node.tag == common_chat_peg_native_builder::TOOL_OPEN; - bool is_tool_name = node.tag == common_chat_peg_native_builder::TOOL_NAME; - bool is_tool_id = node.tag == common_chat_peg_native_builder::TOOL_ID; - bool is_tool_args = node.tag == common_chat_peg_native_builder::TOOL_ARGS; + // Handle tool-related tags (unified version supporting both JSON and tagged formats) + bool is_tool_open = node.tag == common_chat_peg_unified_builder::TOOL_OPEN; + bool is_tool_close = node.tag == common_chat_peg_unified_builder::TOOL_CLOSE; + bool is_tool_name = node.tag == common_chat_peg_unified_builder::TOOL_NAME; + bool is_tool_id = node.tag == common_chat_peg_unified_builder::TOOL_ID; + bool is_tool_args = node.tag == common_chat_peg_unified_builder::TOOL_ARGS; + bool is_arg_open = node.tag == common_chat_peg_unified_builder::TOOL_ARG_OPEN; + bool is_arg_close = node.tag == common_chat_peg_unified_builder::TOOL_ARG_CLOSE; + bool is_arg_name = node.tag == common_chat_peg_unified_builder::TOOL_ARG_NAME; + bool is_arg_value = node.tag == common_chat_peg_unified_builder::TOOL_ARG_VALUE; + bool is_arg_string_value = node.tag == common_chat_peg_unified_builder::TOOL_ARG_STRING_VALUE; if (is_tool_open) { - result.tool_calls.emplace_back(); - current_tool = &result.tool_calls.back(); + pending_tool_call = common_chat_tool_call(); + current_tool = &pending_tool_call.value(); + arg_count = 0; + args_buffer.clear(); + closing_quote_pending = false; } if (is_tool_id && current_tool) { - current_tool->id = std::string(trim_trailing_space(node.text)); + auto text = trim_trailing_space(node.text); + if (text.size() >= 2 && text.front() == '"' && text.back() == '"') { + text = text.substr(1, text.size() - 2); + } + current_tool->id = std::string(text); } if (is_tool_name && current_tool) { current_tool->name = std::string(trim_trailing_space(node.text)); + // Now that we have the name, populate the arguments from the buffer + if (!args_buffer.empty()) { + current_tool->arguments = args_buffer; + args_buffer.clear(); + } else if (current_tool->arguments.empty()) { + current_tool->arguments = "{"; + } + // Add the tool call to results so streaming can see it + if (pending_tool_call.has_value()) { + result.tool_calls.push_back(pending_tool_call.value()); + pending_tool_call.reset(); + current_tool = &result.tool_calls.back(); + } } if (is_tool_args && current_tool) { - current_tool->arguments = std::string(trim_trailing_space(node.text)); - } -} - -void common_chat_peg_constructed_mapper::map(const common_peg_ast_node & node) { - common_chat_peg_mapper::map(node); - - bool is_tool_open = node.tag == common_chat_peg_constructed_builder::TOOL_OPEN; - bool is_tool_name = node.tag == common_chat_peg_constructed_builder::TOOL_NAME; - bool is_tool_close = node.tag == common_chat_peg_constructed_builder::TOOL_CLOSE; - bool is_arg_open = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_OPEN; - bool is_arg_close = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_CLOSE; - bool is_arg_name = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_NAME; - bool is_arg_string = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_STRING_VALUE; - bool is_arg_json = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_JSON_VALUE; - - if (is_tool_open) { - result.tool_calls.emplace_back(); - current_tool = &result.tool_calls.back(); - arg_count = 0; - } - - if (is_tool_name) { - current_tool->name = std::string(node.text); - current_tool->arguments = "{"; + // For JSON format: arguments come as a complete JSON object + // For tagged format: built up from individual arg_name/arg_value nodes + auto text = trim_trailing_space(node.text); + if (!text.empty() && text.front() == '{') { + args_target() = std::string(text); + } } if (is_arg_open) { - needs_closing_quote = false; + closing_quote_pending = false; } if (is_arg_name && current_tool) { + std::string arg_entry; if (arg_count > 0) { - current_tool->arguments += ","; + arg_entry = ","; } - current_tool->arguments += json(trim_trailing_space(node.text)).dump() + ":"; + arg_entry += json(trim(node.text)).dump() + ":"; ++arg_count; + + auto & target = args_target(); + if (target.empty()) { + target = "{"; + } + target += arg_entry; } - if (is_arg_string && current_tool) { - // Serialize to JSON, but exclude the end quote - std::string dumped = json(trim_trailing_space(node.text)).dump(); - current_tool->arguments += dumped.substr(0, dumped.size() - 1); - needs_closing_quote = true; + if ((is_arg_value || is_arg_string_value) && current_tool) { + std::string value_content = std::string(trim_trailing_space(trim_leading_space(node.text, 1), 1)); + + std::string value_to_add; + if (value_content.empty() && is_arg_string_value) { + // Empty string value - arg_close will add the closing quote + value_to_add = "\""; + closing_quote_pending = true; + } else if (!value_content.empty() && is_arg_string_value) { + // Schema declares this as string type - always treat as literal string value + if (!closing_quote_pending) { + value_to_add = "\""; + closing_quote_pending = true; + } + value_to_add += escape_json_string_inner(value_content); + } else if (!value_content.empty()) { + // For potential containers, normalize Python-style single quotes to JSON double quotes + bool is_potential_container = value_content[0] == '[' || value_content[0] == '{'; + if (is_potential_container) { + value_content = normalize_quotes_to_json(value_content); + } + + // Try to parse as JSON value (number, bool, null, object, array) + try { + json parsed = json::parse(value_content); + if (parsed.is_string()) { + // Don't add closing quote yet (added by arg_close) for monotonic streaming + std::string escaped = parsed.dump(); + if (!escaped.empty() && escaped.back() == '"') { + escaped.pop_back(); + } + value_to_add = escaped; + closing_quote_pending = true; + } else { + // Non-string values: use raw content to preserve whitespace for monotonicity + value_to_add = value_content; + } + } catch (...) { + if (node.is_partial && is_potential_container) { + // Partial container: pass through the already-normalized content + value_to_add = value_content; + } else { + // Not valid JSON - treat as string value + if (!closing_quote_pending) { + value_to_add = "\""; + closing_quote_pending = true; + } + value_to_add += escape_json_string_inner(value_content); + } + } + } + + args_target() += value_to_add; } if (is_arg_close && current_tool) { - if (needs_closing_quote) { - current_tool->arguments += "\""; - needs_closing_quote = false; + if (closing_quote_pending) { + args_target() += "\""; + closing_quote_pending = false; } } - if (is_arg_json && current_tool) { - current_tool->arguments += std::string(trim_trailing_space(node.text)); - } - if (is_tool_close && current_tool) { - if (needs_closing_quote) { - current_tool->arguments += "\""; - needs_closing_quote = false; + // Flush buffer to arguments if tool name was never seen + if (current_tool->name.empty() && !args_buffer.empty()) { + current_tool->arguments = args_buffer; + args_buffer.clear(); + } + // Close any pending string quote + if (closing_quote_pending) { + current_tool->arguments += "\""; + closing_quote_pending = false; + } + // Close any unclosed braces (accounts for nested objects) + for (int d = json_brace_depth(current_tool->arguments); d > 0; d--) { + current_tool->arguments += "}"; + } + // Add tool call to results if named; otherwise discard + if (pending_tool_call.has_value()) { + if (!current_tool->name.empty()) { + result.tool_calls.push_back(pending_tool_call.value()); + } + pending_tool_call.reset(); } - current_tool->arguments += "}"; } } + +common_peg_parser common_chat_peg_unified_builder::standard_constructed_tools( + const std::map & markers, + const nlohmann::json & tools, + bool parallel_tool_calls, + bool force_tool_calls) { + if (!tools.is_array() || tools.empty()) { + return eps(); + } + + // Extract markers with defaults + auto get_marker = [&markers](const std::string & key, const std::string & default_val = "") -> std::string { + auto it = markers.find(key); + return it != markers.end() ? it->second : default_val; + }; + + std::string section_start = get_marker("tool_call_start_marker", ""); + std::string section_end = get_marker("tool_call_end_marker", ""); + std::string func_opener = get_marker("function_opener", ""); + std::string func_closer = get_marker("function_closer", ""); + std::string param_key_prefix = get_marker("parameter_key_prefix", ""); + std::string param_closer = get_marker("parameter_closer", ""); + + // Build tool choices for tagged format + auto tool_choices = choice(); + + for (const auto & tool_def : tools) { + if (!tool_def.contains("function")) { + continue; + } + const auto & function = tool_def.at("function"); + std::string name = function.at("name"); + nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object(); + + // Build argument parsers + auto args = eps(); + if (params.contains("properties") && !params["properties"].empty()) { + auto arg_choice = choice(); + for (const auto & el : params["properties"].items()) { + const std::string & prop_name = el.key(); + + auto arg_name_parser = + choice({ literal(prop_name), literal("\"" + prop_name + "\""), literal("'" + prop_name + "'") }); + + auto arg_rule = tool_arg(tool_arg_open(literal(param_key_prefix)) + tool_arg_name(arg_name_parser) + + literal(param_key_suffix) + tool_arg_value(until(param_closer)) + + tool_arg_close(literal(param_closer))); + arg_choice |= arg_rule; + } + args = zero_or_more(arg_choice + space()); + } + + // Build function parser: args + auto tool_parser = tool(tool_open(literal(func_opener) + tool_name(literal(name)) + literal(func_name_suffix)) + + space() + tool_args(args) + space() + tool_close(literal(func_closer))); + + tool_choices |= rule("tool-" + name, tool_parser); + } + + // Build the section with markers + auto section = + parallel_tool_calls ? + trigger_rule("tool-call", literal(section_start) + space() + one_or_more(tool_choices + space()) + + literal(section_end)) : + trigger_rule("tool-call", literal(section_start) + space() + tool_choices + space() + literal(section_end)); + + return force_tool_calls ? section : optional(section); +} + +// Helper: Parse dot notation key into prefix and field name +static std::pair parse_key_spec(const std::string & key) { + auto dot_pos = key.find('.'); + if (dot_pos == std::string::npos) { + return {"", key}; // Top-level field + } + return {key.substr(0, dot_pos), key.substr(dot_pos + 1)}; +} + +// Mode 1: function_is_key — parse {"function_name": {...}} +common_peg_parser common_chat_peg_unified_builder::build_json_tools_function_is_key( + const nlohmann::json & tools, + const std::string & args_key, + const std::string & effective_args_key, + const std::string & call_id_key, + const std::string & gen_call_id_key) { + + auto tool_choices = choice(); + + for (const auto & tool_def : tools) { + if (!tool_def.contains("function")) { + continue; + } + const auto & function = tool_def.at("function"); + std::string name = function.at("name"); + nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object(); + + // Build inner object fields + std::vector inner_fields; + + if (!call_id_key.empty()) { + auto id_parser = atomic( + literal("\"" + call_id_key + "\"") + space() + literal(":") + space() + + literal("\"") + tool_id(json_string_content()) + literal("\"") + ); + inner_fields.push_back(optional(id_parser + space() + optional(literal(",") + space()))); + } + + if (!gen_call_id_key.empty()) { + auto gen_id_parser = atomic( + literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() + + choice({ + literal("\"") + tool_id(json_string_content()) + literal("\""), + tool_id(json_number()) + }) + ); + inner_fields.push_back(optional(gen_id_parser + space() + optional(literal(",") + space()))); + } + + // Arguments — either wrapped in args_key or parsed directly + common_peg_parser args_parser = eps(); + if (args_key.empty()) { + args_parser = tool_args(schema(json(), "tool-" + name + "-schema", params)); + } else { + args_parser = literal("\"" + effective_args_key + "\"") + space() + literal(":") + space() + + tool_args(schema(json(), "tool-" + name + "-schema", params)); + } + inner_fields.push_back(args_parser); + + // Build inner object parser + common_peg_parser inner_object = eps(); + if (args_key.empty() && inner_fields.size() == 1) { + inner_object = inner_fields[0]; + } else { + inner_object = literal("{") + space(); + for (size_t i = 0; i < inner_fields.size(); i++) { + inner_object = inner_object + inner_fields[i]; + if (i < inner_fields.size() - 1) { + inner_object = inner_object + space(); + } + } + inner_object = inner_object + space() + literal("}"); + } + + auto tool_parser = tool( + tool_open(literal("{")) + space() + + literal("\"") + tool_name(literal(name)) + literal("\"") + + space() + literal(":") + space() + + inner_object + + space() + tool_close(literal("}")) + ); + + tool_choices |= rule("tool-" + name, tool_parser); + } + + return tool_choices; +} + +// Mode 2: Nested keys (dot notation like "function.name") +common_peg_parser common_chat_peg_unified_builder::build_json_tools_nested_keys( + const nlohmann::json & tools, + const std::string & effective_name_key, + const std::string & effective_args_key, + const std::string & call_id_key, + const std::string & gen_call_id_key) { + + auto tool_choices = choice(); + + auto name_spec = parse_key_spec(effective_name_key); + auto args_spec = parse_key_spec(effective_args_key); + + std::string nested_prefix = !name_spec.first.empty() ? name_spec.first : args_spec.first; + std::string nested_name_field = !name_spec.first.empty() ? name_spec.second : effective_name_key; + std::string nested_args_field = !args_spec.first.empty() ? args_spec.second : effective_args_key; + + for (const auto & tool_def : tools) { + if (!tool_def.contains("function")) { + continue; + } + const auto & function = tool_def.at("function"); + std::string name = function.at("name"); + nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object(); + + auto nested_name = literal("\"" + nested_name_field + "\"") + space() + literal(":") + space() + + literal("\"") + tool_name(literal(name)) + literal("\""); + auto nested_args = literal("\"" + nested_args_field + "\"") + space() + literal(":") + space() + + tool_args(schema(json(), "tool-" + name + "-schema", params)); + + auto nested_object = literal("{") + space() + + nested_name + space() + literal(",") + space() + + nested_args + + space() + literal("}"); + + // Format: { id?, "function": {...} } + auto tool_parser_body = tool_open(literal("{")) + space(); + + if (!call_id_key.empty()) { + auto id_spec = parse_key_spec(call_id_key); + if (id_spec.first.empty()) { + auto id_parser = atomic( + literal("\"" + call_id_key + "\"") + space() + literal(":") + space() + + literal("\"") + tool_id(json_string_content()) + literal("\"") + ); + tool_parser_body = tool_parser_body + optional(id_parser + space() + literal(",") + space()); + } + } + + if (!gen_call_id_key.empty()) { + auto gen_id_spec = parse_key_spec(gen_call_id_key); + if (gen_id_spec.first.empty()) { + auto gen_id_parser = atomic( + literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() + + choice({ + literal("\"") + tool_id(json_string_content()) + literal("\""), + tool_id(json_number()) + }) + ); + tool_parser_body = tool_parser_body + optional(gen_id_parser + space() + literal(",") + space()); + } + } + + auto nested_field = literal("\"" + nested_prefix + "\"") + space() + literal(":") + space() + nested_object; + tool_parser_body = tool_parser_body + nested_field + space() + tool_close(literal("}")); + + tool_choices |= rule("tool-" + name, tool(tool_parser_body)); + } + + return tool_choices; +} + +// Mode 3: Flat keys with optional ID fields and parameter ordering +common_peg_parser common_chat_peg_unified_builder::build_json_tools_flat_keys( + const nlohmann::json & tools, + const std::string & effective_name_key, + const std::string & effective_args_key, + const std::string & call_id_key, + const std::string & gen_call_id_key, + const std::vector & parameters_order) { + + auto tool_choices = choice(); + auto name_key_parser = literal("\"" + effective_name_key + "\""); + auto args_key_parser = literal("\"" + effective_args_key + "\""); + + for (const auto & tool_def : tools) { + if (!tool_def.contains("function")) { + continue; + } + const auto & function = tool_def.at("function"); + std::string name = function.at("name"); + nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object(); + + auto tool_name_ = name_key_parser + space() + literal(":") + space() + + literal("\"") + tool_name(literal(name)) + literal("\""); + auto tool_args_ = args_key_parser + space() + literal(":") + space() + + tool_args(schema(json(), "tool-" + name + "-schema", params)); + + // Build ID parsers if keys are provided + common_peg_parser id_parser = eps(); + if (!call_id_key.empty()) { + id_parser = atomic( + literal("\"" + call_id_key + "\"") + space() + literal(":") + space() + + choice({ + literal("\"") + tool_id(json_string_content()) + literal("\""), + tool_id(json_number()) + }) + ); + } + + common_peg_parser gen_id_parser = eps(); + if (!gen_call_id_key.empty()) { + gen_id_parser = atomic( + literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() + + choice({ + literal("\"") + tool_id(json_string_content()) + literal("\""), + tool_id(json_number()) + }) + ); + } + + // Create (parser, key) pairs for all fields, then sort by parameters_order + std::vector> parser_pairs; + parser_pairs.emplace_back(tool_name_, effective_name_key); + parser_pairs.emplace_back(tool_args_, effective_args_key); + if (!call_id_key.empty()) { + parser_pairs.emplace_back(optional(id_parser), call_id_key); + } + if (!gen_call_id_key.empty()) { + parser_pairs.emplace_back(optional(gen_id_parser), gen_call_id_key); + } + + std::sort(parser_pairs.begin(), parser_pairs.end(), + [¶meters_order](const auto & a, const auto & b) { + auto pos_a = std::find(parameters_order.begin(), parameters_order.end(), a.second); + auto pos_b = std::find(parameters_order.begin(), parameters_order.end(), b.second); + size_t idx_a = (pos_a == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_a); + size_t idx_b = (pos_b == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_b); + return idx_a < idx_b; + }); + + auto ordered_body = tool_open(literal("{")) + space(); + for (size_t i = 0; i < parser_pairs.size(); i++) { + ordered_body = ordered_body + parser_pairs[i].first; + if (i < parser_pairs.size() - 1) { + ordered_body = ordered_body + space() + literal(",") + space(); + } + } + ordered_body = ordered_body + space() + tool_close(literal("}")); + + tool_choices |= rule("tool-" + name, tool(ordered_body)); + } + + return tool_choices; +} + +common_peg_parser common_chat_peg_unified_builder::standard_json_tools( + const std::string & section_start, + const std::string & section_end, + const nlohmann::json & tools, + bool parallel_tool_calls, + bool force_tool_calls, + const std::string & name_key, + const std::string & args_key, + bool array_wrapped, + bool function_is_key, + const std::string & call_id_key, + const std::string & gen_call_id_key, + const std::vector & parameters_order) { + if (!tools.is_array() || tools.empty()) { + return eps(); + } + + std::string effective_name_key = name_key.empty() ? "name" : name_key; + std::string effective_args_key = args_key.empty() ? "arguments" : args_key; + + // Dispatch to the appropriate builder based on the JSON layout mode + common_peg_parser tool_choices = eps(); + if (function_is_key) { + tool_choices = build_json_tools_function_is_key(tools, args_key, effective_args_key, call_id_key, gen_call_id_key); + } else { + auto name_spec = parse_key_spec(effective_name_key); + auto args_spec = parse_key_spec(effective_args_key); + if (!name_spec.first.empty() || !args_spec.first.empty()) { + tool_choices = build_json_tools_nested_keys(tools, effective_name_key, effective_args_key, call_id_key, gen_call_id_key); + } else { + tool_choices = build_json_tools_flat_keys(tools, effective_name_key, effective_args_key, call_id_key, gen_call_id_key, parameters_order); + } + } + + // Build the section with markers + auto tool_calls = tool_choices; + if (parallel_tool_calls) { + tool_calls = tool_calls + zero_or_more(space() + literal(",") + space() + tool_choices); + } + + if (array_wrapped) { + tool_calls = literal("[") + space() + tool_calls + space() + literal("]"); + } + + auto section = + trigger_rule("tool-call", literal(section_start) + space() + tool_calls + space() + literal(section_end)); + + return force_tool_calls ? section : optional(section); +} diff --git a/common/chat-peg-parser.h b/common/chat-peg-parser.h index b84cbed206..6219c819d6 100644 --- a/common/chat-peg-parser.h +++ b/common/chat-peg-parser.h @@ -3,18 +3,29 @@ #include "chat.h" #include "peg-parser.h" +#include +#include +#include + class common_chat_peg_builder : public common_peg_parser_builder { public: static constexpr const char * REASONING_BLOCK = "reasoning-block"; - static constexpr const char * REASONING = "reasoning"; - static constexpr const char * CONTENT = "content"; + static constexpr const char * REASONING = "reasoning"; + static constexpr const char * CONTENT = "content"; common_peg_parser reasoning_block(const common_peg_parser & p) { return tag(REASONING_BLOCK, p); } + common_peg_parser reasoning(const common_peg_parser & p) { return tag(REASONING, p); } + common_peg_parser content(const common_peg_parser & p) { return tag(CONTENT, p); } + + common_peg_parser tag_with_safe_content(const std::string & tag_name, + const std::string & marker, + const common_peg_parser & p); }; -inline common_peg_arena build_chat_peg_parser(const std::function & fn) { +inline common_peg_arena build_chat_peg_parser( + const std::function & fn) { common_chat_peg_builder builder; builder.set_root(fn(builder)); return builder.build(); @@ -26,80 +37,142 @@ class common_chat_peg_mapper { common_chat_peg_mapper(common_chat_msg & msg) : result(msg) {} + virtual ~common_chat_peg_mapper() = default; + virtual void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result); virtual void map(const common_peg_ast_node & node); }; -class common_chat_peg_native_builder : public common_chat_peg_builder { - public: - static constexpr const char * TOOL = "tool"; - static constexpr const char * TOOL_OPEN = "tool-open"; - static constexpr const char * TOOL_CLOSE = "tool-close"; - static constexpr const char * TOOL_ID = "tool-id"; - static constexpr const char * TOOL_NAME = "tool-name"; - static constexpr const char * TOOL_ARGS = "tool-args"; +struct content_structure; +struct tool_call_structure; +class common_chat_peg_unified_builder : public common_chat_peg_builder { + public: + // Tag constants + static constexpr const char * TOOL = "tool"; + static constexpr const char * TOOL_OPEN = "tool-open"; + static constexpr const char * TOOL_CLOSE = "tool-close"; + static constexpr const char * TOOL_ID = "tool-id"; + static constexpr const char * TOOL_NAME = "tool-name"; + static constexpr const char * TOOL_ARGS = "tool-args"; + static constexpr const char * TOOL_ARG = "tool-arg"; + static constexpr const char * TOOL_ARG_OPEN = "tool-arg-open"; + static constexpr const char * TOOL_ARG_CLOSE = "tool-arg-close"; + static constexpr const char * TOOL_ARG_NAME = "tool-arg-name"; + static constexpr const char * TOOL_ARG_VALUE = "tool-arg-value"; + static constexpr const char * TOOL_ARG_STRING_VALUE = "tool-arg-string-value"; // For schema-declared string types + + // Low-level tag methods common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); } common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); } common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); } common_peg_parser tool_id(const common_peg_parser & p) { return atomic(tag(TOOL_ID, p)); } common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); } common_peg_parser tool_args(const common_peg_parser & p) { return tag(TOOL_ARGS, p); } -}; - -class common_chat_peg_native_mapper : public common_chat_peg_mapper { - common_chat_tool_call * current_tool; - - public: - common_chat_peg_native_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {} - - void map(const common_peg_ast_node & node) override; -}; - -inline common_peg_arena build_chat_peg_native_parser(const std::function & fn) { - common_chat_peg_native_builder builder; - builder.set_root(fn(builder)); - return builder.build(); -} - -class common_chat_peg_constructed_builder : public common_chat_peg_builder { - public: - static constexpr const char * TOOL = "tool"; - static constexpr const char * TOOL_OPEN = "tool-open"; - static constexpr const char * TOOL_CLOSE = "tool-close"; - static constexpr const char * TOOL_NAME = "tool-name"; - static constexpr const char * TOOL_ARG = "tool-arg"; - static constexpr const char * TOOL_ARG_OPEN = "tool-arg-open"; - static constexpr const char * TOOL_ARG_CLOSE = "tool-arg-close"; - static constexpr const char * TOOL_ARG_NAME = "tool-arg-name"; - static constexpr const char * TOOL_ARG_STRING_VALUE = "tool-arg-string-value"; - static constexpr const char * TOOL_ARG_JSON_VALUE = "tool-arg-json-value"; - - common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); } - common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); } - common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); } - common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); } common_peg_parser tool_arg(const common_peg_parser & p) { return tag(TOOL_ARG, p); } common_peg_parser tool_arg_open(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_OPEN, p)); } common_peg_parser tool_arg_close(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_CLOSE, p)); } common_peg_parser tool_arg_name(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_NAME, p)); } + common_peg_parser tool_arg_value(const common_peg_parser & p) { return tag(TOOL_ARG_VALUE, p); } + + // Use for schema-declared string types - won't be treated as potential JSON container common_peg_parser tool_arg_string_value(const common_peg_parser & p) { return tag(TOOL_ARG_STRING_VALUE, p); } - common_peg_parser tool_arg_json_value(const common_peg_parser & p) { return tag(TOOL_ARG_JSON_VALUE, p); } + common_peg_parser tool_arg_json_value(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_VALUE, p)); } + + // Legacy-compatible helper for building standard JSON tool calls + // Used by tests and manual parsers + // name_key/args_key: JSON key names for function name and arguments + // Empty or "name"/"arguments" will accept both common variations + // Supports dot notation for nested objects (e.g., "function.name") + // array_wrapped: if true, tool calls are wrapped in JSON array [...] + // function_is_key: if true, function name is the JSON key (e.g., {"func_name": {...}}) + // call_id_key: JSON key for string call ID (e.g., "id") + // gen_call_id_key: JSON key for generated integer call ID (e.g., "tool_call_id") + // parameters_order: order in which JSON fields should be parsed + common_peg_parser standard_json_tools(const std::string & section_start, + const std::string & section_end, + const nlohmann::json & tools, + bool parallel_tool_calls, + bool force_tool_calls, + const std::string & name_key = "", + const std::string & args_key = "", + bool array_wrapped = false, + bool function_is_key = false, + const std::string & call_id_key = "", + const std::string & gen_call_id_key = "", + const std::vector & parameters_order = {}); + + // Legacy-compatible helper for building XML/tagged style tool calls + // Used by tests and manual parsers + common_peg_parser standard_constructed_tools(const std::map & markers, + const nlohmann::json & tools, + bool parallel_tool_calls, + bool force_tool_calls); + + private: + // Implementation helpers for standard_json_tools — one per JSON tool call layout mode + common_peg_parser build_json_tools_function_is_key(const nlohmann::json & tools, + const std::string & args_key, + const std::string & effective_args_key, + const std::string & call_id_key, + const std::string & gen_call_id_key); + + common_peg_parser build_json_tools_nested_keys(const nlohmann::json & tools, + const std::string & effective_name_key, + const std::string & effective_args_key, + const std::string & call_id_key, + const std::string & gen_call_id_key); + + common_peg_parser build_json_tools_flat_keys(const nlohmann::json & tools, + const std::string & effective_name_key, + const std::string & effective_args_key, + const std::string & call_id_key, + const std::string & gen_call_id_key, + const std::vector & parameters_order); }; -class common_chat_peg_constructed_mapper : public common_chat_peg_mapper { - common_chat_tool_call * current_tool; - int arg_count = 0; - bool needs_closing_quote = false; - - public: - common_chat_peg_constructed_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {} - - void map(const common_peg_ast_node & node) override; -}; - -inline common_peg_arena build_chat_peg_constructed_parser(const std::function & fn) { - common_chat_peg_constructed_builder builder; +inline common_peg_arena build_chat_peg_unified_parser( + const std::function & fn) { + common_chat_peg_unified_builder builder; builder.set_root(fn(builder)); return builder.build(); } + +class tag_based_peg_mapper { + public: + std::map tags; + + void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result); +}; + +struct tagged_parse_result { + common_peg_parse_result result; + std::map tags; +}; + +struct tagged_peg_parser { + common_peg_arena arena; + + tagged_parse_result parse_and_extract(const std::string & input, bool is_partial = false) const; +}; + +tagged_peg_parser build_tagged_peg_parser( + const std::function & fn); + +class common_chat_peg_unified_mapper : public common_chat_peg_mapper { + std::optional pending_tool_call; // Tool call waiting for name + common_chat_tool_call * current_tool = nullptr; + int arg_count = 0; + bool closing_quote_pending = false; + std::string args_buffer; // Buffer to delay arguments until tool name is known + + // Returns a reference to the active argument destination string. + // Before tool_name is known, writes go to args_buffer; after, to current_tool->arguments. + std::string & args_target(); + + public: + common_chat_peg_unified_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {} + + void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & parse_result_arg) override; + void map(const common_peg_ast_node & node) override; +}; diff --git a/common/chat.cpp b/common/chat.cpp index 47a34d5822..4c03d030ff 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1,24 +1,25 @@ #include "chat.h" -#include "chat-parser.h" + +#include "chat-auto-parser.h" +#include "chat-diff-analyzer.h" #include "chat-peg-parser.h" #include "common.h" -#include "json-partial.h" +#include "ggml.h" #include "json-schema-to-grammar.h" #include "log.h" -#include "regex-partial.h" -#include "jinja/parser.h" #include "jinja/value.h" #include "jinja/runtime.h" #include "jinja/caps.h" +#include "peg-parser.h" -#include #include -#include +#include #include #include -#include + #include +#include #include #include #include @@ -26,14 +27,26 @@ using json = nlohmann::ordered_json; static std::string format_time(const std::chrono::system_clock::time_point & now, const std::string & format) { - auto time = std::chrono::system_clock::to_time_t(now); - auto local_time = *std::localtime(&time); + auto time = std::chrono::system_clock::to_time_t(now); + auto local_time = *std::localtime(&time); std::ostringstream ss; ss << std::put_time(&local_time, format.c_str()); auto res = ss.str(); return res; } +static json safe_args_parse(const std::string & to_parse) { + std::string stripped = to_parse; + if (to_parse.at(0) == '"' && to_parse.at(to_parse.length() - 1) == '"') { + stripped = to_parse.substr(1, to_parse.length() - 1); + } + try { + return json::parse(stripped); + } catch (json::exception & e) { + return stripped; + } +} + static std::string string_diff(const std::string & last, const std::string & current) { if (last.empty()) { return current; @@ -105,7 +118,7 @@ json common_chat_msg::to_json_oaicompat(bool concat_typed_text) const { {"type", "function"}, {"function", { {"name", tool_call.name}, - {"arguments", tool_call.arguments}, + {"arguments", json::parse(tool_call.arguments)}, }}, }; if (!tool_call.id.empty()) { @@ -122,7 +135,8 @@ json common_chat_msg::to_json_oaicompat(bool concat_typed_text) const { return jmsg; } -std::vector common_chat_msg_diff::compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new) { +std::vector common_chat_msg_diff::compute_diffs(const common_chat_msg & msg_prv, + const common_chat_msg & msg_new) { std::vector diffs; if (msg_new.tool_calls.size() > msg_prv.tool_calls.size()) { diffs.reserve(msg_new.tool_calls.size() - msg_prv.tool_calls.size() + 3); @@ -132,38 +146,56 @@ std::vector common_chat_msg_diff::compute_diffs(const comm // TODO: these can become expensive for long messages - how to optimize? if (msg_prv.reasoning_content != msg_new.reasoning_content) { - auto & diff = diffs.emplace_back(); + auto & diff = diffs.emplace_back(); diff.reasoning_content_delta = string_diff(msg_prv.reasoning_content, msg_new.reasoning_content); } if (msg_prv.content != msg_new.content) { - auto & diff = diffs.emplace_back(); + auto & diff = diffs.emplace_back(); diff.content_delta = string_diff(msg_prv.content, msg_new.content); } if (msg_new.tool_calls.size() < msg_prv.tool_calls.size()) { - throw std::runtime_error("Invalid diff: now finding less tool calls!"); + std::string err = "Invalid diff: now finding less tool calls!\n"; + err += " Previous (" + std::to_string(msg_prv.tool_calls.size()) + "):\n"; + for (const auto & tc : msg_prv.tool_calls) { + err += " - name: '" + tc.name + "', args: '" + tc.arguments + "'\n"; + } + err += " Current (" + std::to_string(msg_new.tool_calls.size()) + "):\n"; + for (const auto & tc : msg_new.tool_calls) { + err += " - name: '" + tc.name + "', args: '" + tc.arguments + "'\n"; + } + err += " Current msg text content:\n" + msg_new.content + "\n"; + throw std::runtime_error(err); } if (!msg_prv.tool_calls.empty()) { - const auto idx = msg_prv.tool_calls.size() - 1; + const auto idx = msg_prv.tool_calls.size() - 1; const auto & pref = msg_prv.tool_calls[idx]; const auto & newf = msg_new.tool_calls[idx]; - if (pref.name != newf.name) { - throw std::runtime_error("Invalid diff: tool call mismatch!"); + // Allow tool name to change during incremental parsing: + // - empty -> non-empty (initial discovery) + // - prefix -> longer string (name grows as more input is parsed) + if (pref.name != newf.name && !pref.name.empty() && !newf.name.empty()) { + // Check if one is a prefix of the other (for incremental parsing where names grow or shrink) + bool is_prefix = (newf.name.rfind(pref.name, 0) == 0); + if (!is_prefix) { + LOG_ERR("Tool call mismatch: prev='%s' new='%s'\n", pref.name.c_str(), newf.name.c_str()); + throw std::runtime_error("Invalid diff: tool call mismatch!"); + } } const auto args_diff = string_diff(pref.arguments, newf.arguments); - if (!args_diff.empty() || pref.id != newf.id) { - auto & diff = diffs.emplace_back(); + if (!args_diff.empty() || pref.id != newf.id || pref.name != newf.name) { + auto & diff = diffs.emplace_back(); diff.tool_call_index = idx; - if (pref.id != newf.id) { - diff.tool_call_delta.id = newf.id; + if (pref.id != newf.id || pref.name != newf.name) { + diff.tool_call_delta.id = newf.id; diff.tool_call_delta.name = newf.name; } diff.tool_call_delta.arguments = args_diff; } } for (size_t idx = msg_prv.tool_calls.size(); idx < msg_new.tool_calls.size(); ++idx) { - auto & diff = diffs.emplace_back(); + auto & diff = diffs.emplace_back(); diff.tool_call_index = idx; diff.tool_call_delta = msg_new.tool_calls[idx]; } @@ -173,94 +205,14 @@ std::vector common_chat_msg_diff::compute_diffs(const comm using chat_template_caps = jinja::caps; -struct common_chat_template { - jinja::program prog; - std::string bos_tok; - std::string eos_tok; - std::string src; - chat_template_caps caps; - - common_chat_template(const std::string & src, const std::string & bos_token, const std::string & eos_token) { - jinja::lexer lexer; - auto lexer_res = lexer.tokenize(src); - this->prog = jinja::parse_from_tokens(lexer_res); - - this->src = lexer_res.source; - this->bos_tok = bos_token; - this->eos_tok = eos_token; - - this->caps = jinja::caps_get(prog); - // LOG_INF("%s: caps:\n%s\n", __func__, this->caps.to_string().c_str()); - } - - const std::string & source() const { return src; } - const std::string & bos_token() const { return bos_tok; } - const std::string & eos_token() const { return eos_tok; } - - // TODO: this is ugly, refactor it somehow - json add_system(const json & messages, const std::string & system_prompt) const { - GGML_ASSERT(messages.is_array()); - auto msgs_copy = messages; - if (!caps.supports_system_role) { - if (msgs_copy.empty()) { - msgs_copy.insert(msgs_copy.begin(), json{ - {"role", "user"}, - {"content", system_prompt} - }); - } else { - auto & first_msg = msgs_copy[0]; - if (!first_msg.contains("content")) { - first_msg["content"] = ""; - } - first_msg["content"] = system_prompt + "\n\n" - + first_msg["content"].get(); - } - } else { - if (msgs_copy.empty() || msgs_copy[0].at("role") != "system") { - msgs_copy.insert(msgs_copy.begin(), json{ - {"role", "system"}, - {"content", system_prompt} - }); - } else if (msgs_copy[0].at("role") == "system") { - msgs_copy[0]["content"] = system_prompt; - } - } - return msgs_copy; - } - - chat_template_caps original_caps() const { - return caps; - } - -}; - struct common_chat_templates { bool add_bos; bool add_eos; - bool has_explicit_template; // Model had builtin template or template overridde was specified. - std::unique_ptr template_default; // always set (defaults to chatml) + bool has_explicit_template; // Model had builtin template or template overridde was specified. + std::unique_ptr template_default; // always set (defaults to chatml) std::unique_ptr template_tool_use; }; -struct templates_params { - json messages; - json tools; - common_chat_tool_choice tool_choice; - json json_schema; - bool parallel_tool_calls; - common_reasoning_format reasoning_format; - bool stream; - std::string grammar; - bool add_generation_prompt = true; - bool enable_thinking = true; - std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); - json extra_context; - bool add_bos; - bool add_eos; - bool is_inference = true; - bool mark_input = true; // whether to mark input strings in the jinja context -}; - common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice) { if (tool_choice == "auto") { return COMMON_CHAT_TOOL_CHOICE_AUTO; @@ -276,22 +228,27 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates) { common_chat_templates_inputs dummy_inputs; - common_chat_msg msg; - msg.role = "user"; - msg.content = "test"; - dummy_inputs.messages = {msg}; - dummy_inputs.enable_thinking = false; - const auto rendered_no_thinking = common_chat_templates_apply(chat_templates, dummy_inputs); - dummy_inputs.enable_thinking = true; + common_chat_msg msg; + msg.role = "user"; + msg.content = "test"; + dummy_inputs.messages = { msg }; + dummy_inputs.enable_thinking = false; + const auto rendered_no_thinking = common_chat_templates_apply(chat_templates, dummy_inputs); + dummy_inputs.enable_thinking = true; const auto rendered_with_thinking = common_chat_templates_apply(chat_templates, dummy_inputs); - return rendered_no_thinking.prompt != rendered_with_thinking.prompt; + bool detect = rendered_no_thinking.prompt != rendered_with_thinking.prompt; + const auto & tmpl = chat_templates->template_tool_use + ? *chat_templates->template_tool_use + : *chat_templates->template_default; + autoparser::analyze_template result(tmpl); + detect |= result.reasoning.mode != autoparser::reasoning_mode::NONE; + return detect; } std::vector common_chat_msgs_parse_oaicompat(const json & messages) { std::vector msgs; try { - if (!messages.is_array()) { throw std::invalid_argument("Expected 'messages' to be an array, got " + messages.dump()); } @@ -307,7 +264,7 @@ std::vector common_chat_msgs_parse_oaicompat(const json & messa } msg.role = message.at("role"); - auto has_content = message.contains("content"); + auto has_content = message.contains("content"); auto has_tool_calls = message.contains("tool_calls"); if (has_content) { const auto & content = message.at("content"); @@ -328,7 +285,9 @@ std::vector common_chat_msgs_parse_oaicompat(const json & messa msg.content_parts.push_back(msg_part); } } else if (!content.is_null()) { - throw std::invalid_argument("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)"); + throw std::invalid_argument("Invalid 'content' type: expected string or array, got " + + content.dump() + + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)"); } } if (has_tool_calls) { @@ -348,8 +307,13 @@ std::vector common_chat_msgs_parse_oaicompat(const json & messa if (!fc.contains("name")) { throw std::invalid_argument("Missing tool call name: " + tool_call.dump()); } - tc.name = fc.at("name"); - tc.arguments = fc.at("arguments"); + tc.name = fc.at("name"); + const auto & args = fc.at("arguments"); + if (args.is_string()) { + tc.arguments = args; + } else { + tc.arguments = args.dump(); + } if (tool_call.contains("id")) { tc.id = tool_call.at("id"); } @@ -357,7 +321,9 @@ std::vector common_chat_msgs_parse_oaicompat(const json & messa } } if (!has_content && !has_tool_calls) { - throw std::invalid_argument("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)"); + throw std::invalid_argument( + "Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & " + "https://github.com/ggml-org/llama.cpp/issues/12279)"); } if (message.contains("reasoning_content")) { msg.reasoning_content = message.at("reasoning_content"); @@ -463,12 +429,13 @@ json common_chat_tools_to_json_oaicompat(const std::vector & t auto result = json::array(); for (const auto & tool : tools) { result.push_back({ - {"type", "function"}, - {"function", { - {"name", tool.name}, - {"description", tool.description}, - {"parameters", json::parse(tool.parameters)}, - }}, + { "type", "function" }, + { "function", + { + { "name", tool.name }, + { "description", tool.description }, + { "parameters", json::parse(tool.parameters) }, + } }, }); } return result; @@ -486,16 +453,20 @@ json common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff) { json tool_call; tool_call["index"] = diff.tool_call_index; if (!diff.tool_call_delta.id.empty()) { - tool_call["id"] = diff.tool_call_delta.id; + tool_call["id"] = diff.tool_call_delta.id; tool_call["type"] = "function"; } - json function = json::object(); - if (!diff.tool_call_delta.name.empty()) { - function["name"] = diff.tool_call_delta.name; + if (!diff.tool_call_delta.name.empty() || !diff.tool_call_delta.arguments.empty()) { + json function = json::object(); + if (!diff.tool_call_delta.name.empty()) { + function["name"] = diff.tool_call_delta.name; + } + if (!diff.tool_call_delta.arguments.empty()) { + function["arguments"] = diff.tool_call_delta.arguments; + } + tool_call["function"] = function; } - function["arguments"] = diff.tool_call_delta.arguments; - tool_call["function"] = function; - delta["tool_calls"] = json::array({tool_call}); + delta["tool_calls"] = json::array({ tool_call }); } return delta; } @@ -504,13 +475,13 @@ bool common_chat_verify_template(const std::string & tmpl, bool use_jinja) { if (use_jinja) { try { common_chat_msg msg; - msg.role = "user"; + msg.role = "user"; msg.content = "test"; auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl); common_chat_templates_inputs inputs; - inputs.messages = {msg}; + inputs.messages = { msg }; common_chat_templates_apply(tmpls.get(), inputs); return true; @@ -519,28 +490,28 @@ bool common_chat_verify_template(const std::string & tmpl, bool use_jinja) { return false; } } - llama_chat_message chat[] = {{"user", "test"}}; + llama_chat_message chat[] = { + { "user", "test" } + }; const int res = llama_chat_apply_template(tmpl.c_str(), chat, 1, true, nullptr, 0); return res >= 0; } -std::string common_chat_format_single( - const struct common_chat_templates * tmpls, - const std::vector & past_msg, - const common_chat_msg & new_msg, - bool add_ass, - bool use_jinja) { - +std::string common_chat_format_single(const struct common_chat_templates * tmpls, + const std::vector & past_msg, + const common_chat_msg & new_msg, + bool add_ass, + bool use_jinja) { common_chat_templates_inputs inputs; inputs.use_jinja = use_jinja; - inputs.add_bos = tmpls->add_bos; - inputs.add_eos = tmpls->add_eos; + inputs.add_bos = tmpls->add_bos; + inputs.add_eos = tmpls->add_eos; std::string fmt_past_msg; if (!past_msg.empty()) { - inputs.messages = past_msg; + inputs.messages = past_msg; inputs.add_generation_prompt = false; - fmt_past_msg = common_chat_templates_apply(tmpls, inputs).prompt; + fmt_past_msg = common_chat_templates_apply(tmpls, inputs).prompt; } std::ostringstream ss; // if the past_msg ends with a newline, we must preserve it in the formatted version @@ -550,37 +521,39 @@ std::string common_chat_format_single( // format chat with new_msg inputs.messages.push_back(new_msg); inputs.add_generation_prompt = add_ass; - auto fmt_new_msg = common_chat_templates_apply(tmpls, inputs).prompt; + auto fmt_new_msg = common_chat_templates_apply(tmpls, inputs).prompt; // get the diff part ss << fmt_new_msg.substr(fmt_past_msg.size(), fmt_new_msg.size() - fmt_past_msg.size()); return ss.str(); } -std::string common_chat_format_example(const struct common_chat_templates * tmpls, bool use_jinja, const std::map & chat_template_kwargs) { +std::string common_chat_format_example(const struct common_chat_templates * tmpls, + bool use_jinja, + const std::map & chat_template_kwargs) { common_chat_templates_inputs inputs; - inputs.use_jinja = use_jinja; - inputs.add_bos = tmpls->add_bos; - inputs.add_eos = tmpls->add_eos; + inputs.use_jinja = use_jinja; + inputs.add_bos = tmpls->add_bos; + inputs.add_eos = tmpls->add_eos; inputs.chat_template_kwargs = chat_template_kwargs; - auto add_simple_msg = [&](auto role, auto content) { + auto add_simple_msg = [&](auto role, auto content) { common_chat_msg msg; - msg.role = role; + msg.role = role; msg.content = content; inputs.messages.push_back(msg); }; - add_simple_msg("system", "You are a helpful assistant"); - add_simple_msg("user", "Hello"); + add_simple_msg("system", "You are a helpful assistant"); + add_simple_msg("user", "Hello"); add_simple_msg("assistant", "Hi there"); - add_simple_msg("user", "How are you?"); + add_simple_msg("user", "How are you?"); return common_chat_templates_apply(tmpls, inputs).prompt; } -#define CHATML_TEMPLATE_SRC \ - "{%- for message in messages -%}\n" \ +#define CHATML_TEMPLATE_SRC \ + "{%- for message in messages -%}\n" \ " {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' -}}\n" \ - "{%- endfor -%}\n" \ - "{%- if add_generation_prompt -%}\n" \ - " {{- '<|im_start|>assistant\n' -}}\n" \ + "{%- endfor -%}\n" \ + "{%- if add_generation_prompt -%}\n" \ + " {{- '<|im_start|>assistant\n' -}}\n" \ "{%- endif -%}" void common_chat_templates_free(struct common_chat_templates * tmpls) { @@ -598,19 +571,16 @@ std::string common_chat_templates_source(const struct common_chat_templates * tm return tmpls->template_tool_use->source(); } return ""; - } else { - LOG_DBG("%s: unknown template variant: %s\n", __func__, variant.c_str()); } + LOG_DBG("%s: unknown template variant: %s\n", __func__, variant.c_str()); } return tmpls->template_default->source(); } -common_chat_templates_ptr common_chat_templates_init( - const struct llama_model * model, - const std::string & chat_template_override, - const std::string & bos_token_override, - const std::string & eos_token_override) -{ +common_chat_templates_ptr common_chat_templates_init(const struct llama_model * model, + const std::string & chat_template_override, + const std::string & bos_token_override, + const std::string & eos_token_override) { std::string default_template_src; std::string template_tool_use_src; @@ -619,7 +589,7 @@ common_chat_templates_ptr common_chat_templates_init( GGML_ASSERT(model != nullptr); const auto * str = llama_model_chat_template(model, /* name */ nullptr); if (str) { - default_template_src = str; + default_template_src = str; has_explicit_template = true; } str = llama_model_chat_template(model, /* name */ "tool_use"); @@ -641,34 +611,40 @@ common_chat_templates_ptr common_chat_templates_init( // TODO @ngxson : this is a temporary hack to prevent chat template from throwing an error // Ref: https://github.com/ggml-org/llama.cpp/pull/15230#issuecomment-3173959633 if (default_template_src.find("<|channel|>") != std::string::npos - // search for the error message and patch it - && default_template_src.find("in message.content or") != std::string::npos) { + // search for the error message and patch it + && default_template_src.find("in message.content or") != std::string::npos) { string_replace_all(default_template_src, - "{%- if \"<|channel|>analysis<|message|>\" in message.content or \"<|channel|>final<|message|>\" in message.content %}", - "{%- if false %}"); + "{%- if \"<|channel|>analysis<|message|>\" in message.content or " + "\"<|channel|>final<|message|>\" in message.content %}", + "{%- if false %}"); } // TODO @aldehir : this is a temporary fix, pending Minja changes // Ref: https://github.com/ggml-org/llama.cpp/pull/17713#issuecomment-3631342664 if (default_template_src.find("[TOOL_CALLS]") != std::string::npos - // search for the error message and patch it - && default_template_src.find("if (message['content'] is none or") != std::string::npos) { + // search for the error message and patch it + && default_template_src.find("if (message['content'] is none or") != std::string::npos) { string_replace_all(default_template_src, - "{%- if (message['content'] is none or message['content'] == '' or message['content']|length == 0) and (message['tool_calls'] is not defined or message['tool_calls'] is none or message['tool_calls']|length == 0) %}", - "{%- if false %}"); + "{%- if (message['content'] is none or message['content'] == '' or " + "message['content']|length == 0) and (message['tool_calls'] is not defined or " + "message['tool_calls'] is none or message['tool_calls']|length == 0) %}", + "{%- if false %}"); } std::string token_bos = bos_token_override; std::string token_eos = eos_token_override; - bool add_bos = false; - bool add_eos = false; + bool add_bos = false; + bool add_eos = false; if (model) { - const auto * vocab = llama_model_get_vocab(model); - const auto get_token = [&](llama_token token, const char * name, const char * jinja_variable_name) { + const auto * vocab = llama_model_get_vocab(model); + const auto get_token = [&](llama_token token, const char * name, const char * jinja_variable_name) { if (token == LLAMA_TOKEN_NULL) { - if (default_template_src.find(jinja_variable_name) != std::string::npos - || template_tool_use_src.find(jinja_variable_name) != std::string::npos) { - LOG_WRN("common_chat_templates_init: warning: vocab does not have a %s token, jinja template won't work as intended.\n", name); + if (default_template_src.find(jinja_variable_name) != std::string::npos || + template_tool_use_src.find(jinja_variable_name) != std::string::npos) { + LOG_WRN( + "common_chat_templates_init: warning: vocab does not have a %s token, jinja template won't " + "work as intended.\n", + name); } return std::string(); } @@ -676,13 +652,13 @@ common_chat_templates_ptr common_chat_templates_init( }; token_bos = get_token(llama_vocab_bos(vocab), "BOS", "bos_token"); token_eos = get_token(llama_vocab_eos(vocab), "EOS", "eos_token"); - add_bos = llama_vocab_get_add_bos(vocab); - add_eos = llama_vocab_get_add_eos(vocab); + add_bos = llama_vocab_get_add_bos(vocab); + add_eos = llama_vocab_get_add_eos(vocab); } common_chat_templates_ptr tmpls(new common_chat_templates()); tmpls->has_explicit_template = has_explicit_template; - tmpls->add_bos = add_bos; - tmpls->add_eos = add_eos; + tmpls->add_bos = add_bos; + tmpls->add_eos = add_eos; try { tmpls->template_default = std::make_unique(default_template_src, token_bos, token_eos); } catch (const std::exception & e) { @@ -703,36 +679,12 @@ common_chat_templates_ptr common_chat_templates_init( const char * common_chat_format_name(common_chat_format format) { switch (format) { - case COMMON_CHAT_FORMAT_CONTENT_ONLY: return "Content-only"; - case COMMON_CHAT_FORMAT_GENERIC: return "Generic"; - case COMMON_CHAT_FORMAT_MISTRAL_NEMO: return "Mistral Nemo"; - case COMMON_CHAT_FORMAT_MAGISTRAL: return "Magistral"; - case COMMON_CHAT_FORMAT_LLAMA_3_X: return "Llama 3.x"; - case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: return "Llama 3.x with builtin tools"; - case COMMON_CHAT_FORMAT_DEEPSEEK_R1: return "DeepSeek R1"; - case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return "FireFunction v2"; - case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2"; - case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1"; - case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1: return "DeepSeek V3.1"; - case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro"; - case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B"; - case COMMON_CHAT_FORMAT_GRANITE: return "Granite"; - case COMMON_CHAT_FORMAT_GPT_OSS: return "GPT-OSS"; - case COMMON_CHAT_FORMAT_SEED_OSS: return "Seed-OSS"; - case COMMON_CHAT_FORMAT_NEMOTRON_V2: return "Nemotron V2"; - case COMMON_CHAT_FORMAT_APERTUS: return "Apertus"; - case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: return "LFM2 with JSON tools"; - case COMMON_CHAT_FORMAT_MINIMAX_M2: return "MiniMax-M2"; - case COMMON_CHAT_FORMAT_GLM_4_5: return "GLM 4.5"; - case COMMON_CHAT_FORMAT_KIMI_K2: return "Kimi K2"; - case COMMON_CHAT_FORMAT_QWEN3_CODER_XML: return "Qwen3 Coder"; - case COMMON_CHAT_FORMAT_APRIEL_1_5: return "Apriel 1.5"; - case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo"; - case COMMON_CHAT_FORMAT_SOLAR_OPEN: return "Solar Open"; - case COMMON_CHAT_FORMAT_EXAONE_MOE: return "EXAONE MoE"; - case COMMON_CHAT_FORMAT_PEG_SIMPLE: return "peg-simple"; - case COMMON_CHAT_FORMAT_PEG_NATIVE: return "peg-native"; - case COMMON_CHAT_FORMAT_PEG_CONSTRUCTED: return "peg-constructed"; + case COMMON_CHAT_FORMAT_CONTENT_ONLY: + return "Content-only"; + case COMMON_CHAT_FORMAT_PEG_SIMPLE: + return "peg-simple"; + case COMMON_CHAT_FORMAT_PEG_NATIVE: + return "peg-native"; default: throw std::runtime_error("Unknown chat format"); } @@ -740,10 +692,14 @@ const char * common_chat_format_name(common_chat_format format) { const char * common_reasoning_format_name(common_reasoning_format format) { switch (format) { - case COMMON_REASONING_FORMAT_NONE: return "none"; - case COMMON_REASONING_FORMAT_AUTO: return "auto"; - case COMMON_REASONING_FORMAT_DEEPSEEK: return "deepseek"; - case COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY: return "deepseek-legacy"; + case COMMON_REASONING_FORMAT_NONE: + return "none"; + case COMMON_REASONING_FORMAT_AUTO: + return "auto"; + case COMMON_REASONING_FORMAT_DEEPSEEK: + return "deepseek"; + case COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY: + return "deepseek-legacy"; default: throw std::runtime_error("Unknown reasoning format"); } @@ -752,11 +708,14 @@ const char * common_reasoning_format_name(common_reasoning_format format) { common_reasoning_format common_reasoning_format_from_name(const std::string & format) { if (format == "none") { return COMMON_REASONING_FORMAT_NONE; - } else if (format == "auto") { + } + if (format == "auto") { return COMMON_REASONING_FORMAT_AUTO; - } else if (format == "deepseek") { + } + if (format == "deepseek") { return COMMON_REASONING_FORMAT_DEEPSEEK; - } else if (format == "deepseek-legacy") { + } + if (format == "deepseek-legacy") { return COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY; } throw std::runtime_error("Unknown reasoning format: " + format); @@ -772,7 +731,8 @@ static void foreach_function(const json & tools, const std::function & fn) { +static void foreach_parameter(const json & function, + const std::function & fn) { if (!function.contains("parameters") || !function.at("parameters").is_object()) { return; } @@ -780,7 +740,7 @@ static void foreach_parameter(const json & function, const std::function required; if (params.contains("required") && params.at("required").is_array()) { params.at("required").get_to(required); @@ -791,19 +751,19 @@ static void foreach_parameter(const json & function, const std::function & messages_override = std::nullopt, - const std::optional & tools_override = std::nullopt, - const std::optional & additional_context = std::nullopt) -{ + const autoparser::templates_params & inputs, + const std::optional & messages_override, + const std::optional & tools_override, + const std::optional & additional_context) { jinja::context ctx(tmpl.source()); nlohmann::ordered_json inp = nlohmann::ordered_json{ {"messages", messages_override.has_value() ? *messages_override : inputs.messages}, {"bos_token", tmpl.bos_token()}, {"eos_token", tmpl.eos_token()}, + {"enable_thinking", inputs.enable_thinking}, }; if (tools_override.has_value() || !inputs.tools.empty()) { inp["tools"] = tools_override.has_value() ? *tools_override : inputs.tools; @@ -829,7 +789,7 @@ static std::string apply( // render jinja::runtime runtime(ctx); const jinja::value results = runtime.execute(tmpl.prog); - auto parts = runtime.gather_string_parts(results); + auto parts = jinja::runtime::gather_string_parts(results); std::string result = parts->as_string().str(); @@ -843,265 +803,8 @@ static std::string apply( return result; } -static common_chat_params common_chat_params_init_generic(const common_chat_template & tmpl, const struct templates_params & inputs) { - common_chat_params data; - - auto tool_call_schemas = json::array(); - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - auto tool_schema = json { - {"type", "object"}, - {"properties", { - {"name", { - {"type", "string"}, - {"const", function.at("name")}, - }}, - {"arguments", function.at("parameters")}, - }}, - {"required", json::array({"name", "arguments"})}, - }; - if (function.contains("description")) { - tool_schema["description"] = function.at("description"); - } - if (inputs.parallel_tool_calls) { - tool_schema.at("properties")["id"] = { - {"type", "string"}, - {"minLength", 4}, - }; - tool_schema.at("required").push_back("id"); - } - tool_call_schemas.emplace_back(tool_schema); - }); - const auto tool_call = - inputs.parallel_tool_calls - ? json { - {"type", "object"}, - {"properties", { - {"tool_calls", { - {"type", "array"}, - {"items", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json { - {"anyOf", tool_call_schemas}, - }}, - {"minItems", 1}, - }}, - }}, - {"required", json::array({"tool_calls"})}, - } - : json { - {"type", "object"}, - {"properties", { - {"tool_call", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json { - {"anyOf", tool_call_schemas}, - }}, - }}, - {"required", json::array({"tool_call"})}, - }; - const auto schema = - inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED - ? json { - {"anyOf", json::array({ - tool_call, - { - {"type", "object"}, - {"properties", { - {"response", inputs.json_schema.is_null() - ? json {{"type", "string"}} - : inputs.json_schema - }, - }}, - {"required", json::array({"response"})}, - }, - })} - } - : tool_call; - - data.grammar_lazy = false; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - builder.add_schema("root", schema); - }); - - auto tweaked_messages = tmpl.add_system( - inputs.messages, - "Respond in JSON format, either with `tool_call` (a request to call tools) or with `response` reply to the user's request"); - - // ensure all messages has "content" field - for (auto & message : tweaked_messages) { - if (!message.contains("content") || message["content"].is_null()) { - message["content"] = ""; - } - } - - data.prompt = apply(tmpl, inputs, /* messages_override= */ tweaked_messages); - data.format = COMMON_CHAT_FORMAT_GENERIC; - return data; -} - -static common_chat_params common_chat_params_init_mistral_nemo(const common_chat_template & tmpl, const struct templates_params & inputs) { - common_chat_params data; - data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - auto schemas = json::array(); - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - schemas.push_back({ - {"type", "object"}, - {"properties", { - // Important note: the model is probably trained to take a JSON stringified arguments value. - // It's hard to constrain that for now (while reusing the JSON schema conversion), so we're just expecting a plain object. - {"name", { - {"type", "string"}, - {"const", function.at("name")}, - }}, - {"arguments", function.at("parameters")}, - {"id", { - {"type", "string"}, - // Nemo's template expects a 9-character alphanumeric ID. - {"pattern", "^[a-zA-Z0-9]{9}$"}, - }}, - }}, - {"required", json::array({"name", "arguments", "id"})}, - }); - }); - auto schema = json { - {"type", "array"}, - {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}}, - {"minItems", 1}, - }; - if (!inputs.parallel_tool_calls) { - schema["maxItems"] = 1; - } - builder.add_rule("root", "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls", schema)); - }); - data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"}); - data.preserved_tokens = { - "[TOOL_CALLS]", - }; - data.prompt = apply(tmpl, inputs); - data.format = COMMON_CHAT_FORMAT_MISTRAL_NEMO; - return data; -} - - -// Case-insensitive find -static size_t ifind_string(const std::string & haystack, const std::string & needle, size_t pos = 0) { - auto it = std::search( - haystack.begin() + pos, haystack.end(), - needle.begin(), needle.end(), - [](char a, char b) { return std::tolower(a) == std::tolower(b); } - ); - return (it == haystack.end()) ? std::string::npos : std::distance(haystack.begin(), it); -} - -static common_chat_params common_chat_params_init_lfm2(const common_chat_template & tmpl, const struct templates_params & inputs) { - common_chat_params data; - const auto is_json_schema_provided = !inputs.json_schema.is_null(); - const auto is_grammar_provided = !inputs.grammar.empty(); - const auto are_tools_provided = inputs.tools.is_array() && !inputs.tools.empty(); - - // the logic requires potentially modifying the messages - auto tweaked_messages = inputs.messages; - - auto replace_json_schema_marker = [](json & messages) -> bool { - static std::string marker1 = "force json schema.\n"; - static std::string marker2 = "force json schema."; - - if (messages.empty() || messages.at(0).at("role") != "system") { - return false; - } - - std::string content = messages.at(0).at("content"); - - for (const auto & marker : {marker1, marker2}) { - const auto pos = ifind_string(content, marker); - if (pos != std::string::npos) { - content.replace(pos, marker.length(), ""); - // inject modified content back into the messages - messages.at(0).at("content") = content; - return true; - } - } - - return false; - }; - - // Lfm2 model does not natively work with json, but can generally understand the tools structure - // - // Example of the pytorch dialog structure: - // <|startoftext|><|im_start|>system - // List of tools: <|tool_list_start|>[{"name": "get_candidate_status", "description": "Retrieves the current status of a candidate in the recruitment process", "parameters": {"type": "object", "properties": {"candidate_id": {"type": "string", "description": "Unique identifier for the candidate"}}, "required": ["candidate_id"]}}]<|tool_list_end|><|im_end|> - // <|im_start|>user - // What is the current status of candidate ID 12345?<|im_end|> - // <|im_start|>assistant - // <|tool_call_start|>[get_candidate_status(candidate_id="12345")]<|tool_call_end|>Checking the current status of candidate ID 12345.<|im_end|> - // <|im_start|>tool - // <|tool_response_start|>{"candidate_id": "12345", "status": "Interview Scheduled", "position": "Clinical Research Associate", "date": "2023-11-20"}<|tool_response_end|><|im_end|> - // <|im_start|>assistant - // The candidate with ID 12345 is currently in the "Interview Scheduled" stage for the position of Clinical Research Associate, with an interview date set for 2023-11-20.<|im_end|> - // - // For the llama server compatibility with json tools semantic, - // the client can add "Follow json schema." line into the system message prompt to force the json output. - // - if (are_tools_provided && (is_json_schema_provided || is_grammar_provided)) { - // server/utils.hpp prohibits that branch for the custom grammar anyways - throw std::runtime_error("Tools call must not use \"json_schema\" or \"grammar\", use non-tool invocation if you want to use custom grammar"); - } else if (are_tools_provided && replace_json_schema_marker(tweaked_messages)) { - LOG_INF("%s: Using tools to build a grammar\n", __func__); - - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - auto schemas = json::array(); - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - schemas.push_back({ - {"type", "object"}, - {"properties", { - {"name", { - {"type", "string"}, - {"const", function.at("name")}, - }}, - {"arguments", function.at("parameters")}, - }}, - {"required", json::array({"name", "arguments", "id"})}, - }); - }); - auto schema = json { - {"type", "array"}, - {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}}, - {"minItems", 1}, - }; - if (!inputs.parallel_tool_calls) { - schema["maxItems"] = 1; - } - - builder.add_rule("root", "\"<|tool_call_start|>\"" + builder.add_schema("tool_calls", schema) + "\"<|tool_call_end|>\""); - }); - // model has no concept of tool selection mode choice, - // if the system prompt rendered correctly it will produce a tool call - // the grammar goes inside the tool call body - data.grammar_lazy = true; - data.grammar_triggers = {{COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, "\\s*<\\|tool_call_start\\|>\\s*\\["}}; - data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"}; - data.format = COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS; - } else if (are_tools_provided && (!is_json_schema_provided && !is_grammar_provided)) { - LOG_INF("%s: Using tools without json schema or grammar\n", __func__); - // output those tokens - data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"}; - } else if (is_json_schema_provided) { - LOG_INF("%s: Using provided json schema to build a grammar\n", __func__); - data.grammar = json_schema_to_grammar(inputs.json_schema); - } else if (is_grammar_provided) { - LOG_INF("%s: Using provided grammar\n", __func__); - data.grammar = inputs.grammar; - } else { - LOG_INF("%s: Using content relying on the template\n", __func__); - } - - data.prompt = apply(tmpl, inputs, /* messages_override= */ tweaked_messages); - LOG_DBG("%s: Prompt: %s\n", __func__, data.prompt.c_str()); - - return data; -} - -static common_chat_params common_chat_params_init_ministral_3(const common_chat_template & tmpl, const struct templates_params & inputs) { +static common_chat_params common_chat_params_init_ministral_3(const common_chat_template & tmpl, + const autoparser::templates_params & inputs) { common_chat_params data; // Build up messages to follow the format: https://huggingface.co/mistralai/Ministral-3-14B-Reasoning-2512/blob/main/chat_template.jinja @@ -1119,8 +822,8 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_ // If message contains `reasoning_content`, add it as a block of type `thinking` if (msg.contains("reasoning_content") && msg.at("reasoning_content").is_string()) { content.push_back({ - {"type", "thinking"}, - {"thinking", msg.at("reasoning_content").get()}, + { "type", "thinking" }, + { "thinking", msg.at("reasoning_content").get() }, }); } @@ -1128,8 +831,8 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_ if (msg.contains("content")) { if (msg.at("content").is_string()) { content.push_back({ - {"type", "text"}, - {"text", msg.at("content").get()}, + { "type", "text" }, + { "text", msg.at("content").get() }, }); } else if (msg.at("content").is_array()) { auto blocks = msg.at("content"); @@ -1137,18 +840,18 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_ } } - auto adjusted = msg; + auto adjusted = msg; adjusted["content"] = content; adjusted.erase("reasoning_content"); adjusted_messages.push_back(adjusted); } - auto has_tools = inputs.tools.is_array() && !inputs.tools.empty(); + auto has_tools = inputs.tools.is_array() && !inputs.tools.empty(); auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE; - auto include_grammar = true; + auto include_grammar = true; - data.prompt = apply(tmpl, inputs, /* messages_override = */ adjusted_messages); - data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; + data.prompt = common_chat_template_direct_apply(tmpl, inputs, /* messages_override = */ adjusted_messages); + data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; data.preserved_tokens = { "[THINK]", "[/THINK]", @@ -1156,13 +859,15 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_ "[ARGS]", }; - auto parser = build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) { - auto reasoning = extract_reasoning ? p.optional("[THINK]" + p.reasoning(p.until("[/THINK]")) + "[/THINK]") : p.eps(); + auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { + auto reasoning = + extract_reasoning ? p.optional("[THINK]" + p.reasoning(p.until("[/THINK]")) + "[/THINK]") : p.eps(); // Response format parser if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) { // Ministral wants to emit json surrounded by code fences - return reasoning << "```json" << p.content(p.schema(p.json(), "response-format", inputs.json_schema)) << "```"; + return reasoning << "```json" << p.content(p.schema(p.json(), "response-format", inputs.json_schema)) + << "```"; } // Tool call parser @@ -1170,17 +875,16 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_ auto tool_choice = p.choice(); foreach_function(inputs.tools, [&](const json & tool) { const auto & function = tool.at("function"); - std::string name = function.at("name"); - const auto & schema = function.at("parameters"); + std::string name = function.at("name"); + const auto & schema = function.at("parameters"); - tool_choice |= p.rule("tool-" + name, - p.tool_open(p.tool_name(p.literal(name)) + "[ARGS]") - + p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)) - ); + tool_choice |= + p.rule("tool-" + name, p.tool_open(p.tool_name(p.literal(name)) + "[ARGS]") + + p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema))); }); - auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0; - auto max_calls = inputs.parallel_tool_calls ? -1 : 1; + auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0; + auto max_calls = inputs.parallel_tool_calls ? -1 : 1; auto tool_calls = p.trigger_rule("tool-call", p.repeat("[TOOL_CALLS]" + tool_choice, min_calls, max_calls)); return reasoning << p.content(p.until("[TOOL_CALLS]")) << tool_calls; @@ -1199,838 +903,32 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_ data.grammar = build_grammar([&](const common_grammar_builder & builder) { foreach_function(inputs.tools, [&](const json & tool) { const auto & function = tool.at("function"); - auto schema = function.at("parameters"); + auto schema = function.at("parameters"); builder.resolve_refs(schema); }); parser.build_grammar(builder, data.grammar_lazy); }); data.grammar_triggers = { - {COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"} + { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]" } }; } return data; } -static common_chat_params common_chat_params_init_magistral(const common_chat_template & tmpl, const struct templates_params & inputs) { - common_chat_params data; - data.prompt = apply(tmpl, inputs); - data.format = COMMON_CHAT_FORMAT_MAGISTRAL; - data.preserved_tokens = { - "[THINK]", - "[/THINK]", - }; - - if (inputs.tools.is_array() && !inputs.tools.empty()) { - data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - auto schemas = json::array(); - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - schemas.push_back({ - {"type", "object"}, - {"properties", { - {"name", { - {"type", "string"}, - {"const", function.at("name")}, - }}, - {"arguments", function.at("parameters")}, - {"id", { - {"type", "string"}, - {"pattern", "^[a-zA-Z0-9]{9}$"}, - }}, - }}, - {"required", json::array({"name", "arguments", "id"})}, - }); - }); - auto schema = json { - {"type", "array"}, - {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}}, - {"minItems", 1}, - }; - if (!inputs.parallel_tool_calls) { - schema["maxItems"] = 1; - } - builder.add_rule("root", "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls", schema)); - }); - data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"}); - data.preserved_tokens.push_back("[TOOL_CALLS]"); - } else { - data.grammar_lazy = false; - if (!inputs.json_schema.is_null()) { - if (!inputs.grammar.empty()) { - throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both"); - } - data.grammar = json_schema_to_grammar(inputs.json_schema); - } else { - data.grammar = inputs.grammar; - } - } - - return data; -} - -static common_chat_params common_chat_params_init_command_r7b(const common_chat_template & tmpl, const struct templates_params & inputs) { - common_chat_params data; - - auto adjusted_messages = json::array(); - for (const auto & msg : inputs.messages) { - auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string(); - auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array(); - if (has_reasoning_content && has_tool_calls) { - auto adjusted_message = msg; - adjusted_message["tool_plan"] = msg.at("reasoning_content"); - adjusted_message.erase("reasoning_content"); - adjusted_messages.push_back(adjusted_message); - } else { - adjusted_messages.push_back(msg); - } - } - data.prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages); - data.format = COMMON_CHAT_FORMAT_COMMAND_R7B; - if (string_ends_with(data.prompt, "<|START_THINKING|>")) { - if (!inputs.enable_thinking) { - data.prompt += "<|END_THINKING|>"; - } else { - data.thinking_forced_open = true; - } - } else if (!inputs.enable_thinking && string_ends_with(data.prompt, "<|CHATBOT_TOKEN|>")) { - data.prompt += "<|START_THINKING|><|END_THINKING|>"; - } - - data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - auto schemas = json::array(); - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - schemas.push_back({ - {"type", "object"}, - {"properties", { - {"tool_call_id", { - {"type", "string"}, - // Command-R's template expects an integer string. - {"pattern", "^[0-9]{1,10}$"}, - }}, - {"tool_name", { - {"type", "string"}, - {"const", function.at("name")}, - }}, - {"parameters", function.at("parameters")}, - }}, - {"required", json::array({"tool_call_id", "tool_name", "parameters"})}, - }); - }); - auto schema = json { - {"type", "array"}, - {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}}, - {"minItems", 1}, - }; - if (!inputs.parallel_tool_calls) { - schema["maxItems"] = 1; - } - builder.add_rule("root", - std::string(data.thinking_forced_open ? "( \"<|END_THINKING|>\" space )? " : "") + - "\"<|START_ACTION|>\" " + builder.add_schema("tool_calls", schema) + " \"<|END_ACTION|>\""); - }); - data.grammar_triggers.push_back({ - COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, - // If thinking_forced_open, then we capture the tag in the grammar, - // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar) - std::string(data.thinking_forced_open ? "[\\s\\S]*?(<\\|END_THINKING\\|>\\s*)" : "(?:<\\|START_THINKING\\|>[\\s\\S]*?<\\|END_THINKING\\|>\\s*)?") + - "(<\\|START_ACTION\\|>)[\\s\\S]*" - }); - data.preserved_tokens = { - "<|START_ACTION|>", - "<|END_ACTION|>", - "<|START_RESPONSE|>", - "<|END_RESPONSE|>", - "<|START_THINKING|>", - "<|END_THINKING|>", - }; - return data; -} - -static void expect_tool_parameters(const std::string & name, const json & parameters, const std::vector & expected_properties) { - if (!parameters.is_object() || !parameters.contains("type") || parameters.at("type") != "object" || !parameters.contains("properties") || !parameters.contains("required")) { - throw std::runtime_error("Parameters of tool " + name + " must be an object w/ required properties"); - } - const auto & parameters_properties = parameters.at("properties"); - const auto & parameters_required = parameters.at("required"); - for (const auto & prop : expected_properties) { - if (!parameters_properties.contains(prop)) { - throw std::runtime_error("Parameters of tool " + name + " is missing property: " + prop); // NOLINT - } - if (std::find(parameters_required.begin(), parameters_required.end(), json(prop)) == parameters_required.end()) { - throw std::runtime_error("Parameters of tool " + name + " must have property marked as required: " + prop); // NOLINT - } - } - if (parameters_properties.size() != expected_properties.size()) { - throw std::runtime_error("Parameters of tool " + name + " must only have these properties:" + string_join(expected_properties, ", ")); - } -} - -static common_chat_params common_chat_params_init_llama_3_x(const common_chat_template & tmpl, const struct templates_params & inputs, bool allow_python_tag_builtin_tools) { - auto builtin_tools = json::array(); - common_chat_params data; - if (!inputs.tools.is_null()) { - data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - std::vector tool_rules; - - auto handle_builtin_tool = [&](const std::string & name, const json & parameters) { - if (name == "wolfram_alpha" || name == "web_search" || name == "brave_search") { - // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py - // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py - expect_tool_parameters(name, parameters, {"query"}); - } else if (name == "python" || name == "code_interpreter") { - // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py - expect_tool_parameters(name, parameters, {"code"}); - } else { - return false; - } - - std::vector kvs; - for (const auto & [key, value] : parameters.at("properties").items()) { - kvs.push_back("\"" + key + "=\" " + builder.add_schema(name + "-args-" + key, value)); // NOLINT - } - - tool_rules.push_back( - builder.add_rule( - name + "-call", - "\"<|python_tag|>" + name + ".call(\" " + string_join(kvs, " \", \" ") + " \")\"")); - builtin_tools.push_back(name); - - return true; - }; - - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - std::string name = function.at("name"); - auto parameters = function.at("parameters"); - builder.resolve_refs(parameters); - - // https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote/tool_runtime - if (allow_python_tag_builtin_tools) { - handle_builtin_tool(name, parameters); - } - tool_rules.push_back( - builder.add_rule( - name + "-call", - "\"{\" space " - "( \"\\\"type\\\"\" space \":\" space \"\\\"function\\\"\" space \",\" space )? " - " \"\\\"name\\\"\" space \":\" space \"\\\"" + name + "\\\"\" space \",\" space " - " \"\\\"parameters\\\"\" space \":\" space " + builder.add_schema(name + "-args", parameters) + " " - "\"}\" space")); - }); - // Small models may hallucinate function names so we match anything (*at the start*) that looks like the JSON of a function call, regardless of the name. - data.grammar_triggers.push_back({ - COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, - "(\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\")[\\s\\S]*", // + name + "\"[\\s\\S]*", - }); - if (!builtin_tools.empty()) { - data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"}); - data.preserved_tokens.push_back("<|python_tag|>"); - } - // Allow a few empty lines on top of the usual constrained json schema space rule. - builder.add_rule("root", string_join(tool_rules, " | ")); - data.additional_stops.push_back("<|eom_id|>"); - }); - data.format = allow_python_tag_builtin_tools && !builtin_tools.empty() - ? COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS - : COMMON_CHAT_FORMAT_LLAMA_3_X; - } else { - data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; - } - data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override= */ std::nullopt, json { - {"date_string", format_time(inputs.now, "%d %b %Y")}, - {"tools_in_user_message", false}, - {"builtin_tools", builtin_tools}, - }); - return data; -} - -static common_chat_params common_chat_params_init_nemotron_v2(const common_chat_template & tmpl, const struct templates_params & inputs) { - common_chat_params data; - - // Generate the prompt using the apply() function with the template - data.prompt = apply(tmpl, inputs); - data.format = COMMON_CHAT_FORMAT_NEMOTRON_V2; - - // Handle thinking tags appropriately based on inputs.enable_thinking - if (string_ends_with(data.prompt, "\n")) { - if (!inputs.enable_thinking) { - data.prompt += ""; - } else { - data.thinking_forced_open = true; - } - } - - // When tools are present, build grammar for the format, similar to CommandR, but without tool call ID - if (!inputs.tools.is_null() && inputs.tools.is_array() && !inputs.tools.empty()) { - data.grammar_lazy = true; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - auto schemas = json::array(); - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - schemas.push_back({ - { "type", "object" }, - { "properties", - { - { "name", - { - { "type", "string" }, - { "const", function.at("name") }, - } }, - { "arguments", function.at("parameters") }, - } }, - { "required", json::array({ "name", "arguments" }) }, - }); - }); - auto schema = json{ - { "type", "array" }, - { "items", schemas.size() == 1 ? schemas[0] : json{ { "anyOf", schemas } } }, - { "minItems", 1 }, - }; - if (!inputs.parallel_tool_calls) { - schema["maxItems"] = 1; - } - builder.add_rule("root", - std::string(data.thinking_forced_open ? "( \"\" space )? " : "") + - "\"\" " + builder.add_schema("tool_calls", schema) + - " \"\""); - }); - data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, - // If thinking_forced_open, then we capture the tag in the grammar, - // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar) - std::string(data.thinking_forced_open ? - "[\\s\\S]*?(\\s*)" : - "(?:[\\s\\S]*?\\s*)?") + - "()[\\s\\S]*" }); - } - return data; -} - -static common_chat_params common_chat_params_init_nemotron_v3(const common_chat_template & tmpl, const struct templates_params & inputs) { - common_chat_params data; - - data.prompt = apply(tmpl, inputs); - data.format = COMMON_CHAT_FORMAT_PEG_CONSTRUCTED; - - // Handle thinking tags appropriately based on inputs.enable_thinking - if (string_ends_with(data.prompt, "\n")) { - if (!inputs.enable_thinking) { - data.prompt += ""; - } else { - data.thinking_forced_open = true; - } - } - - data.preserved_tokens = { - "", - "", - "", - "", - }; - - auto has_tools = inputs.tools.is_array() && !inputs.tools.empty(); - auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE; - auto include_grammar = true; - - auto parser = build_chat_peg_constructed_parser([&](auto & p) { - auto reasoning = p.eps(); - if (inputs.enable_thinking && extract_reasoning) { - auto reasoning_content = p.reasoning(p.until("")) + ("" | p.end()); - if (data.thinking_forced_open) { - reasoning = reasoning_content; - } - } - - // Response format parser - if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) { - return reasoning << p.content(p.schema(p.json(), "response-format", inputs.json_schema)); - } - - // Tool call parser - if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) { - auto tool_choice = p.choice(); - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - std::string name = function.at("name"); - auto parameters = function.at("parameters"); - - auto schema_info = common_schema_info(); - schema_info.resolve_refs(parameters); - - auto tool_open = "\n"; - auto tool_close = p.literal("\n"); - auto args = p.sequence(); - auto arg_string = p.rule("xml-arg-string", p.until_one_of({ - "\n", - "\n" - })); - - foreach_parameter(function, [&](const auto & param_name, const json & param_schema, bool is_required) { - auto rule_name = "tool-" + name + "-arg-" + param_name; - - auto arg_open = "\n"; - auto arg_close = p.literal("\n"); - auto arg_value = p.eps(); - - if (schema_info.resolves_to_string(param_schema)) { - arg_value = p.tool_arg_string_value(arg_string) + "\n"; - } else { - arg_value = p.tool_arg_json_value(p.schema(p.json(), rule_name + "-schema", param_schema)); - } - - // Model may or my not close with - auto arg_rule = p.rule(rule_name, p.tool_arg_open(arg_open) + arg_value + p.optional(p.tool_arg_close(arg_close))); - args += p.repeat(arg_rule, /* min = */ is_required ? 1 : 0, /* max = */ 1); - }); - - tool_choice |= p.rule("tool-" + name, p.tool_open(tool_open) + args + p.tool_close(tool_close)); - }); - - auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0; - auto max_calls = inputs.parallel_tool_calls ? -1 : 1; - auto tool_call = p.rule("tool-call", "\n" + tool_choice + "" + p.space()); - auto tool_calls = p.trigger_rule("tool-call-root", p.repeat(tool_call, /* min = */ min_calls, /* max = */ max_calls)); - - return reasoning << p.content(p.until("")) << tool_calls; - } - - // Content only parser - include_grammar = false; - return reasoning << p.content(p.rest()); - }); - - data.parser = parser.save(); - - if (include_grammar) { - data.grammar_lazy = has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO; - - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - auto schema = function.at("parameters"); - builder.resolve_refs(schema); - }); - parser.build_grammar(builder, data.grammar_lazy); - }); - - data.grammar_triggers = { - {COMMON_GRAMMAR_TRIGGER_TYPE_WORD, ""} - }; - } - - return data; -} - - -static common_chat_params common_chat_params_init_apertus(const common_chat_template & tmpl, const struct templates_params & inputs) { - common_chat_params data; - - // Generate the prompt using the apply() function with the template - data.prompt = apply(tmpl, inputs); - data.format = COMMON_CHAT_FORMAT_APERTUS; - - // Handle thinking tags appropriately based on inputs.enable_thinking - if (string_ends_with(data.prompt, "<|inner_prefix|>")) { - if (!inputs.enable_thinking) { - data.prompt += "<|inner_suffix|>"; - } else { - data.thinking_forced_open = true; - } - } - - // When tools are present, build grammar for the <|tools_prefix|> format - if (!inputs.tools.is_null() && inputs.tools.is_array() && !inputs.tools.empty()) { - data.grammar_lazy = true; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - auto schemas = json::array(); - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - schemas.push_back({ - { "type", "object" }, - { "properties", - { - { function.at("name"), function.at("parameters") } - } }, - { "required", json::array({ function.at("name") }) }, - }); - }); - auto schema = json{ - { "type", "array" }, - { "items", schemas.size() == 1 ? schemas[0] : json{ { "anyOf", schemas } } }, - { "minItems", 1 }, - }; - if (!inputs.parallel_tool_calls) { - schema["maxItems"] = 1; - } - builder.add_rule("root", - std::string(data.thinking_forced_open ? "( \"<|inner_suffix|>\" space )? " : "") + - "\"<|tools_prefix|>\"" + builder.add_schema("tool_calls", schema) + "\"<|tools_suffix|>\""); - }); - data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, - // If thinking_forced_open, then we capture the <|inner_suffix|> tag in the grammar, - // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar) - std::string(data.thinking_forced_open ? - "[\\s\\S]*?(<\\|inner_suffix\\|>\\s*)" : - "(?:<\\|inner_prefix\\|>[\\s\\S]*?<\\|inner_suffix\\|>\\s*)?") + - "(<\\|tools_prefix\\|>)[\\s\\S]*" }); - data.preserved_tokens = { - "<|system_start|>", - "<|system_end|>", - "<|developer_start|>", - "<|developer_end|>", - "<|user_start|>", - "<|user_end|>", - "<|assistant_start|>", - "<|assistant_end|>", - "<|inner_prefix|>", - "<|inner_suffix|>", - "<|tools_prefix|>", - "<|tools_suffix|>", - }; - } - return data; -} - -static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_template & tmpl, const struct templates_params & inputs) { - common_chat_params data; - auto prompt = apply(tmpl, inputs); - - // Hacks to fix the official (broken) prompt. - // It is advisable to use --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja instead, - // until the official template is fixed. - if (tmpl.source().find("{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}") != std::string::npos) { - // Don't leave the chat dangling after tool results - if (string_ends_with(prompt, "<|tool▁outputs▁end|>")) { - prompt += "<|end▁of▁sentence|>"; - if (inputs.add_generation_prompt) { - prompt += "<|Assistant|>"; - } - } - // Fix up tool call delta example added by Minja - prompt = std::regex_replace( - prompt, - std::regex("(<|tool▁call▁end|>)[\\s\\r\\n]*(<|tool▁outputs▁begin|>|<|User|>)"), - "$1<|tool▁calls▁end|><|end▁of▁sentence|>$2"); - } - data.prompt = prompt; - data.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1; - if (string_ends_with(data.prompt, "\n")) { - if (!inputs.enable_thinking) { - data.prompt += ""; - } else { - data.thinking_forced_open = true; - } - } - - if (inputs.tools.is_array() && !inputs.tools.empty()) { - data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null(); - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - std::vector tool_rules; - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - std::string name = function.at("name"); - auto parameters = function.at("parameters"); - builder.resolve_refs(parameters); - tool_rules.push_back(builder.add_rule(name + "-call", - "( \"<|tool▁call▁begin|>\" )? \"function<|tool▁sep|>" + name + "\\n" - "```json\\n\" " + builder.add_schema(name + "-args", parameters) + " " - "\"```<|tool▁call▁end|>\"")); - }); - // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag, - // so we accept common variants (then it's all constrained) - builder.add_rule("root", - std::string(data.thinking_forced_open ? "( \"\" space )? " : "") + - "( \"<|tool▁calls▁begin|>\" | \"<|tool_calls_begin|>\" | \"<|tool calls begin|>\" | \"<|tool\\\\_calls\\\\_begin|>\" | \"<|tool▁calls|>\" ) " - "(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " " - "\"<|tool▁calls▁end|>\"" - " space"); - data.grammar_triggers.push_back({ - COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, - // If thinking_forced_open, then we capture the tag in the grammar, - // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar) - std::string(data.thinking_forced_open ? "[\\s\\S]*?(\\s*)" : "(?:[\\s\\S]*?\\s*)?") + - "(<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)[\\s\\S]*" - }); - data.preserved_tokens = { - "", - "", - "<|tool▁calls▁begin|>", - "<|tool▁call▁begin|>", - "<|tool▁sep|>", - "<|tool▁call▁end|>", - "<|tool▁calls▁end|", - }; - }); - } - return data; -} - -static common_chat_params common_chat_params_init_deepseek_v3_1(const common_chat_template & tmpl, const struct templates_params & inputs) { - common_chat_params data; - - // Pass thinking context for DeepSeek V3.1 template - json additional_context = { - {"thinking", inputs.enable_thinking}, - }; - - auto prompt = apply(tmpl, inputs, - /* messages_override= */ inputs.messages, - /* tools_override= */ std::nullopt, - additional_context); - data.prompt = prompt; - data.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1; - if (string_ends_with(data.prompt, "")) { - if (!inputs.enable_thinking) { - data.prompt += ""; - } else { - data.thinking_forced_open = true; - } - } - if (inputs.tools.is_array() && !inputs.tools.empty()) { - data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null(); - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - std::vector tool_rules; - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - std::string name = function.at("name"); - auto parameters = function.at("parameters"); - builder.resolve_refs(parameters); - tool_rules.push_back(builder.add_rule(name + "-call", - "( \"<|tool▁call▁begin|>\" )? \"" + name + "<|tool▁sep|>" - "\" " + builder.add_schema(name + "-args", parameters) + " " - "\"<|tool▁call▁end|>\"")); - }); - // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag, - // so we accept common variants (then it's all constrained) - builder.add_rule("root", - std::string(data.thinking_forced_open ? "( \"\" space )? " : "") + - "( \"<|tool▁calls▁begin|>\" | \"<|tool_calls_begin|>\" | \"<|tool calls begin|>\" | \"<|tool\\\\_calls\\\\_begin|>\" | \"<|tool▁calls|>\" ) " - "(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " " - "\"<|tool▁calls▁end|>\"" - " space"); - data.grammar_triggers.push_back({ - COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, - // If thinking_forced_open, then we capture the tag in the grammar, - // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar) - std::string(data.thinking_forced_open ? "[\\s\\S]*?(\\s*)" : "(?:[\\s\\S]*?\\s*)?") + - "(<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)[\\s\\S]*" - }); - data.preserved_tokens = { - "", - "", - "<|tool▁calls▁begin|>", - "<|tool▁call▁begin|>", - "<|tool▁sep|>", - "<|tool▁call▁end|>", - "<|tool▁calls▁end|>", - }; - }); - } - return data; -} - -static common_chat_params common_chat_params_init_minimax_m2(const common_chat_template & tmpl, const struct templates_params & params) { - common_chat_params data; - data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - - data.prompt = apply(tmpl, params); - data.format = COMMON_CHAT_FORMAT_MINIMAX_M2; - - // Handle thinking tags based on prompt ending - if (string_ends_with(data.prompt, "\n")) { - if (!params.enable_thinking) { - // Close the thinking tag immediately if thinking is disabled - data.prompt += "\n\n"; - } else { - // Mark thinking as forced open (template started with ) - data.thinking_forced_open = true; - } - } - - // Preserve MiniMax-M2 special tokens - data.preserved_tokens = { - "", - "", - "", - "", - }; - - // build grammar for tool call - static const xml_tool_call_format form { - /* form.scope_start = */ "\n", - /* form.tool_start = */ "\n", - /* form.key_start = */ "", - /* form.val_end = */ "\n", - /* form.tool_end = */ "\n", - /* form.scope_end = */ "", - }; - build_grammar_xml_tool_call(data, params.tools, form); - - return data; -} - -static common_chat_params common_chat_params_init_qwen3_coder_xml(const common_chat_template & tmpl, const struct templates_params & params) { - common_chat_params data; - data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - - data.prompt = apply(tmpl, params); - data.format = COMMON_CHAT_FORMAT_QWEN3_CODER_XML; - - data.preserved_tokens = { - "", - "", - "", - "", - }; - - // build grammar for tool call - static const xml_tool_call_format form { - /* form.scope_start = */ "\n", - /* form.tool_start = */ "\n", - /* form.key_start = */ "\n", - /* form.val_end = */ "\n\n", - /* form.tool_end = */ "\n", - /* form.scope_end = */ "", - }; - build_grammar_xml_tool_call(data, params.tools, form); - - return data; -} - -static common_chat_params common_chat_params_init_kimi_k2(const common_chat_template & tmpl, const struct templates_params & params) { - common_chat_params data; - data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - - data.prompt = apply(tmpl, params); - data.format = COMMON_CHAT_FORMAT_KIMI_K2; - - data.preserved_tokens = { - "", - "", - "<|tool_calls_section_begin|>", - "<|tool_call_begin|>", - "<|tool_call_argument_begin|>", - "<|tool_call_end|>", - "<|tool_calls_section_end|>", - "<|im_end|>", - "<|im_system|>", - "<|im_middle|>", - }; - - data.additional_stops.insert(data.additional_stops.end(), { - "<|im_end|>", - "<|im_middle|>" - }); - // build grammar for tool call - static const xml_tool_call_format form = ([]() { - xml_tool_call_format form {}; - form.scope_start = "<|tool_calls_section_begin|>"; - form.tool_start = "<|tool_call_begin|>"; - form.tool_sep = "<|tool_call_argument_begin|>{"; - form.key_start = "\""; - form.key_val_sep = "\": "; - form.val_end = ", "; - form.tool_end = "}<|tool_call_end|>"; - form.scope_end = "<|tool_calls_section_end|>"; - form.raw_argval = false; - form.last_val_end = ""; - return form; - })(); - build_grammar_xml_tool_call(data, params.tools, form); - - return data; -} - -static common_chat_params common_chat_params_init_apriel_1_5(const common_chat_template & tmpl, const struct templates_params & params) { - common_chat_params data; - data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - - data.prompt = apply(tmpl, params); - data.format = COMMON_CHAT_FORMAT_APRIEL_1_5; - - data.preserved_tokens = { - "", - "", - "", - "", - }; - - // build grammar for tool call - static const xml_tool_call_format form = ([]() { - xml_tool_call_format form {}; - form.scope_start = "["; - form.tool_start = "{\"name\": \""; - form.tool_sep = "\", \"arguments\": {"; - form.key_start = "\""; - form.key_val_sep = "\": "; - form.val_end = ", "; - form.tool_end = "}, "; - form.scope_end = "]"; - form.raw_argval = false; - form.last_val_end = ""; - form.last_tool_end = "}"; - return form; - })(); - build_grammar_xml_tool_call(data, params.tools, form); - - return data; -} - -static common_chat_params common_chat_params_init_xiaomi_mimo(const common_chat_template & tmpl, const struct templates_params & params) { - common_chat_params data; - data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - - data.prompt = apply(tmpl, params); - data.format = COMMON_CHAT_FORMAT_XIAOMI_MIMO; - - data.preserved_tokens = { - "", - "", - }; - - // build grammar for tool call - static const xml_tool_call_format form = ([]() { - xml_tool_call_format form {}; - form.scope_start = "\n"; - form.tool_start = "\n{\"name\": \""; - form.tool_sep = "\", \"arguments\": {"; - form.key_start = "\""; - form.key_val_sep = "\": "; - form.val_end = ", "; - form.tool_end = "}\n"; - form.scope_end = ""; - form.raw_argval = false; - form.last_val_end = ""; - return form; - })(); - build_grammar_xml_tool_call(data, params.tools, form); - - return data; -} - -static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) { +static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, + const autoparser::templates_params & inputs) { common_chat_params data; // Copy reasoning to the "thinking" field as expected by the gpt-oss template auto adjusted_messages = json::array(); for (const auto & msg : inputs.messages) { auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string(); - auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array(); + auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array(); if (has_reasoning_content && has_tool_calls) { - auto adjusted_message = msg; + auto adjusted_message = msg; adjusted_message["thinking"] = msg.at("reasoning_content"); adjusted_messages.push_back(adjusted_message); } else { @@ -2038,7 +936,7 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp } } - auto prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages); + auto prompt = common_chat_template_direct_apply(tmpl, inputs, /* messages_override= */ adjusted_messages); // Check if we need to replace the return token with end token during // inference and without generation prompt. For more details see: @@ -2052,895 +950,207 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp } data.prompt = prompt; - data.format = COMMON_CHAT_FORMAT_GPT_OSS; + data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; // These special tokens are required to parse properly, so we include them // even if parse_tool_calls is false. data.preserved_tokens = { - "<|channel|>", - "<|constrain|>", - "<|message|>", - "<|start|>", - "<|end|>", + "<|channel|>", "<|constrain|>", "<|message|>", "<|start|>", "<|end|>", }; - if (!inputs.json_schema.is_null()) { - data.grammar_lazy = false; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - auto schema = inputs.json_schema; - builder.resolve_refs(schema); + auto has_tools = inputs.tools.is_array() && !inputs.tools.empty(); + auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE; + auto include_grammar = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && has_tools; - auto not_end = builder.add_rule("not-end", - "[^<] | \"<\" [^|] | \"<|\" [^e] | \"<|e\" [^n] | \"<|en\" [^d] | \"<|end\" [^|] | \"<|end|\" [^>]"); - auto analysis = builder.add_rule("analysis", - "\"<|channel|>analysis<|message|>\" ( " + not_end + " )* \"<|end|>\""); - auto constraint = builder.add_rule("constraint", "\"<|constrain|>\"? [a-zA-Z0-9_-]+"); - auto final = builder.add_rule("final", - "\"<|channel|>final\" ( \" \" " + constraint + " )? \"<|message|>\" " + - builder.add_schema("response", schema) - ); + auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { + const std::string END = "<|end|>"; + const std::string START = "<|start|>"; + const std::string MESSAGE = "<|message|>"; + const std::string CHANNEL = "<|channel|>"; + const std::string CONSTRAIN = "<|constrain|>"; + const std::string START_ASSISTANT = START + "assistant"; + const std::string CHANNEL_ANALYSIS = CHANNEL + "analysis"; + const std::string CHANNEL_COMMENTARY = CHANNEL + "commentary"; + const std::string CHANNEL_FINAL = CHANNEL + "final"; - builder.add_rule("root", "( " + analysis + " \"<|start|>assistant\" )? " + final); - }); - } + auto the_end = END | p.end(); - if (inputs.tools.is_array() && !inputs.tools.empty()) { - data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - // tool calls can appear in commentary or analysis channels - auto channel = builder.add_rule("channel", "\"<|channel|>\" ( \"commentary\" | \"analysis\" )"); + const std::string analysis_header = CHANNEL_ANALYSIS + MESSAGE; + auto segment_content = p.until(END); + auto analysis_segment = extract_reasoning ? + p.literal(analysis_header) + p.reasoning(segment_content) + p.until(END) + the_end : + p.content(analysis_header + p.until(END) + the_end); - std::vector tool_rules_recipient_in_role; - std::vector tool_rules_recipient_in_channel; - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - std::string name = function.at("name"); - auto parameters = function.at("parameters"); - builder.resolve_refs(parameters); + auto channel_header_content = p.until_one_of({ " to=functions.", MESSAGE }); + auto content_header = p.choice({ p.literal(CHANNEL_COMMENTARY), p.literal(CHANNEL_FINAL) }); + auto content_segment = p.rule("content-segment", content_header + channel_header_content + MESSAGE + + p.content(segment_content) + the_end); - tool_rules_recipient_in_role.push_back( - builder.add_rule(name + "-call", - "\"" + name + "\"" + channel + " \" <|constrain|>json\"? \"<|message|>\" " + - builder.add_schema(name + "-args", parameters) - ) - ); - - tool_rules_recipient_in_channel.push_back( - builder.add_rule(name + "-call", - "\"" + name + "\"" + " \" <|constrain|>json\"? \"<|message|>\" " + - builder.add_schema(name + "-args", parameters) - ) - ); - }); - - auto recipient_in_channel = builder.add_rule("recipient_in_channel", - channel + " \" to=functions.\" ( " + - string_join(tool_rules_recipient_in_channel, " | ") + " )" - ); - - if (data.grammar_lazy) { - auto recipient_in_role = builder.add_rule("recipient_in_role", - "\"<|start|>assistant\"? \" to=functions.\" ( " + - string_join(tool_rules_recipient_in_role, " | ") + " )" - ); - - builder.add_rule("root", recipient_in_role + " | " + recipient_in_channel); - } else { - auto not_end = builder.add_rule("not-end", - "[^<] | \"<\" [^|] | \"<|\" [^e] | \"<|e\" [^n] | \"<|en\" [^d] | \"<|end\" [^|] | \"<|end|\" [^>]"); - auto analysis = builder.add_rule("analysis", - "\"<|channel|>analysis<|message|>\" ( " + not_end + " )* \"<|end|>\""); - auto commentary = builder.add_rule("commentary", - "\"<|channel|>commentary<|message|>\" ( " + not_end + " )* \"<|end|>\""); - - auto recipient_in_role = builder.add_rule("recipient_in_role", - "\" to=functions.\" ( " + string_join(tool_rules_recipient_in_role, " | ") + " )" - ); - - builder.add_rule("root", - "( " + analysis + " \"<|start|>assistant\" )? " + - "( " + commentary + " \"<|start|>assistant\" )? " + - "( " + recipient_in_role + " | " + recipient_in_channel + " )" - ); - } - - // Trigger on tool calls that appear in the commentary channel - data.grammar_triggers.push_back({ - COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, - "<\\|channel\\|>(?:commentary|analysis) to" - }); - - // Trigger tool calls that appear in the role section, either at the - // start or in the middle. - data.grammar_triggers.push_back({ - COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, - "^ to" - }); - - data.grammar_triggers.push_back({ - COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, - "<\\|start\\|>assistant to" - }); - }); - } - - return data; -} - -static common_chat_params common_chat_params_init_glm_4_5(const common_chat_template & tmpl, const struct templates_params & inputs) { - common_chat_params data; - data.grammar_lazy = inputs.tools.is_array() && !inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - - std::string prompt = apply(tmpl, inputs); - - // match the existing trimming behavior - if (inputs.add_bos && string_starts_with(prompt, tmpl.bos_token())) { - prompt.erase(0, tmpl.bos_token().size()); - } - if (inputs.add_eos && string_ends_with(prompt, tmpl.eos_token())) { - prompt.erase(prompt.size() - tmpl.eos_token().size()); - } - if (string_ends_with(prompt, "")) { - if (!inputs.enable_thinking) { - prompt += ""; - } else { - data.thinking_forced_open = true; - } - } - - // add GLM preserved tokens - data.preserved_tokens = { - "<|endoftext|>", - "[MASK]", - "[gMASK]", - "[sMASK]", - "", - "", - "<|system|>", - "<|user|>", - "<|assistant|>", - "<|observation|>", - "<|begin_of_image|>", - "<|end_of_image|>", - "<|begin_of_video|>", - "<|end_of_video|>", - "<|begin_of_audio|>", - "<|end_of_audio|>", - "<|begin_of_transcription|>", - "<|end_of_transcription|>", - "<|code_prefix|>", - "<|code_middle|>", - "<|code_suffix|>", - "/nothink", - "", - "", - "", - "", - "", - "", - "", - "" - }; - - // extra GLM 4.5 stop word - data.additional_stops.insert(data.additional_stops.end(), { - "<|user|>", - "<|observation|>" - }); - - // build grammar for tool call - static const xml_tool_call_format form { - /* form.scope_start = */ "", - /* form.tool_start = */ "\n", - /* form.tool_sep = */ "\n", - /* form.key_start = */ "", - /* form.key_val_sep = */ "\n", - /* form.val_end = */ "\n", - /* form.tool_end = */ "\n", - /* form.scope_end = */ "", - }; - build_grammar_xml_tool_call(data, inputs.tools, form); - - data.prompt = prompt; - data.format = COMMON_CHAT_FORMAT_GLM_4_5; - return data; -} - -static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct templates_params & inputs) { - LOG_DBG("%s\n", __func__); - common_chat_params data; - const std::optional additional_context = json { - {"datetime", format_time(inputs.now, "%b %d %Y %H:%M:%S GMT")}, - {"functions", json(inputs.tools.empty() ? "" : inputs.tools.dump(2))}, - }; - data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override =*/ std::nullopt, additional_context); - if (inputs.tools.is_array() && !inputs.tools.empty()) { - data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - auto schemas = json::array(); - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - schemas.push_back({ - {"type", "object"}, - {"properties", { - {"name", { - {"type", "string"}, - {"const", function.at("name")}, - }}, - {"arguments", function.at("parameters")}, - }}, - {"required", json::array({"name", "arguments", "id"})}, - }); - }); - auto schema = json { - {"type", "array"}, - {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}}, - {"minItems", 1}, - }; - if (!inputs.parallel_tool_calls) { - schema["maxItems"] = 1; - } - builder.add_rule("root", "\" functools\"? " + builder.add_schema("tool_calls", schema)); - }); - data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, " functools["}); - data.preserved_tokens = { - " functools[", - }; - data.format = COMMON_CHAT_FORMAT_FIREFUNCTION_V2; - } else { - data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; - } - return data; -} - -static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template & tmpl, const struct templates_params & inputs) { - // >>>all\nlet's call functions>>>fn1\n{"arg1": 1...}\n>>>fn2\n{"arg1": 1...}... - // Using ">>>f1\n", ">>>f2\n"... as trigger words for the grammar - // If the function is python, we also allow raw python code (if the line after `python\n` doesn't start w/ opening `{`), which the model seems to prefer for multiline code. - common_chat_params data; - data.prompt = apply(tmpl, inputs); - data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2; - if (inputs.tools.is_array() && !inputs.tools.empty()) { - data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - std::vector first_tool_rules; - std::vector subsequent_tool_rules; - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - std::string name = function.at("name"); - auto parameters = function.at("parameters"); - builder.resolve_refs(parameters); - std::string args_pattern = "[\\s\\S]*"; - auto args_rule = builder.add_schema(name + "-args", parameters); - if (name == "python") { - args_rule = builder.add_rule(name + "-maybe-raw-args", args_rule + " | [^{] .*"); - } else { - args_pattern = "\\{" + args_pattern; - } - auto call_rule = builder.add_rule(name + "-call", "\"" + name + "\\n\" " + args_rule); - first_tool_rules.push_back(call_rule); - if (inputs.parallel_tool_calls) { - subsequent_tool_rules.push_back(builder.add_rule(name + "-call2", "\">>>\" " + call_rule)); - } - data.grammar_triggers.push_back({ - COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, - "((?:[\\s\\S]+?>>>)?" + regex_escape(name) + "\n)" + args_pattern, - }); - }); - data.preserved_tokens = { - "<|end_header_id|>", - }; - auto first_rule = first_tool_rules.empty() ? "" : builder.add_rule("first_tool_call", string_join(first_tool_rules, " | ")) + " space"; - if (inputs.parallel_tool_calls) { - auto subsequent_rule = builder.add_rule("subsequent_tool_call", string_join(subsequent_tool_rules, " | ")) + " space"; - builder.add_rule("root", first_rule + " (" + subsequent_rule + ")*"); - } else { - builder.add_rule("root", first_rule); - } - - }); - } - return data; -} - -static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(const common_chat_template & tmpl, const struct templates_params & inputs) { - // https://github.com/MeetKai/functionary/blob/main/tests/prompt_test_v3-llama3.1.txt - common_chat_params data; - - if (!inputs.tools.is_null()) { - std::string python_code_argument_name; - auto has_raw_python = false; - - data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - std::vector tool_rules; - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - const auto & parameters = function.at("parameters"); - std::string name = function.at("name"); - if (name == "python" || name == "ipython") { - if (!parameters.contains("type")) { - throw std::runtime_error("Missing type in python tool"); - } - has_raw_python = true; - const auto & type = parameters.at("type"); - if (type == "object") { - auto properties = parameters.at("properties"); - for (auto it = properties.begin(); it != properties.end(); ++it) { - if (it.value().at("type") == "string") { - if (!python_code_argument_name.empty()) { - throw std::runtime_error("Multiple string arguments found in python tool"); - } - python_code_argument_name = it.key(); - } - } - if (python_code_argument_name.empty()) { - throw std::runtime_error("No string argument found in python tool"); - } - } else if (type != "string") { - throw std::runtime_error("Invalid type in python tool: " + type.dump()); - } - } - tool_rules.push_back(builder.add_rule(name + "-call", "\"\" " + builder.add_schema(name + "-args", parameters) + " \"\" space")); - }); - if (has_raw_python) { - tool_rules.push_back(builder.add_rule("python-call", "\"<|python_tag|>\" .*")); - data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"}); - data.preserved_tokens.push_back("<|python_tag|>"); - } - auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " space"; - builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call); - data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "\n")) { - if (!extra_context["enable_thinking"]) { - data.prompt += ""; - } else { - data.thinking_forced_open = true; - } - } - - if (!inputs.tools.is_null()) { - // (content)?({"name": "foo", "arguments": {"a": 1}})* - data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - std::vector tool_rules; - std::vector tool_call_alts; - std::vector escaped_names; - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - std::string name = function.at("name"); - auto parameters = function.at("parameters"); - builder.resolve_refs(parameters); - tool_rules.push_back(builder.add_schema(name + "-call", { - {"type", "object"}, - {"properties", json { - {"name", json {{"const", name}}}, - {"arguments", parameters}, - }}, - {"required", json::array({"name", "arguments"})}, - })); - tool_call_alts.push_back(builder.add_rule( - name + "-function-tag", - "\"\" space " + - builder.add_schema(name + "-args", parameters) + " " - "\"\" space")); - - data.grammar_triggers.push_back({ - COMMON_GRAMMAR_TRIGGER_TYPE_WORD, - "", - }); - auto escaped_name = regex_escape(name); - data.grammar_triggers.push_back({ - COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, - " alt_tags { - any_tool_call, - "\"\" space " + any_tool_call + " \"\"", - // The rest is just to accommodate common "good bad" outputs. - "\"\" space " + any_tool_call + " \"\"", - "\"\" space " + any_tool_call + " \"\"", - "\"\" space " + any_tool_call + " \"\"", - "\"\" space " + any_tool_call + " \"\"", - "\"\" space " + any_tool_call + " \"\"", - "\"\" space " + any_tool_call + " \"\"", - }; - auto wrappable_tool_call = builder.add_rule("wrappable_tool_call", "( " + string_join(alt_tags, " | ") + " ) space"); - tool_call_alts.push_back(wrappable_tool_call); - tool_call_alts.push_back( - "( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space "); - auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | ")); - builder.add_rule("root", - std::string(data.thinking_forced_open ? "( \"\" space )? " : "") + - (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call)); - // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives) - data.grammar_triggers.push_back({ - COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, - // If thinking_forced_open, then we capture the tag in the grammar, - // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar) - std::string(data.thinking_forced_open ? "(\\s*)" : "") + ( - "\\s*(" - "(?:" - "||||)?" - "\\s*\\{\\s*\"name\"\\s*:\\s*\"(?:" + string_join(escaped_names, "|") + ")\"" - ")" - ")" - ), - }); - data.preserved_tokens = { - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "```", - "```json", - "```xml", - }; - }); - } - - return data; -} - -static common_chat_params common_chat_params_init_granite(const common_chat_template & tmpl, const struct templates_params & inputs) { - common_chat_params data; - - // Pass thinking context for Granite template - json additional_context = { - {"thinking", inputs.enable_thinking}, - }; - - data.prompt = apply(tmpl, inputs, /* messages_override= */ std::nullopt, /* tools_override= */ std::nullopt, additional_context); - data.format = COMMON_CHAT_FORMAT_GRANITE; - - if (string_ends_with(data.prompt, "\n") || string_ends_with(data.prompt, "")) { - if (!inputs.enable_thinking) { - data.prompt += ""; - } else { - data.thinking_forced_open = true; - } - } - - if (!inputs.tools.is_null()) { - // Granite uses <|tool_call|> followed by JSON list - data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - std::vector tool_rules; - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - std::string name = function.at("name"); - auto parameters = function.at("parameters"); - builder.resolve_refs(parameters); - tool_rules.push_back(builder.add_rule(name + "-call", builder.add_schema(name + -"-args", { - {"type", "object"}, - {"properties", { - {"name", {{"const", name}}}, - {"arguments", parameters}, - }}, - {"required", json::array({"name", "arguments"})}, - }))); - }); - - auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")); - auto tool_list = builder.add_rule("tool_list", "\"[\" space " + tool_call + " (\",\" space " + tool_call + ")* space \"]\""); - - if (data.thinking_forced_open) { - builder.add_rule("root", "\"\" space \"\" space [^<]* \"\" space \"<|tool_call|>\" space " + tool_list); - } else { - builder.add_rule("root", "\"<|tool_call|>\" space " + tool_list); - } - - data.grammar_triggers.push_back({ - COMMON_GRAMMAR_TRIGGER_TYPE_WORD, - "<|tool_call|>" - }); - - data.preserved_tokens = { - "", - "", - "", - "", - "<|tool_call|>", - }; - }); - } else { - // Handle thinking tags for non-tool responses - if (data.thinking_forced_open && inputs.enable_thinking) { - data.grammar_lazy = false; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - builder.add_rule("root", "\"\" space \"\" space .* \"\" space"); - }); - data.preserved_tokens = { - "", - "", - "", - "", - }; - } - } - - return data; -} - -static common_chat_params common_chat_params_init_solar_open(const common_chat_template & tmpl, const struct templates_params & inputs) { - common_chat_params data; - - // Copy `reasoning_content` to `reasoning` - auto adjusted_messages = json::array(); - for (const auto & msg : inputs.messages) { - if (msg.contains("reasoning_content") && msg.at("reasoning_content").is_string()) { - auto adjusted_message = msg; - adjusted_message["reasoning"] = msg.at("reasoning_content"); - adjusted_message.erase("reasoning_content"); - adjusted_messages.push_back(adjusted_message); - } else { - adjusted_messages.push_back(msg); - } - } - - auto has_tools = inputs.tools.is_array() && !inputs.tools.empty(); - auto include_grammar = true; - - auto prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages); - - // Check if we need to replace the flush token with end token during inference and without generation prompt. - if (inputs.is_inference && !inputs.add_generation_prompt) { - static constexpr std::string_view return_token = "<|flush|>"; - static constexpr std::string_view end_token = "<|end|>"; - if (size_t pos = prompt.rfind(return_token); pos != std::string::npos) { - prompt.replace(pos, return_token.length(), end_token); - } - } - - data.prompt = prompt; - data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; - data.preserved_tokens = { - "<|think|>", - "<|content|>", - "<|begin|>", - "<|end|>", - "<|tool_calls|>", - "<|tool_call:begin|>", - "<|tool_call:end|>", - "<|tool_call:name|>", - "<|tool_call:args|>", - }; - - auto parser = build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) { - auto lit_think = p.atomic(p.literal("<|think|>")); - auto lit_assistant_begin = p.atomic(p.literal("<|begin|>assistant")); - auto lit_content = p.atomic(p.literal("<|content|>")); - auto lit_end = p.atomic(p.literal("<|end|>")); - auto parser_until_end = p.until("<|end|>"); - - // reasoning <- "<|think|>" (!"<|end|>" .)* - auto parser_reasoning = p.rule("reasoning", lit_think + p.reasoning(parser_until_end)); - - // content <- "<|content|>" (!"<|end|>" .)* - auto parser_content = p.rule("content", lit_content + p.content(parser_until_end)); - - // wrap_choice(items) <- item-choice wrapped* - // item-choice <- items[0] / ... / items[n] - // wrapped <- "<|end|><|begin|>assistant" item-choice - auto wrap_choice = [&](const std::vector & items) { - auto choice = p.choice(items); - return choice + p.zero_or_more(lit_end + lit_assistant_begin + choice); - }; - - // wrap_seq(items) <- item[0] "<|end|><|begin|>assistant" item[1] ... - auto wrap_seq = [&](const std::vector & items) { - auto seq = p.sequence(); - for (auto i = 0u; i < items.size(); i++) { - if (i == 0) { - seq += items[i]; - continue; - } - seq += lit_end + lit_assistant_begin + items[i]; - } - return seq; - }; - - // Response format parser - if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) { - auto parser_response_format = lit_content + p.content(p.schema(p.json(), "response-format", inputs.json_schema)); - return p.choice({ - wrap_seq({parser_reasoning, parser_response_format}), - wrap_seq({parser_response_format}) - }); + if (!inputs.json_schema.is_null()) { + auto final_header = p.literal(CHANNEL_FINAL); + auto constraint = p.optional(p.space() + p.literal(CONSTRAIN) + channel_header_content); + return p.optional(analysis_segment) + final_header + constraint + MESSAGE + + p.content(p.schema(p.json(), "response-format", inputs.json_schema)); } - auto lit_tool_call_begin = p.literal("<|tool_call:begin|>"); - auto lit_tool_call_name = p.literal("<|tool_call:name|>"); - auto lit_tool_call_args = p.literal("<|tool_call:args|>"); - auto lit_tool_call_end = p.literal("<|tool_call:end|>"); + auto segment = p.optional(START_ASSISTANT + p.space()) + p.choice({ content_segment, analysis_segment }); + auto contents = p.optional(segment + p.repeat(p.optional(p.space()) + segment, 0, -1)) + p.end(); // Tool call parser if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) { - auto parser_tool_call = p.choice(); + auto tool_choice = p.choice(); + foreach_function(inputs.tools, [&](const json & tool) { const auto & function = tool.at("function"); - std::string name = function.at("name"); - const auto & schema = function.at("parameters"); + std::string name = function.at("name"); + const auto & params = function.at("parameters"); - // tool(name, schema) <- name "<|tool_call:args|>" schema - parser_tool_call |= p.rule("tool-" + name, - p.atomic(p.tool_name(p.literal(name)) + lit_tool_call_args) - + p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema))); + // Tool call can appear as: + // 1. In role header: " to=functions.NAME<|channel|>..." + // 2. In channel: "<|channel|>(analysis|commentary) to=functions.NAME..." + auto func_name = p.literal(" to=functions.") + p.tool_name(p.literal(name)); + + auto channel = p.literal(CHANNEL_COMMENTARY) | p.literal(CHANNEL_ANALYSIS); + auto constraint = p.space() + p.optional(p.literal(CONSTRAIN) + channel_header_content); + auto args = p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", params)); + + // Pattern 1: recipient in role header + // " to=functions.NAME<|channel|>(analysis|commentary)[constraint]<|message|>ARGS" + auto tool_in_role = p.tool(p.tool_open(func_name + channel) + constraint + MESSAGE + args); + + // Pattern 2: recipient in channel header + // "<|channel|>(analysis|commentary) to=functions.NAME[constraint]<|message|>ARGS" + + auto tool_in_channel = p.tool(channel + p.tool_open(func_name + constraint + MESSAGE) + args); + + tool_choice |= p.trigger_rule("tool-" + name, tool_in_role | tool_in_channel); }); auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0; auto max_calls = inputs.parallel_tool_calls ? -1 : 1; - // tool-calls <- "<|tool_calls|>" tool-call+ - // tool-call <- "<|tool_call:begin|> call-id "<|tool_call:name|>" &([^<]+ "<|tool_call:args|>") tool-choice "<|tool_call:end|>" - // call-id <- [a-zA-Z0-9_-]+ - // tool-choice <- tool(t[0].name, t[0].schema) / ... / tool(t[n].name, t[n].schema) - auto parser_tool_calls = p.trigger_rule("tool-calls", - p.atomic(p.literal("<|tool_calls|>")) - + p.repeat( - p.tool_open( - lit_tool_call_begin - + p.tool_id(p.chars("[a-zA-Z0-9_-]", 1, -1)) - + lit_tool_call_name - + p.peek(p.chars("[^<]", 1, -1) + lit_tool_call_args)) - + parser_tool_call - + p.tool_close(lit_tool_call_end), - /* min = */ 1, - /* max = */ max_calls)); + auto role_start = p.optional(p.space() + p.literal(START_ASSISTANT)); + auto tool_call = p.rule("tool-call", p.repeat(role_start + tool_choice, min_calls, max_calls) + p.end()); - if (min_calls == 1) { - // If required, then try any combination of the reasoning, content, and tool call - return p.choice({ - wrap_seq({parser_reasoning, parser_content, parser_tool_calls}), - wrap_seq({parser_reasoning, parser_tool_calls}), - wrap_seq({parser_content, parser_tool_calls}), - wrap_seq({parser_tool_calls}) - }); - } - - return wrap_choice({parser_reasoning, parser_content, parser_tool_calls}); + return p.choice({ tool_call, p.one_or_more(segment) + tool_call }); } - // Content only parser - include_grammar = false; - return wrap_choice({parser_reasoning, parser_content}); + return contents; }); data.parser = parser.save(); if (include_grammar) { data.grammar_lazy = has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO; - - data.grammar = build_grammar([&](const common_grammar_builder & builder) { + data.grammar = build_grammar([&](const common_grammar_builder & builder) { foreach_function(inputs.tools, [&](const json & tool) { const auto & function = tool.at("function"); - auto schema = function.at("parameters"); + auto schema = function.at("parameters"); builder.resolve_refs(schema); }); parser.build_grammar(builder, data.grammar_lazy); }); data.grammar_triggers = { - {COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|tool_calls|>"} + { COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, "^(?:<\\|start\\|>assistant\\s*)?(\\s+to=functions)" }, + { COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, "(?:<\\|end\\|>)(?:<\\|start\\|>assistant\\s*)?(\\s+to=functions)" }, + { COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, + "(?:<\\|start\\|>assistant\\s*)?(<\\|channel\\|>(?:commentary|analysis)\\s+to=functions)" } }; } return data; } -static common_chat_params common_chat_params_init_exaone_moe(const common_chat_template & tmpl, const struct templates_params & inputs) { +// Functionary v3.2 - uses recipient-based format: >>>recipient\n{content} +static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template & tmpl, + const autoparser::templates_params & inputs) { common_chat_params data; - data.prompt = apply(tmpl, inputs); - data.format = COMMON_CHAT_FORMAT_EXAONE_MOE; - if (string_ends_with(data.prompt, "\n")) { - if (!inputs.enable_thinking) { - data.prompt += "\n\n"; - } else { - data.thinking_forced_open = true; - } - } + data.prompt = common_chat_template_direct_apply(tmpl, inputs); + data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; + data.preserved_tokens = { + ">>>all", + }; + + auto has_tools = inputs.tools.is_array() && !inputs.tools.empty(); + auto include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE; + + auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { + // Functionary v3.2 format: + // - Normal content: >>>all\n{content} + // - Tool calls: >>>function_name\n{json_args} + // Generation prompt ends with ">>>" so model outputs recipient immediately + + // Build content parser for >>>all\n{content} + // When tools are present, content stops before the next ">>>" (tool call) + // When no tools, content goes until end + auto content_until_tool = p.literal(">>>all\n") + p.content(p.until(">>>")); + auto content_until_end = p.literal(">>>all\n") + p.content(p.rest()); + + // If no tools or tool_choice is NONE, just parse content + if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) { + // When no tools, just match the prefix and capture everything after + return content_until_end + p.end(); + } + + // Build tool call parsers for each available function + auto tool_choice = p.choice(); + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool.at("function"); + std::string name = function.at("name"); + const auto & schema = function.at("parameters"); + + // Tool format: >>>function_name\n{json_args} + auto tool_parser = p.tool( + p.tool_open(p.literal(">>>") + p.tool_name(p.literal(name)) + p.literal("\n")) + + p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)) + ); + + tool_choice |= p.rule("tool-" + name, tool_parser); + }); + + auto content_only = content_until_end; + auto content_and_tools = content_until_tool + p.one_or_more(tool_choice); + auto tools_only = p.one_or_more(tool_choice); + + if (inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED) { + if (inputs.parallel_tool_calls) { + return p.choice({ content_and_tools, tools_only }) + p.end(); + } + return p.choice({ content_until_tool + tool_choice, tools_only }) + p.end(); + } + if (inputs.parallel_tool_calls) { + return p.choice({ content_and_tools, content_only, tools_only }) + p.end(); + } + auto content_and_tool = content_until_tool + tool_choice; + return p.choice({ content_and_tool, content_only, tool_choice }) + p.end(); + }); + + data.parser = parser.save(); + + if (include_grammar) { + data.grammar_lazy = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO; - if (inputs.tools.is_array() && !inputs.tools.empty()) { - data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null(); data.grammar = build_grammar([&](const common_grammar_builder & builder) { - std::vector tool_rules; foreach_function(inputs.tools, [&](const json & tool) { const auto & function = tool.at("function"); - std::string name = function.at("name"); - auto parameters = function.at("parameters"); - builder.resolve_refs(parameters); - // Expect: {"name": "", "arguments": {...}} - tool_rules.push_back(builder.add_rule( - name + "-call", - "\"\" space " + - builder.add_schema(name + "-obj", json{ - {"type", "object"}, - {"properties", { - {"name", json{{"const", name}}}, - {"arguments", parameters}, - }}, - {"required", json::array({"name", "arguments"})}, - }) + - " space \"\" space")); + auto schema = function.at("parameters"); + builder.resolve_refs(schema); }); - - auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")); - builder.add_rule("root", - std::string(data.thinking_forced_open ? "( \"\" space )? " : "") + - (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call)); - - data.grammar_triggers.push_back({ - COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, - std::string(data.thinking_forced_open ? "[\\s\\S]*?(\\s*)?" : "") + - "()[\\s\\S]*" - }); - data.preserved_tokens = { - "", - "", - "", - "", - }; + parser.build_grammar(builder, data.grammar_lazy); }); + + // Grammar trigger for when the model starts outputting a tool call + // (after the initial ">>>" in the generation prompt) + data.grammar_triggers = { + { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, ">>>" } + }; } return data; } -static common_chat_params common_chat_params_init_translate_gemma(const common_chat_template & tmpl, const struct templates_params & inputs) { - common_chat_params data; - - // This template does not support tools or reasoning - // we just need to transform the messages into the correct schema - - templates_params inputs_new = inputs; - json & messages = inputs_new.messages; - - // default to chat_template_kwargs, or en-GB if not specified - std::string default_src_lang = inputs.extra_context.value("source_lang_code", "en-GB"); - std::string default_tgt_lang = inputs.extra_context.value("target_lang_code", "en-GB"); - - GGML_ASSERT(messages.is_array()); - for (auto & message : messages) { - if (message.contains("role") && message["role"].get() != "user") { - continue; - } - if (!message.contains("content")) { - message["content"] = json::array(); - } - if (message.contains("content") && !message["content"].is_array()) { - auto content_str = message["content"].get(); - // default to en-GB if not specified (to make common_chat_format_example works) - auto src_lang = message.contains("source_lang_code") - ? message["source_lang_code"].get() : default_src_lang; - auto tgt_lang = message.contains("target_lang_code") - ? message["target_lang_code"].get() : default_tgt_lang; - message["content"] = json::array({ - json{ - {"type", "text"}, - {"text", content_str}, - {"source_lang_code", src_lang}, - {"target_lang_code", tgt_lang}, - } - }); - } - } - - data.prompt = apply(tmpl, inputs_new, std::nullopt, std::nullopt); - data.format = COMMON_CHAT_FORMAT_GENERIC; - - return data; -} - -static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) { - common_chat_params data; - data.prompt = apply(tmpl, inputs); - data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; - data.grammar_lazy = false; - if (!inputs.json_schema.is_null()) { - if (!inputs.grammar.empty()) { - throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both"); - } - data.grammar = json_schema_to_grammar(inputs.json_schema); - } else { - data.grammar = inputs.grammar; - } - return data; -} - -static common_chat_params common_chat_params_init_seed_oss( - const common_chat_template & tmpl, - templates_params & params, - const common_chat_templates_inputs & inputs) -{ - common_chat_params data; - data.prompt = apply(tmpl, params); - data.format = COMMON_CHAT_FORMAT_SEED_OSS; - if (string_ends_with(data.prompt, "")) { - if (!inputs.enable_thinking) { - data.prompt += ""; - } else { - data.thinking_forced_open = true; - } - } - - if (params.tools.is_array() && !params.tools.empty()) { - data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - std::vector tool_rules; - foreach_function(params.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - std::string name = function.at("name"); - auto parameters = function.at("parameters"); - builder.resolve_refs(parameters); - - // Create rule for Seed-OSS function call format - std::string param_rules; - if (parameters.contains("properties")) { - for (const auto & [key, value] : parameters.at("properties").items()) { - param_rules += "\"\"" + builder.add_schema(name + "-arg-" + key, value) + - "\"\""; - } - } - - tool_rules.push_back(builder.add_rule(name + "-call", - "\"\" space \"\" space " + - param_rules + - " \"\" space \"\"")); - }); - - data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "" }); - - data.preserved_tokens = { - "", "", "", "", - "", "", - }; - - builder.add_rule("root", string_join(tool_rules, " | ")); - }); - } - return data; -} - -// various workarounds for known issues with certain templates or model behaviors -// TODO @ngxson : improve this (how?) namespace workaround { // if first message is system and template does not support it, merge it with next message @@ -2960,6 +1170,15 @@ static void system_message_not_supported(json & messages) { } } +static void requires_non_null_content(json & messages) { + GGML_ASSERT(messages.is_array()); + for (auto & message : messages) { + if (message.contains("tool_calls") && !message.contains("content")) { + message["content"] = ""; + } + } +} + static void func_args_not_string(json & messages) { GGML_ASSERT(messages.is_array()); for (auto & message : messages) { @@ -2980,71 +1199,11 @@ static void func_args_not_string(json & messages) { } } -static void move_tool_calls_to_content(json & messages, int indent_spaces = 2) { - GGML_ASSERT(messages.is_array()); - for (auto & message : messages) { - if (message.contains("tool_calls")) { - auto tool_calls_new = json{ - {"tool_calls", message.at("tool_calls")} - }; - message.erase("tool_calls"); - auto content = message.at("content"); - std::string content_new = content.is_null() ? "" : content.get(); - message["content"] = content_new + tool_calls_new.dump(indent_spaces, ' ', false, json::error_handler_t::replace); - } - } } -// TODO @ngxson : we may remove support for generic schema in the future -static void use_generic_schema(json & messages) { - GGML_ASSERT(messages.is_array()); - for (auto & message : messages) { - if (message.contains("tool_calls") && message.at("tool_calls").is_array()) { - auto & tool_calls = message.at("tool_calls"); - for (auto & tool_call : tool_calls) { - if (tool_call.contains("type") && tool_call.at("type") == "function" && - tool_call.contains("function") && tool_call.at("function").is_object()) { - // Copy values before erasing to avoid use-after-free - json name_value; - json arguments_value; - json id_value; - const auto & function = tool_call.at("function"); - if (function.contains("name")) { - name_value = function.at("name"); - } - if (function.contains("arguments")) { - arguments_value = function.at("arguments"); - } - if (tool_call.contains("id")) { - id_value = tool_call.at("id"); - } - // Now safely erase and assign in the correct order - tool_call.erase("type"); - tool_call.erase("function"); - tool_call.erase("id"); - // Reassign in desired order: name, arguments, id - if (!name_value.is_null()) { - tool_call["name"] = name_value; - } - if (!arguments_value.is_null()) { - tool_call["arguments"] = arguments_value; - } - if (!id_value.is_null()) { - tool_call["id"] = id_value; - } - } - } - } - } -} - -} // namespace workaround - -static common_chat_params common_chat_templates_apply_jinja( - const struct common_chat_templates * tmpls, - const struct common_chat_templates_inputs & inputs) -{ - templates_params params; +static common_chat_params common_chat_templates_apply_jinja(const struct common_chat_templates * tmpls, + const struct common_chat_templates_inputs & inputs) { + autoparser::templates_params params; params.tools = common_chat_tools_to_json_oaicompat(inputs.tools); const auto & tmpl = params.tools.is_array() && tmpls->template_tool_use ? *tmpls->template_tool_use @@ -3065,6 +1224,13 @@ static common_chat_params common_chat_templates_apply_jinja( workaround::system_message_not_supported(params.messages); } + if (tmpl.original_caps().supports_tool_calls) { + // some templates will require the content field in tool call messages + // to still be non-null, this puts an empty string everywhere where the + // content field is null + workaround::requires_non_null_content(params.messages); + } + params.extra_context = json::object(); for (auto el : inputs.chat_template_kwargs) { params.extra_context[el.first] = json::parse(el.second); @@ -3074,235 +1240,62 @@ static common_chat_params common_chat_templates_apply_jinja( params.json_schema = json::parse(inputs.json_schema); } - if (inputs.parallel_tool_calls && !tmpl.original_caps().supports_parallel_tool_calls) { - LOG_DBG("Disabling parallel_tool_calls because the template does not support it\n"); - params.parallel_tool_calls = false; - } else { - params.parallel_tool_calls = inputs.parallel_tool_calls; - } + // if (inputs.parallel_tool_calls && !tmpl.original_caps().supports_parallel_tool_calls) { + // LOG_DBG("Disabling parallel_tool_calls because the template does not support it\n"); + // params.parallel_tool_calls = false; + // } else { + params.parallel_tool_calls = inputs.parallel_tool_calls; + //} if (params.tools.is_array()) { if (params.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && !params.grammar.empty()) { throw std::runtime_error("Cannot specify grammar with tools"); } if (caps.supports_tool_calls && !caps.supports_tools) { - LOG_WRN("Template supports tool calls but does not natively describe tools. The fallback behaviour used may produce bad results, inspect prompt w/ --verbose & consider overriding the template.\n"); + LOG_WRN( + "Template supports tool calls but does not natively describe tools. The fallback behaviour used may " + "produce bad results, inspect prompt w/ --verbose & consider overriding the template.\n"); } } - // DeepSeek V3.1: detect based on specific patterns in the template - if (src.find("message['prefix'] is defined and message['prefix'] and thinking") != std::string::npos && - params.json_schema.is_null()) { - return common_chat_params_init_deepseek_v3_1(tmpl, params); - } - - // DeepSeek R1: use handler in all cases except json schema (thinking / tools). - if (src.find("<|tool▁calls▁begin|>") != std::string::npos && params.json_schema.is_null()) { - return common_chat_params_init_deepseek_r1(tmpl, params); - } - - // Command R7B: : use handler in all cases except json schema (thinking / tools). - if (src.find("<|END_THINKING|><|START_ACTION|>") != std::string::npos && params.json_schema.is_null()) { - workaround::func_args_not_string(params.messages); - return common_chat_params_init_command_r7b(tmpl, params); - } - - // Granite (IBM) - detects thinking / tools support - if (src.find("elif thinking") != std::string::npos && src.find("<|tool_call|>") != std::string::npos) { - workaround::func_args_not_string(params.messages); - workaround::use_generic_schema(params.messages); - workaround::move_tool_calls_to_content(params.messages); - return common_chat_params_init_granite(tmpl, params); - } - - // GLM 4.5: detect by and tags (check before Hermes since both use ) - if (src.find("[gMASK]") != std::string::npos && - src.find("") != std::string::npos && - src.find("") != std::string::npos && - params.json_schema.is_null()) { - workaround::func_args_not_string(params.messages); - if (!params.extra_context.contains("clear_thinking")) { - // by default, do not clear reasoning_content (added since GLM-4.7) - params.extra_context["clear_thinking"] = false; - } - return common_chat_params_init_glm_4_5(tmpl, params); - } - - // Qwen3-Coder XML format detection (must come before Hermes 2 Pro) - // Detect via explicit XML markers unique to Qwen3-Coder to avoid false positives in other templates. - // Require presence of , , and blocks. - if (src.find("") != std::string::npos && - src.find("") != std::string::npos && - src.find("") != std::string::npos && - src.find("") != std::string::npos) { - return common_chat_params_init_nemotron_v3(tmpl, params); - } - return common_chat_params_init_qwen3_coder_xml(tmpl, params); - } - - // Xiaomi MiMo format detection (must come before Hermes 2 Pro) - if (src.find("") != std::string::npos && - src.find("# Tools") != std::string::npos && - src.find("") != std::string::npos && - src.find("") != std::string::npos && - src.find("") != std::string::npos && - src.find("") != std::string::npos) { - return common_chat_params_init_xiaomi_mimo(tmpl, params); - } - - // EXAONE MoE format detection - if (src.find("") != std::string::npos && - src.find("") != std::string::npos && - src.find("<|tool_declare|>") != std::string::npos) { - return common_chat_params_init_exaone_moe(tmpl, params); - } - - // Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools) - if (src.find("") != std::string::npos && params.json_schema.is_null()) { - return common_chat_params_init_hermes_2_pro(tmpl, params); - } - - // GPT-OSS - if (src.find("<|channel|>") != std::string::npos) { - return common_chat_params_init_gpt_oss(tmpl, params); - } - - // Seed-OSS - if (src.find("") != std::string::npos) { - workaround::func_args_not_string(params.messages); - return common_chat_params_init_seed_oss(tmpl, params, inputs); - } - - // Nemotron v2 - if (src.find("") != std::string::npos) { - return common_chat_params_init_nemotron_v2(tmpl, params); - } - - // Apertus format detection - if (src.find("<|system_start|>") != std::string::npos && src.find("<|tools_prefix|>") != std::string::npos) { - return common_chat_params_init_apertus(tmpl, params); - } - - // LFM2 (w/ tools) - if (src.find("List of tools: <|tool_list_start|>[") != std::string::npos && - src.find("]<|tool_list_end|>") != std::string::npos) { - return common_chat_params_init_lfm2(tmpl, params); - } - - // MiniMax-M2 format detection - if (src.find("]~!b[") != std::string::npos && src.find("]~b]") != std::string::npos) { - workaround::func_args_not_string(params.messages); - return common_chat_params_init_minimax_m2(tmpl, params); - } - - // Kimi K2 format detection - if (src.find("<|im_system|>tool_declare<|im_middle|>") != std::string::npos && - src.find("<|tool_calls_section_begin|>") != std::string::npos && - src.find("## Return of") != std::string::npos) { - return common_chat_params_init_kimi_k2(tmpl, params); - } - - // Apriel 1.5 format detection - if (src.find("") != std::string::npos && - src.find("") != std::string::npos && - src.find("") != std::string::npos && - src.find("<|assistant|>") != std::string::npos && - src.find("<|tool_result|>") != std::string::npos && - src.find("[") != std::string::npos && - src.find("]") != std::string::npos) { - return common_chat_params_init_apriel_1_5(tmpl, params); - } - - // Solar Open - if (src.find("<|tool_response:begin|>") != std::string::npos && - src.find("<|tool_response:name|>") != std::string::npos && - src.find("<|tool_response:result|>") != std::string::npos) { - return common_chat_params_init_solar_open(tmpl, params); - } - - // Use generic handler when mixing tools + JSON schema. - // TODO: support that mix in handlers below. - if ((params.tools.is_array() && params.json_schema.is_object())) { - return common_chat_params_init_generic(tmpl, params); - } - - // Functionary prepends "all\n" to plain content outputs, so we use its handler in all cases. - if (src.find(">>>all") != std::string::npos) { - return common_chat_params_init_functionary_v3_2(tmpl, params); - } - - // Firefunction v2 requires datetime and functions in the context even w/o tools, so we also use its handler in all cases. - if (src.find(" functools[") != std::string::npos) { - return common_chat_params_init_firefunction_v2(tmpl, params); - } - - // Functionary v3.1 (w/ tools) - if (src.find("<|start_header_id|>") != std::string::npos - && src.find("ipython<|end_header_id|>") != std::string::npos) { - auto allow_python_tag_builtin_tools = src.find("<|python_tag|>") != std::string::npos; - workaround::func_args_not_string(params.messages); - return common_chat_params_init_llama_3_x(tmpl, params, allow_python_tag_builtin_tools); - } - - // Ministral/Mistral Large 3 - if (src.find("[SYSTEM_PROMPT]") != std::string::npos && - src.find("[TOOL_CALLS]") != std::string::npos && - src.find("[ARGS]") != std::string::npos) { + // Ministral/Mistral Large 3 - uses special reasoning structure fixes, can't use autoparser + // Note: Mistral Small 3.2 uses [CALL_ID] which Ministral doesn't have, so we can distinguish them + if (src.find("[SYSTEM_PROMPT]") != std::string::npos && src.find("[TOOL_CALLS]") != std::string::npos && + src.find("[ARGS]") != std::string::npos && src.find("[CALL_ID]") == std::string::npos) { + LOG_DBG("Using specialized template: Ministral/Magistral Large 3\n"); return common_chat_params_init_ministral_3(tmpl, params); } - if (src.find("[THINK]") != std::string::npos && src.find("[/THINK]") != std::string::npos) { - return common_chat_params_init_magistral(tmpl, params); + // GPT-OSS - has unique channel-based structure that needs dedicated handler + if (src.find("<|channel|>") != std::string::npos) { + LOG_DBG("Using specialized template: GPT-OSS\n"); + return common_chat_params_init_gpt_oss(tmpl, params); } - // Solar Open - if (src.find("<|tool_response:begin|>") != std::string::npos && - src.find("<|tool_response:name|>") != std::string::npos && - src.find("<|tool_response:result|>") != std::string::npos) { - return common_chat_params_init_solar_open(tmpl, params); + // Functionary v3.2 - uses recipient-based format with >>>recipient\n{content} + // Detection: template has ">>>all" for content and ">>>" prefix for tool calls + if (src.find(">>>all") != std::string::npos && src.find(">>>${recipient}") != std::string::npos) { + LOG_DBG("Using specialized template: Functionary v3.2\n"); + return common_chat_params_init_functionary_v3_2(tmpl, params); } - // TranslateGemma - if (src.find("[source_lang_code]") != std::string::npos && - src.find("[target_lang_code]") != std::string::npos) { - return common_chat_params_init_translate_gemma(tmpl, params); + try { + LOG_DBG("Using differential autoparser\n"); + auto auto_params = autoparser::universal_peg_generator::generate_parser(tmpl, params); + return auto_params; + } catch (const std::exception & e) { + LOG_WRN("Automatic parser generation failed: %s\n", e.what()); } - // Plain handler (no tools) - if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) { - return common_chat_params_init_without_tools(tmpl, params); - } - - // Mistral Nemo (w/ tools) - if (src.find("[TOOL_CALLS]") != std::string::npos) { - workaround::func_args_not_string(params.messages); - return common_chat_params_init_mistral_nemo(tmpl, params); - } - - // Generic fallback - workaround::func_args_not_string(params.messages); - workaround::use_generic_schema(params.messages); - workaround::move_tool_calls_to_content(params.messages); - return common_chat_params_init_generic(tmpl, params); + GGML_ABORT("Unable to generate parser for this template."); } // Legacy template route (adhoc C++ implementation of known templates), forward to llama_chat_apply_template. -static common_chat_params common_chat_templates_apply_legacy( - const struct common_chat_templates * tmpls, - const struct common_chat_templates_inputs & inputs) -{ - size_t alloc_size = 0; +static common_chat_params common_chat_templates_apply_legacy(const struct common_chat_templates * tmpls, + const struct common_chat_templates_inputs & inputs) { + size_t alloc_size = 0; std::vector chat; - std::vector contents; + std::vector contents; for (const auto & msg : inputs.messages) { auto content = msg.content; @@ -3312,25 +1305,27 @@ static common_chat_params common_chat_templates_apply_legacy( continue; } if (!content.empty()) { - content += "\n";; + content += "\n"; + ; } content += part.text; } contents.emplace_back(std::move(content)); } for (size_t i = 0; i < contents.size(); ++i) { - const auto & msg = inputs.messages[i]; + const auto & msg = inputs.messages[i]; const auto & content = contents[i]; - chat.push_back({msg.role.c_str(), content.c_str()}); + chat.push_back({ msg.role.c_str(), content.c_str() }); size_t msg_size = msg.role.size() + content.size(); - alloc_size += msg_size + (msg_size / 4); // == msg_size * 1.25 but avoiding float ops + alloc_size += msg_size + (msg_size / 4); // == msg_size * 1.25 but avoiding float ops } std::vector buf(alloc_size); // run the first time to get the total output length const auto & src = tmpls->template_default->source(); - int32_t res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt, buf.data(), buf.size()); + int32_t res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt, + buf.data(), buf.size()); // error: chat template is not supported if (res < 0) { @@ -3342,7 +1337,8 @@ static common_chat_params common_chat_templates_apply_legacy( // if it turns out that our buffer is too small, we resize it if ((size_t) res > buf.size()) { buf.resize(res); - res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt, buf.data(), buf.size()); + res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt, buf.data(), + buf.size()); } // for safety, we check the result again @@ -3360,14 +1356,72 @@ static common_chat_params common_chat_templates_apply_legacy( return params; } -common_chat_params common_chat_templates_apply( - const struct common_chat_templates * tmpls, - const struct common_chat_templates_inputs & inputs) -{ +common_chat_params common_chat_templates_apply(const struct common_chat_templates * tmpls, + const struct common_chat_templates_inputs & inputs) { GGML_ASSERT(tmpls != nullptr); - return inputs.use_jinja - ? common_chat_templates_apply_jinja(tmpls, inputs) - : common_chat_templates_apply_legacy(tmpls, inputs); + return inputs.use_jinja ? common_chat_templates_apply_jinja(tmpls, inputs) : + common_chat_templates_apply_legacy(tmpls, inputs); +} + +common_chat_msg common_chat_parse(const std::string & input, + bool is_partial, + const common_chat_parser_params & params) { + return common_chat_peg_parse(params.parser, input, is_partial, params); +} + +common_chat_msg common_chat_peg_parse(const common_peg_arena & src_parser, + const std::string & input, + bool is_partial, + const common_chat_parser_params & params) { + const common_peg_arena & parser = src_parser.empty() ? + build_chat_peg_unified_parser([](common_chat_peg_unified_builder & p) { return p.content(p.rest()) + p.end(); }) : + src_parser; + + if (src_parser.empty()) { + LOG_WRN("No parser definition detected, assuming pure content parser."); + } + + LOG_DBG("Parsing PEG input with format %s: %s\n", common_chat_format_name(params.format), input.c_str()); + + common_peg_parse_context ctx(input, is_partial); + ctx.debug = params.debug; + auto result = parser.parse(ctx); + + if (result.fail()) { + // During partial parsing, return partial results if any AST nodes were captured + // This allows streaming to work correctly for formats like FUNC_MARKDOWN_CODE_BLOCK + if (is_partial && result.end > 0) { + // Try to extract any partial results from what was successfully parsed + common_chat_msg msg; + msg.role = "assistant"; + auto mapper = common_chat_peg_unified_mapper(msg); + mapper.from_ast(ctx.ast, result); + + if (ctx.debug) { + fprintf(stderr, "\nAST for partial parse (fail):\n%s\n", ctx.ast.dump().c_str()); + fflush(stderr); + } + return msg; + } + throw std::runtime_error(std::string("Failed to parse input at pos ") + std::to_string(result.end) + ": " + + input.substr(result.end)); + } + + common_chat_msg msg; + msg.role = "assistant"; + + auto mapper = common_chat_peg_unified_mapper(msg); + mapper.from_ast(ctx.ast, result); + + if (ctx.debug) { + fprintf(stderr, "\nAST for %s parse:\n%s\n", is_partial ? "partial" : "full", ctx.ast.dump().c_str()); + fflush(stderr); + } + + if (!is_partial) { + LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({ msg }).at(0).dump().c_str()); + } + return msg; } std::map common_chat_templates_get_caps(const common_chat_templates * chat_templates) { @@ -3375,3 +1429,4 @@ std::map common_chat_templates_get_caps(const common_chat_tem GGML_ASSERT(chat_templates->template_default != nullptr); return chat_templates->template_default->caps.to_map(); } + diff --git a/common/chat.h b/common/chat.h index 1bf43f7261..8ccdba03e1 100644 --- a/common/chat.h +++ b/common/chat.h @@ -3,17 +3,30 @@ #pragma once #include "common.h" +#include "jinja/parser.h" +#include "nlohmann/json_fwd.hpp" #include "peg-parser.h" -#include +#include "jinja/runtime.h" +#include "jinja/caps.h" +#include "nlohmann/json.hpp" + #include +#include +#include #include #include -#include + +using chat_template_caps = jinja::caps; +using json = nlohmann::ordered_json; #include struct common_chat_templates; +namespace autoparser { +struct templates_params; +} // namespace autoparser + struct common_chat_tool_call { std::string name; std::string arguments; @@ -38,21 +51,85 @@ struct common_chat_msg_content_part { } }; +struct common_chat_template { + jinja::program prog; + std::string bos_tok; + std::string eos_tok; + std::string src; + chat_template_caps caps; + + common_chat_template(const std::string & src, const std::string & bos_token, const std::string & eos_token) { + jinja::lexer lexer; + auto lexer_res = lexer.tokenize(src); + this->prog = jinja::parse_from_tokens(lexer_res); + + this->src = lexer_res.source; + this->bos_tok = bos_token; + this->eos_tok = eos_token; + + this->caps = jinja::caps_get(prog); + // LOG_INF("%s: caps:\n%s\n", __func__, this->caps.to_string().c_str()); + } + + const std::string & source() const { return src; } + const std::string & bos_token() const { return bos_tok; } + const std::string & eos_token() const { return eos_tok; } + + // TODO: this is ugly, refactor it somehow + json add_system(const json & messages, const std::string & system_prompt) const { + GGML_ASSERT(messages.is_array()); + auto msgs_copy = messages; + if (!caps.supports_system_role) { + if (msgs_copy.empty()) { + msgs_copy.insert(msgs_copy.begin(), json{ + {"role", "user"}, + {"content", system_prompt} + }); + } else { + auto & first_msg = msgs_copy[0]; + if (!first_msg.contains("content")) { + first_msg["content"] = ""; + } + first_msg["content"] = system_prompt + "\n\n" + + first_msg["content"].get(); + } + } else { + if (msgs_copy.empty() || msgs_copy[0].at("role") != "system") { + msgs_copy.insert(msgs_copy.begin(), json{ + {"role", "system"}, + {"content", system_prompt} + }); + } else if (msgs_copy[0].at("role") == "system") { + msgs_copy[0]["content"] = system_prompt; + } + } + return msgs_copy; + } + + chat_template_caps original_caps() const { + return caps; + } + +}; + struct common_chat_msg { - std::string role; - std::string content; + std::string role; + std::string content; std::vector content_parts; - std::vector tool_calls; - std::string reasoning_content; - std::string tool_name; - std::string tool_call_id; + std::vector tool_calls; + std::string reasoning_content; + std::string tool_name; + std::string tool_call_id; nlohmann::ordered_json to_json_oaicompat(bool concat_typed_text = false) const; bool empty() const { - return content.empty() && content_parts.empty() && tool_calls.empty() && reasoning_content.empty() && tool_name.empty() && tool_call_id.empty(); + return content.empty() && content_parts.empty() && tool_calls.empty() && reasoning_content.empty() && + tool_name.empty() && tool_call_id.empty(); } - void set_tool_call_ids(std::vector & ids_cache, const std::function & gen_tool_call_id) { + + void set_tool_call_ids(std::vector & ids_cache, + const std::function & gen_tool_call_id) { for (auto i = 0u; i < tool_calls.size(); i++) { if (ids_cache.size() <= i) { auto id = tool_calls[i].id; @@ -64,32 +141,28 @@ struct common_chat_msg { tool_calls[i].id = ids_cache[i]; } } + bool operator==(const common_chat_msg & other) const { - return role == other.role - && content == other.content - && content_parts == other.content_parts - && tool_calls == other.tool_calls - && reasoning_content == other.reasoning_content - && tool_name == other.tool_name - && tool_call_id == other.tool_call_id; - } - bool operator!=(const common_chat_msg & other) const { - return !(*this == other); + return role == other.role && content == other.content && content_parts == other.content_parts && + tool_calls == other.tool_calls && reasoning_content == other.reasoning_content && + tool_name == other.tool_name && tool_call_id == other.tool_call_id; } + + bool operator!=(const common_chat_msg & other) const { return !(*this == other); } }; struct common_chat_msg_diff { - std::string reasoning_content_delta; - std::string content_delta; - size_t tool_call_index = std::string::npos; + std::string reasoning_content_delta; + std::string content_delta; + size_t tool_call_index = std::string::npos; common_chat_tool_call tool_call_delta; - static std::vector compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new); + static std::vector compute_diffs(const common_chat_msg & msg_prv, + const common_chat_msg & msg_new); bool operator==(const common_chat_msg_diff & other) const { - return content_delta == other.content_delta - && tool_call_index == other.tool_call_index - && tool_call_delta == other.tool_call_delta; + return content_delta == other.content_delta && tool_call_index == other.tool_call_index && + tool_call_delta == other.tool_call_delta; } }; @@ -107,64 +180,37 @@ enum common_chat_tool_choice { enum common_chat_format { COMMON_CHAT_FORMAT_CONTENT_ONLY, - COMMON_CHAT_FORMAT_GENERIC, - COMMON_CHAT_FORMAT_MISTRAL_NEMO, - COMMON_CHAT_FORMAT_MAGISTRAL, - COMMON_CHAT_FORMAT_LLAMA_3_X, - COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS, - COMMON_CHAT_FORMAT_DEEPSEEK_R1, - COMMON_CHAT_FORMAT_FIREFUNCTION_V2, - COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, - COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1, - COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, - COMMON_CHAT_FORMAT_HERMES_2_PRO, - COMMON_CHAT_FORMAT_COMMAND_R7B, - COMMON_CHAT_FORMAT_GRANITE, - COMMON_CHAT_FORMAT_GPT_OSS, - COMMON_CHAT_FORMAT_SEED_OSS, - COMMON_CHAT_FORMAT_NEMOTRON_V2, - COMMON_CHAT_FORMAT_APERTUS, - COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS, - COMMON_CHAT_FORMAT_GLM_4_5, - COMMON_CHAT_FORMAT_MINIMAX_M2, - COMMON_CHAT_FORMAT_KIMI_K2, - COMMON_CHAT_FORMAT_QWEN3_CODER_XML, - COMMON_CHAT_FORMAT_APRIEL_1_5, - COMMON_CHAT_FORMAT_XIAOMI_MIMO, - COMMON_CHAT_FORMAT_SOLAR_OPEN, - COMMON_CHAT_FORMAT_EXAONE_MOE, // These are intended to be parsed by the PEG parser COMMON_CHAT_FORMAT_PEG_SIMPLE, COMMON_CHAT_FORMAT_PEG_NATIVE, - COMMON_CHAT_FORMAT_PEG_CONSTRUCTED, - COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats + COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats }; struct common_chat_templates_inputs { - std::vector messages; - std::string grammar; - std::string json_schema; - bool add_generation_prompt = true; - bool use_jinja = true; + std::vector messages; + std::string grammar; + std::string json_schema; + bool add_generation_prompt = true; + bool use_jinja = true; // Parameters below only supported when use_jinja is true - std::vector tools; - common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO; - bool parallel_tool_calls = false; + std::vector tools; + common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO; + bool parallel_tool_calls = false; common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool enable_thinking" - bool enable_thinking = true; - std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); - std::map chat_template_kwargs; - bool add_bos = false; - bool add_eos = false; + bool enable_thinking = true; + std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); + std::map chat_template_kwargs; + bool add_bos = false; + bool add_eos = false; }; struct common_chat_params { common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY; std::string prompt; std::string grammar; - bool grammar_lazy = false; + bool grammar_lazy = false; bool thinking_forced_open = false; std::vector grammar_triggers; std::vector preserved_tokens; @@ -175,13 +221,14 @@ struct common_chat_params { // per-message parsing syntax // should be derived from common_chat_params struct common_chat_parser_params { - common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY; + common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY; common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool parse_reasoning" // Whether reasoning_content should be inlined in the content (e.g. for reasoning_format=deepseek in stream mode) - bool reasoning_in_content = false; - bool thinking_forced_open = false; - bool parse_tool_calls = true; - common_peg_arena parser = {}; + bool reasoning_in_content = false; + bool thinking_forced_open = false; + bool parse_tool_calls = true; + bool debug = false; // Enable debug output for PEG parser + common_peg_arena parser = {}; common_chat_parser_params() = default; common_chat_parser_params(const common_chat_params & chat_params) { format = chat_params.format; @@ -194,45 +241,42 @@ bool common_chat_verify_template(const std::string & tmpl, bool use_jinja); void common_chat_templates_free(struct common_chat_templates * tmpls); -struct common_chat_templates_deleter { void operator()(common_chat_templates * tmpls) { common_chat_templates_free(tmpls); } }; +struct common_chat_templates_deleter { + void operator()(common_chat_templates * tmpls) { common_chat_templates_free(tmpls); } +}; typedef std::unique_ptr common_chat_templates_ptr; -common_chat_templates_ptr common_chat_templates_init( - const struct llama_model * model, - const std::string & chat_template_override, - const std::string & bos_token_override = "", - const std::string & eos_token_override = ""); +common_chat_templates_ptr common_chat_templates_init(const struct llama_model * model, + const std::string & chat_template_override, + const std::string & bos_token_override = "", + const std::string & eos_token_override = ""); bool common_chat_templates_was_explicit(const struct common_chat_templates * tmpls); std::string common_chat_templates_source(const struct common_chat_templates * tmpls, const std::string & variant = ""); - -struct common_chat_params common_chat_templates_apply( - const struct common_chat_templates * tmpls, - const struct common_chat_templates_inputs & inputs); +struct common_chat_params common_chat_templates_apply(const struct common_chat_templates * tmpls, + const struct common_chat_templates_inputs & inputs); // Format single message, while taking into account the position of that message in chat history -std::string common_chat_format_single( - const struct common_chat_templates * tmpls, - const std::vector & past_msg, - const common_chat_msg & new_msg, - bool add_ass, - bool use_jinja); +std::string common_chat_format_single(const struct common_chat_templates * tmpls, + const std::vector & past_msg, + const common_chat_msg & new_msg, + bool add_ass, + bool use_jinja); // Returns an example of formatted chat -std::string common_chat_format_example( - const struct common_chat_templates * tmpls, - bool use_jinja, - const std::map & chat_template_kwargs); +std::string common_chat_format_example(const struct common_chat_templates * tmpls, + bool use_jinja, + const std::map & chat_template_kwargs); -const char* common_chat_format_name(common_chat_format format); -common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & syntax); -common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_parser_params & syntax); +const char * common_chat_format_name(common_chat_format format); +common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & params); +common_chat_msg common_chat_peg_parse(const common_peg_arena & src_parser, const std::string & input, bool is_partial, const common_chat_parser_params & params); // used by arg and server -const char * common_reasoning_format_name(common_reasoning_format format); -common_reasoning_format common_reasoning_format_from_name(const std::string & format); +const char * common_reasoning_format_name(common_reasoning_format format); +common_reasoning_format common_reasoning_format_from_name(const std::string & format); common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice); @@ -251,3 +295,10 @@ nlohmann::ordered_json common_chat_msg_diff_to_json_oaicompat(const common_chat_ // get template caps, useful for reporting to server /props endpoint std::map common_chat_templates_get_caps(const common_chat_templates * chat_templates); + +std::string common_chat_template_direct_apply( + const common_chat_template & tmpl, + const autoparser::templates_params & inputs, + const std::optional & messages_override = std::nullopt, + const std::optional & tools_override = std::nullopt, + const std::optional & additional_context = std::nullopt); diff --git a/common/jinja/caps.cpp b/common/jinja/caps.cpp index dbaaed500a..aecad6efa6 100644 --- a/common/jinja/caps.cpp +++ b/common/jinja/caps.cpp @@ -1,3 +1,4 @@ +#include "log.h" #include "value.h" #include "runtime.h" #include "caps.h" @@ -16,7 +17,7 @@ using json = nlohmann::ordered_json; namespace jinja { using caps_json_fn = std::function; -using caps_analyze_fn = std::function; +using caps_analyze_fn = std::function; static void caps_try_execute(jinja::program & prog, const caps_json_fn & messages_fn, @@ -36,16 +37,20 @@ static void caps_try_execute(jinja::program & prog, auto tools = ctx.get_val("tools"); bool success = false; + std::string result; try { jinja::runtime runtime(ctx); - runtime.execute(prog); + auto results = runtime.execute(prog); + auto parts = jinja::runtime::gather_string_parts(results); + result = parts->as_string().str(); success = true; } catch (const std::exception & e) { JJ_DEBUG("Exception during execution: %s", e.what()); + result = ""; // ignore exceptions during capability analysis } - analyze_fn(success, messages, tools); + analyze_fn(success, messages, tools, result); } // for debugging only @@ -90,6 +95,8 @@ caps caps_get(jinja::program & prog) { return v->stats.ops.find(op_name) != v->stats.ops.end(); }; + JJ_DEBUG("%s\n", ">>> Running capability check: typed content"); + // case: typed content support caps_try_execute( prog, @@ -106,7 +113,7 @@ caps caps_get(jinja::program & prog) { // tools return json{nullptr}; }, - [&](bool success, value & messages, value &) { + [&](bool success, value & messages, value &, const std::string &) { auto & content = messages->at(0)->at("content"); caps_print_stats(content, "messages[0].content"); if (has_op(content, "selectattr") || has_op(content, "array_access")) { @@ -120,6 +127,7 @@ caps caps_get(jinja::program & prog) { } ); + JJ_DEBUG("%s\n", ">>> Running capability check: system prompt"); // case: system prompt support caps_try_execute( @@ -141,7 +149,7 @@ caps caps_get(jinja::program & prog) { // tools return json::array(); }, - [&](bool, value & messages, value &) { + [&](bool, value & messages, value &, const std::string &) { auto & content = messages->at(0)->at("content"); caps_print_stats(content, "messages[0].content"); if (!content->stats.used) { @@ -150,6 +158,8 @@ caps caps_get(jinja::program & prog) { } ); + JJ_DEBUG("%s\n", ">>> Running capability check: tool support"); + // case: tools support caps_try_execute( prog, @@ -162,10 +172,10 @@ caps caps_get(jinja::program & prog) { }, { {"role", "assistant"}, - {"content", "Assistant message"}, + {"content", ""}, // Some templates expect content to be empty with tool calls {"tool_calls", json::array({ { - {"id", "call1"}, + {"id", "call00001"}, {"type", "function"}, {"function", { {"name", "tool1"}, @@ -175,10 +185,10 @@ caps caps_get(jinja::program & prog) { }} }, { - {"id", "call2"}, + {"id", "call00002"}, {"type", "function"}, {"function", { - {"name", "tool2"}, + {"name", "tool1"}, {"arguments", { {"arg", "value"} }} @@ -186,6 +196,15 @@ caps caps_get(jinja::program & prog) { } })} }, + { + {"role", "tool"}, + {"content", "Tool response"}, + {"tool_call_id", "call00001"} + }, + { + {"role", "assistant"}, + {"content", "The tool response was 'tool response'"} + }, { {"role", "user"}, {"content", "User message"}, @@ -199,7 +218,7 @@ caps caps_get(jinja::program & prog) { {"name", "tool"}, {"type", "function"}, {"function", { - {"name", "tool"}, + {"name", "tool1"}, {"description", "Tool description"}, {"parameters", { {"type", "object"}, @@ -215,7 +234,7 @@ caps caps_get(jinja::program & prog) { }, }); }, - [&](bool success, value & messages, value & tools) { + [&](bool success, value & messages, value & tools, const std::string & res) { if (!success) { result.supports_tool_calls = false; result.supports_tools = false; @@ -224,8 +243,11 @@ caps caps_get(jinja::program & prog) { auto & tool_name = tools->at(0)->at("function")->at("name"); caps_print_stats(tool_name, "tools[0].function.name"); + caps_print_stats(tools, "tools"); if (!tool_name->stats.used) { - result.supports_tools = false; + if (!tools->stats.used && res.find(tool_name->as_string().str()) == std::string::npos) { + result.supports_tools = false; + } } auto & tool_calls = messages->at(1)->at("tool_calls");; @@ -243,6 +265,8 @@ caps caps_get(jinja::program & prog) { } ); + JJ_DEBUG("%s\n", ">>> Running capability check: preserve reasoning"); + // case: preserve reasoning content in chat history caps_try_execute( prog, @@ -268,7 +292,7 @@ caps caps_get(jinja::program & prog) { // tools return json::array(); }, - [&](bool, value & messages, value &) { + [&](bool, value & messages, value &, const std::string &) { auto & content = messages->at(1)->at("reasoning_content"); caps_print_stats(content, "messages[1].reasoning_content"); if (content->stats.used) { diff --git a/common/jinja/runtime.cpp b/common/jinja/runtime.cpp index cc012c892f..b7e71115ed 100644 --- a/common/jinja/runtime.cpp +++ b/common/jinja/runtime.cpp @@ -114,8 +114,10 @@ value binary_expression::execute_impl(context & ctx) { // Logical operators if (op.value == "and") { + JJ_DEBUG("Executing logical test: %s AND %s", left->type().c_str(), right->type().c_str()); return left_val->as_bool() ? right->execute(ctx) : std::move(left_val); } else if (op.value == "or") { + JJ_DEBUG("Executing logical test: %s OR %s", left->type().c_str(), right->type().c_str()); return left_val->as_bool() ? std::move(left_val) : right->execute(ctx); } @@ -835,7 +837,7 @@ value call_expression::execute_impl(context & ctx) { for (auto & arg_stmt : this->args) { auto arg_val = arg_stmt->execute(ctx); JJ_DEBUG(" Argument type: %s", arg_val->type().c_str()); - args.push_back(std::move(arg_val)); + args.push_back(arg_val); } // execute callee value callee_val = callee->execute(ctx); diff --git a/common/jinja/value.cpp b/common/jinja/value.cpp index 2aa156b177..38da1df6d3 100644 --- a/common/jinja/value.cpp +++ b/common/jinja/value.cpp @@ -715,8 +715,26 @@ const func_builtins & value_string_t::get_builtins() const { return args.get_pos(0); }}, {"tojson", tojson}, - {"indent", [](const func_args &) -> value { - throw not_implemented_exception("String indent builtin not implemented"); + {"indent", [](const func_args &args) -> value { + // no support for "first" as that would require us to somehow access generation context + args.ensure_count(2, 4); + args.ensure_vals(true, true, false, false); + + auto input = args.get_pos(0); + auto arg0 = args.get_pos(1); + + int count = arg0->as_int(); + if (count <= 0) { + throw raised_exception("indent must be a positive number"); + } + std::string indented; + for (int i = 0; i < count; i++) { + indented.append(" "); + } + indented.append(input->as_string().str()); + auto res = mk_val(indented); + res->val_str.mark_input_based_on(input->as_string()); + return res; }}, {"join", [](const func_args &) -> value { throw not_implemented_exception("String join builtin not implemented"); diff --git a/common/jinja/value.h b/common/jinja/value.h index 1c04760a08..7111629cda 100644 --- a/common/jinja/value.h +++ b/common/jinja/value.h @@ -12,8 +12,8 @@ #include #include #include -#include #include +#include namespace jinja { diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index 2f67c74d79..57a14dc9f4 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -27,11 +27,11 @@ static std::string build_repetition(const std::string & item_rule, int min_items if (separator_rule.empty()) { if (min_items == 1 && !has_max) { return item_rule + "+"; - } else if (min_items == 0 && !has_max) { - return item_rule + "*"; - } else { - return item_rule + "{" + std::to_string(min_items) + "," + (has_max ? std::to_string(max_items) : "") + "}"; } + if (min_items == 0 && !has_max) { + return item_rule + "*"; + } + return item_rule + "{" + std::to_string(min_items) + "," + (has_max ? std::to_string(max_items) : "") + "}"; } auto result = item_rule + " " + build_repetition("(" + separator_rule + " " + item_rule + ")", min_items == 0 ? 0 : min_items - 1, has_max ? max_items - 1 : max_items); @@ -41,7 +41,7 @@ static std::string build_repetition(const std::string & item_rule, int min_items return result; } -static void _build_min_max_int(int64_t min_value, int64_t max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) { +static void build_min_max_int(int64_t min_value, int64_t max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) { auto has_min = min_value != std::numeric_limits::min(); auto has_max = max_value != std::numeric_limits::max(); @@ -128,14 +128,14 @@ static void _build_min_max_int(int64_t min_value, int64_t max_value, std::string if (has_min && has_max) { if (min_value < 0 && max_value < 0) { out << "\"-\" ("; - _build_min_max_int(-max_value, -min_value, out, decimals_left, /* top_level= */ true); + build_min_max_int(-max_value, -min_value, out, decimals_left, /* top_level= */ true); out << ")"; return; } if (min_value < 0) { out << "\"-\" ("; - _build_min_max_int(0, -min_value, out, decimals_left, /* top_level= */ true); + build_min_max_int(0, -min_value, out, decimals_left, /* top_level= */ true); out << ") | "; min_value = 0; } @@ -159,7 +159,7 @@ static void _build_min_max_int(int64_t min_value, int64_t max_value, std::string if (has_min) { if (min_value < 0) { out << "\"-\" ("; - _build_min_max_int(std::numeric_limits::min(), -min_value, out, decimals_left, /* top_level= */ false); + build_min_max_int(std::numeric_limits::min(), -min_value, out, decimals_left, /* top_level= */ false); out << ") | [0] | [1-9] "; more_digits(0, decimals_left - 1); } else if (min_value == 0) { @@ -194,7 +194,7 @@ static void _build_min_max_int(int64_t min_value, int64_t max_value, std::string } digit_range(c, c); out << " ("; - _build_min_max_int(std::stoll(min_s.substr(1)), std::numeric_limits::max(), out, less_decimals, /* top_level= */ false); + build_min_max_int(std::stoll(min_s.substr(1)), std::numeric_limits::max(), out, less_decimals, /* top_level= */ false); out << ")"; if (c < '9') { out << " | "; @@ -213,10 +213,10 @@ static void _build_min_max_int(int64_t min_value, int64_t max_value, std::string more_digits(0, less_decimals); out << " | "; } - _build_min_max_int(0, max_value, out, decimals_left, /* top_level= */ true); + build_min_max_int(0, max_value, out, decimals_left, /* top_level= */ true); } else { out << "\"-\" ("; - _build_min_max_int(-max_value, std::numeric_limits::max(), out, decimals_left, /* top_level= */ false); + build_min_max_int(-max_value, std::numeric_limits::max(), out, decimals_left, /* top_level= */ false); out << ")"; } return; @@ -232,7 +232,7 @@ struct BuiltinRule { std::vector deps; }; -std::unordered_map PRIMITIVE_RULES = { +static std::unordered_map PRIMITIVE_RULES = { {"boolean", {"(\"true\" | \"false\") space", {}}}, {"decimal-part", {"[0-9]{1,16}", {}}}, {"integral-part", {"[0] | [1-9] [0-9]{0,15}", {}}}, @@ -247,7 +247,7 @@ std::unordered_map PRIMITIVE_RULES = { {"null", {"\"null\" space", {}}}, }; -std::unordered_map STRING_FORMAT_RULES = { +static std::unordered_map STRING_FORMAT_RULES = { {"date", {"[0-9]{4} \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( \"0\" [1-9] | [1-2] [0-9] | \"3\" [0-1] )", {}}}, {"time", {"([01] [0-9] | \"2\" [0-3]) \":\" [0-5] [0-9] \":\" [0-5] [0-9] ( \".\" [0-9]{3} )? ( \"Z\" | ( \"+\" | \"-\" ) ( [01] [0-9] | \"2\" [0-3] ) \":\" [0-5] [0-9] )", {}}}, {"date-time", {"date \"T\" time", {"date", "time"}}}, @@ -260,22 +260,26 @@ static bool is_reserved_name(const std::string & name) { static const std::unordered_set RESERVED_NAMES = [] { std::unordered_set s; s.insert("root"); - for (const auto & p : PRIMITIVE_RULES) s.insert(p.first); - for (const auto & p : STRING_FORMAT_RULES) s.insert(p.first); + for (const auto & p : PRIMITIVE_RULES) { + s.insert(p.first); + } + for (const auto & p : STRING_FORMAT_RULES) { + s.insert(p.first); + } return s; }(); return RESERVED_NAMES.find(name) != RESERVED_NAMES.end(); } -std::regex INVALID_RULE_CHARS_RE("[^a-zA-Z0-9-]+"); -std::regex GRAMMAR_LITERAL_ESCAPE_RE("[\r\n\"\\\\]"); -std::regex GRAMMAR_RANGE_LITERAL_ESCAPE_RE("[\r\n\"\\]\\-\\\\]"); -std::unordered_map GRAMMAR_LITERAL_ESCAPES = { +static std::regex INVALID_RULE_CHARS_RE("[^a-zA-Z0-9-]+"); +static std::regex GRAMMAR_LITERAL_ESCAPE_RE("[\r\n\"\\\\]"); +static std::regex GRAMMAR_RANGE_LITERAL_ESCAPE_RE("[\r\n\"\\]\\-\\\\]"); +static std::unordered_map GRAMMAR_LITERAL_ESCAPES = { {'\r', "\\r"}, {'\n', "\\n"}, {'"', "\\\""}, {'-', "\\-"}, {']', "\\]"}, {'\\', "\\\\"} }; -std::unordered_set NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'}; -std::unordered_set ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'^', '$', '.', '[', ']', '(', ')', '|', '{', '}', '*', '+', '?'}; +static std::unordered_set NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'}; +static std::unordered_set ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'^', '$', '.', '[', ']', '(', ')', '|', '{', '}', '*', '+', '?'}; static std::string replacePattern(const std::string & input, const std::regex & regex, const std::function & replacement) { std::smatch match; @@ -322,19 +326,19 @@ private: if (_rules.find(esc_name) == _rules.end() || _rules[esc_name] == rule) { _rules[esc_name] = rule; return esc_name; - } else { - int i = 0; - while (_rules.find(esc_name + std::to_string(i)) != _rules.end() && _rules[esc_name + std::to_string(i)] != rule) { - i++; - } - std::string key = esc_name + std::to_string(i); - _rules[key] = rule; - return key; } + int i = 0; + while (_rules.find(esc_name + std::to_string(i)) != _rules.end() && _rules[esc_name + std::to_string(i)] != rule) { + i++; + } + std::string key = esc_name + std::to_string(i); + _rules[key] = rule; + return key; } std::string _generate_union_rule(const std::string & name, const std::vector & alt_schemas) { std::vector rules; + rules.reserve(alt_schemas.size()); for (size_t i = 0; i < alt_schemas.size(); i++) { rules.push_back(visit(alt_schemas[i], name + (name.empty() ? "alternative-" : "-") + std::to_string(i))); } @@ -398,6 +402,7 @@ private: flush_literal(); std::vector results; + results.reserve(ret.size()); for (const auto & item : ret) { results.push_back(to_rule(item)); } @@ -551,7 +556,7 @@ private: TrieNode() : is_end_of_string(false) {} void insert(const std::string & string) { - auto node = this; + auto *node = this; for (char c : string) { node = &node->children[c]; } @@ -676,7 +681,7 @@ private: if (ks.empty()) { return res; } - std::string k = ks[0]; + const std::string& k = ks[0]; std::string kv_rule_name = prop_kv_rule_names[k]; std::string comma_ref = "( \",\" space " + kv_rule_name + " )"; if (first_is_optional) { @@ -779,7 +784,7 @@ public: std::string pointer = ref.substr(ref.find('#') + 1); std::vector tokens = string_split(pointer, "/"); for (size_t i = 1; i < tokens.size(); ++i) { - std::string sel = tokens[i]; + const std::string& sel = tokens[i]; if (target.is_object() && target.contains(sel)) { target = target[sel]; } else if (target.is_array()) { @@ -802,7 +807,7 @@ public: _refs[ref] = target; } } else { - for (auto & kv : n.items()) { + for (const auto & kv : n.items()) { visit_refs(kv.value()); } } @@ -812,7 +817,7 @@ public: visit_refs(schema); } - std::string _generate_constant_rule(const json & value) { + static std::string _generate_constant_rule(const json & value) { return format_literal(value.dump()); } @@ -823,10 +828,12 @@ public: if (schema.contains("$ref")) { return _add_rule(rule_name, _resolve_ref(schema["$ref"])); - } else if (schema.contains("oneOf") || schema.contains("anyOf")) { + } + if (schema.contains("oneOf") || schema.contains("anyOf")) { std::vector alt_schemas = schema.contains("oneOf") ? schema["oneOf"].get>() : schema["anyOf"].get>(); return _add_rule(rule_name, _generate_union_rule(name, alt_schemas)); - } else if (schema_type.is_array()) { + } + if (schema_type.is_array()) { std::vector schema_types; for (const auto & t : schema_type) { json schema_copy(schema); @@ -834,15 +841,18 @@ public: schema_types.push_back(schema_copy); } return _add_rule(rule_name, _generate_union_rule(name, schema_types)); - } else if (schema.contains("const")) { + } + if (schema.contains("const")) { return _add_rule(rule_name, _generate_constant_rule(schema["const"]) + " space"); - } else if (schema.contains("enum")) { + } + if (schema.contains("enum")) { std::vector enum_values; for (const auto & v : schema["enum"]) { enum_values.push_back(_generate_constant_rule(v)); } return _add_rule(rule_name, "(" + string_join(enum_values, " | ") + ") space"); - } else if ((schema_type.is_null() || schema_type == "object") + } + if ((schema_type.is_null() || schema_type == "object") && (schema.contains("properties") || (schema.contains("additionalProperties") && schema["additionalProperties"] != true))) { std::unordered_set required; @@ -863,11 +873,12 @@ public: _build_object_rule( properties, required, name, schema.contains("additionalProperties") ? schema["additionalProperties"] : json())); - } else if ((schema_type.is_null() || schema_type == "object" || schema_type == "string") && schema.contains("allOf")) { + } + if ((schema_type.is_null() || schema_type == "object" || schema_type == "string") && schema.contains("allOf")) { std::unordered_set required; std::vector> properties; std::map enum_values; - std::string hybrid_name = name; + const std::string& hybrid_name = name; std::function add_component = [&](const json & comp_schema, bool is_required) { if (comp_schema.contains("$ref")) { add_component(_refs[comp_schema["$ref"]], is_required); @@ -890,9 +901,9 @@ public: // todo warning } }; - for (auto & t : schema["allOf"]) { + for (const auto & t : schema["allOf"]) { if (t.contains("anyOf")) { - for (auto & tt : t["anyOf"]) { + for (const auto & tt : t["anyOf"]) { add_component(tt, false); } } else { @@ -911,7 +922,8 @@ public: } } return _add_rule(rule_name, _build_object_rule(properties, required, hybrid_name, json())); - } else if ((schema_type.is_null() || schema_type == "array") && (schema.contains("items") || schema.contains("prefixItems"))) { + } + if ((schema_type.is_null() || schema_type == "array") && (schema.contains("items") || schema.contains("prefixItems"))) { json items = schema.contains("items") ? schema["items"] : schema["prefixItems"]; if (items.is_array()) { std::string rule = "\"[\" space "; @@ -923,27 +935,31 @@ public: } rule += " \"]\" space"; return _add_rule(rule_name, rule); - } else { - std::string item_rule_name = visit(items, name + (name.empty() ? "" : "-") + "item"); - int min_items = schema.contains("minItems") ? schema["minItems"].get() : 0; - json max_items_json = schema.contains("maxItems") ? schema["maxItems"] : json(); - int max_items = max_items_json.is_number_integer() ? max_items_json.get() : std::numeric_limits::max(); - - return _add_rule(rule_name, "\"[\" space " + build_repetition(item_rule_name, min_items, max_items, "\",\" space") + " \"]\" space"); } - } else if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) { + std::string item_rule_name = visit(items, name + (name.empty() ? "" : "-") + "item"); + int min_items = schema.contains("minItems") ? schema["minItems"].get() : 0; + json max_items_json = schema.contains("maxItems") ? schema["maxItems"] : json(); + int max_items = max_items_json.is_number_integer() ? max_items_json.get() : std::numeric_limits::max(); + + return _add_rule(rule_name, "\"[\" space " + build_repetition(item_rule_name, min_items, max_items, "\",\" space") + " \"]\" space"); + } + if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) { return _visit_pattern(schema["pattern"], rule_name); - } else if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) { + } + if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) { return _add_primitive(rule_name == "root" ? "root" : schema_format, PRIMITIVE_RULES.at("uuid")); - } else if ((schema_type.is_null() || schema_type == "string") && STRING_FORMAT_RULES.find(schema_format + "-string") != STRING_FORMAT_RULES.end()) { + } + if ((schema_type.is_null() || schema_type == "string") && STRING_FORMAT_RULES.find(schema_format + "-string") != STRING_FORMAT_RULES.end()) { auto prim_name = schema_format + "-string"; return _add_rule(rule_name, _add_primitive(prim_name, STRING_FORMAT_RULES.at(prim_name))); - } else if (schema_type == "string" && (schema.contains("minLength") || schema.contains("maxLength"))) { + } + if (schema_type == "string" && (schema.contains("minLength") || schema.contains("maxLength"))) { std::string char_rule = _add_primitive("char", PRIMITIVE_RULES.at("char")); int min_len = schema.contains("minLength") ? schema["minLength"].get() : 0; int max_len = schema.contains("maxLength") ? schema["maxLength"].get() : std::numeric_limits::max(); return _add_rule(rule_name, "\"\\\"\" " + build_repetition(char_rule, min_len, max_len) + " \"\\\"\" space"); - } else if (schema_type == "integer" && (schema.contains("minimum") || schema.contains("exclusiveMinimum") || schema.contains("maximum") || schema.contains("exclusiveMaximum"))) { + } + if (schema_type == "integer" && (schema.contains("minimum") || schema.contains("exclusiveMinimum") || schema.contains("maximum") || schema.contains("exclusiveMaximum"))) { int64_t min_value = std::numeric_limits::min(); int64_t max_value = std::numeric_limits::max(); if (schema.contains("minimum")) { @@ -958,19 +974,19 @@ public: } std::stringstream out; out << "("; - _build_min_max_int(min_value, max_value, out); + build_min_max_int(min_value, max_value, out); out << ") space"; return _add_rule(rule_name, out.str()); - } else if (schema.empty() || schema_type == "object") { - return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object"))); - } else { - if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get()) == PRIMITIVE_RULES.end()) { - _errors.push_back("Unrecognized schema: " + schema.dump()); - return ""; - } - // TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero - return _add_primitive(rule_name == "root" ? "root" : schema_type.get(), PRIMITIVE_RULES.at(schema_type.get())); } + if (schema.empty() || schema_type == "object") { + return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object"))); + } + if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get()) == PRIMITIVE_RULES.end()) { + _errors.push_back("Unrecognized schema: " + schema.dump()); + return ""; + } + // TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero + return _add_primitive(rule_name == "root" ? "root" : schema_type.get(), PRIMITIVE_RULES.at(schema_type.get())); } void check_errors() { @@ -985,7 +1001,7 @@ public: std::string format_grammar() { std::stringstream ss; for (const auto & kv : _rules) { - ss << kv.first << " ::= " << kv.second << std::endl; + ss << kv.first << " ::= " << kv.second << '\n'; } return ss.str(); } diff --git a/common/peg-parser.cpp b/common/peg-parser.cpp index f2fc84500f..1545a24210 100644 --- a/common/peg-parser.cpp +++ b/common/peg-parser.cpp @@ -1,28 +1,32 @@ -#include "common.h" #include "peg-parser.h" -#include "json-schema-to-grammar.h" -#include "unicode.h" -#include +#include "common.h" +#include "json-schema-to-grammar.h" +#include "log.h" +#include "unicode.h" #include #include #include #include +#include #include #include #include // Trick to catch missing branches -template -inline constexpr bool is_always_false_v = false; +template inline constexpr bool is_always_false_v = false; const char * common_peg_parse_result_type_name(common_peg_parse_result_type type) { switch (type) { - case COMMON_PEG_PARSE_RESULT_FAIL: return "fail"; - case COMMON_PEG_PARSE_RESULT_SUCCESS: return "success"; - case COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT: return "need_more_input"; - default: return "unknown"; + case COMMON_PEG_PARSE_RESULT_FAIL: + return "fail"; + case COMMON_PEG_PARSE_RESULT_SUCCESS: + return "success"; + case COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT: + return "need_more_input"; + default: + return "unknown"; } } @@ -34,81 +38,88 @@ static bool is_hex_digit(const char c) { // This is used in common_peg_until_parser and to build a GBNF exclusion grammar struct trie { struct node { - size_t depth = 0; - std::map children; - bool is_word; + std::map children; + bool is_word = false; }; std::vector nodes; trie(const std::vector & words) { - create_node(); // root node - for (const auto & w : words) { - insert(w); - } + create_node(); // root node + for (const auto & w : words) { + insert(w); + } } enum match_result { NO_MATCH, PARTIAL_MATCH, COMPLETE_MATCH }; // Check if a delimiter starts at the given position match_result check_at(std::string_view sv, size_t start_pos) const { - size_t current = 0; // Start at root - size_t pos = start_pos; + size_t current = 0; // Start at root + size_t pos = start_pos; + + // LOG_DBG("%s: checking at pos %zu, sv='%s'\n", __func__, start_pos, std::string(sv).c_str()); while (pos < sv.size()) { - auto it = nodes[current].children.find(sv[pos]); + auto result = parse_utf8_codepoint(sv, pos); + if (result.status != utf8_parse_result::SUCCESS) { + break; + } + + auto it = nodes[current].children.find(result.codepoint); if (it == nodes[current].children.end()) { // Can't continue matching - return match_result{match_result::NO_MATCH}; + return match_result{ match_result::NO_MATCH }; } current = it->second; - pos++; + pos += result.bytes_consumed; // Check if we've matched a complete word if (nodes[current].is_word) { - return match_result{match_result::COMPLETE_MATCH}; + // LOG_DBG("%s: complete match found at pos %zu\n", __func__, pos); + return match_result{ match_result::COMPLETE_MATCH }; } } // Reached end of input while still in the trie (not at root) if (current != 0) { // We're in the middle of a potential match - return match_result{match_result::PARTIAL_MATCH}; + return match_result{ match_result::PARTIAL_MATCH }; } // Reached end at root (no match) - return match_result{match_result::NO_MATCH}; + return match_result{ match_result::NO_MATCH }; } struct prefix_and_next { - std::string prefix; - std::string next_chars; + std::vector prefix; + std::vector next_chars; }; std::vector collect_prefix_and_next() { - std::string prefix; + std::vector prefix; std::vector result; collect_prefix_and_next(0, prefix, result); return result; } private: - void collect_prefix_and_next(size_t index, std::string & prefix, std::vector & out) { + void collect_prefix_and_next(size_t index, std::vector & prefix, std::vector & out) { if (!nodes[index].is_word) { if (!nodes[index].children.empty()) { - std::string chars; + std::vector chars; chars.reserve(nodes[index].children.size()); for (const auto & p : nodes[index].children) { chars.push_back(p.first); } - out.emplace_back(prefix_and_next{prefix, chars}); + out.emplace_back(prefix_and_next{ prefix, chars }); } } for (const auto & p : nodes[index].children) { - unsigned char ch = p.first; - auto child = p.second; + uint32_t ch = p.first; + auto child = p.second; prefix.push_back(ch); collect_prefix_and_next(child, prefix, out); prefix.pop_back(); @@ -123,13 +134,21 @@ struct trie { void insert(const std::string & word) { size_t current = 0; - for (unsigned char ch : word) { + size_t pos = 0; + while (pos < word.length()) { + auto result = parse_utf8_codepoint(word, pos); + if (result.status != utf8_parse_result::SUCCESS) { + break; + } + + uint32_t ch = result.codepoint; + pos += result.bytes_consumed; + auto it = nodes[current].children.find(ch); if (it == nodes[current].children.end()) { - size_t child = create_node(); - nodes[child].depth = nodes[current].depth + 1; + size_t child = create_node(); nodes[current].children[ch] = child; - current = child; + current = child; } else { current = it->second; } @@ -140,14 +159,14 @@ struct trie { static std::pair parse_hex_escape(const std::string & str, size_t pos, int hex_count) { if (pos + hex_count > str.length()) { - return {0, 0}; + return { 0, 0 }; } uint32_t value = 0; for (int i = 0; i < hex_count; i++) { char c = str[pos + i]; if (!is_hex_digit(c)) { - return {0, 0}; + return { 0, 0 }; } value <<= 4; if ('a' <= c && c <= 'f') { @@ -160,53 +179,64 @@ static std::pair parse_hex_escape(const std::string & str, siz break; } } - return {value, static_cast(hex_count)}; + return { value, static_cast(hex_count) }; } static std::pair parse_char_class_char(const std::string & content, size_t pos) { if (content[pos] == '\\' && pos + 1 < content.length()) { switch (content[pos + 1]) { - case 'x': { - auto result = parse_hex_escape(content, pos + 2, 2); - if (result.second > 0) { - return {result.first, 2 + result.second}; + case 'x': + { + auto result = parse_hex_escape(content, pos + 2, 2); + if (result.second > 0) { + return { result.first, 2 + result.second }; + } + // Invalid escape, treat as literal 'x' + return { static_cast('x'), 2 }; } - // Invalid escape, treat as literal 'x' - return {static_cast('x'), 2}; - } - case 'u': { - auto result = parse_hex_escape(content, pos + 2, 4); - if (result.second > 0) { - return {result.first, 2 + result.second}; + case 'u': + { + auto result = parse_hex_escape(content, pos + 2, 4); + if (result.second > 0) { + return { result.first, 2 + result.second }; + } + // Invalid escape, treat as literal 'u' + return { static_cast('u'), 2 }; } - // Invalid escape, treat as literal 'u' - return {static_cast('u'), 2}; - } - case 'U': { - auto result = parse_hex_escape(content, pos + 2, 8); - if (result.second > 0) { - return {result.first, 2 + result.second}; + case 'U': + { + auto result = parse_hex_escape(content, pos + 2, 8); + if (result.second > 0) { + return { result.first, 2 + result.second }; + } + // Invalid escape, treat as literal 'U' + return { static_cast('U'), 2 }; } - // Invalid escape, treat as literal 'U' - return {static_cast('U'), 2}; - } - case 'n': return {'\n', 2}; - case 't': return {'\t', 2}; - case 'r': return {'\r', 2}; - case '\\': return {'\\', 2}; - case ']': return {']', 2}; - case '[': return {'[', 2}; - default: return {static_cast(content[pos + 1]), 2}; + case 'n': + return { '\n', 2 }; + case 't': + return { '\t', 2 }; + case 'r': + return { '\r', 2 }; + case '\\': + return { '\\', 2 }; + case ']': + return { ']', 2 }; + case '[': + return { '[', 2 }; + default: + return { static_cast(content[pos + 1]), 2 }; } } // Regular character - return as codepoint - return {static_cast(static_cast(content[pos])), 1}; + return { static_cast(static_cast(content[pos])), 1 }; } -static std::pair, bool> parse_char_classes(const std::string & classes) { +static std::pair, bool> parse_char_classes( + const std::string & classes) { std::vector ranges; - bool negated = false; + bool negated = false; std::string content = classes; if (content.front() == '[') { @@ -231,14 +261,14 @@ static std::pair, bool> parse_c if (i + 1 < content.length() && content[i] == '-') { // Range detected auto [end, end_len] = parse_char_class_char(content, i + 1); - ranges.push_back(common_peg_chars_parser::char_range{start, end}); + ranges.push_back(common_peg_chars_parser::char_range{ start, end }); i += 1 + end_len; } else { - ranges.push_back(common_peg_chars_parser::char_range{start, start}); + ranges.push_back(common_peg_chars_parser::char_range{ start, start }); } } - return {ranges, negated}; + return { ranges, negated }; } void common_peg_ast_arena::visit(common_peg_ast_id id, const common_peg_ast_visitor & visitor) const { @@ -279,29 +309,53 @@ common_peg_parser_id common_peg_arena::get_rule(const std::string & name) const } struct parser_executor { - const common_peg_arena & arena; + const common_peg_arena & arena; common_peg_parse_context & ctx; - size_t start_pos; + size_t start_pos; - parser_executor(const common_peg_arena & arena, common_peg_parse_context & ctx, size_t start) - : arena(arena), ctx(ctx), start_pos(start) {} + parser_executor(const common_peg_arena & arena, common_peg_parse_context & ctx, size_t start) : + arena(arena), + ctx(ctx), + start_pos(start) {} + + std::string debug_indent() const { return std::string(ctx.parse_depth * 2, ' '); } + + std::string debug_input_snippet(size_t pos, size_t len = 60) const { + if (pos >= ctx.input.size()) { + return ""; + } + auto snippet = ctx.input.substr(pos, len); + // Escape newlines for display + std::string result; + for (char c : snippet) { + if (c == '\n') { + result += "\\n"; + } else if (c == '\r') { + result += "\\r"; + } else if (c == '\t') { + result += "\\t"; + } else { + result += c; + } + } + if (pos + len < ctx.input.size()) { + result += "..."; + } + return result; + } common_peg_parse_result operator()(const common_peg_epsilon_parser & /* p */) const { return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos); } common_peg_parse_result operator()(const common_peg_start_parser & /* p */) const { - return common_peg_parse_result( - start_pos == 0 ? COMMON_PEG_PARSE_RESULT_SUCCESS : COMMON_PEG_PARSE_RESULT_FAIL, - start_pos - ); + return common_peg_parse_result(start_pos == 0 ? COMMON_PEG_PARSE_RESULT_SUCCESS : COMMON_PEG_PARSE_RESULT_FAIL, + start_pos); } common_peg_parse_result operator()(const common_peg_end_parser & /* p */) const { return common_peg_parse_result( - start_pos >= ctx.input.size() ? COMMON_PEG_PARSE_RESULT_SUCCESS : COMMON_PEG_PARSE_RESULT_FAIL, - start_pos - ); + start_pos >= ctx.input.size() ? COMMON_PEG_PARSE_RESULT_SUCCESS : COMMON_PEG_PARSE_RESULT_FAIL, start_pos); } common_peg_parse_result operator()(const common_peg_literal_parser & p) { @@ -323,12 +377,39 @@ struct parser_executor { } common_peg_parse_result operator()(const common_peg_sequence_parser & p) { - auto pos = start_pos; + if (ctx.debug) { + LOG_DBG("%sSEQ start at %zu '%s' (%zu children)\n", debug_indent().c_str(), start_pos, + debug_input_snippet(start_pos).c_str(), p.children.size()); + } + ctx.parse_depth++; + + auto pos = start_pos; std::vector nodes; - for (const auto & child_id : p.children) { + for (size_t i = 0; i < p.children.size(); i++) { + const auto & child_id = p.children[i]; + if (ctx.debug) { + fprintf(stderr, "%sSEQ child %zu: %s\n", debug_indent().c_str(), i, arena.dump(child_id).c_str()); + } auto result = arena.parse(child_id, ctx, pos); + + if (ctx.debug) { + fprintf(stderr, "%sSEQ child %zu: %s at %zu->%zu\n", debug_indent().c_str(), i, + common_peg_parse_result_type_name(result.type), result.start, result.end); + } + if (result.fail()) { + ctx.parse_depth--; + if (ctx.is_partial && result.end >= ctx.input.size()) { + if (ctx.debug) { + fprintf(stderr, "%sSEQ -> NEED_MORE (child failed at end)\n", debug_indent().c_str()); + } + return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, result.end, + std::move(nodes)); + } + if (ctx.debug) { + fprintf(stderr, "%sSEQ -> FAIL\n", debug_indent().c_str()); + } return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos, result.end); } @@ -337,43 +418,93 @@ struct parser_executor { } if (result.need_more_input()) { - return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, result.end, std::move(nodes)); + ctx.parse_depth--; + if (ctx.debug) { + fprintf(stderr, "%sSEQ -> NEED_MORE\n", debug_indent().c_str()); + } + return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, result.end, + std::move(nodes)); } pos = result.end; } + ctx.parse_depth--; + if (ctx.debug) { + fprintf(stderr, "%sSEQ -> SUCCESS at %zu->%zu\n", debug_indent().c_str(), start_pos, pos); + } return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos, pos, std::move(nodes)); } common_peg_parse_result operator()(const common_peg_choice_parser & p) { + if (ctx.debug) { + fprintf(stderr, "%sCHOICE start at %zu '%s' (%zu options)\n", debug_indent().c_str(), start_pos, + debug_input_snippet(start_pos).c_str(), p.children.size()); + } + ctx.parse_depth++; + auto pos = start_pos; - for (const auto & child_id : p.children) { + for (size_t i = 0; i < p.children.size(); i++) { + const auto & child_id = p.children[i]; + if (ctx.debug) { + fprintf(stderr, "%sCHOICE option %zu: %s\n", debug_indent().c_str(), i, arena.dump(child_id).c_str()); + } auto result = arena.parse(child_id, ctx, pos); + if (ctx.debug) { + fprintf(stderr, "%sCHOICE option %zu: %s\n", debug_indent().c_str(), i, + common_peg_parse_result_type_name(result.type)); + } if (!result.fail()) { + ctx.parse_depth--; + if (ctx.debug) { + fprintf(stderr, "%sCHOICE -> %s (option %zu)\n", debug_indent().c_str(), + common_peg_parse_result_type_name(result.type), i); + } return result; } } + ctx.parse_depth--; + if (ctx.debug) { + fprintf(stderr, "%sCHOICE -> FAIL (no options matched)\n", debug_indent().c_str()); + } return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos); } common_peg_parse_result operator()(const common_peg_repetition_parser & p) { - auto pos = start_pos; - int match_count = 0; + if (ctx.debug) { + fprintf(stderr, "%sREPEAT start at %zu '%s' (min=%d, max=%d)\n", debug_indent().c_str(), start_pos, + debug_input_snippet(start_pos).c_str(), p.min_count, p.max_count); + } + ctx.parse_depth++; + + auto pos = start_pos; + int match_count = 0; std::vector nodes; // Try to match up to max_count times (or unlimited if max_count is -1) while (p.max_count == -1 || match_count < p.max_count) { if (pos >= ctx.input.size()) { + if (ctx.debug) { + fprintf(stderr, "%sREPEAT: at end of input, count=%d\n", debug_indent().c_str(), match_count); + } break; } auto result = arena.parse(p.child, ctx, pos); + if (ctx.debug) { + fprintf(stderr, "%sREPEAT iter %d: %s at %zu->%zu, nodes=%zu\n", debug_indent().c_str(), match_count, + common_peg_parse_result_type_name(result.type), result.start, result.end, result.nodes.size()); + fprintf(stderr, "%sREPEAT CHILD: %s\n", debug_indent().c_str(), arena.dump(p.child).c_str()); + } + if (result.success()) { // Prevent infinite loop on empty matches if (result.end == pos) { + if (ctx.debug) { + fprintf(stderr, "%s REPEAT: empty match, stopping\n", debug_indent().c_str()); + } break; } @@ -391,21 +522,45 @@ struct parser_executor { nodes.insert(nodes.end(), result.nodes.begin(), result.nodes.end()); } - return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, result.end, std::move(nodes)); + ctx.parse_depth--; + if (ctx.debug) { + fprintf(stderr, "%sREPEAT -> NEED_MORE (count=%d, nodes=%zu)\n", debug_indent().c_str(), + match_count, nodes.size()); + } + return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, result.end, + std::move(nodes)); } // Child failed - stop trying + if (ctx.debug) { + fprintf(stderr, "%sREPEAT: child failed, stopping\n", debug_indent().c_str()); + } break; } // Check if we got enough matches if (p.min_count > 0 && match_count < p.min_count) { + ctx.parse_depth--; if (pos >= ctx.input.size() && ctx.is_partial) { - return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos, std::move(nodes)); + if (ctx.debug) { + fprintf(stderr, "%sREPEAT -> NEED_MORE (not enough matches: %d < %d)\n", debug_indent().c_str(), + match_count, p.min_count); + } + return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos, + std::move(nodes)); + } + if (ctx.debug) { + fprintf(stderr, "%sREPEAT -> FAIL (not enough matches: %d < %d)\n", debug_indent().c_str(), match_count, + p.min_count); } return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos, pos); } + ctx.parse_depth--; + if (ctx.debug) { + fprintf(stderr, "%sREPEAT -> SUCCESS (count=%d, nodes=%zu)\n", debug_indent().c_str(), match_count, + nodes.size()); + } return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos, pos, std::move(nodes)); } @@ -463,8 +618,8 @@ struct parser_executor { } common_peg_parse_result operator()(const common_peg_chars_parser & p) const { - auto pos = start_pos; - int match_count = 0; + auto pos = start_pos; + int match_count = 0; // Try to match up to max_count times (or unlimited if max_count is -1) while (p.max_count == -1 || match_count < p.max_count) { @@ -527,7 +682,7 @@ struct parser_executor { } static common_peg_parse_result handle_escape_sequence(common_peg_parse_context & ctx, size_t start, size_t & pos) { - ++pos; // consume '\' + ++pos; // consume '\' if (pos >= ctx.input.size()) { if (!ctx.is_partial) { return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start); @@ -537,6 +692,7 @@ struct parser_executor { switch (ctx.input[pos]) { case '"': + case '\'': case '\\': case '/': case 'b': @@ -555,7 +711,7 @@ struct parser_executor { } static common_peg_parse_result handle_unicode_escape(common_peg_parse_context & ctx, size_t start, size_t & pos) { - ++pos; // consume 'u' + ++pos; // consume 'u' for (int i = 0; i < 4; ++i) { if (pos >= ctx.input.size()) { if (!ctx.is_partial) { @@ -613,11 +769,53 @@ struct parser_executor { return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos); } + common_peg_parse_result operator()(const common_peg_python_dict_string_parser & /* p */) { + auto pos = start_pos; + + // Parse string content (without quotes) + while (pos < ctx.input.size()) { + char c = ctx.input[pos]; + + if (c == '\'') { + // Found closing quote - success (don't consume it) + return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos, pos); + } + + if (c == '\\') { + auto result = handle_escape_sequence(ctx, start_pos, pos); + if (!result.success()) { + return result; + } + } else { + auto utf8_result = parse_utf8_codepoint(ctx.input, pos); + + if (utf8_result.status == utf8_parse_result::INCOMPLETE) { + if (!ctx.is_partial) { + return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos); + } + return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos); + } + + if (utf8_result.status == utf8_parse_result::INVALID) { + return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos); + } + + pos += utf8_result.bytes_consumed; + } + } + + // Reached end without finding closing quote + if (!ctx.is_partial) { + return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos, pos); + } + return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos); + } + common_peg_parse_result operator()(const common_peg_until_parser & p) const { trie matcher(p.delimiters); // Scan input and check for delimiters - size_t pos = start_pos; + size_t pos = start_pos; size_t last_valid_pos = start_pos; while (pos < ctx.input.size()) { @@ -638,16 +836,12 @@ struct parser_executor { return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos); } - // Check if a delimiter starts at this position auto match = matcher.check_at(ctx.input, pos); - if (match == trie::COMPLETE_MATCH) { - // Found a complete delimiter, return everything before it return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos, pos); } if (match == trie::PARTIAL_MATCH) { - // Found a partial match extending to end of input, return everything before it return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos, pos); } @@ -673,18 +867,12 @@ struct parser_executor { if (!result.fail()) { std::string_view text; if (result.start < ctx.input.size()) { - text = std::string_view(ctx.input).substr(result.start, result.end - result.start); + text = std::string_view(ctx.input).substr( + result.start, std::min(result.end - result.start, ctx.input.size() - result.start)); } - auto node_id = ctx.ast.add_node( - p.name, - "", - result.start, - result.end, - text, - std::move(result.nodes), - result.need_more_input() - ); + auto node_id = ctx.ast.add_node(p.name, "", result.start, result.end, text, std::move(result.nodes), + result.need_more_input()); return common_peg_parse_result(result.type, result.start, result.end, { node_id }); } @@ -694,6 +882,9 @@ struct parser_executor { common_peg_parse_result operator()(const common_peg_tag_parser & p) { // Parse the child + if (ctx.debug) { + fprintf(stderr, "%sTAG: %s\n", debug_indent().c_str(), p.tag.c_str()); + } auto result = arena.parse(p.child, ctx, start_pos); if (!result.fail()) { @@ -702,15 +893,8 @@ struct parser_executor { text = std::string_view(ctx.input).substr(result.start, result.end - result.start); } - auto node_id = ctx.ast.add_node( - "", - p.tag, - result.start, - result.end, - text, - std::move(result.nodes), - result.need_more_input() - ); + auto node_id = ctx.ast.add_node("", p.tag, result.start, result.end, text, std::move(result.nodes), + result.need_more_input()); return common_peg_parse_result(result.type, result.start, result.end, { node_id }); } @@ -740,60 +924,90 @@ common_peg_parse_result common_peg_arena::parse(common_peg_parse_context & ctx, return parse(root_, ctx, start); } -common_peg_parse_result common_peg_arena::parse(common_peg_parser_id id, common_peg_parse_context & ctx, size_t start) const { +common_peg_parse_result common_peg_arena::parse(common_peg_parser_id id, + common_peg_parse_context & ctx, + size_t start) const { // Execute parser - const auto & parser = parsers_.at(id); + const auto & parser = parsers_.at(id); parser_executor exec(*this, ctx, start); return std::visit(exec, parser); } common_peg_parser_id common_peg_arena::resolve_ref(common_peg_parser_id id) { const auto & parser = parsers_.at(id); - if (auto ref = std::get_if(&parser)) { + if (const auto *ref = std::get_if(&parser)) { return get_rule(ref->name); } return id; } +static void bfs_node(common_peg_ast_arena &arena, std::ostringstream & oss, const common_peg_ast_node & node, int indent) { + for (int i = 0; i < indent; i++) { + oss << " "; + } + oss << "NODE " << node.id; + if (!node.rule.empty()) { + oss << " (rule " << node.rule << ")"; + } + if (!node.tag.empty()) { + oss << " (tag " << node.tag << ")"; + } + oss << " ['" << node.text << "']\n"; + for (const auto child : node.children) { + bfs_node(arena, oss, arena.get(child), indent + 1); + } +} + +std::string common_peg_ast_arena::dump() { + std::ostringstream oss; + for (auto & node : nodes_) { + bfs_node(*this, oss, node, 0); + } + return oss.str(); +} + void common_peg_arena::resolve_refs() { // Walk through all parsers and replace refs with their corresponding rule IDs for (auto & parser : parsers_) { - std::visit([this](auto & p) { - using T = std::decay_t; + std::visit( + [this](auto & p) { + using T = std::decay_t; - if constexpr (std::is_same_v) { - for (auto & child : p.children) { - child = resolve_ref(child); + if constexpr (std::is_same_v) { + for (auto & child : p.children) { + child = resolve_ref(child); + } + } else if constexpr (std::is_same_v) { + for (auto & child : p.children) { + child = resolve_ref(child); + } + } else if constexpr (std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) { + p.child = resolve_ref(p.child); + } else if constexpr (std::is_same_v) { + p.child = resolve_ref(p.child); + } else if constexpr (std::is_same_v) { + p.child = resolve_ref(p.child); + } else if constexpr (std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) { + // These rules do not have children + } else { + static_assert(is_always_false_v); } - } else if constexpr (std::is_same_v) { - for (auto & child : p.children) { - child = resolve_ref(child); - } - } else if constexpr (std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v) { - p.child = resolve_ref(p.child); - } else if constexpr (std::is_same_v) { - p.child = resolve_ref(p.child); - } else if constexpr (std::is_same_v) { - p.child = resolve_ref(p.child); - } else if constexpr (std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v) { - // These rules do not have children - } else { - static_assert(is_always_false_v); - } - }, parser); + }, + parser); } // Also flatten root if it's a ref @@ -803,63 +1017,88 @@ void common_peg_arena::resolve_refs() { } std::string common_peg_arena::dump(common_peg_parser_id id) const { + std::unordered_set visited; + return dump_impl(id, visited); +} + +std::string common_peg_arena::dump_impl(common_peg_parser_id id, + std::unordered_set & visited) const { + // Check for cycles + if (visited.count(id)) { + return "[cycle]"; + } + visited.insert(id); + const auto & parser = parsers_.at(id); - return std::visit([this](const auto & p) -> std::string { - using T = std::decay_t; + return std::visit( + [this, &visited](const auto & p) -> std::string { + using T = std::decay_t; - if constexpr (std::is_same_v) { - return "Epsilon"; - } else if constexpr (std::is_same_v) { - return "Start"; - } else if constexpr (std::is_same_v) { - return "End"; - } else if constexpr (std::is_same_v) { - return "Literal(" + p.literal + ")"; - } else if constexpr (std::is_same_v) { - std::vector parts; - for (const auto & child : p.children) { - parts.push_back(dump(child)); + if constexpr (std::is_same_v) { + return "Epsilon"; + } else if constexpr (std::is_same_v) { + return "Start"; + } else if constexpr (std::is_same_v) { + return "End"; + } else if constexpr (std::is_same_v) { + return "Literal(" + p.literal + ")"; + } else if constexpr (std::is_same_v) { + std::vector parts; + for (const auto & child : p.children) { + parts.push_back(dump_impl(child, visited)); + } + return "Sequence(" + string_join(parts, ", ") + ")"; + } else if constexpr (std::is_same_v) { + std::vector parts; + for (const auto & child : p.children) { + parts.push_back(dump_impl(child, visited)); + } + return "Choice(" + string_join(parts, ", ") + ")"; + } else if constexpr (std::is_same_v) { + if (p.max_count == -1) { + return "Repetition(" + dump_impl(p.child, visited) + ", " + std::to_string(p.min_count) + + ", unbounded)"; + } + return "Repetition(" + dump_impl(p.child, visited) + ", " + std::to_string(p.min_count) + ", " + + std::to_string(p.max_count) + ")"; + } else if constexpr (std::is_same_v) { + return "And(" + dump_impl(p.child, visited) + ")"; + } else if constexpr (std::is_same_v) { + return "Not(" + dump_impl(p.child, visited) + ")"; + } else if constexpr (std::is_same_v) { + return "Atomic(" + dump_impl(p.child, visited) + ")"; + } else if constexpr (std::is_same_v) { + return "Any"; + } else if constexpr (std::is_same_v) { + return "Space"; + } else if constexpr (std::is_same_v) { + if (p.max_count == -1) { + return "CharRepeat(" + p.pattern + ", " + std::to_string(p.min_count) + ", unbounded)"; + } + return "CharRepeat(" + p.pattern + ", " + std::to_string(p.min_count) + ", " + + std::to_string(p.max_count) + ")"; + } else if constexpr (std::is_same_v) { + return "JsonString()"; + } else if constexpr (std::is_same_v) { + return "PythonDictString()"; + } else if constexpr (std::is_same_v) { + return "Until(" + string_join(p.delimiters, " | ") + ")"; + } else if constexpr (std::is_same_v) { + return "Schema(" + dump_impl(p.child, visited) + ", " + (p.schema ? p.schema->dump() : "null") + ")"; + } else if constexpr (std::is_same_v) { + return "Rule(" + p.name + ", " + dump_impl(p.child, visited) + ")"; + } else if constexpr (std::is_same_v) { + return "Ref(" + p.name + ")"; + } else if constexpr (std::is_same_v) { + return "Tag(" + p.tag + ", " + dump(p.child) + ")"; + } else if constexpr (std::is_same_v) { + return "Atomic(" + dump(p.child) + ")"; + } else { + return "Unknown"; } - return "Sequence(" + string_join(parts, ", ") + ")"; - } else if constexpr (std::is_same_v) { - std::vector parts; - for (const auto & child : p.children) { - parts.push_back(dump(child)); - } - return "Choice(" + string_join(parts, ", ") + ")"; - } else if constexpr (std::is_same_v) { - if (p.max_count == -1) { - return "Repetition(" + dump(p.child) + ", " + std::to_string(p.min_count) + ", unbounded)"; - } - return "Repetition(" + dump(p.child) + ", " + std::to_string(p.min_count) + ", " + std::to_string(p.max_count) + ")"; - } else if constexpr (std::is_same_v) { - return "And(" + dump(p.child) + ")"; - } else if constexpr (std::is_same_v) { - return "Not(" + dump(p.child) + ")"; - } else if constexpr (std::is_same_v) { - return "Any"; - } else if constexpr (std::is_same_v) { - return "Space"; - } else if constexpr (std::is_same_v) { - if (p.max_count == -1) { - return "CharRepeat(" + p.pattern + ", " + std::to_string(p.min_count) + ", unbounded)"; - } - return "CharRepeat(" + p.pattern + ", " + std::to_string(p.min_count) + ", " + std::to_string(p.max_count) + ")"; - } else if constexpr (std::is_same_v) { - return "JsonString()"; - } else if constexpr (std::is_same_v) { - return "Until(" + string_join(p.delimiters, " | ") + ")"; - } else if constexpr (std::is_same_v) { - return "Schema(" + dump(p.child) + ", " + (p.schema ? p.schema->dump() : "null") + ")"; - } else if constexpr (std::is_same_v) { - return "Rule(" + p.name + ", " + dump(p.child) + ")"; - } else if constexpr (std::is_same_v) { - return "Ref(" + p.name + ")"; - } else { - return "Unknown"; - } - }, parser); + }, + parser); } common_peg_parser & common_peg_parser::operator=(const common_peg_parser & other) { @@ -868,25 +1107,25 @@ common_peg_parser & common_peg_parser::operator=(const common_peg_parser & other } common_peg_parser & common_peg_parser::operator+=(const common_peg_parser & other) { - id_ = builder_.sequence({id_, other.id_}); + id_ = builder_.sequence({ id_, other.id_ }); return *this; } common_peg_parser & common_peg_parser::operator|=(const common_peg_parser & other) { - id_ = builder_.choice({id_, other.id_}); + id_ = builder_.choice({ id_, other.id_ }); return *this; } common_peg_parser common_peg_parser::operator+(const common_peg_parser & other) const { - return builder_.sequence({id_, other.id_}); + return builder_.sequence({ id_, other.id_ }); } common_peg_parser common_peg_parser::operator|(const common_peg_parser & other) const { - return builder_.choice({id_, other.id_}); + return builder_.choice({ id_, other.id_ }); } common_peg_parser common_peg_parser::operator<<(const common_peg_parser & other) const { - return builder_.sequence({id_, builder_.space(), other.id_}); + return builder_.sequence({ id_, builder_.space(), other.id_ }); } common_peg_parser common_peg_parser::operator+(const char * str) const { @@ -955,7 +1194,7 @@ common_peg_parser common_peg_parser_builder::sequence(const std::vector & parsers) { @@ -987,7 +1226,7 @@ common_peg_parser common_peg_parser_builder::choice(const std::vector & parsers) { @@ -1010,36 +1249,42 @@ common_peg_parser common_peg_parser_builder::choice(std::initializer_list(schema), raw})); +common_peg_parser common_peg_parser_builder::schema(const common_peg_parser & p, + const std::string & name, + const nlohmann::ordered_json & schema, + bool raw) { + return wrap(arena_.add_parser( + common_peg_schema_parser{ p.id(), name, std::make_shared(schema), raw })); } common_peg_parser common_peg_parser_builder::rule(const std::string & name, const common_peg_parser & p, bool trigger) { auto clean_name = rule_name(name); - auto rule_id = arena_.add_parser(common_peg_rule_parser{clean_name, p.id(), trigger}); + auto rule_id = arena_.add_parser(common_peg_rule_parser{ clean_name, p.id(), trigger }); arena_.add_rule(clean_name, rule_id); return ref(clean_name); } -common_peg_parser common_peg_parser_builder::rule(const std::string & name, const std::function & builder_fn, bool trigger) { +common_peg_parser common_peg_parser_builder::rule(const std::string & name, + const std::function & builder_fn, + bool trigger) { auto clean_name = rule_name(name); if (arena_.has_rule(clean_name)) { return ref(clean_name); } // Create placeholder rule to allow recursive references - auto placeholder = any(); // Temporary placeholder - auto placeholder_rule_id = arena_.add_parser(common_peg_rule_parser{clean_name, placeholder.id(), trigger}); + auto placeholder = any(); // Temporary placeholder + auto placeholder_rule_id = arena_.add_parser(common_peg_rule_parser{ clean_name, placeholder.id(), trigger }); arena_.add_rule(clean_name, placeholder_rule_id); // Build the actual parser auto parser = builder_fn(); // Replace placeholder with actual rule - auto rule_id = arena_.add_parser(common_peg_rule_parser{clean_name, parser.id(), trigger}); + auto rule_id = arena_.add_parser(common_peg_rule_parser{ clean_name, parser.id(), trigger }); arena_.rules_[clean_name] = rule_id; return ref(clean_name); @@ -1056,77 +1301,71 @@ common_peg_arena common_peg_parser_builder::build() { // JSON parsers common_peg_parser common_peg_parser_builder::json_number() { - return rule("json-number", [this]() { + return rule("json-number", [this]() { auto digit1_9 = chars("[1-9]", 1, 1); - auto digits = chars("[0-9]"); - auto int_part = choice({literal("0"), sequence({digit1_9, chars("[0-9]", 0, -1)})}); - auto frac = sequence({literal("."), digits}); - auto exp = sequence({choice({literal("e"), literal("E")}), optional(chars("[+-]", 1, 1)), digits}); - return sequence({optional(literal("-")), int_part, optional(frac), optional(exp), space()}); + auto digits = chars("[0-9]"); + auto int_part = choice({ literal("0"), sequence({ digit1_9, chars("[0-9]", 0, -1) }) }); + auto frac = sequence({ literal("."), digits }); + auto exp = sequence({ choice({ literal("e"), literal("E") }), optional(chars("[+-]", 1, 1)), digits }); + // Negative lookahead: only commit the number when the next character can't extend it. + // At EOF in partial mode, chars returns NEED_MORE → negate propagates NEED_MORE → number not committed. + // This prevents premature commits of partial numbers (e.g. "3" when "3.14" is incoming). + auto not_number_continuation = negate(chars("[0-9.eE+-]", 1, 1)); + return sequence({ optional(literal("-")), int_part, optional(frac), optional(exp), not_number_continuation, space() }); }); } common_peg_parser common_peg_parser_builder::json_string() { - return rule("json-string", [this]() { - return sequence({literal("\""), json_string_content(), literal("\""), space()}); + // When allow_python_dict_format is true, accept both single and double quotes + if (allow_python_dict_format_) { + return rule("json-string-flex", [this]() { + auto json_str = sequence({ literal("\""), json_string_content(), literal("\""), space() }); + auto python_str = sequence({ literal("'"), python_dict_string_content(), literal("'"), space() }); + return choice({ json_str, python_str }); + }); + } + // Standard JSON strings with double quotes only + return rule("json-string", + [this]() { return sequence({ literal("\""), json_string_content(), literal("\""), space() }); }); +} + +common_peg_parser common_peg_parser_builder::flexible_string() { + // Always returns a choice of both quote styles regardless of flag + return rule("flexible-string", [this]() { + auto json_str = sequence({ literal("\""), json_string_content(), literal("\""), space() }); + auto python_str = sequence({ literal("'"), python_dict_string_content(), literal("'"), space() }); + return choice({ json_str, python_str }); }); } common_peg_parser common_peg_parser_builder::json_bool() { - return rule("json-bool", [this]() { - return sequence({choice({literal("true"), literal("false")}), space()}); - }); + return rule("json-bool", [this]() { return sequence({ choice({ literal("true"), literal("false") }), space() }); }); } common_peg_parser common_peg_parser_builder::json_null() { - return rule("json-null", [this]() { - return sequence({literal("null"), space()}); - }); + return rule("json-null", [this]() { return sequence({ literal("null"), space() }); }); } common_peg_parser common_peg_parser_builder::json_object() { return rule("json-object", [this]() { - auto ws = space(); - auto member = sequence({json_string(), ws, literal(":"), ws, json()}); - auto members = sequence({member, zero_or_more(sequence({ws, literal(","), ws, member}))}); - return sequence({ - literal("{"), - ws, - choice({ - literal("}"), - sequence({members, ws, literal("}")}) - }), - ws - }); + auto ws = space(); + auto member = sequence({ json_string(), ws, literal(":"), ws, json() }); + auto members = sequence({ member, zero_or_more(sequence({ ws, literal(","), ws, member })) }); + return sequence({ literal("{"), ws, choice({ literal("}"), sequence({ members, ws, literal("}") }) }) }); }); } common_peg_parser common_peg_parser_builder::json_array() { return rule("json-array", [this]() { - auto ws = space(); - auto elements = sequence({json(), zero_or_more(sequence({literal(","), ws, json()}))}); - return sequence({ - literal("["), - ws, - choice({ - literal("]"), - sequence({elements, ws, literal("]")}) - }), - ws - }); + auto ws = space(); + auto elements = sequence({ json(), zero_or_more(sequence({ literal(","), ws, json() })) }); + return sequence({ literal("["), ws, choice({ literal("]"), sequence({ elements, ws, literal("]") }) }) }); }); } common_peg_parser common_peg_parser_builder::json() { return rule("json-value", [this]() { - return choice({ - json_object(), - json_array(), - json_string(), - json_number(), - json_bool(), - json_null() - }); + return choice({ json_object(), json_array(), json_string(), json_number(), json_bool(), json_null() }); }); } @@ -1134,6 +1373,63 @@ common_peg_parser common_peg_parser_builder::json_string_content() { return wrap(arena_.add_parser(common_peg_json_string_parser{})); } +common_peg_parser common_peg_parser_builder::python_dict_string_content() { + return wrap(arena_.add_parser(common_peg_python_dict_string_parser{})); +} + +common_peg_parser common_peg_parser_builder::python_dict_string() { + return rule("python-dict-string", + [this]() { return sequence({ literal("'"), python_dict_string_content(), literal("'"), space() }); }); +} + +common_peg_parser common_peg_parser_builder::python_dict_number() { + // Same as JSON number + return json_number(); +} + +common_peg_parser common_peg_parser_builder::python_dict_bool() { + // Same as JSON bool + return json_bool(); +} + +common_peg_parser common_peg_parser_builder::python_dict_null() { + // Same as JSON null + return json_null(); +} + +common_peg_parser common_peg_parser_builder::python_dict_object() { + return rule("python-dict-object", [this]() { + auto ws = space(); + auto member = sequence({ python_dict_string(), ws, literal(":"), ws, python_dict() }); + auto members = sequence({ member, zero_or_more(sequence({ ws, literal(","), ws, member })) }); + return sequence({ literal("{"), ws, choice({ literal("}"), sequence({ members, ws, literal("}") }) }) }); + }); +} + +common_peg_parser common_peg_parser_builder::python_dict_array() { + return rule("python-dict-array", [this]() { + auto ws = space(); + auto elements = sequence({ python_dict(), zero_or_more(sequence({ literal(","), ws, python_dict() })) }); + return sequence({ literal("["), ws, choice({ literal("]"), sequence({ elements, ws, literal("]") }) }) }); + }); +} + +common_peg_parser common_peg_parser_builder::python_dict() { + return rule("python-dict-value", [this]() { + std::vector parsers = { + python_dict_object(), python_dict_array(), python_dict_string(), python_dict_number(), + python_dict_bool(), python_dict_null() + }; + return choice(parsers); + }); +} + +common_peg_parser common_peg_parser_builder::marker() { + auto sharp_bracket_parser = literal("<") + until(">") + literal(">"); + auto square_bracket_parser = literal("[") + until("]") + literal("]"); + return choice({ sharp_bracket_parser, square_bracket_parser }); +} + common_peg_parser common_peg_parser_builder::json_member(const std::string & key, const common_peg_parser & p) { auto ws = space(); return sequence({ @@ -1145,17 +1441,76 @@ common_peg_parser common_peg_parser_builder::json_member(const std::string & key }); } - -static std::string gbnf_escape_char_class(char c) { - switch (c) { - case '\n': return "\\n"; - case '\t': return "\\t"; - case '\r': return "\\r"; - case '\\': return "\\\\"; - case ']': return "\\]"; - case '[': return "\\["; - default: return std::string(1, c); +static std::string gbnf_escape_char_class(uint32_t c) { + if (c == '-' || c == ']' || c == '[' || c == '\\') { + return "\\" + std::string(1, (char) c); } + // Escape whitespace control characters + if (c == '\n') { + return "\\n"; + } + if (c == '\t') { + return "\\t"; + } + if (c == '\r') { + return "\\r"; + } + + // Printable ASCII + if (c >= 0x20 && c <= 0x7E) { + return std::string(1, (char) c); + } + + // Hex escape + char buf[16]; + const char * hex = "0123456789ABCDEF"; + + if (c <= 0xFF) { + buf[0] = '\\'; + buf[1] = 'x'; + buf[2] = hex[(c >> 4) & 0xF]; + buf[3] = hex[c & 0xF]; + buf[4] = '\0'; + } else if (c <= 0xFFFF) { + buf[0] = '\\'; + buf[1] = 'u'; + buf[2] = hex[(c >> 12) & 0xF]; + buf[3] = hex[(c >> 8) & 0xF]; + buf[4] = hex[(c >> 4) & 0xF]; + buf[5] = hex[c & 0xF]; + buf[6] = '\0'; + } else { + buf[0] = '\\'; + buf[1] = 'U'; + for (int i = 0; i < 8; i++) { + buf[2 + i] = hex[(c >> ((7 - i) * 4)) & 0xF]; + } + buf[10] = '\0'; + } + + return std::string(buf); +} + +static std::string codepoints_to_utf8(const std::vector & cps) { + std::string s; + for (uint32_t cp : cps) { + if (cp < 0x80) { + s += (char) cp; + } else if (cp < 0x800) { + s += (char) (0xC0 | (cp >> 6)); + s += (char) (0x80 | (cp & 0x3F)); + } else if (cp < 0x10000) { + s += (char) (0xE0 | (cp >> 12)); + s += (char) (0x80 | ((cp >> 6) & 0x3F)); + s += (char) (0x80 | (cp & 0x3F)); + } else { + s += (char) (0xF0 | (cp >> 18)); + s += (char) (0x80 | ((cp >> 12) & 0x3F)); + s += (char) (0x80 | ((cp >> 6) & 0x3F)); + s += (char) (0x80 | (cp & 0x3F)); + } + } + return s; } static std::string gbnf_excluding_pattern(const std::vector & strings) { @@ -1168,17 +1523,17 @@ static std::string gbnf_excluding_pattern(const std::vector & strin pattern += " | "; } - const auto & pre = pieces[i].prefix; + const auto & pre = pieces[i].prefix; const auto & chars = pieces[i].next_chars; std::string cls; - cls.reserve(chars.size()); - for (const auto & ch : chars) { + cls.reserve(chars.size() * 4); + for (uint32_t ch : chars) { cls += gbnf_escape_char_class(ch); } if (!pre.empty()) { - pattern += gbnf_format_literal(pre) + " [^" + cls + "]"; + pattern += gbnf_format_literal(codepoints_to_utf8(pre)) + " [^" + cls + "]"; } else { pattern += "[^" + cls + "]"; } @@ -1187,58 +1542,57 @@ static std::string gbnf_excluding_pattern(const std::vector & strin return "(" + pattern + ")*"; } -static std::unordered_set collect_reachable_rules( - const common_peg_arena & arena, - const common_peg_parser_id & rule -) { +static std::unordered_set collect_reachable_rules(const common_peg_arena & arena, + const common_peg_parser_id & rule) { std::unordered_set reachable; std::unordered_set visited; std::function visit = [&](common_peg_parser_id id) { const auto & parser = arena.get(id); - std::visit([&](const auto & p) { - using T = std::decay_t; + std::visit( + [&](const auto & p) { + using T = std::decay_t; - if constexpr (std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v) { - // These parsers do not have any children - } else if constexpr (std::is_same_v) { - for (auto child : p.children) { - visit(child); - } - } else if constexpr (std::is_same_v) { - for (auto child : p.children) { - visit(child); - } - } else if constexpr (std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v) { - visit(p.child); - } else if constexpr (std::is_same_v) { - if (visited.find(p.name) == visited.end()) { - visited.insert(p.name); - reachable.insert(p.name); + if constexpr (std::is_same_v || + std::is_same_v || std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || std::is_same_v || + std::is_same_v || + std::is_same_v) { + // These parsers do not have any children + } else if constexpr (std::is_same_v) { + for (auto child : p.children) { + visit(child); + } + } else if constexpr (std::is_same_v) { + for (auto child : p.children) { + visit(child); + } + } else if constexpr (std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) { visit(p.child); + } else if constexpr (std::is_same_v) { + if (visited.find(p.name) == visited.end()) { + visited.insert(p.name); + reachable.insert(p.name); + visit(p.child); + } + } else if constexpr (std::is_same_v) { + // Traverse rules so we pick up everything + auto referenced_rule = arena.get_rule(p.name); + visit(referenced_rule); + } else { + static_assert(is_always_false_v); } - } else if constexpr (std::is_same_v) { - // Traverse rules so we pick up everything - auto referenced_rule = arena.get_rule(p.name); - visit(referenced_rule); - } else { - static_assert(is_always_false_v); - } - }, parser); + }, + parser); }; visit(rule); @@ -1251,129 +1605,138 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo std::function to_gbnf = [&](common_peg_parser_id id) -> std::string { const auto & parser = parsers_.at(id); - return std::visit([&](const auto & p) -> std::string { - using T = std::decay_t; + return std::visit( + [&](const auto & p) -> std::string { + using T = std::decay_t; - if constexpr (std::is_same_v || - std::is_same_v || - std::is_same_v) { - return ""; - } else if constexpr (std::is_same_v) { - return gbnf_format_literal(p.literal); - } else if constexpr (std::is_same_v) { - std::string s; - for (const auto & child : p.children) { - if (!s.empty()) { - s += " "; + if constexpr (std::is_same_v || + std::is_same_v || std::is_same_v) { + return ""; + } else if constexpr (std::is_same_v) { + return gbnf_format_literal(p.literal); + } else if constexpr (std::is_same_v) { + std::string s; + for (const auto & child : p.children) { + if (!s.empty()) { + s += " "; + } + auto child_gbnf = to_gbnf(child); + const auto & child_parser = parsers_.at(child); + if (std::holds_alternative(child_parser) || + std::holds_alternative(child_parser) || + std::holds_alternative(child_parser) || + std::holds_alternative(child_parser)) { + s += "(" + child_gbnf + ")"; + } else { + s += child_gbnf; + } } - auto child_gbnf = to_gbnf(child); - const auto & child_parser = parsers_.at(child); + return s; + } else if constexpr (std::is_same_v) { + std::string s; + for (const auto & child : p.children) { + if (!s.empty()) { + s += " | "; + } + auto child_gbnf = to_gbnf(child); + const auto & child_parser = parsers_.at(child); + if (std::holds_alternative(child_parser)) { + s += "(" + child_gbnf + ")"; + } else { + s += child_gbnf; + } + } + return s; + } else if constexpr (std::is_same_v) { + auto child_gbnf = to_gbnf(p.child); + const auto & child_parser = parsers_.at(p.child); if (std::holds_alternative(child_parser) || - std::holds_alternative(child_parser)) { - s += "(" + child_gbnf + ")"; - } else { - s += child_gbnf; + std::holds_alternative(child_parser) || + std::holds_alternative(child_parser) || + std::holds_alternative(child_parser)) { + child_gbnf = "(" + child_gbnf + ")"; } - } - return s; - } else if constexpr (std::is_same_v) { - std::string s; - for (const auto & child : p.children) { - if (!s.empty()) { - s += " | "; + if (p.min_count == 0 && p.max_count == 1) { + return child_gbnf + "?"; } - auto child_gbnf = to_gbnf(child); - const auto & child_parser = parsers_.at(child); - if (std::holds_alternative(child_parser)) { - s += "(" + child_gbnf + ")"; - } else { - s += child_gbnf; + if (p.min_count == 0 && p.max_count == -1) { + return child_gbnf + "*"; } - } - return s; - } else if constexpr (std::is_same_v) { - auto child_gbnf = to_gbnf(p.child); - const auto & child_parser = parsers_.at(p.child); - if (std::holds_alternative(child_parser) || - std::holds_alternative(child_parser)) { - child_gbnf = "(" + child_gbnf + ")"; - } - if (p.min_count == 0 && p.max_count == 1) { - return child_gbnf + "?"; - } - if (p.min_count == 0 && p.max_count == -1) { - return child_gbnf + "*"; - } - if (p.min_count == 1 && p.max_count == -1) { - return child_gbnf + "+"; - } - if (p.max_count == -1) { - return child_gbnf + "{" + std::to_string(p.min_count) + ",}"; - } - if (p.min_count == p.max_count) { - if (p.min_count == 1) { - return child_gbnf; + if (p.min_count == 1 && p.max_count == -1) { + return child_gbnf + "+"; } - return child_gbnf + "{" + std::to_string(p.min_count) + "}"; - } - return child_gbnf + "{" + std::to_string(p.min_count) + "," + std::to_string(p.max_count) + "}"; - } else if constexpr (std::is_same_v || std::is_same_v) { - return ""; // Lookahead not supported in GBNF - } else if constexpr (std::is_same_v) { - return "."; - } else if constexpr (std::is_same_v) { - return "space"; - } else if constexpr (std::is_same_v) { - std::string result = p.pattern; - if (p.min_count == 0 && p.max_count == 1) { - return result + "?"; - } - if (p.min_count == 0 && p.max_count == -1) { - return result + "*"; - } - if (p.min_count == 1 && p.max_count == -1) { - return result + "+"; - } - if (p.max_count == -1) { - return result + "{" + std::to_string(p.min_count) + ",}"; - } - if (p.min_count == p.max_count) { - if (p.min_count == 1) { - return result; + if (p.max_count == -1) { + return child_gbnf + "{" + std::to_string(p.min_count) + ",}"; } - return result + "{" + std::to_string(p.min_count) + "}"; - } - return result + "{" + std::to_string(p.min_count) + "," + std::to_string(p.max_count) + "}"; - } else if constexpr (std::is_same_v) { - return R"(( [^"\\] | "\\" ( ["\\/ bfnrt] | "u" [0-9a-fA-F]{4} ) )*)"; - } else if constexpr (std::is_same_v) { - if (p.delimiters.empty()) { - return ".*"; - } - return gbnf_excluding_pattern(p.delimiters); - } else if constexpr (std::is_same_v) { - if (p.schema) { - if (p.raw && p.schema->contains("type") && p.schema->at("type").is_string() && p.schema->at("type") == "string") { - // TODO: Implement more comprehensive grammar generation for raw strings. - // For now, use the grammar emitted from the underlying parser. - return to_gbnf(p.child); + if (p.min_count == p.max_count) { + if (p.min_count == 1) { + return child_gbnf; + } + return child_gbnf + "{" + std::to_string(p.min_count) + "}"; } - return builder.add_schema(p.name, *p.schema); + return child_gbnf + "{" + std::to_string(p.min_count) + "," + std::to_string(p.max_count) + "}"; + } else if constexpr (std::is_same_v || + std::is_same_v) { + return ""; // Lookahead not supported in GBNF + } else if constexpr (std::is_same_v) { + return "."; + } else if constexpr (std::is_same_v) { + return "space"; + } else if constexpr (std::is_same_v) { + std::string result = p.pattern; + if (p.min_count == 0 && p.max_count == 1) { + return result + "?"; + } + if (p.min_count == 0 && p.max_count == -1) { + return result + "*"; + } + if (p.min_count == 1 && p.max_count == -1) { + return result + "+"; + } + if (p.max_count == -1) { + return result + "{" + std::to_string(p.min_count) + ",}"; + } + if (p.min_count == p.max_count) { + if (p.min_count == 1) { + return result; + } + return result + "{" + std::to_string(p.min_count) + "}"; + } + return result + "{" + std::to_string(p.min_count) + "," + std::to_string(p.max_count) + "}"; + } else if constexpr (std::is_same_v) { + return R"(( [^"\\] | "\\" ( ["\\/ bfnrt] | "u" [0-9a-fA-F]{4} ) )*)"; + } else if constexpr (std::is_same_v) { + return R"(( [^'\\] | "\\" ( ['"\\/ bfnrt] | "u" [0-9a-fA-F]{4} ) )*)"; + } else if constexpr (std::is_same_v) { + if (p.delimiters.empty()) { + return ".*"; + } + return gbnf_excluding_pattern(p.delimiters); + } else if constexpr (std::is_same_v) { + if (p.schema) { + if (p.raw && p.schema->contains("type") && p.schema->at("type").is_string() && + p.schema->at("type") == "string") { + // TODO: Implement more comprehensive grammar generation for raw strings. + // For now, use the grammar emitted from the underlying parser. + return to_gbnf(p.child); + } + return builder.add_schema(p.name, *p.schema); + } + return to_gbnf(p.child); + } else if constexpr (std::is_same_v) { + return p.name; + } else if constexpr (std::is_same_v) { + // Refs should not exist after flattening, but kept just in case + return p.name; + } else if constexpr (std::is_same_v) { + return to_gbnf(p.child); + } else if constexpr (std::is_same_v) { + return to_gbnf(p.child); + } else { + static_assert(is_always_false_v); } - return to_gbnf(p.child); - } else if constexpr (std::is_same_v) { - return p.name; - } else if constexpr (std::is_same_v) { - // Refs should not exist after flattening, but kept just in case - return p.name; - } else if constexpr (std::is_same_v) { - return to_gbnf(p.child); - } else if constexpr (std::is_same_v) { - return to_gbnf(p.child); - } else { - static_assert(is_always_false_v); - } - }, parser); + }, + parser); }; // Collect reachable rules @@ -1432,80 +1795,125 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo static nlohmann::json serialize_parser_variant(const common_peg_parser_variant & variant) { using json = nlohmann::json; - return std::visit([](const auto & p) -> json { - using T = std::decay_t; + return std::visit( + [](const auto & p) -> json { + using T = std::decay_t; - if constexpr (std::is_same_v) { - return json{{"type", "epsilon"}}; - } else if constexpr (std::is_same_v) { - return json{{"type", "start"}}; - } else if constexpr (std::is_same_v) { - return json{{"type", "end"}}; - } else if constexpr (std::is_same_v) { - return json{{"type", "literal"}, {"literal", p.literal}}; - } else if constexpr (std::is_same_v) { - return json{{"type", "sequence"}, {"children", p.children}}; - } else if constexpr (std::is_same_v) { - return json{{"type", "choice"}, {"children", p.children}}; - } else if constexpr (std::is_same_v) { - return json{ - {"type", "repetition"}, - {"child", p.child}, - {"min_count", p.min_count}, - {"max_count", p.max_count} - }; - } else if constexpr (std::is_same_v) { - return json{{"type", "and"}, {"child", p.child}}; - } else if constexpr (std::is_same_v) { - return json{{"type", "not"}, {"child", p.child}}; - } else if constexpr (std::is_same_v) { - return json{{"type", "any"}}; - } else if constexpr (std::is_same_v) { - return json{{"type", "space"}}; - } else if constexpr (std::is_same_v) { - json ranges = json::array(); - for (const auto & range : p.ranges) { - ranges.push_back({{"start", range.start}, {"end", range.end}}); + if constexpr (std::is_same_v) { + return json{ + { "type", "epsilon" } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "start" } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "end" } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "literal" }, + { "literal", p.literal } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "sequence" }, + { "children", p.children } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "choice" }, + { "children", p.children } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "repetition" }, + { "child", p.child }, + { "min_count", p.min_count }, + { "max_count", p.max_count } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "and" }, + { "child", p.child } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "not" }, + { "child", p.child } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "any" } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "space" } + }; + } else if constexpr (std::is_same_v) { + json ranges = json::array(); + for (const auto & range : p.ranges) { + ranges.push_back({ + { "start", range.start }, + { "end", range.end } + }); + } + return json{ + { "type", "chars" }, + { "pattern", p.pattern }, + { "ranges", ranges }, + { "negated", p.negated }, + { "min_count", p.min_count }, + { "max_count", p.max_count } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "json_string" } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "python_dict_string" } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "until" }, + { "delimiters", p.delimiters } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "schema" }, + { "child", p.child }, + { "name", p.name }, + { "schema", p.schema ? *p.schema : nullptr }, + { "raw", p.raw } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "rule" }, + { "name", p.name }, + { "child", p.child }, + { "trigger", p.trigger } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "ref" }, + { "name", p.name } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "atomic" }, + { "child", p.child } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "tag" }, + { "child", p.child }, + { "tag", p.tag } + }; } - return json{ - {"type", "chars"}, - {"pattern", p.pattern}, - {"ranges", ranges}, - {"negated", p.negated}, - {"min_count", p.min_count}, - {"max_count", p.max_count} - }; - } else if constexpr (std::is_same_v) { - return json{{"type", "json_string"}}; - } else if constexpr (std::is_same_v) { - return json{{"type", "until"}, {"delimiters", p.delimiters}}; - } else if constexpr (std::is_same_v) { - return json{ - {"type", "schema"}, - {"child", p.child}, - {"name", p.name}, - {"schema", p.schema ? *p.schema : nullptr}, - {"raw", p.raw} - }; - } else if constexpr (std::is_same_v) { - return json{ - {"type", "rule"}, - {"name", p.name}, - {"child", p.child}, - {"trigger", p.trigger} - }; - } else if constexpr (std::is_same_v) { - return json{{"type", "ref"}, {"name", p.name}}; - } else if constexpr (std::is_same_v) { - return json{{"type", "atomic"}, {"child", p.child}}; - } else if constexpr (std::is_same_v) { - return json{ - {"type", "tag"}, - {"child", p.child}, - {"tag", p.tag} - }; - } - }, variant); + }, + variant); } nlohmann::json common_peg_arena::to_json() const { @@ -1514,9 +1922,9 @@ nlohmann::json common_peg_arena::to_json() const { parsers.push_back(serialize_parser_variant(parser)); } return nlohmann::json{ - {"parsers", parsers}, - {"rules", rules_}, - {"root", root_} + { "parsers", parsers }, + { "rules", rules_ }, + { "root", root_ } }; } @@ -1540,41 +1948,38 @@ static common_peg_parser_variant deserialize_parser_variant(const nlohmann::json if (!j.contains("literal") || !j["literal"].is_string()) { throw std::runtime_error("literal parser missing or invalid 'literal' field"); } - return common_peg_literal_parser{j["literal"]}; + return common_peg_literal_parser{ j["literal"] }; } if (type == "sequence") { if (!j.contains("children") || !j["children"].is_array()) { throw std::runtime_error("sequence parser missing or invalid 'children' field"); } - return common_peg_sequence_parser{j["children"].get>()}; + return common_peg_sequence_parser{ j["children"].get>() }; } if (type == "choice") { if (!j.contains("children") || !j["children"].is_array()) { throw std::runtime_error("choice parser missing or invalid 'children' field"); } - return common_peg_choice_parser{j["children"].get>()}; + return common_peg_choice_parser{ j["children"].get>() }; } if (type == "repetition") { if (!j.contains("child") || !j.contains("min_count") || !j.contains("max_count")) { throw std::runtime_error("repetition parser missing required fields"); } - return common_peg_repetition_parser{ - j["child"].get(), - j["min_count"].get(), - j["max_count"].get() - }; + return common_peg_repetition_parser{ j["child"].get(), j["min_count"].get(), + j["max_count"].get() }; } if (type == "and") { if (!j.contains("child")) { throw std::runtime_error("and parser missing 'child' field"); } - return common_peg_and_parser{j["child"].get()}; + return common_peg_and_parser{ j["child"].get() }; } if (type == "not") { if (!j.contains("child")) { throw std::runtime_error("not parser missing 'child' field"); } - return common_peg_not_parser{j["child"].get()}; + return common_peg_not_parser{ j["child"].get() }; } if (type == "any") { return common_peg_any_parser{}; @@ -1583,34 +1988,34 @@ static common_peg_parser_variant deserialize_parser_variant(const nlohmann::json return common_peg_space_parser{}; } if (type == "chars") { - if (!j.contains("pattern") || !j.contains("ranges") || !j.contains("negated") || - !j.contains("min_count") || !j.contains("max_count")) { + if (!j.contains("pattern") || !j.contains("ranges") || !j.contains("negated") || !j.contains("min_count") || + !j.contains("max_count")) { throw std::runtime_error("chars parser missing required fields"); } common_peg_chars_parser parser; - parser.pattern = j["pattern"]; - parser.negated = j["negated"]; + parser.pattern = j["pattern"]; + parser.negated = j["negated"]; parser.min_count = j["min_count"]; parser.max_count = j["max_count"]; for (const auto & range_json : j["ranges"]) { if (!range_json.contains("start") || !range_json.contains("end")) { throw std::runtime_error("char_range missing 'start' or 'end' field"); } - parser.ranges.push_back({ - range_json["start"].get(), - range_json["end"].get() - }); + parser.ranges.push_back({ range_json["start"].get(), range_json["end"].get() }); } return parser; } if (type == "json_string") { return common_peg_json_string_parser{}; } + if (type == "python_dict_string") { + return common_peg_python_dict_string_parser{}; + } if (type == "until") { if (!j.contains("delimiters") || !j["delimiters"].is_array()) { throw std::runtime_error("until parser missing or invalid 'delimiters' field"); } - return common_peg_until_parser{j["delimiters"].get>()}; + return common_peg_until_parser{ j["delimiters"].get>() }; } if (type == "schema") { if (!j.contains("child") || !j.contains("name") || !j.contains("schema") || !j.contains("raw")) { @@ -1618,7 +2023,7 @@ static common_peg_parser_variant deserialize_parser_variant(const nlohmann::json } common_peg_schema_parser parser; parser.child = j["child"].get(); - parser.name = j["name"]; + parser.name = j["name"]; if (!j["schema"].is_null()) { parser.schema = std::make_shared(j["schema"]); } @@ -1629,17 +2034,14 @@ static common_peg_parser_variant deserialize_parser_variant(const nlohmann::json if (!j.contains("name") || !j.contains("child") || !j.contains("trigger")) { throw std::runtime_error("rule parser missing required fields"); } - return common_peg_rule_parser{ - j["name"].get(), - j["child"].get(), - j["trigger"].get() - }; + return common_peg_rule_parser{ j["name"].get(), j["child"].get(), + j["trigger"].get() }; } if (type == "ref") { if (!j.contains("name") || !j["name"].is_string()) { throw std::runtime_error("ref parser missing or invalid 'name' field"); } - return common_peg_ref_parser{j["name"]}; + return common_peg_ref_parser{ j["name"] }; } if (type == "atomic") { if (!j.contains("child")) { diff --git a/common/peg-parser.h b/common/peg-parser.h index 1cd640365f..b5e7ae13cf 100644 --- a/common/peg-parser.h +++ b/common/peg-parser.h @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -111,6 +112,8 @@ class common_peg_ast_arena { void visit(common_peg_ast_id id, const common_peg_ast_visitor & visitor) const; void visit(const common_peg_parse_result & result, const common_peg_ast_visitor & visitor) const; + + std::string dump(); }; struct common_peg_parse_result { @@ -139,6 +142,7 @@ struct common_peg_parse_result { struct common_peg_parse_context { std::string input; bool is_partial; + bool debug = false; // Enable debug output for parser tracing common_peg_ast_arena ast; int parse_depth; @@ -207,6 +211,7 @@ struct common_peg_chars_parser { }; struct common_peg_json_string_parser {}; +struct common_peg_python_dict_string_parser {}; struct common_peg_until_parser { std::vector delimiters; @@ -255,6 +260,7 @@ using common_peg_parser_variant = std::variant< common_peg_space_parser, common_peg_chars_parser, common_peg_json_string_parser, + common_peg_python_dict_string_parser, common_peg_until_parser, common_peg_schema_parser, common_peg_rule_parser, @@ -299,6 +305,8 @@ class common_peg_arena { friend class common_peg_parser_builder; private: + std::string dump_impl(common_peg_parser_id id, std::unordered_set & visited) const; + common_peg_parser_id add_parser(common_peg_parser_variant parser); void add_rule(const std::string & name, common_peg_parser_id id); @@ -311,9 +319,16 @@ class common_peg_parser_builder { common_peg_parser wrap(common_peg_parser_id id) { return common_peg_parser(id, *this); } common_peg_parser add(const common_peg_parser_variant & p) { return wrap(arena_.add_parser(p)); } + bool allow_python_dict_format_ = false; + public: common_peg_parser_builder(); + // Enable/disable Python dict format support (single-quoted strings). + // When enabled, JSON parsers will also accept Python dict-style single-quoted strings. + void set_allow_python_dict_format(bool allow) { allow_python_dict_format_ = allow; } + bool get_allow_python_dict_format() const { return allow_python_dict_format_; } + // Match nothing, always succeed. // S -> ε common_peg_parser eps() { return add(common_peg_epsilon_parser{}); } @@ -418,10 +433,32 @@ class common_peg_parser_builder { // Useful for extracting content within a JSON string. common_peg_parser json_string_content(); + // Matches a string that accepts both JSON double-quoted and Python dict single-quoted styles. + // This is useful when you explicitly want to accept both formats regardless of the allow_python_dict_format flag. + common_peg_parser flexible_string(); + + // Matches a Python dict-style single-quoted string content without the surrounding quotes. + // Useful for extracting content within a Python dict string. + common_peg_parser python_dict_string_content(); + // Matches a JSON object member with a key and associated parser as the // value. common_peg_parser json_member(const std::string & key, const common_peg_parser & p); + // Creates a complete Python dict format parser supporting objects, arrays, single-quoted strings, + // numbers, booleans, and null. Similar to JSON but uses single quotes for strings. + // value -> object | array | string | number | true | false | null + common_peg_parser python_dict(); + common_peg_parser python_dict_object(); + common_peg_parser python_dict_string(); + common_peg_parser python_dict_array(); + common_peg_parser python_dict_number(); + common_peg_parser python_dict_bool(); + common_peg_parser python_dict_null(); + + // A marker, i.e. text delimited by a pair of <> or [] + common_peg_parser marker(); + // Wraps a parser with JSON schema metadata for grammar generation. // Used internally to convert JSON schemas to GBNF grammar rules. common_peg_parser schema(const common_peg_parser & p, const std::string & name, const nlohmann::ordered_json & schema, bool raw = false); diff --git a/docs/autoparser.md b/docs/autoparser.md new file mode 100644 index 0000000000..cdbcce23b8 --- /dev/null +++ b/docs/autoparser.md @@ -0,0 +1,609 @@ +# Unified Auto-Parser Architecture + +The auto-parser automatically analyzes chat templates to determine how to parse model outputs, including content, reasoning, and tool calls. + +## Overview + +The unified auto-parser uses a **pure differential, compositional approach** to analyze chat templates: + +**Core Philosophy**: + +- **Zero Hardcoded Patterns**: All markers extracted through template comparison (the **only heuristic** is JSON detection) +- **Compositional Architecture**: Separate parsers for reasoning, content, and tools that compose cleanly + +**Four-Phase Analysis**: + +1. **Phase 1: Reasoning Analysis** (R1-R3) - Detects reasoning markers and mode +2. **Phase 2: Content Analysis** (C1) - Detects content wrapping markers +3. **Phase 3: Tool Call Analysis** (T1-T7) - Extracts tool section, function, and call ID markers +4. **Phase 4: Argument Analysis** (A1-A2) - Extracts argument name/value markers (TAG_WITH_TAGGED only) + +## Data Structures + +### diff_analysis_result + +The result of differential analysis contains all extracted markers and format classifications: + +```cpp +struct diff_analysis_result { + // Classification results + reasoning_mode reasoning = reasoning_mode::NONE; + content_mode content = content_mode::PLAIN; + tool_format tools = tool_format::NONE; + + // All extracted markers (see marker_registry below) + marker_registry markers; + + // JSON field names (for JSON_NATIVE format) + bool fun_name_is_key = false; // Function name is the JSON key: {"func_name": {...}} + std::string function_field = "function"; // Outer object key (e.g., "function" in "function.name") + std::string name_field = "name"; + std::string args_field = "arguments"; + std::string id_field; // String call ID field (e.g., "id") + std::string gen_id_field; // Generated integer call ID field (e.g., "tool_call_id") + std::vector parameter_order; // Order of JSON fields for parsing + + // Call ID position (for non-JSON formats) + call_id_position call_id_pos = call_id_position::NONE; + + // Flags + bool supports_tools = false; + bool supports_parallel_calls = false; + bool requires_nonnull_content = false; + bool tools_array_wrapped = false; // Tool calls wrapped in JSON array [...] + + // Preserved tokens for tokenizer (union of all non-empty markers) + std::vector preserved_tokens; +}; +``` + +### Enums + +**`reasoning_mode`**: How the template handles reasoning/thinking blocks. + +| Value | Description | +|----------------------|-------------------------------------------------------------------------------| +| `NONE` | No reasoning markers detected | +| `TAG_BASED` | Standard tag-based: `...` | +| `DELIMITER` | Delimiter-based: reasoning ends at delimiter (e.g., `[BEGIN FINAL RESPONSE]`) | +| `FORCED_OPEN` | Template ends with open reasoning tag (empty start, non-empty end) | +| `FORCED_CLOSED` | Both tags when disabled; only start tag when enabled | +| `TOOLS_ONLY` | Reasoning only appears when tool calls are present | + +**`content_mode`**: How the template wraps content. + +| Value | Description | +|----------------------------|------------------------------------------------------| +| `PLAIN` | No content markers | +| `ALWAYS_WRAPPED` | Content always wrapped: `...` | +| `WRAPPED_WITH_REASONING` | Content wrapped only when reasoning is present | + +**`tool_format`**: Classification of tool call structure. + +| Value | Description | +|--------------------|------------------------------------------------------------------| +| `NONE` | No tool support detected | +| `JSON_NATIVE` | Pure JSON: `{"name": "X", "arguments": {...}}` | +| `TAG_WITH_JSON` | Tag-based with JSON args: `{...}` | +| `TAG_WITH_TAGGED` | Tag-based with tagged args: `value` | + +**`call_id_position`**: Where call IDs appear relative to function name and arguments (for non-JSON formats). + +| Value | Description | +|----------------------------|------------------------------------------| +| `NONE` | No call ID support detected | +| `PRE_FUNC_NAME` | Before function name | +| `BETWEEN_FUNC_AND_ARGS` | Between function name and arguments | +| `POST_ARGS` | After arguments | + +### marker_registry + +All markers are extracted via differential analysis without hardcoded patterns: + +```cpp +struct marker_registry { + // === Reasoning markers (from R1-R3) === + std::string reasoning_start; // e.g., "", "[THINK]", "<|START_THINKING|>", "" + std::string reasoning_end; // e.g., "", "[BEGIN FINAL RESPONSE]", "<|END_THINKING|>" + + // === Content markers (from C1) === + std::string content_start; // e.g., "", "" + std::string content_end; // e.g., "", "" + + // === Tool section markers (from T1-T2) === + std::string tool_section_start; // e.g., "", "[TOOL_CALLS]" + std::string tool_section_end; // e.g., "", "" + std::string per_call_start; // e.g., "<|tool_call_begin|>" (for multi-call templates) + std::string per_call_end; // e.g., "<|tool_call_end|>" + std::string call_separator; // e.g., ",", "\n" + + // === Function markers (from T3-T6) === + std::string func_name_prefix; // e.g., "", ":0" + std::string func_close; // e.g., "" + std::string args_start; // e.g., "{" + std::string args_end; // e.g., "}" + + // === Argument markers (from A1-A2, for TAG_WITH_TAGGED) === + std::string arg_name_prefix; // e.g., "" + std::string arg_name_suffix; // e.g., ">", "" + std::string arg_value_prefix; // e.g., "", "" + std::string arg_value_suffix; // e.g., "", "" + std::string arg_separator; // e.g., "", "\n" + + // === Call ID markers (from T7) === + std::string call_id_prefix; // e.g., "[CALL_ID]" + std::string call_id_suffix; // e.g., "[ARGS]" + + // === Special markers === + std::string code_block_marker; // e.g., "Action:" (for markdown code block format) + std::string code_block_language; // e.g., "json" + std::string function_namespace; // e.g., "functions." (for prefixed-indexed format) +}; +``` + +## Tool Calling Formats + +The auto-parser recognizes three tool calling formats. + +### JSON_NATIVE + +**Structure**: The entire tool call (function name, arguments, and values) is in JSON format. There may be enclosing tags around the tool calling section. + +**Characteristics**: + +- Function name is a JSON field: `"name": "function_name"` +- Arguments are a JSON object: `"arguments": {"key": "value"}` +- May be wrapped in section markers like `...` or `[TOOL_CALLS]...]` + +**Examples**: + +Standard OpenAI-style: + +```json + +{"name": "get_weather", "arguments": {"location": "Paris", "unit": "celsius"}} + +``` + +Mistral Nemo with array wrapper: + +```json +[TOOL_CALLS] +[{"name": "calculate", "arguments": {"expr": "2+2"}}] +``` + +**Detection**: Function name found inside a JSON structure (determined by JSON parse attempt). + +--- + +### TAG_WITH_JSON + +**Structure**: The function name is outside the JSON structure, typically within quasi-XML markers. Arguments are still provided as a JSON object. + +**Characteristics**: + +- Function name appears in tag attributes: `` or `function_name` +- Arguments are a JSON object following the tag +- Has closing tags: `` or `` +- Arguments remain valid JSON + +**Examples**: + +Functionary v3.1: + +```xml +{"location": "Paris", "unit": "celsius"} +``` + +MiniMax: + +```xml + +calculate +{"expr": "2+2"} + +``` + +**Detection**: Function name not in JSON, but arguments are JSON (args_start is `{`). + +--- + +### TAG_WITH_TAGGED + +**Structure**: Both the function name AND argument names are in XML-style tags. Argument values may be JSON or unquoted primitives depending on schema type. + +**Characteristics**: + +- Function name in tag: `` or `` +- Each argument has its own tag: `value` +- String values are **unquoted** (raw text content of the tag) +- Non-string values (objects, arrays, numbers, booleans) are still JSON-formatted +- Supports streaming: partial arguments can be parsed incrementally + +**Examples**: + +Qwen/Hermes XML format: + +```xml + +Paris +celsius + +``` + +Note how string values (`Paris`, `celsius`) are unquoted inside the tags. + +Mixed types example: + +```xml + +2+2 +2 +{"round": true} + +``` + +Here: + +- `expr` and `precision` are strings (unquoted) +- `options` is an object (JSON-formatted inside the tag) + +**Detection**: `arg_name_prefix` is non-empty, arguments use tagged format rather than JSON object. + +--- + +## Analysis Flow + +```text +Template + | + v +differential_analyzer::analyze(tmpl) + | + |-- Phase 1: analyze_reasoning(tmpl, result) + | |-- R1: compare_reasoning_presence() — with/without reasoning_content field + | |-- R2: compare_thinking_enabled() — enable_thinking=false vs true + | '-- R3: compare_reasoning_scope() — reasoning with content vs with tools + | + |-- Phase 2: analyze_content(tmpl, result) + | '-- C1: compare_content_values() — content vs tools vs reasoning + | + |-- Phase 3: analyze_tools(tmpl, result) + | |-- T1: analyze_tool_calls() — no tools vs with tools + format classification + | |-- T2: check_per_call_markers() — per-section vs per-call markers + | |-- T3: extract_call_separator() — separator between multiple calls + | |-- T4: extract_function_markers() — func_alpha vs func_beta + | |-- T5: extract_argument_separator() — 1 arg vs 2 args + | |-- T6: extract_args_markers() — no args vs with args + | '-- T7: extract_call_id_markers() — call_id "call00001" vs "call99999" + | + |-- Phase 4: analyze_arguments(tmpl, result) [TAG_WITH_TAGGED only] + | |-- A1: extract_argument_name_markers() — "first" arg vs "second" arg + | '-- A2: extract_argument_value_markers() — value "XXXX" vs "YYYY" + | + '-- collect_preserved_tokens(result) + | + v +diff_analysis_result + | + v +universal_peg_generator::generate_parser(tmpl, inputs, analysis) + |-- build_parser(analysis, inputs, ...) — builds PEG parser arena + | |-- Reasoning parser (based on reasoning_mode) + | |-- Content parser (based on content_mode) + | '-- Tool parser (dispatches by tool_format): + | |-- build_tool_parser_json_native() + | |-- build_tool_parser_tag_json() + | '-- build_tool_parser_tag_tagged() + | + |-- Build GBNF grammar (if tools present) + '-- Set grammar triggers from tool markers + | + v +common_chat_params (prompt, parser, grammar, triggers, preserved_tokens) +``` + +## Entry Point + +The auto-parser is invoked in `common/chat.cpp` in `common_chat_templates_apply_jinja`. A few specialized templates are handled first (Ministral/Magistral Large 3, GPT-OSS, Functionary v3.2), then the auto-parser handles everything else: + +```cpp +try { + LOG_DBG("Using differential autoparser\n"); + auto auto_params = universal_peg_generator::generate_parser(tmpl, params); + return auto_params; +} catch (const std::exception & e) { + LOG_WRN("Automatic parser generation failed: %s\n", e.what()); +} +``` + +## Algorithm Details + +### Core Mechanism: Differential Comparison + +All analysis phases use the same factorized comparison function: + +```cpp +compare_variants(tmpl, params_A, params_modifier) +``` + +This creates variant B by applying a modifier lambda to a copy of params_A, renders both through the template, and computes a `diff_split`: + +```cpp +struct diff_split { + std::string prefix; // Common prefix between A and B + std::string suffix; // Common suffix between A and B + std::string left; // Unique to variant A + std::string right; // Unique to variant B +}; +``` + +The diff is computed via `calculate_diff_split()`, which uses longest-common-prefix/suffix with iterative tag boundary fixing — it moves incomplete `<...>` or `[...]` markers from prefix/suffix into the left/right parts until stable. + +Text is segmentized into markers and non-marker fragments using `segmentize_markers()`, which splits on `<...>` and `[...]` boundaries. + +### Phase 1: Reasoning Analysis + +Three comparisons extract reasoning markers and classify the reasoning mode: + +**R1 — `compare_reasoning_presence()`**: Compares assistant message with vs without a `reasoning_content` field. + +- Segmentizes `diff.right` to find markers around the reasoning content +- 3+ segments → `TAG_BASED` (start marker, content, end marker) +- 2 segments → `DELIMITER` (content followed by delimiter) +- Special case: markers found in prefix/suffix → `FORCED_CLOSED` + +**R2 — `compare_thinking_enabled()`**: Compares `enable_thinking=false` vs `true`. + +- Detects `FORCED_OPEN`: template adds opening tag when thinking enabled +- Detects `FORCED_CLOSED`: disable mode has both markers, enable mode has only start +- Handles reverse patterns (e.g., GLM-4.6 where disabled adds empty block) + +**R3 — `compare_reasoning_scope()`**: Compares reasoning with content vs with tool calls. + +- Detects `TOOLS_ONLY`: reasoning appears only when tool calls are present +- Extracts reasoning markers from tool call output by segmentizing + +### Phase 2: Content Analysis + +**C1 — `compare_content_values()`**: Compares content-only output vs tools output vs reasoning output. + +- Creates two comparisons: content→tools and content→reasoning +- Finds content text position in diff to extract surrounding markers +- Classifies: + - `ALWAYS_WRAPPED`: content has start/end markers in both comparisons + - `WRAPPED_WITH_REASONING`: markers only when reasoning is present + - `PLAIN`: no wrapping markers detected + +### Phase 3: Tool Call Analysis + +**T1 — `analyze_tool_calls()`**: Compares no-tools vs with-tools output. + +- Calls `analyze_tool_call_format()` to classify the format using the **only heuristic**: a JSON parse attempt + - `in_json_haystack()` checks whether the function name appears inside a JSON structure + - If function name is in JSON → `JSON_NATIVE` → `analyze_tool_call_format_json_native()`: + - Parses JSON structure, matches needle values to extract field names + - Detects `fun_name_is_key`, `function_field`, `name_field`, `args_field`, `id_field`, `gen_id_field` + - Detects `tools_array_wrapped` by checking for `[` before JSON + - Builds `parameter_order` by sorting fields by position + - Extracts `tool_section_start`/`tool_section_end` + - If function name is not in JSON → `analyze_tool_call_format_non_json()`: + - Segmentizes the haystack into markers and text + - Uses symmetry: counts opening markers, matches with closing markers + - Extracts `tool_section_start`, `tool_section_end`, `per_call_start`, `per_call_end` + +**T2 — `check_per_call_markers()`**: Compares 1 call vs 2 calls. + +- If the second call starts with `tool_section_start`, markers are per-call not per-section +- Moves tool_section markers to per_call markers, clears section markers + +**T3 — `extract_call_separator()`**: Compares 1 call vs 2 calls. + +- Finds separator between calls using `until_common_prefix(diff.right, ...)` with the two function names as anchors + +**T4 — `extract_function_markers()`**: Compares function name "foofoo" vs "barbar". + +- Finds function name in diff, segmentizes to extract prefix/suffix markers +- Extracts `func_name_prefix`, `func_name_suffix` +- Searches for closing marker after args to extract `func_close` + +**T5 — `extract_argument_separator()`**: Compares 1 argument vs 2 arguments. + +- Uses `until_common_prefix()` with argument names as anchors to find the separator + +**T6 — `extract_args_markers()`**: Compares 0 arguments vs 1 argument. + +- Uses `until_common_prefix()` and `after_common_suffix()` to find container markers +- Extracts `args_start`, `args_end` + +**T7 — `extract_call_id_markers()`**: Compares call IDs "call00001" vs "call99999". + +- Determines position relative to function name and arguments +- Classifies as `PRE_FUNC_NAME`, `BETWEEN_FUNC_AND_ARGS`, or `POST_ARGS` +- Extracts `call_id_prefix`, `call_id_suffix` + +### Phase 4: Argument Analysis (TAG_WITH_TAGGED only) + +Only runs when Phase 3 detected TAG_WITH_TAGGED or TAG_WITH_JSON format with non-JSON argument structures. + +**A1 — `extract_argument_name_markers()`**: Compares argument name "first" vs "second". + +- Finds common prefix of diff.left/right to extract marker structure +- Extracts `arg_name_prefix`, `arg_name_suffix` + +**A2 — `extract_argument_value_markers()`**: Compares value "XXXX" vs "YYYY". + +- Segmentizes prefix/suffix around value to find markers +- Extracts `arg_value_prefix`, `arg_value_suffix` + +### Parser Building + +The parser generator (`universal_peg_generator`) takes the analysis result and builds a PEG parser arena. The entry point is `generate_parser(tmpl, inputs)`, which: + +1. Runs `differential_analyzer::analyze(tmpl)` to get the analysis result +2. Calls `build_parser(analysis, inputs, ...)` to construct the PEG parser +3. Builds a GBNF grammar if tools are present (for constrained decoding) +4. Sets grammar triggers from `tool_section_start` or `per_call_start` + +#### Reasoning Parser Construction + +Built inline in `build_parser()` based on `reasoning_mode`: + +| Mode | Parser | +|-----------------------------------|---------------------------------------------------------------------------------------------| +| `FORCED_OPEN` / `FORCED_CLOSED` | `reasoning(until(end)) + end` — expects reasoning immediately (opening tag was in template) | +| `TAG_BASED` / `TOOLS_ONLY` | `optional(start + reasoning(until(end)) + end)` | +| `DELIMITER` | `optional(reasoning(until(end)) + end)` — no start marker, reasoning ends at delimiter | + +#### Content Parser Construction + +| Condition | Parser | +|------------------------------------|---------------------------------------------------------------------------| +| `json_schema` present | `reasoning + space() + content(schema(json(), ...)) + end()` | +| Tools present | Dispatches to tool parser builder | +| `ALWAYS_WRAPPED` with reasoning | `reasoning + start + content(until(end)) + end + end()` | +| `ALWAYS_WRAPPED` without reasoning | `content(until(start)) + start + content(until(end)) + end + end()` | +| Default | `reasoning + content(rest()) + end()` | + +#### Tool Parser Construction + +`build_tool_parser()` dispatches by `tool_format`: + +**`build_tool_parser_json_native()`**: Uses the `standard_json_tools()` builder helper which has three internal modes: + +- `build_json_tools_function_is_key()` — function name is the JSON key: `{"get_weather": {"location": "Paris"}}` +- `build_json_tools_nested_keys()` — nested object: `{"function": {"name": "X", "arguments": {...}}}` +- `build_json_tools_flat_keys()` — flat object: `{"name": "X", "arguments": {...}}` + +Handles content wrappers, array wrapping, parallel calls, and section markers. + +**`build_tool_parser_tag_json()`**: For each tool, builds: + +```text +tool_open(prefix + tool_name(literal(name)) + suffix) + + call_id_section + + tool_args(schema(json(), tool_schema)) +``` + +Wraps in per-call or section markers. Handles parallel calls. + +**`build_tool_parser_tag_tagged()`**: For each tool, builds per-argument parsers: + +- String types: `tool_arg_string_value(schema(until(suffix), ...))` +- JSON types: `tool_arg_json_value(schema(json(), ...))` +- Required vs optional arguments +- Arguments joined with `space()` between them + +Handles `func_close`, `peek()` for partial parsing safety, and call_id sections. + +All three return: `reasoning + optional(content(until(trigger))) + tool_calls + end()` + +### Mapper + +The `common_chat_peg_unified_mapper` maps PEG parse results (AST nodes) into `common_chat_msg` structures. Key design: + +- **Buffered arguments**: Before `tool_name` is known, argument text goes to `args_buffer`; once name is set, the buffer is flushed to `current_tool->arguments` +- **`args_target()`**: Returns a reference to whichever destination is active, eliminating branching +- **`closing_quote_pending`**: Tracks whether a closing `"` needs to be appended when a string argument value is finalized +- **Quote normalization**: Python-style quotes (`'key': 'value'`) are converted to JSON (`"key": "value"`) +- **Brace auto-closing**: At tool close, unclosed `{` braces are closed automatically (tracked via `json_brace_depth()`) + +## Files + +| File | Purpose | +|-------------------------------------------|-------------------------------------------------------------------| +| `common/chat-auto-parser.h` | `universal_peg_generator` class and `templates_params` struct | +| `common/chat-auto-parser-generator.cpp` | Parser generator implementation | +| `common/chat-diff-analyzer.h` | Analysis result types, enums, and `differential_analyzer` class | +| `common/chat-diff-analyzer.cpp` | Differential analysis implementation | +| `common/chat-auto-parser-helpers.h/cpp` | `calculate_diff_split()`, `segmentize_markers()`, string helpers | +| `common/chat-peg-parser.h/cpp` | PEG builder and mapper classes | +| `common/chat.cpp` | Entry point: `common_chat_templates_apply_jinja()` | +| `tools/parser/debug-template-parser.cpp` | Debug tool for template analysis | +| `tools/parser/template-analysis.cpp` | Template analysis tool | + +## Testing & Debugging + +### Debug Tools + +**Template Debugger**: `tools/parser/debug-template-parser.cpp` + +- Usage: `./bin/llama-debug-template-parser path/to/template.jinja` +- Shows detected format, markers, generated parser, and GBNF grammar + +**Template Analysis**: `tools/parser/template-analysis.cpp` + +- Usage: `./bin/llama-template-analysis path/to/template.jinja` + +**Debug Logging**: Enable with `LLAMA_LOG_VERBOSITY=2` + +- Shows detailed analysis steps, pattern extraction results, and generated parser structure + +**PEG Test Builder**: Fluent API for creating test cases in `tests/test-chat.cpp`: + +```cpp +auto tst = peg_tester("models/templates/Template.jinja"); +tst.test("input text") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({tool_json}) + .parallel_tool_calls(true) + .enable_thinking(true) + .expect(expected_message) + .run(); +``` + +### Tested Templates + +The following templates have active tests in `tests/test-chat.cpp`: + +| Template | Format | Notes | +| -------- | ------ | ----- | +| Ministral-3-14B-Reasoning | Reasoning | `[THINK]...[/THINK]` tags | +| NVIDIA-Nemotron-3-Nano-30B | TAG_WITH_TAGGED | Reasoning + tools | +| CohereForAI Command-R7B | JSON_NATIVE | `<\|START_THINKING\|>`/`<\|START_RESPONSE\|>` markers | +| Google Gemma 2 2B | Content only | No tool support | +| Qwen-QwQ-32B | Reasoning | Forced-open thinking | +| NousResearch Hermes 2 Pro | JSON_NATIVE | `` wrapper | +| IBM Granite 3.3 | JSON_NATIVE | `` + `` | +| ByteDance Seed-OSS | TAG_WITH_TAGGED | Custom `` and `` tags | +| Qwen3-Coder | TAG_WITH_TAGGED | XML-style tool format | +| DeepSeek V3.1 | JSON_NATIVE | Forced thinking mode | +| GLM-4.6 | TAG_WITH_TAGGED | `name\n......` format | +| GLM-4.7-Flash | TAG_WITH_TAGGED | Updated GLM format | +| Kimi-K2-Thinking | JSON_NATIVE | Reasoning + JSON tools | +| Apertus-8B-Instruct | JSON_NATIVE | Function name as JSON key | +| MiniMax-M2 | TAG_WITH_JSON | XML invoke with JSON args | +| NVIDIA-Nemotron-Nano-v2 | JSON_NATIVE | `` wrapper (nested) | +| CohereForAI Command-R Plus | JSON_NATIVE | Markdown code block format | +| Mistral-Nemo-Instruct-2407 | JSON_NATIVE | `[TOOL_CALLS]` wrapper with ID field | +| Functionary v3.1 | TAG_WITH_JSON | `` format | +| Functionary v3.2 | Specialized | `>>>` recipient delimiter (dedicated handler) | +| Fireworks Firefunction v2 | TAG_WITH_JSON | Fireworks tool format | +| DeepSeek R1 Distill (Llama/Qwen) | Reasoning | Forced-open thinking | +| llama-cpp-deepseek-r1 | Reasoning | Forced-open thinking | +| Kimi-K2 / Kimi-K2-Instruct | JSON_NATIVE | JSON tools with special markers | +| Llama 3.1/3.2/3.3 | JSON_NATIVE | Standard Llama tool format | +| OpenAI GPT-OSS | Specialized | Channel-based (dedicated handler) | +| Apriel 1.5 | JSON_NATIVE | `` wrapper with JSON array | +| Apriel 1.6 Thinker | Reasoning | Implicit reasoning start | +| Mistral Small 3.2 | JSON_NATIVE | `[TOOL_CALLS]func[ARGS]{...}` with call ID | +| Devstral | JSON_NATIVE | `[TOOL_CALLS]func[ARGS]{...}` without call ID | +| StepFun 3.5 Flash | TAG_WITH_TAGGED | `` format | + +## Adding Support for New Templates + +To support a new template format: + +1. **If it follows standard patterns** - The auto-parser should detect it automatically using the three formats (JSON_NATIVE, TAG_WITH_JSON, TAG_WITH_TAGGED) +2. **If differential analysis doesn't extract markers correctly** - Add a workaround in the workarounds array in `chat-diff-analyzer.cpp` +3. **If it needs fundamentally different handling** - Add a dedicated handler in `chat.cpp` before the auto-parser block (as done for GPT-OSS, Functionary v3.2, and Ministral) + +## Edge Cases and Quirks + +1. **Forced Thinking**: If `enable_thinking` is true but the model has already started a thought block (e.g., ended the prompt with ``), the parser enters "forced thinking" mode where it immediately expects reasoning content. +2. **Per-Call vs Per-Section Markers**: Some templates wrap each tool call individually (`per_call_start`/`per_call_end`), others wrap the entire tool section (`tool_section_start`/`tool_section_end`). T2 disambiguates by checking if the second call in a two-call output starts with the section marker. +3. **Double Wrapping**: Some templates (e.g., Functionary) use the same string for both the tool section start and the function prefix (e.g., `` or `[marker]` tokens, ensuring clean extraction. +6. **Workarounds**: A workaround array in `chat-diff-analyzer.cpp` applies post-analysis patches for templates whose differential analysis produces incomplete or incorrect results (e.g., old Qwen thinking, Granite 3.3, Cohere Command-R+, Functionary, DeepSeek-R1-Distill-Qwen). diff --git a/docs/development/parsing.md b/docs/development/parsing.md index dbb989bf08..e627ea6502 100644 --- a/docs/development/parsing.md +++ b/docs/development/parsing.md @@ -22,7 +22,7 @@ Below is a contrived example demonstrating how to use the PEG parser to parse output from a model that emits arguments as JSON. ```cpp -auto parser = build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) { +auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { // Build a choice of all available tools auto tool_choice = p.choice(); for (const auto & tool : tools) { @@ -212,7 +212,7 @@ mapper.from_ast(ctx.ast, result); ### Native -The `common_chat_peg_native_builder` builds a `native` parser suitable for +The `common_chat_peg_unified_builder` builds a `native` parser suitable for models that emit tool arguments as a direct JSON object. - **`reasoning(p)`** - Tag node for `reasoning_content` @@ -225,7 +225,7 @@ models that emit tool arguments as a direct JSON object. - **`tool_args(p)`** - Tag the tool arguments ```cpp -build_chat_peg_native_parser([&](common_chat_peg_native_parser & p) { +build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { auto get_weather_tool = p.tool(p.sequence({ p.tool_open(p.literal("{")), p.json_member("name", "\"" + p.tool_name(p.literal("get_weather")) + "\""), @@ -246,7 +246,7 @@ build_chat_peg_native_parser([&](common_chat_peg_native_parser & p) { ### Constructed -The `common_chat_peg_constructed_builder` builds a `constructed` parser +The `common_chat_peg_unified_builder` builds a `constructed` parser suitable for models that emit tool arguments as separate entities, such as XML tags. @@ -264,7 +264,7 @@ tags. - **`tool_arg_json_value(p)`** - Tag JSON value for the argument ```cpp -build_chat_peg_constructed_parser([&](common_chat_peg_constructed_builder & p) { +build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { auto location_arg = p.tool_arg( p.tool_arg_open(""), p.tool_arg_string_value(p.until("")), diff --git a/models/templates/Apertus-8B-Instruct.jinja b/models/templates/Apertus-8B-Instruct.jinja index 10826ff690..48f1658f4c 100644 --- a/models/templates/Apertus-8B-Instruct.jinja +++ b/models/templates/Apertus-8B-Instruct.jinja @@ -294,7 +294,7 @@ {%- for tool_call in message.tool_calls -%} {%- if tool_call.type == 'function' -%} {%- set function = tool_call.function -%} - {{- '{"' + function.name + '": ' + function.arguments + '}' }} + {{- '{"' + function.name + '": ' + function.arguments|tojson + '}' }} {%- if not loop.last -%} {{- ", " }} {%- endif -%} diff --git a/models/templates/Apriel-1.6-15b-Thinker-fixed.jinja b/models/templates/Apriel-1.6-15b-Thinker-fixed.jinja new file mode 100755 index 0000000000..c430f45580 --- /dev/null +++ b/models/templates/Apriel-1.6-15b-Thinker-fixed.jinja @@ -0,0 +1,173 @@ +{# ---------------------------------------------------------------------- #} +{# ƛƬ Default setup and flags #} +{# ---------------------------------------------------------------------- #} +{# FIX: Use "is defined" check BEFORE accessing the variable #} +{%- set messages = messages if (messages is defined and messages) else [] -%} +{%- set tools = tools if (tools is defined and tools) else [] -%} +{%- set add_generation_prompt = add_generation_prompt if (add_generation_prompt is defined) else false -%} +{%- set available_tool_string = '' -%} +{%- set add_tool_id = true -%} +{%- set add_thoughts = true -%} {# whether to include reasoning blocks #} +{%- set add_generation_prompt = true -%} {# whether to emit reasoning starter before assistant response #} +{# Optional token placeholders (safe defaults) #} +{%- set bos_token = bos_token if (bos_token is defined) else '' -%} +{%- set eos_token = eos_token if (eos_token is defined) else '' -%} +{# ---------------------------------------------------------------------- #} +{# Core reasoning prompt and assistant reasoning prefix #} +{# ---------------------------------------------------------------------- #} +{%- set reasoning_prompt -%} + You are a thoughtful, systematic AI assistant from ServiceNow Language Models (SLAM) lab. + Analyze each question carefully, present your reasoning step-by-step, then provide the final + response after the marker [BEGIN FINAL RESPONSE]. +{%- endset -%} +{%- set reasoning_asst_turn_start = 'Here are my reasoning steps:\n' -%} +{# ---------------------------------------------------------------------- #} +{# Tool list and tool call output format #} +{# ---------------------------------------------------------------------- #} +{%- if tools|length > 0 -%} + {%- set available_tool_string -%} + You are provided with function signatures within XML tags. + You may call one or more functions to assist with the user query. + Don't make assumptions about the arguments. You should infer the argument values from previous + user responses and the system message. + Here are the available tools: + + {% for tool in tools %}{{ tool|string }}{% endfor %} + + . + + Return all function calls as a list of JSON objects within XML tags. + Each JSON object should contain a function name and arguments as follows: + [ + {"name": , "arguments": }, + {"name": , "arguments": }, + ... + ] + {%- endset -%} +{%- endif -%} +{# ---------------------------------------------------------------------- #} +{# Start system block if first message is not system #} +{# ---------------------------------------------------------------------- #} +{%- if messages|length > 0 and messages[0]['role'] != 'system' -%} + {%- if tools|length > 0 -%} + {{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' + available_tool_string + '\n' }} + {%- else -%} + {{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' }} + {%- endif -%} +{%- endif -%} +{# ---------------------------------------------------------------------- #} +{# Iterate through messages #} +{# ---------------------------------------------------------------------- #} +{%- for message in messages -%} + + {# ---------------- USER MESSAGE ---------------- #} + {%- if message['role'] == 'user' -%} + {{ '<|begin_user|>\n' }} + {%- if message['content'] is not string -%} + {%- for chunk in message['content'] -%} + {%- if chunk['type'] == 'text' -%} + {{ chunk['text'] }} + {%- elif chunk['type'] in ['image', 'image_url'] -%} + {{ '[IMG]' }} + {%- else -%} + {{ raise_exception('Unrecognized content type!') }} + {%- endif -%} + {%- endfor -%} + {%- else -%} + {{ message['content'] }} + {%- endif -%} + + {# ---------------- SYSTEM MESSAGE ---------------- #} + {%- elif message['role'] == 'system' -%} + {%- set sys_content = message.get('content', '') -%} + {%- if sys_content and sys_content|length > 0 -%} + {%- if sys_content is string -%} + {%- set system_message = sys_content -%} + {%- else -%} + {%- set system_message = sys_content[0]['text'] -%} + {%- endif -%} + {%- else -%} + {%- set system_message = '' -%} + {%- endif -%} + + {%- if tools|length > 0 -%} + {{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' + system_message + '\n' + available_tool_string + '\n' }} + {%- else -%} + {{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' + system_message + '\n' }} + {%- endif -%} + + {# ---------------- ASSISTANT MESSAGE ---------------- #} + {%- elif message['role'] == 'assistant' -%} + {%- if loop.last -%} + {%- set add_tool_id = false -%} + {%- endif -%} + + {{ '\n<|begin_assistant|>\n' }} + + {%- if add_thoughts and message.get('reasoning_content') and loop.last -%} + {{ message['reasoning_content'] + '\n[BEGIN FINAL RESPONSE]\n' }} + {%- endif -%} + + {%- set asst_content = message.get('content', '') -%} + {%- if asst_content and asst_content|length > 0 -%} + {%- if asst_content is not string -%} + {%- set asst_text = asst_content[0]['text'] -%} + {%- else -%} + {%- set asst_text = asst_content -%} + {%- endif -%} + {# For historical turns (not the last), strip reasoning and keep only final response #} + {%- if not loop.last and '[BEGIN FINAL RESPONSE]' in asst_text -%} + {{- asst_text.split('[BEGIN FINAL RESPONSE]')[-1] | trim -}} + {%- else -%} + {{- asst_text -}} + {%- endif -%} + {%- elif message.get('chosen') and message['chosen']|length > 0 -%} + {{ message['chosen'][0] }} + {%- endif -%} + + {# Tool call output #} + {%- set tool_calls = message.get('tool_calls', []) -%} + {%- if tool_calls and tool_calls|length > 0 -%} + {{ '\n[' }} + {%- for tool_call in tool_calls -%} + {{ '{"name": "' + tool_call['function']['name'] + '", "arguments": ' + tool_call['function']['arguments']|tojson }} + {%- if add_tool_id == true and 'id' in tool_call -%} + {{ ', "id": "' + tool_call['id'] + '"' }} + {%- endif -%} + {{ '}' }} + {%- if not loop.last -%}{{ ', ' }}{%- endif -%} + {%- endfor -%} + {{ ']' }} + {%- endif -%} + + {%- set training_prompt = training_prompt if (training_prompt is defined) else false -%} + {%- if not loop.last or training_prompt -%} + {{ '\n<|end|>\n' }} + {%- endif -%} + + {# ---------------- TOOL RESULT MESSAGE ---------------- #} + {%- elif message['role'] == 'tool' -%} + {%- set tool_content = message.get('content', '') -%} + {%- if tool_content is string -%} + {%- set tool_message = tool_content -%} + {%- else -%} + {%- set tool_message = tool_content[0]['text'] if tool_content else '' -%} + {%- endif -%} + {{ '<|begin_tool_result|>\n' + tool_message|string + '\n' }} + + {# ---------------- CONTENT MESSAGE ---------------- #} + {%- elif message['role'] == 'content' -%} + {%- set msg_content = message.get('content', '') -%} + {%- if msg_content is not string -%} + {{ '<|begin_content|>\n' + msg_content[0]['text'] + '\n' }} + {%- else -%} + {{ '<|begin_content|>\n' + msg_content + '\n' }} + {%- endif -%} + {%- endif -%} + + {# ---------------- REASONING PROMPT BEFORE NEXT ASSISTANT ---------------- #} + {%- if loop.last and add_generation_prompt and message['role'] != 'assistant' -%} + {{ '\n<|begin_assistant|>\n' + reasoning_asst_turn_start }} + {%- endif -%} + +{%- endfor -%} diff --git a/models/templates/Bielik-11B-v3.0-Instruct.jinja b/models/templates/Bielik-11B-v3.0-Instruct.jinja new file mode 100644 index 0000000000..40ef50076e --- /dev/null +++ b/models/templates/Bielik-11B-v3.0-Instruct.jinja @@ -0,0 +1,77 @@ +{{ bos_token }} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content'] %} + {%- set loop_start_index = 1 %} +{%- else %} + {%- set system_message = "" %} + {%- set loop_start_index = 0 %} +{%- endif %} + +{%- if system_message or tools %} + {{- '<|im_start|>system\n' }} + {%- if system_message %} + {{- system_message }} + {%- endif %} + {%- if tools %} + {{- '\n\nMasz dostęp do następujących narzędzi. Definicje narzędzi znajdują się poniżej wewnątrz znaczników <|function_list|>:\n<|function_list|>\n' }} + {{- '[' }} + {%- for tool in tools %} + {{- tool | tojson }} + {%- if not loop.last %} + {{- ',\n' }} + {%- endif %} + {%- endfor %} + {{- ']\n<|function_list|>\n\nAby wywołać narzędzie, użyj formatu {"name": "nazwa_narzędzia", "arguments": {"argument": "wartość"}}. Wyniki działania narzędzi zostaną przekazane z markerem <|function_output|>.\n' }} + {%- endif %} + {%- if enable_thinking %} + {{- '\n\nZanim odpowiesz na pytanie, najpierw przemyśl swoje kroki i umieść swoje myśli wewnątrz tagów .... Musisz najpierw pomyśleć, zanim udzielisz odpowiedzi. ' -}} + {{- 'WAŻNE: Powinieneś myśleć w tym samym języku, co pytanie użytkownika. Jeśli pytanie jest zadane po polsku, powinieneś również myśleć po polsku. Jeśli pytanie jest po angielsku, myślisz również po angielsku itd. ' }} + {{- '** PAMIĘTAJ! ** Pytanie po polsku -> myślenie po polsku -> odpowiedź po polsku!' -}} + {{- '*** BARDZO WAŻNE!!! *** Jesteś Bielikiem, polskim modelem językowym. Twoją główną cechą jest umiejętność pisania po polsku. Jeśli użytkownik zadaje Ci pytania po polsku, ZAWSZE odpowiadaj po polsku. ' -}} + {{- 'Nawet, jeśli korzystasz z narzędzia, którego większość instrukcji jest po angielsku, powinieneś przede wszystkim odpowiadać po polsku, jeśli użytkownik zadaje pytanie w tym języku. ' -}} + {%- endif %} + {{- '<|im_end|>\n' }} +{%- endif %} + +{%- for message in messages[loop_start_index:] %} + {%- if message['role'] == 'user' %} + {{- '<|im_start|>user\n' + message['content'] + '<|im_end|>\n' }} + {%- elif message['role'] == 'assistant' %} + {{- '<|im_start|>assistant\n' }} + {%- set content = message.content | default('') %} + {%- set reasoning_content = message.reasoning_content | default('') %} + {%- if not reasoning_content and '' in content and '' in content %} + {%- set reasoning_parts = content.split('') %} + {%- set reasoning_content = reasoning_parts[0].split('')[-1] %} + {%- set content = reasoning_parts[1:] | join('') %} + {%- endif %} + {%- if reasoning_content %} + {{- '\n' + reasoning_content.strip() + '\n\n' }} + {%- endif %} + {{- content.lstrip() }} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n\n{"name": "' + tool_call.name + '", "arguments": ' + (tool_call.arguments if tool_call.arguments is string else tool_call.arguments | tojson) + '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message['role'] == 'tool' %} + {%- if loop.index0 == 0 or messages[loop.index0 - 1]['role'] != 'tool' %} + {{- '<|im_start|>user\n' }} + {%- endif %} + {{- '<|function_output|>' + message['content'] }} + {%- if loop.last or messages[loop.index0 + 1]['role'] != 'tool' %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} + +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking %} + {{- '\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja b/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja index 078e9f5458..e144cfcf69 100644 --- a/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja +++ b/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja @@ -132,7 +132,7 @@ The following instructions take precedence over instructions in the default prea {%- elif message.role|lower == 'user' %} <|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{ message.content }}<|END_OF_TURN_TOKEN|>{%- if documents and not sent_documents.value %}{%- set sent_documents.value = true %}{% set tool_idx.value = tool_idx.value + 1 %}{{ document_turn(documents) }}{% endif %} {%- elif message.role|lower == 'assistant' or message.role|lower == 'chatbot' %} -<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{% if message.tool_calls %}<|START_THINKING|>{{message.tool_plan}}<|END_THINKING|><|START_ACTION|>[ +<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{% if message.tool_calls %}<|START_THINKING|>{{message.reasoning_content}}<|END_THINKING|><|START_ACTION|>[ {% for tc in message.tool_calls %} {"tool_call_id": "{{ tool_idx.value }}", "tool_name": "{{ tc['function']['name'] }}", "parameters": {{ tc['function']['arguments']|tojson }}}{% if not loop.last %},{% endif %} diff --git a/models/templates/GLM-4.7-Flash.jinja b/models/templates/GLM-4.7-Flash.jinja new file mode 100644 index 0000000000..2ab98ef068 --- /dev/null +++ b/models/templates/GLM-4.7-Flash.jinja @@ -0,0 +1,86 @@ +[gMASK] +{%- if tools -%} +<|system|> +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{% for tool in tools %} +{{ tool | tojson(ensure_ascii=False) }} +{% endfor %} + + +For each function call, output the function name and arguments within the following XML format: +{function-name}{arg-key-1}{arg-value-1}{arg-key-2}{arg-value-2}...{%- endif -%} +{%- macro visible_text(content) -%} + {%- if content is string -%} + {{- content }} + {%- elif content is iterable and content is not mapping -%} + {%- for item in content -%} + {%- if item is mapping and item.type == 'text' -%} + {{- item.text }} + {%- elif item is string -%} + {{- item }} + {%- endif -%} + {%- endfor -%} + {%- else -%} + {{- content }} + {%- endif -%} +{%- endmacro -%} +{%- set ns = namespace(last_user_index=-1) %} +{%- for m in messages %} + {%- if m.role == 'user' %} + {% set ns.last_user_index = loop.index0 -%} + {%- endif %} +{%- endfor %} +{% for m in messages %} +{%- if m.role == 'user' -%}<|user|>{{ visible_text(m.content) }} +{%- elif m.role == 'assistant' -%} +<|assistant|> +{%- set reasoning_content = '' %} +{%- set content = visible_text(m.content) %} +{%- if m.reasoning_content is string %} + {%- set reasoning_content = m.reasoning_content %} +{%- else %} + {%- if '' in content %} + {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- set content = content.split('')[-1].lstrip('\n') %} + {%- endif %} +{%- endif %} +{%- if ((clear_thinking is defined and not clear_thinking) or loop.index0 > ns.last_user_index) and reasoning_content -%} +{{ '' + reasoning_content.strip() + ''}} +{%- else -%} +{{ '' }} +{%- endif -%} +{%- if content.strip() -%} +{{ content.strip() }} +{%- endif -%} +{% if m.tool_calls %} +{% for tc in m.tool_calls %} +{%- if tc.function %} + {%- set tc = tc.function %} +{%- endif %} +{{- '' + tc.name -}} +{% set _args = tc.arguments %}{% for k, v in _args.items() %}{{ k }}{{ v | tojson(ensure_ascii=False) if v is not string else v }}{% endfor %}{% endfor %} +{% endif %} +{%- elif m.role == 'tool' -%} +{%- if m.content is string -%} +{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|observation|>' }} +{%- endif %} +{{- '' }} +{{- m.content }} +{{- '' }} +{%- else -%} +<|observation|>{% for tr in m.content %} +{{ tr.output if tr.output is defined else tr }}{% endfor -%} +{% endif -%} +{%- elif m.role == 'system' -%} +<|system|>{{ visible_text(m.content) }} +{%- endif -%} +{%- endfor -%} +{%- if add_generation_prompt -%} + <|assistant|>{{- '' if (enable_thinking is defined and not enable_thinking) else '' -}} +{%- endif -%} \ No newline at end of file diff --git a/models/templates/LFM2-8B-A1B.jinja b/models/templates/LFM2-8B-A1B.jinja new file mode 100644 index 0000000000..3738b3d145 --- /dev/null +++ b/models/templates/LFM2-8B-A1B.jinja @@ -0,0 +1,47 @@ +{{- bos_token -}} +{%- set system_prompt = "" -%} +{%- set ns = namespace(system_prompt="") -%} +{%- if messages[0]["role"] == "system" -%} + {%- set ns.system_prompt = messages[0]["content"] -%} + {%- set messages = messages[1:] -%} +{%- endif -%} +{%- if tools -%} + {%- set ns.system_prompt = ns.system_prompt + ("\n" if ns.system_prompt else "") + "You can use the following tools: <|tool_list_start|>[" -%} + {%- for tool in tools -%} + {%- if tool is not string -%} + {%- set tool = tool | tojson -%} + {%- endif -%} + {%- set ns.system_prompt = ns.system_prompt + tool -%} + {%- if not loop.last -%} + {%- set ns.system_prompt = ns.system_prompt + ", " -%} + {%- endif -%} + {%- endfor -%} + {%- set ns.system_prompt = ns.system_prompt + "]<|tool_list_end|>" -%} + {{- '**IMPORTANT**: The syntax for calling the tools is: <|tool_call_start|>JSON tool call goes here<|tool_call_end|>. Please only call tools in the specified manner.' -}} +{%- endif -%} +{%- if ns.system_prompt -%} + {{- "<|im_start|>system\n" + ns.system_prompt + "<|im_end|>\n" -}} +{%- endif -%} +{%- for message in messages -%} + {{- "<|im_start|>" + message["role"] + "\n" -}} + {%- set content = message["content"] -%} + {%- if content is not string -%} + {%- set content = content | tojson -%} + {%- endif -%} + {%- if message["role"] == "tool" -%} + {%- set content = "<|tool_response_start|>" + content + "<|tool_response_end|>" -%} + {%- elif message["role"] == "assistant" -%} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n<|tool_call_start|>\n{"name": "' + tool_call.name + '", "arguments": ' + (tool_call.arguments if tool_call.arguments is string else tool_call.arguments | tojson) + '}\n<|tool_call_end|>\n' }} + {%- endfor %} + {%- endif %} + {%- endif -%} + {{- content + "<|im_end|>\n" -}} +{%- endfor -%} +{%- if add_generation_prompt -%} + {{- "<|im_start|>assistant\n" -}} +{%- endif -%} diff --git a/models/templates/Qwen3-Coder.jinja b/models/templates/Qwen3-Coder.jinja index 49b0e8d0ee..cde8c0e43d 100644 --- a/models/templates/Qwen3-Coder.jinja +++ b/models/templates/Qwen3-Coder.jinja @@ -29,7 +29,7 @@ {%- endif %} {%- endif %} {%- if tools is iterable and tools | length > 0 %} - {{- "\n\n# Tools\n\nYou have access to the following functions:\n\n" }} + {{- "\n\n# Tools\n\nYou have access to the following tools:\n\n" }} {{- "" }} {%- for tool in tools %} {%- if tool.function is defined %} @@ -63,7 +63,7 @@ {{- '\n' }} {%- endfor %} {{- "\n" }} - {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }} + {{- '\n\nIf you choose to call a tool ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nvalue_2\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: the tool calling block MUST begin with an opening tag and end with a closing tag.\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }} {%- endif %} {%- if system_message is defined %} {{- '<|im_end|>\n' }} diff --git a/models/templates/StepFun3.5-Flash.jinja b/models/templates/StepFun3.5-Flash.jinja new file mode 100644 index 0000000000..c09ea497da --- /dev/null +++ b/models/templates/StepFun3.5-Flash.jinja @@ -0,0 +1,80 @@ +{% macro render_content(content) %}{% if content is none %}{{- '' }}{% elif content is string %}{{- content }}{% elif content is mapping %}{{- content['value'] if 'value' in content else content['text'] }}{% elif content is iterable %}{% for item in content %}{% if item.type == 'text' %}{{- item['value'] if 'value' in item else item['text'] }}{% elif item.type == 'image' %}{% endif %}{% endfor %}{% endif %}{% endmacro %} +{{bos_token}}{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- render_content(messages[0].content) + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou have access to the following functions in JSONSchema format:\n\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson(ensure_ascii=False) }} + {%- endfor %} + {{- "\n\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner \n...\n block must be nested within \n...\n XML tags\n- Required parameters MUST be specified\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + render_content(messages[0].content) + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and render_content(message.content) is string and not(render_content(message.content).startswith('') and render_content(message.content).endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- set content = render_content(message.content) %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {%- set role_name = 'observation' if (message.role == "system" and not loop.first and message.name == 'observation') else message.role %} + {{- '<|im_start|>' + role_name + '\n' + content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- if message.reasoning_content is string %} + {%- set reasoning_content = render_content(message.reasoning_content) %} + {%- else %} + {%- if '' in content %} + {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- set content = content.split('')[-1].lstrip('\n') %} + {%- else %} + {%- set reasoning_content = '' %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content + '\n\n' + content }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n\n' }} + {%- if tool_call.arguments is defined %} + {%- set arguments = tool_call.arguments %} + {%- for args_name, args_value in arguments|items %} + {{- '\n' }} + {%- set args_value = args_value | tojson(ensure_ascii=False) | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %} + {{- args_value }} + {{- '\n\n' }} + {%- endfor %} + {%- endif %} + {{- '\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>tool_response\n' }} + {%- endif %} + {{- '' }} + {{- content }} + {{- '' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n\n' }} +{%- endif %} diff --git a/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja index c2066bd739..299f7a7ff1 100644 --- a/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja +++ b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja @@ -1 +1,44 @@ -{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>\n'}}{% endif %} \ No newline at end of file +{% if not add_generation_prompt is defined -%} + {%- set add_generation_prompt = false -%} +{%- endif -%} +{%- set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') -%} +{%- for message in messages -%} + {%- if message['role'] == 'system' -%} + {%- set ns.system_prompt = message['content'] -%} + {%- endif -%} +{%- endfor -%}{{bos_token}}{{ns.system_prompt}} +{%- for message in messages -%} + {%- if message['role'] == 'user' -%} + {%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}} + {%- endif -%} + {%- if message['role'] == 'assistant' and message['content'] is none -%} + {%- set ns.is_tool = false -%} + {%- for tool in message['tool_calls']-%} + {%- if not ns.is_first -%}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}} + {%- set ns.is_first = true -%} + {%- else -%}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}} + {%- endif -%} + {%- endfor -%} + {%- endif -%} + {%- if message['role'] == 'assistant' and message['content'] is not none -%} + {%- if ns.is_tool -%}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}} + {%- set ns.is_tool = false -%} + {%- else -%} + {%- set content = message['content'] -%} + {%- if '' in content -%} + {%- set content = content.split('')[-1] -%} + {%- endif -%}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}} + {%- endif -%} + {%- endif -%} + {%- if message['role'] == 'tool' -%} + {%- set ns.is_tool = true -%} + {%- if ns.is_output_first -%}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} + {%- set ns.is_output_first = false -%} + {%- else -%}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} + {%- endif -%} + {%- endif -%} +{%- endfor -%} +{%- if ns.is_tool -%}{{'<|tool▁outputs▁end|>'}} +{%- endif -%} +{%- if add_generation_prompt and not ns.is_tool -%}{{'<|Assistant|>\n'}} +{%- endif %} \ No newline at end of file diff --git a/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja index c2066bd739..0c8d81e107 100644 --- a/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja +++ b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja @@ -1 +1,47 @@ -{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>\n'}}{% endif %} \ No newline at end of file +{% if not add_generation_prompt is defined -%} + {%- set add_generation_prompt = false -%} +{%- endif -%} +{%- set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') -%} +{%- for message in messages -%} + {%- if message['role'] == 'system' -%} + {%- set ns.system_prompt = message['content'] -%} + {%- endif -%} +{%- endfor -%}{{bos_token}}{{ns.system_prompt}} +{%- for message in messages -%} + {%- if message['role'] == 'user' -%} + {%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}} + {%- endif -%} + {%- if message['role'] == 'assistant' and message['tool_calls'] -%} + {%- set ns.is_tool = false -%} + {%- for tool in message['tool_calls']-%} + {%- if not ns.is_first -%} + {{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}} + {%- set ns.is_first = true -%} + {%- else -%} + {{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}} + {%- endif -%} + {%- endfor -%} + {{'<|tool▁calls▁end|><|end▁of▁sentence|>'}} + {%- endif -%} + {%- if message['role'] == 'assistant' and message['content'] is not none -%} + {%- if ns.is_tool -%}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}} + {%- set ns.is_tool = false -%} + {%- else -%} + {%- set content = message['content'] -%} + {%- if '' in content -%} + {%- set content = content.split('')[-1] -%} + {%- endif -%}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}} + {%- endif -%} + {%- endif -%} + {%- if message['role'] == 'tool' -%} + {%- set ns.is_tool = true -%} + {%- if ns.is_output_first -%}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} + {%- set ns.is_output_first = false -%} + {%- else -%}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} + {%- endif -%} + {%- endif -%} +{%- endfor -%} +{%- if ns.is_tool -%}{{'<|tool▁outputs▁end|>'}} +{%- endif -%} +{%- if add_generation_prompt and not ns.is_tool -%}{{'<|Assistant|>\n'}} +{%- endif %} \ No newline at end of file diff --git a/models/templates/deepseek-ai-DeepSeek-V3.1.jinja b/models/templates/deepseek-ai-DeepSeek-V3.1.jinja index e5656196a3..2fd1c415b8 100644 --- a/models/templates/deepseek-ai-DeepSeek-V3.1.jinja +++ b/models/templates/deepseek-ai-DeepSeek-V3.1.jinja @@ -1,3 +1,71 @@ -{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% if not thinking is defined %}{% set thinking = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + ' +{% if not add_generation_prompt is defined -%} + {%- set add_generation_prompt = false -%} +{%- endif -%} +{%- if not thinking is defined -%} + {%- if enable_thinking is defined -%} + {%- set thinking = enable_thinking -%} + {%- else -%} + {%- set thinking = false -%} + {%- endif -%} +{%- endif -%} +{%- set ns = namespace(is_first=false, is_tool=false, system_prompt='', is_first_sp=true, is_last_user=false) -%} +{%- for message in messages -%} + {%- if message['role'] == 'system' -%} + {%- if ns.is_first_sp -%} + {%- set ns.system_prompt = ns.system_prompt + message['content'] -%} + {%- set ns.is_first_sp = false -%} + {%- else -%} + {%- set ns.system_prompt = ns.system_prompt + ' -' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- if ns.is_last_user %}{{'<|Assistant|>'}}{%- endif %}{%- set ns.is_last_user = false -%}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- else %}{{message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'<|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}{%- if ns.is_last_user %}{{'<|Assistant|>'}}{%- if message['prefix'] is defined and message['prefix'] and thinking %}{{''}} {%- else %}{{''}}{%- endif %}{%- endif %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{%- set content = message['content'] -%}{%- if '' in content %}{%- set content = content.split('', 1)[1] -%}{%- endif %}{{content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endfor -%}{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool %}{{'<|Assistant|>'}}{%- if not thinking %}{{''}}{%- else %}{{''}}{%- endif %}{% endif %} \ No newline at end of file +' + message['content'] -%} + {%- endif -%} + {%- endif -%} +{%- endfor -%}{{ bos_token }}{{ ns.system_prompt }} +{%- for message in messages -%} + {%- if message['role'] == 'user' -%} + {%- set ns.is_tool = false -%} + {%- set ns.is_first = false -%} + {%- set ns.is_last_user = true -%}{{'<|User|>' + message['content']}} + {%- endif -%} + {%- if message['role'] == 'assistant' and message['tool_calls'] -%} + {%- if ns.is_last_user -%}{{'<|Assistant|>'}} + {%- endif -%} + {%- set ns.is_last_user = false -%} + {%- set ns.is_first = false -%} + {%- set ns.is_tool = false -%} + {%- for tool in message['tool_calls'] -%} + {%- if not ns.is_first -%} + {%- if not message['content'] -%}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}} + {%- else -%}{{message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}} + {%- endif -%} + {%- set ns.is_first = true -%} + {%- else -%}{{'<|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}} + {%- endif -%} + {%- endfor -%}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}} + {%- endif -%} + {%- if message['role'] == 'assistant' and not message['tool_calls'] -%} + {%- if ns.is_last_user -%}{{'<|Assistant|>'}} + {%- if message['prefix'] is defined and message['prefix'] and thinking -%}{{''}} + {%- else -%}{{''}} + {%- endif -%} + {%- endif -%} + {%- set ns.is_last_user = false -%} + {%- if ns.is_tool -%}{{message['content'] + '<|end▁of▁sentence|>'}} + {%- set ns.is_tool = false -%} + {%- else -%} + {%- set content = message['content'] -%} + {%- if '' in content -%} + {%- set content = content.split('', 1)[1] -%} + {%- endif -%}{{content + '<|end▁of▁sentence|>'}} + {%- endif -%} + {%- endif -%} + {%- if message['role'] == 'tool' -%} + {%- set ns.is_last_user = false -%} + {%- set ns.is_tool = true -%}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} + {%- endif -%} +{%- endfor -%} +{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool -%}{{'<|Assistant|>'}} + {%- if not thinking -%}{{''}} + {%- else -%}{{''}} + {%- endif -%} +{%- endif %} \ No newline at end of file diff --git a/models/templates/fireworks-ai-llama-3-firefunction-v2.jinja b/models/templates/fireworks-ai-llama-3-firefunction-v2.jinja index 9b8136df73..b94cfd4d9b 100644 --- a/models/templates/fireworks-ai-llama-3-firefunction-v2.jinja +++ b/models/templates/fireworks-ai-llama-3-firefunction-v2.jinja @@ -46,7 +46,7 @@ Available functions as JSON spec: {%- if 'tool_calls' in message and message['tool_calls'] -%} {%- set tool = namespace(calls=[]) -%} {%- for call in message['tool_calls'] -%} - {%- set tool.calls = tool.calls + ['{"name": "' + call['function']['name'] + '", "arguments": ' + call['function']['arguments'] + '}'] -%} + {%- set tool.calls = tool.calls + ['{"name": "' + call['function']['name'] + '", "arguments": ' + call['function']['arguments']|tojson + '}'] -%} {%- endfor -%} {%- set ns.content = ns.content + ' functools[' + tool.calls | join(', ') + ']' -%} {%- endif -%} diff --git a/models/templates/moonshotai-Kimi-K2.jinja b/models/templates/moonshotai-Kimi-K2.jinja index ecb49a2108..e286d8a7b5 100644 --- a/models/templates/moonshotai-Kimi-K2.jinja +++ b/models/templates/moonshotai-Kimi-K2.jinja @@ -1,43 +1,43 @@ -{%- if tools -%} - <|im_system|>tool_declare<|im_middle|>{{ tools | tojson }}<|im_end|> -{%- endif -%} -{%- for message in messages -%} - {%- if loop.first and messages[0]['role'] != 'system' -%} - <|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|> - {%- endif -%} - {%- if message['role'] == 'system' -%} - <|im_system|>system<|im_middle|> - {%- elif message['role'] == 'user' -%} - <|im_user|>user<|im_middle|> - {%- elif message['role'] == 'assistant' -%} - <|im_assistant|>assistant<|im_middle|> - {%- elif message['role'] == 'tool' -%} - <|im_system|>tool<|im_middle|> - {%- endif -%} - {%- if message['role'] == 'assistant' and message.get('tool_calls') -%} - {%- if message['content'] -%}{{ message['content'] }}{%- endif -%} - <|tool_calls_section_begin|> - {%- for tool_call in message['tool_calls'] -%} - {%- set func_name = tool_call['function']['name'] -%} - {%- set formatted_id = 'functions.' + func_name + ':' + loop.index0|string -%} - <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{{ tool_call['function']['arguments'] | tojson}}<|tool_call_end|> - {%- endfor -%} - <|tool_calls_section_end|> - {%- elif message['role'] == 'tool' -%} - ## Return of {{ message.tool_call_id }}\n{{ message['content'] }} - {%- elif message['content'] is string -%} - {{ message['content'] }} - {%- elif message['content'] is not none -%} - {% for content in message['content'] -%} - {% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%} - <|media_start|>image<|media_content|><|media_pad|><|media_end|> - {% else -%} - {{ content['text'] }} - {%- endif -%} - {%- endfor -%} - {%- endif -%} - <|im_end|> -{%- endfor -%} -{%- if add_generation_prompt -%} - <|im_assistant|>assistant<|im_middle|> -{%- endif -%} +{%- if tools -%} + <|im_system|>tool_declare<|im_middle|>{{ tools | tojson }}<|im_end|> +{%- endif -%} +{%- for message in messages -%} + {%- if loop.first and messages[0]['role'] != 'system' -%} + <|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|> + {%- endif -%} + {%- if message['role'] == 'system' -%} + <|im_system|>system<|im_middle|> + {%- elif message['role'] == 'user' -%} + <|im_user|>user<|im_middle|> + {%- elif message['role'] == 'assistant' -%} + <|im_assistant|>assistant<|im_middle|> + {%- elif message['role'] == 'tool' -%} + <|im_system|>tool<|im_middle|> + {%- endif -%} + {%- if message['role'] == 'assistant' and message.get('tool_calls') -%} + {%- if message['content'] -%}{{ message['content'] }}{%- endif -%} + <|tool_calls_section_begin|> + {%- for tool_call in message['tool_calls'] -%} + {%- set func_name = tool_call['function']['name'] -%} + {%- set formatted_id = 'functions.' + func_name + ':' + loop.index0|string -%} + <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{{ tool_call['function']['arguments'] | tojson}}<|tool_call_end|> + {%- endfor -%} + <|tool_calls_section_end|> + {%- elif message['role'] == 'tool' -%} + ## Return of {{ message.tool_call_id }}\n{{ message['content'] }} + {%- elif message['content'] is string -%} + {{ message['content'] }} + {%- elif message['content'] is not none -%} + {% for content in message['content'] -%} + {% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%} + <|media_start|>image<|media_content|><|media_pad|><|media_end|> + {% else -%} + {{ content['text'] }} + {%- endif -%} + {%- endfor -%} + {%- endif -%} + <|im_end|> +{%- endfor -%} +{%- if add_generation_prompt -%} + <|im_assistant|>assistant<|im_middle|> +{%- endif -%} diff --git a/models/templates/unsloth-Apriel-1.5.jinja b/models/templates/unsloth-Apriel-1.5.jinja index 29e582fbf6..1639b63901 100644 --- a/models/templates/unsloth-Apriel-1.5.jinja +++ b/models/templates/unsloth-Apriel-1.5.jinja @@ -86,22 +86,22 @@ Prior to generating the function calls, you should generate the reasoning for wh {%- set add_tool_id = false -%} {%- endif -%} {{- '<|assistant|>\n' -}} - {%- if message['content'] is not none and message['content']|length > 0 -%} + {%- if message['content'] is defined and message['content'] is not none and message['content']|length > 0 -%} {%- if message['content'] is not string and message['content'][0]['text'] is not none %} {{- message['content'][0]['text'] }} {%- else %} {{- message['content'] -}} {%- endif -%} - {%- elif message['chosen'] is not none and message['chosen']|length > 0 -%} + {%- elif message['chosen'] is defined and message['chosen'] is not none and message['chosen']|length > 0 -%} {{- message['chosen'][0] -}} {%- endif -%} {%- if add_thoughts and 'thought' in message and message['thought'] is not none -%} {{- '' + message['thought'] + '' -}} {%- endif -%} - {%- if message['tool_calls'] is not none and message['tool_calls']|length > 0 -%} + {%- if message['tool_calls'] is defined and message['tool_calls'] is not none and message['tool_calls']|length > 0 -%} {{- '\n[' -}} {%- for tool_call in message["tool_calls"] -%} - {{- '{"name": "' + tool_call['function']['name'] + '", "arguments": ' + tool_call['function']['arguments']|string -}} + {{- '{"name": "' + tool_call['function']['name'] + '", "arguments": ' + tool_call['function']['arguments']|tojson -}} {%- if add_tool_id == true -%} {{- ', "id": "' + tool_call['id'] + '"' -}} {%- endif -%} diff --git a/scripts/server-bench.py b/scripts/server-bench.py index dbbb0939ff..2ef7258712 100755 --- a/scripts/server-bench.py +++ b/scripts/server-bench.py @@ -230,7 +230,7 @@ def benchmark( logger.info("") logger.info(f"Benchmark duration: {token_t_last:.2f} s") - logger.info(f"Request throughput: {n_prompts / token_t_last:.2f} requests/s = {n_prompts / (token_t_last/60):.2f} requests/min") + logger.info(f"Request throughput: {n_prompts / token_t_last:.2f} requests/s = {n_prompts / (token_t_last / 60):.2f} requests/min") logger.info(f"Total prompt length: {np.sum(prompt_n)} tokens") logger.info(f"Average prompt length: {np.mean(prompt_n):.2f} tokens") logger.info(f"Average prompt latency: {1e3 * np.mean(prompt_t):.2f} ms") diff --git a/scripts/server-test-model.py b/scripts/server-test-model.py new file mode 100644 index 0000000000..9049d80279 --- /dev/null +++ b/scripts/server-test-model.py @@ -0,0 +1,202 @@ +import argparse +import json +import requests +import logging +import sys + +handler = logging.StreamHandler(sys.stdout) +handler.terminator = "" # ← no newline +logging.basicConfig(level=logging.INFO, format='%(message)s', handlers=[handler]) +logger = logging.getLogger("server-test-model") + + +def run_query(url, messages, tools=None, stream=False, tool_choice=None): + payload = { + "messages": messages, + "stream": stream, + "max_tokens": 5000, + } + if tools: + payload["tools"] = tools + if tool_choice: + payload["tool_choice"] = tool_choice + + try: + response = requests.post(url, json=payload, stream=stream) + response.raise_for_status() + except requests.exceptions.RequestException as e: + if e.response is not None: + logger.info(f"Response error: {e} for {e.response.content}\n") + else: + logger.info(f"Error connecting to server: {e}\n") + return None + + full_content = "" + reasoning_content = "" + tool_calls = [] + + if stream: + logger.info(f"--- Streaming response (Tools: {bool(tools)}) ---\n") + for line in response.iter_lines(): + if line: + decoded_line = line.decode("utf-8") + if decoded_line.startswith("data: "): + data_str = decoded_line[6:] + if data_str == "[DONE]": + break + try: + data = json.loads(data_str) + if "choices" in data and len(data["choices"]) > 0: + delta = data["choices"][0].get("delta", {}) + + # Content + content_chunk = delta.get("content", "") + if content_chunk: + full_content += content_chunk + logger.info(content_chunk) + + # Reasoning + reasoning_chunk = delta.get("reasoning_content", "") + if reasoning_chunk: + reasoning_content += reasoning_chunk + logger.info(f"\x1B[3m{reasoning_chunk}\x1B[0m") + + # Tool calls + if "tool_calls" in delta: + for tc in delta["tool_calls"]: + index = tc.get("index") + if index is not None: + while len(tool_calls) <= index: + # Using "function" as type default but could be flexible + tool_calls.append( + { + "id": "", + "type": "function", + "function": { + "name": "", + "arguments": "", + }, + } + ) + + if "id" in tc: + tool_calls[index]["id"] += tc["id"] + if "function" in tc: + if "name" in tc["function"]: + tool_calls[index]["function"][ + "name" + ] += tc["function"]["name"] + if "arguments" in tc["function"]: + tool_calls[index]["function"][ + "arguments" + ] += tc["function"]["arguments"] + + except json.JSONDecodeError: + logger.info(f"Failed to decode JSON: {data_str}\n") + logger.info("\n--- End of Stream ---\n") + else: + logger.info(f"--- Non-streaming response (Tools: {bool(tools)}) ---\n") + data = response.json() + if "choices" in data and len(data["choices"]) > 0: + message = data["choices"][0].get("message", {}) + full_content = message.get("content", "") + reasoning_content = message.get("reasoning_content", "") + tool_calls = message.get("tool_calls", []) + logger.info(full_content) + logger.info("--- End of Response ---\n") + + return { + "content": full_content, + "reasoning_content": reasoning_content, + "tool_calls": tool_calls, + } + + +def test_chat(url, stream): + logger.info(f"\n=== Testing Chat (Stream={stream}) ===\n") + messages = [{"role": "user", "content": "What is the capital of France?"}] + result = run_query(url, messages, stream=stream) + + if result: + if result["content"]: + logger.info("PASS: Output received.\n") + else: + logger.info("WARN: No content received (valid if strict tool call, but unexpected here).\n") + + if result.get("reasoning_content"): + logger.info(f"INFO: Reasoning content detected ({len(result['reasoning_content'])} chars).\n") + else: + logger.info("INFO: No reasoning content detected (Standard model behavior).\n") + else: + logger.info("FAIL: No result.\n") + + +def test_tool_call(url, stream): + logger.info(f"\n=== Testing Tool Call (Stream={stream}) ===\n") + messages = [ + { + "role": "user", + "content": "What is the weather in London? Please use the get_weather tool.", + } + ] + tools = [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, + }, + "required": ["location"], + }, + }, + } + ] + + result = run_query(url, messages, tools=tools, tool_choice="auto", stream=stream) + + if result: + tcs = result.get("tool_calls") + if tcs and len(tcs) > 0: + logger.info("PASS: Tool calls detected.") + for tc in tcs: + func = tc.get("function", {}) + logger.info(f" Tool: {func.get('name')}, Args: {func.get('arguments')}\n") + else: + logger.info(f"FAIL: No tool calls. Content: {result['content']}\n") + + if result.get("reasoning_content"): + logger.info( + f"INFO: Reasoning content detected during tool call ({len(result['reasoning_content'])} chars).\n" + ) + else: + logger.info("FAIL: Query failed.\n") + + +def main(): + parser = argparse.ArgumentParser(description="Test llama-server functionality.") + parser.add_argument("--host", default="localhost", help="Server host") + parser.add_argument("--port", default=8080, type=int, help="Server port") + args = parser.parse_args() + + base_url = f"http://{args.host}:{args.port}/v1/chat/completions" + logger.info(f"Testing server at {base_url}\n") + + # Non-streaming tests + test_chat(base_url, stream=False) + test_tool_call(base_url, stream=False) + + # Streaming tests + test_chat(base_url, stream=True) + test_tool_call(base_url, stream=True) + + +if __name__ == "__main__": + main() diff --git a/scripts/snapdragon/qdc/tests/test_bench.py b/scripts/snapdragon/qdc/tests/test_bench.py index 651ab5b717..bd19e5d26c 100644 --- a/scripts/snapdragon/qdc/tests/test_bench.py +++ b/scripts/snapdragon/qdc/tests/test_bench.py @@ -14,7 +14,7 @@ cli_pref=f'cd {pkg_path} && LD_LIBRARY_PATH={lib_path} ADSP_LIBRARY_PATH={lib_pa def run_cmd(cmd): p = subprocess.run(cmd, text = True, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) sys.stdout.write(p.stdout) - assert(p.returncode == 0) + assert (p.returncode == 0) @pytest.mark.dependency() diff --git a/src/models/models.h b/src/models/models.h index 3c66d32531..b15fdd2e5f 100644 --- a/src/models/models.h +++ b/src/models/models.h @@ -1,10 +1,11 @@ #pragma once -#include "../llama-model.h" #include "../llama-graph.h" +#include "../llama-model.h" // TODO: remove in follow-up PR - move to .cpp files #include "../llama-memory-recurrent.h" + #include struct llm_graph_context_mamba : public llm_graph_context { @@ -12,9 +13,16 @@ struct llm_graph_context_mamba : public llm_graph_context { virtual ~llm_graph_context_mamba() = default; - ggml_tensor * build_mamba_layer(llm_graph_input_rs * inp, ggml_tensor * cur, const llama_model & model, const llama_ubatch & ubatch, int il); - ggml_tensor * build_mamba2_layer(llm_graph_input_rs * inp, ggml_tensor * cur, const llama_model & model, const llama_ubatch & ubatch, int il) const; - + ggml_tensor * build_mamba_layer(llm_graph_input_rs * inp, + ggml_tensor * cur, + const llama_model & model, + const llama_ubatch & ubatch, + int il); + ggml_tensor * build_mamba2_layer(llm_graph_input_rs * inp, + ggml_tensor * cur, + const llama_model & model, + const llama_ubatch & ubatch, + int il) const; }; // Base class for RWKV-related models @@ -158,8 +166,7 @@ struct llm_build_ernie4_5_moe : public llm_graph_context { llm_build_ernie4_5_moe(const llama_model & model, const llm_graph_params & params); }; -template -struct llm_build_exaone4 : public llm_graph_context { +template struct llm_build_exaone4 : public llm_graph_context { llm_build_exaone4(const llama_model & model, const llm_graph_params & params); }; @@ -183,8 +190,7 @@ struct llm_build_gemma2_iswa : public llm_graph_context { llm_build_gemma2_iswa(const llama_model & model, const llm_graph_params & params); }; -template -struct llm_build_gemma3 : public llm_graph_context { +template struct llm_build_gemma3 : public llm_graph_context { llm_build_gemma3(const llama_model & model, const llm_graph_params & params); }; @@ -195,8 +201,8 @@ struct llm_build_gemma3n_iswa : public llm_graph_context { const int64_t n_embd_altup; const int64_t n_altup; const int i_altup_act; - const int n_layer_sparsity = 10; // number of layers using activation sparsity - const float f_sparsity_std_mul = 1.6448533535003662f; // std_multiplier = normal_dist.icdf(0.95) + const int n_layer_sparsity = 10; // number of layers using activation sparsity + const float f_sparsity_std_mul = 1.6448533535003662f; // std_multiplier = normal_dist.icdf(0.95) llm_build_gemma3n_iswa(const llama_model & model, const llm_graph_params & params); ggml_tensor * calc_magnitude(ggml_tensor * x); @@ -237,27 +243,26 @@ struct llm_build_gptneox : public llm_graph_context { struct llm_build_granite : public llm_graph_context { llm_build_granite(const llama_model & model, const llm_graph_params & params); -private: - ggml_tensor * build_attention_layer( - ggml_tensor * cur, - ggml_tensor * inp_pos, - llm_graph_input_attn_kv * inp_attn, - const llama_model & model, - const int64_t n_embd_head, - const int il); + private: + ggml_tensor * build_attention_layer(ggml_tensor * cur, + ggml_tensor * inp_pos, + llm_graph_input_attn_kv * inp_attn, + const llama_model & model, + const int64_t n_embd_head, + const int il); - ggml_tensor * build_layer_ffn( - ggml_tensor * cur, - ggml_tensor * inpSA, - const llama_model & model, - const int il); + ggml_tensor * build_layer_ffn(ggml_tensor * cur, ggml_tensor * inpSA, const llama_model & model, const int il); }; struct llm_build_granite_hybrid : public llm_graph_context_mamba { llm_build_granite_hybrid(const llama_model & model, const llm_graph_params & params); ggml_tensor * build_layer_ffn(ggml_tensor * cur, ggml_tensor * inpSA, const llama_model & model, const int il); - ggml_tensor * build_attention_layer(ggml_tensor * cur, ggml_tensor * inp_pos, llm_graph_input_attn_kv * inp_attn, - const llama_model & model,const int64_t n_embd_head, const int il); + ggml_tensor * build_attention_layer(ggml_tensor * cur, + ggml_tensor * inp_pos, + llm_graph_input_attn_kv * inp_attn, + const llama_model & model, + const int64_t n_embd_head, + const int il); }; struct llm_build_grok : public llm_graph_context { @@ -321,9 +326,11 @@ struct llm_build_lfm2 : public llm_graph_context { llm_build_lfm2(const llama_model & model, const llm_graph_params & params); ggml_tensor * build_moe_feed_forward(ggml_tensor * cur, int il) const; ggml_tensor * build_dense_feed_forward(ggml_tensor * cur, int il) const; - ggml_tensor * build_attn_block(ggml_tensor * cur, ggml_tensor * inp_pos, llm_graph_input_attn_kv * inp_attn, int il) const; + ggml_tensor * build_attn_block(ggml_tensor * cur, + ggml_tensor * inp_pos, + llm_graph_input_attn_kv * inp_attn, + int il) const; ggml_tensor * build_shortconv_block(ggml_tensor * cur, llm_graph_input_rs * inp_recr, int il); - }; struct llm_build_llada : public llm_graph_context { @@ -382,16 +389,18 @@ struct llm_build_nemotron : public llm_graph_context { struct llm_build_nemotron_h : public llm_graph_context_mamba { llm_build_nemotron_h(const llama_model & model, const llm_graph_params & params); ggml_tensor * build_ffn_layer(ggml_tensor * cur, const llama_model & model, const int il); - ggml_tensor * build_attention_layer(ggml_tensor * cur, llm_graph_input_attn_kv * inp_attn, - const llama_model & model, const int64_t n_embd_head, const int il); + ggml_tensor * build_attention_layer(ggml_tensor * cur, + llm_graph_input_attn_kv * inp_attn, + const llama_model & model, + const int64_t n_embd_head, + const int il); }; struct llm_build_neo_bert : public llm_graph_context { llm_build_neo_bert(const llama_model & model, const llm_graph_params & params); }; -template -struct llm_build_olmo2 : public llm_graph_context { +template struct llm_build_olmo2 : public llm_graph_context { llm_build_olmo2(const llama_model & model, const llm_graph_params & params); }; @@ -423,17 +432,23 @@ struct llm_build_phi2 : public llm_graph_context { llm_build_phi2(const llama_model & model, const llm_graph_params & params); }; -template -struct llm_build_phi3 : public llm_graph_context { +template struct llm_build_phi3 : public llm_graph_context { llm_build_phi3(const llama_model & model, const llm_graph_params & params); }; struct llm_build_plamo2 : public llm_graph_context_mamba { llm_build_plamo2(const llama_model & model, const llm_graph_params & params); - private: - ggml_tensor * build_plamo2_mamba_layer(llm_graph_input_rs * inp, ggml_tensor * cur, const llama_model & model, const llama_ubatch & ubatch, int il); - ggml_tensor * build_plamo2_attn_layer(llm_graph_input_attn_kv * inp, ggml_tensor * inp_pos, ggml_tensor * cur, - const llama_model & model, int il); + private: + ggml_tensor * build_plamo2_mamba_layer(llm_graph_input_rs * inp, + ggml_tensor * cur, + const llama_model & model, + const llama_ubatch & ubatch, + int il); + ggml_tensor * build_plamo2_attn_layer(llm_graph_input_attn_kv * inp, + ggml_tensor * inp_pos, + ggml_tensor * cur, + const llama_model & model, + int il); }; struct llm_build_plamo : public llm_graph_context { @@ -479,24 +494,20 @@ struct llm_build_qwen3vlmoe : public llm_graph_context { struct llm_build_qwen3next : public llm_graph_context_mamba { llm_build_qwen3next(const llama_model & model, const llm_graph_params & params); -private: - ggml_tensor * build_layer_attn( - llm_graph_input_attn_kv * inp_attn, - ggml_tensor * cur, - ggml_tensor * inp_pos, - int il); + private: + ggml_tensor * build_layer_attn(llm_graph_input_attn_kv * inp_attn, + ggml_tensor * cur, + ggml_tensor * inp_pos, + int il); - ggml_tensor * build_layer_attn_linear( - llm_graph_input_rs * inp, - ggml_tensor * cur, - ggml_tensor * causal_mask, - ggml_tensor * identity, - ggml_tensor * diag_mask, - int il); + ggml_tensor * build_layer_attn_linear(llm_graph_input_rs * inp, + ggml_tensor * cur, + ggml_tensor * causal_mask, + ggml_tensor * identity, + ggml_tensor * diag_mask, + int il); - ggml_tensor * build_layer_ffn( - ggml_tensor * cur, - int il); + ggml_tensor * build_layer_ffn(ggml_tensor * cur, int il); // returns pair of output and new state std::pair build_delta_net_chunking( @@ -681,8 +692,7 @@ struct llm_build_seed_oss : public llm_graph_context { llm_build_seed_oss(const llama_model & model, const llm_graph_params & params); }; -template -struct llm_build_smallthinker : public llm_graph_context { +template struct llm_build_smallthinker : public llm_graph_context { llm_build_smallthinker(const llama_model & model, const llm_graph_params & params); }; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 350bffc315..b8ce2fac90 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -187,11 +187,11 @@ if (NOT WIN32 OR NOT BUILD_SHARED_LIBS) # llama_build_and_test(test-double-float.cpp) # SLOW endif() -llama_build_and_test(test-chat-parser.cpp) llama_build_and_test(test-chat-peg-parser.cpp peg-parser/simple-tokenize.cpp) -llama_build_and_test(test-chat-template.cpp) llama_build_and_test(test-jinja.cpp) llama_test(test-jinja NAME test-jinja-py ARGS -py LABEL python) +llama_build_and_test(test-chat-auto-parser.cpp WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) +llama_build_and_test(test-chat-template.cpp) llama_build_and_test(test-json-partial.cpp) llama_build_and_test(test-log.cpp) llama_build_and_test( @@ -201,6 +201,7 @@ llama_build_and_test( peg-parser/test-gbnf-generation.cpp peg-parser/test-json-parser.cpp peg-parser/test-json-serialization.cpp + peg-parser/test-python-dict-parser.cpp peg-parser/test-unicode.cpp peg-parser/tests.h ) @@ -264,3 +265,5 @@ target_link_libraries(${TEST_TARGET} PRIVATE llama) llama_build_and_test(test-alloc.cpp) target_include_directories(test-alloc PRIVATE ${PROJECT_SOURCE_DIR}/ggml/src) + + diff --git a/tests/peg-parser/test-basic.cpp b/tests/peg-parser/test-basic.cpp index 1bda6f2e69..872f16a78d 100644 --- a/tests/peg-parser/test-basic.cpp +++ b/tests/peg-parser/test-basic.cpp @@ -1,3 +1,4 @@ +#include "peg-parser.h" #include "tests.h" void test_basic(testing & t) { @@ -450,5 +451,21 @@ void test_basic(testing & t) { t.assert_equal("result_is_fail", true, result.fail()); }); + + // Test markers + t.test("marker", [](testing &t) { + auto bracket_parser = build_peg_parser([](common_peg_parser_builder & p) { + return p.marker(); + }); + + common_peg_parse_context ctx_square("[marker]", false); + common_peg_parse_context ctx_sharp("", false); + + auto result_square = bracket_parser.parse(ctx_square); + auto result_sharp = bracket_parser.parse(ctx_sharp); + + t.assert_true("result_square_is_success", result_square.success()); + t.assert_true("result_sharp_is_success", result_sharp.success()); + }); }); } diff --git a/tests/peg-parser/test-python-dict-parser.cpp b/tests/peg-parser/test-python-dict-parser.cpp new file mode 100644 index 0000000000..9db1154b45 --- /dev/null +++ b/tests/peg-parser/test-python-dict-parser.cpp @@ -0,0 +1,279 @@ +#include "tests.h" + +void test_python_dict_parser(testing &t) { + // Test parsing a simple Python dict object with single quotes + t.test("simple Python dict object parsing", [](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); }); + + std::string input = "{'name': 'test', 'value': 42, 'flag': true}"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + + t.assert_equal("result_is_success", true, result.success()); + t.assert_equal("result_end", input.size(), result.end); + }); + + // Test parsing a Python dict array with mixed types + t.test("Python dict array with mixed types", [](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); }); + + std::string input = "[1, 'hello', true, null, 3.14]"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + + t.assert_equal("result_is_success", true, result.success()); + t.assert_equal("result_end", input.size(), result.end); + }); + + // Test parsing nested Python dict with objects and arrays + t.test("nested Python dict with objects and arrays", [](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); }); + + std::string input = + "{'users': [{'id': 1, 'name': 'Alice'}, {'id': 2, 'name': 'Bob'}], 'count': 2, 'metadata': {'version': '1.0', 'tags': ['admin', 'user']}}"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + + t.assert_equal("result_is_success", true, result.success()); + t.assert_equal("result_end", input.size(), result.end); + }); + + // Test parsing Python dict with escaped single quotes + t.test("Python dict with escaped single quotes", [](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); }); + + std::string input = "{'message': 'It\\'s working!'}"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + + t.assert_equal("result_is_success", true, result.success()); + t.assert_equal("result_end", input.size(), result.end); + }); + + // Test parsing Python dict with double quotes inside single quotes + t.test("Python dict with double quotes inside single quotes", [](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); }); + + std::string input = "{'quote': 'He said \"Hello\"'}"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + + t.assert_equal("result_is_success", true, result.success()); + t.assert_equal("result_end", input.size(), result.end); + }); + + // Test the example from the requirements + t.test("complex Python dict example from requirements", [](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); }); + + std::string input = "{ 'obj' : { 'something': 1, 'other \"something\"' : 'foo\\'s bar' } }"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + + t.assert_equal("result_is_success", true, result.success()); + t.assert_equal("result_end", input.size(), result.end); + }); + + // Test need_more_input() parsing - incomplete object + t.test("need_more_input() parsing - incomplete object", [](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); }); + + std::string input = "{'name': 'test', 'value': "; + common_peg_parse_context ctx(input, true); + + auto result = parser.parse(ctx); + + t.assert_equal("result_is_need_more_input", true, result.need_more_input()); + }); + + // Test need_more_input() parsing - incomplete single-quoted string + t.test("need_more_input() parsing - incomplete single-quoted string", [](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); }); + + std::string input = "{'name': 'test"; + common_peg_parse_context ctx(input, true); + + auto result = parser.parse(ctx); + + t.assert_equal("result_is_need_more_input", true, result.need_more_input()); + }); + + // Test unicode in Python dict strings + t.test("unicode in Python dict strings", [](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); }); + + std::string input = "{'message': 'Hello, 世界!'}"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + + t.assert_equal("result_is_success", true, result.success()); + t.assert_equal("result_end", input.size(), result.end); + }); + + // Test Python dict with unicode escapes + t.test("Python dict with unicode escapes", [](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); }); + + std::string input = "{'unicode': 'Hello\\u0041'}"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + + t.assert_equal("result_is_success", true, result.success()); + t.assert_equal("result_end", input.size(), result.end); + }); + + // Test that JSON double-quoted strings fail with Python dict parser + t.test("JSON double-quoted strings fail with Python dict parser", [](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); }); + + std::string input = "{\"name\": \"test\"}"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + + t.assert_equal("result_is_fail", true, result.fail()); + }); + + // Test Python dict string content parser directly + t.test("python dict string content parser", [](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { + return p.sequence({ p.literal("'"), p.python_dict_string_content(), p.literal("'"), p.space() }); + }); + + t.test("simple string", [&](testing &t) { + std::string input = "'hello'"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + t.assert_true("success", result.success()); + t.assert_equal("end", input.size(), result.end); + }); + + t.test("string with escaped single quote", [&](testing &t) { + std::string input = "'it\\'s'"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + t.assert_true("success", result.success()); + t.assert_equal("end", input.size(), result.end); + }); + + t.test("string with double quotes", [&](testing &t) { + std::string input = "'say \"hello\"'"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + t.assert_true("success", result.success()); + t.assert_equal("end", input.size(), result.end); + }); + + t.test("incomplete string", [&](testing &t) { + std::string input = "'hello"; + common_peg_parse_context ctx(input, true); + + auto result = parser.parse(ctx); + t.assert_true("need_more_input", result.need_more_input()); + }); + }); + + // Test allow_python_dict_format flag usage + t.test("allow_python_dict_format flag", [](testing &t) { + t.test("flag is false by default", [&](testing &t) { + common_peg_parser_builder builder; + t.assert_equal("default_value", false, builder.get_allow_python_dict_format()); + }); + + t.test("flag can be set to true", [&](testing &t) { + common_peg_parser_builder builder; + builder.set_allow_python_dict_format(true); + t.assert_equal("after_set", true, builder.get_allow_python_dict_format()); + }); + + t.test("flag can be set back to false", [&](testing &t) { + common_peg_parser_builder builder; + builder.set_allow_python_dict_format(true); + builder.set_allow_python_dict_format(false); + t.assert_equal("after_reset", false, builder.get_allow_python_dict_format()); + }); + }); + + // Test that the flag actually affects json() parser behavior + t.test("json() parser with allow_python_dict_format flag", [](testing &t) { + t.test("json() rejects single quotes when flag is false", [&](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { + p.set_allow_python_dict_format(false); + return p.json(); + }); + + std::string input = "{'name': 'test'}"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + t.assert_true("fail", result.fail()); + }); + + t.test("json() accepts single quotes when flag is true", [&](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { + p.set_allow_python_dict_format(true); + return p.json(); + }); + + std::string input = "{'name': 'test'}"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + t.assert_true("success", result.success()); + t.assert_equal("end", input.size(), result.end); + }); + + t.test("json() still accepts double quotes when flag is true", [&](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { + p.set_allow_python_dict_format(true); + return p.json(); + }); + + std::string input = "{\"name\": \"test\"}"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + t.assert_true("success", result.success()); + t.assert_equal("end", input.size(), result.end); + }); + + t.test("json() accepts mixed quote styles when flag is true", [&](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { + p.set_allow_python_dict_format(true); + return p.json(); + }); + + std::string input = "{\"name\": 'test', 'value': \"hello\"}"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + t.assert_true("success", result.success()); + t.assert_equal("end", input.size(), result.end); + }); + + t.test("complex nested structure with flag true", [&](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { + p.set_allow_python_dict_format(true); + return p.json(); + }); + + std::string input = "{ 'obj' : { 'something': 1, 'other \"something\"' : 'foo\\'s bar' } }"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + t.assert_true("success", result.success()); + t.assert_equal("end", input.size(), result.end); + }); + }); +} diff --git a/tests/peg-parser/tests.h b/tests/peg-parser/tests.h index 4d3f4e9eaf..debd4286c5 100644 --- a/tests/peg-parser/tests.h +++ b/tests/peg-parser/tests.h @@ -22,3 +22,4 @@ void test_json_parser(testing &t); void test_gbnf_generation(testing &t); void test_unicode(testing &t); void test_json_serialization(testing &t); +void test_python_dict_parser(testing &t); diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index a50c569b82..c88e1a1ddf 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -7790,6 +7790,8 @@ static std::vector> make_test_cases_eval() { test_cases.emplace_back(new test_mul_mat(type_a, GGML_TYPE_F32, 1, 64, 256, {1, 1}, {1, 1})); } + test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q8_0, GGML_TYPE_F32, 6, 4096, 5120, {1, 1}, {1, 1})); + #if 0 // test the mat-mat path for Metal for (int k = 1; k < 512; ++k) { diff --git a/tests/test-chat-auto-parser.cpp b/tests/test-chat-auto-parser.cpp new file mode 100644 index 0000000000..c78428491f --- /dev/null +++ b/tests/test-chat-auto-parser.cpp @@ -0,0 +1,1885 @@ +#include "chat-auto-parser-helpers.h" +#include "chat-diff-analyzer.h" +#include "chat-peg-parser.h" +#include "chat.h" +#include "peg-parser.h" +#include "testing.h" + +#include +#include +#include +#include + +using namespace autoparser; + +static void test_calculate_diff_split_basic(testing & t); +static void test_calculate_diff_split_identical(testing & t); +static void test_calculate_diff_split_common_prefix(testing & t); +static void test_calculate_diff_split_common_suffix(testing & t); +static void test_calculate_diff_split_common_both(testing & t); +static void test_calculate_diff_split_empty_cases(testing & t); +static void test_calculate_diff_split_no_common(testing & t); +static void test_calculate_diff_split_single_char(testing & t); +static void test_calculate_diff_split_overlaps(testing & t); +static void test_calculate_diff_split_tag_boundaries(testing & t); +static void test_calculate_diff_split(testing & t); + +static void test_until_common_prefix_basic(testing & t); +static void test_until_common_prefix(testing & t); + +static void test_after_common_suffix_basic(testing & t); +static void test_after_common_suffix(testing & t); + +static void test_analyze_tool_call_pure_json(testing & t); +static void test_analyze_tool_call_function_name_markers(testing & t); +static void test_analyze_tool_call_full_markers(testing & t); +static void test_analyze_tool_call_edge_cases(testing & t); + +static void test_compare_variants_basic(testing & t); +static void test_compare_variants_messages_modifier(testing & t); +static void test_compare_variants_tools_modifier(testing & t); +static void test_compare_variants_both_modifiers(testing & t); +static void test_compare_variants_template_failure(testing & t); +static void test_compare_variants_identity(testing & t); +static void test_compare_variants(testing & t); + +// Seed-OSS template tool calling analysis tests +static void test_seed_oss_tool_analysis(testing & t); +static void test_seed_oss_tool_presence(testing & t); +static void test_seed_oss_call_count(testing & t); +static void test_seed_oss_function_names(testing & t); +static void test_seed_oss_argument_count(testing & t); +static void test_seed_oss_args_presence(testing & t); +static void test_seed_oss_tool_with_reasoning(testing & t); + +// Nemotron template analysis tests +static void test_nemotron_analysis(testing & t); +static void test_nemotron_reasoning_detection(testing & t); +static void test_nemotron_tool_format(testing & t); + +// CohereForAI template analysis tests +static void test_cohere_reasoning_detection(testing & t); +static void test_cohere_analysis(testing & t); + +// Marker separation +static void test_marker_separation(testing & t); + +// standard_json_tools format tests +static void test_standard_json_tools_formats(testing & t); +static void test_standard_json_tools_openai(testing & t); +static void test_standard_json_tools_cohere(testing & t); +static void test_standard_json_tools_function_key(testing & t); + +// normalize_quotes_to_json tests +static void test_normalize_quotes_to_json(testing & t); +static void test_normalize_quotes_with_embedded_quotes(testing & t); + +// TAG_WITH_TAGGED argument parsing tests +static void test_tagged_args_with_embedded_quotes(testing & t); + +int main(int argc, char * argv[]) { + testing t(std::cout); + t.verbose = true; + + // usage: test-chat-auto-parser-helpers [filter_regex] + + if (argc > 1) { + t.set_filter(argv[1]); + } + + t.test("diff_split", test_calculate_diff_split); + t.test("common_prefix", test_until_common_prefix); + t.test("common_suffix", test_after_common_suffix); + t.test("compare_variants", test_compare_variants); + t.test("segments", test_marker_separation); + t.test("seed_oss_diffs", test_seed_oss_tool_analysis); + t.test("cohere", test_cohere_analysis); + t.test("nemotron", test_nemotron_analysis); + t.test("standard_json_tools", test_standard_json_tools_formats); + t.test("normalize_quotes_to_json", test_normalize_quotes_to_json); + t.test("tagged_args_embedded_quotes", test_tagged_args_with_embedded_quotes); + + return t.summary(); +} + +static void test_marker_separation(testing & t) { + auto single_square_marker = segmentize_markers("pre_marker[marker]post_marker"); + auto single_diag_marker = segmentize_markers("pre_markerpost_marker"); + auto paired_markers = segmentize_markers("world"); + auto double_different_markers = segmentize_markers("[hello][world]"); + auto in_between = segmentize_markers("imdabada[hey]"); + + t.test("single_square_marker", [&] (testing & t) { + t.assert_equal("first is text", segment_type::TEXT, single_square_marker[0].type); + t.assert_equal("second is marker", segment_type::MARKER, single_square_marker[1].type); + t.assert_equal("last is text", segment_type::TEXT, single_square_marker[2].type); + + t.assert_equal("first is 'pre_marker'", "pre_marker", single_square_marker[0].value); + t.assert_equal("second is '[marker]'", "[marker]", single_square_marker[1].value); + t.assert_equal("last is 'post_marker'", "post_marker", single_square_marker[2].value); + }); + + t.test("single_diagonal_marker", [&] (testing & t) { + t.assert_equal("first is text", segment_type::TEXT, single_diag_marker[0].type); + t.assert_equal("second is marker", segment_type::MARKER, single_diag_marker[1].type); + t.assert_equal("last is text", segment_type::TEXT, single_diag_marker[2].type); + + t.assert_equal("first is 'pre_marker'", "pre_marker", single_diag_marker[0].value); + t.assert_equal("second is ''", "", single_diag_marker[1].value); + t.assert_equal("last is 'post_marker'", "post_marker", single_diag_marker[2].value); + }); + + t.test("paired_markers", [&] (testing & t) { + t.assert_equal("first is marker", segment_type::MARKER, paired_markers[0].type); + t.assert_equal("second is text", segment_type::TEXT, paired_markers[1].type); + t.assert_equal("third is marker", segment_type::MARKER, paired_markers[2].type); + + t.assert_equal("first is ''", "", paired_markers[0].value); + t.assert_equal("second is 'world'", "world", paired_markers[1].value); + t.assert_equal("third is ''", "", paired_markers[2].value); + }); + + t.test("double_different_markers", [&] (testing & t) { + t.assert_equal("first is marker", segment_type::MARKER, double_different_markers[0].type); + t.assert_equal("second is marker", segment_type::MARKER, double_different_markers[1].type); + t.assert_equal("third is marker", segment_type::MARKER, double_different_markers[2].type); + t.assert_equal("fourth is marker", segment_type::MARKER, double_different_markers[3].type); + + t.assert_equal("first is ''", "", double_different_markers[0].value); + t.assert_equal("second is '[hello]'", "[hello]", double_different_markers[1].value); + t.assert_equal("third is ''", "", double_different_markers[2].value); + t.assert_equal("fourth is '[world]'", "[world]", double_different_markers[3].value); + }); + + t.test("in_between", [&] (testing & t) { + t.assert_equal("first is text", segment_type::TEXT, in_between[0].type); + t.assert_equal("second is marker", segment_type::MARKER, in_between[1].type); + t.assert_equal("third is text", segment_type::TEXT, in_between[2].type); + t.assert_equal("fourth is marker", segment_type::MARKER, in_between[3].type); + t.assert_equal("fifth is text", segment_type::TEXT, in_between[4].type); + t.assert_equal("sixth is marker", segment_type::MARKER, in_between[5].type); + + t.assert_equal("first is 'im'", "im", in_between[0].value); + t.assert_equal("second is ''", "", in_between[1].value); + t.assert_equal("third is 'daba'", "daba", in_between[2].value); + t.assert_equal("fourth is ''", "", in_between[3].value); + t.assert_equal("fifth is 'da'", "da", in_between[4].value); + t.assert_equal("sixth is '[hey]'", "[hey]", in_between[5].value); + }); +} + +static void test_calculate_diff_split(testing & t) { + t.test("calculate_diff_split basic", test_calculate_diff_split_basic); + t.test("calculate_diff_split identical", test_calculate_diff_split_identical); + t.test("calculate_diff_split common prefix", test_calculate_diff_split_common_prefix); + t.test("calculate_diff_split common suffix", test_calculate_diff_split_common_suffix); + t.test("calculate_diff_split common both", test_calculate_diff_split_common_both); + t.test("calculate_diff_split empty cases", test_calculate_diff_split_empty_cases); + t.test("calculate_diff_split no common", test_calculate_diff_split_no_common); + t.test("calculate_diff_split single char", test_calculate_diff_split_single_char); + t.test("calculate_diff_split overlaps", test_calculate_diff_split_overlaps); + t.test("calculate_diff_split tag boundaries", test_calculate_diff_split_tag_boundaries); +} + +static void test_calculate_diff_split_basic(testing & t) { + diff_split result = calculate_diff_split("hello world", "hello test"); + t.assert_equal("prefix should be 'hello '", "hello ", result.prefix); + t.assert_equal("left should be 'world'", "world", result.left); + t.assert_equal("right should be 'test'", "test", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + result = calculate_diff_split("abc", "xyz"); + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be 'abc'", "abc", result.left); + t.assert_equal("right should be 'xyz'", "xyz", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + result = calculate_diff_split("prefixA suffix", "prefixB suffix"); + t.assert_equal("prefix should be 'prefix'", "prefix", result.prefix); + t.assert_equal("left should be 'A'", "A", result.left); + t.assert_equal("right should be 'B'", "B", result.right); + t.assert_equal("suffix should be ' suffix'", " suffix", result.suffix); +} + +static void test_calculate_diff_split_identical(testing & t) { + diff_split result = calculate_diff_split("hello", "hello"); + t.assert_equal("prefix should be 'hello'", "hello", result.prefix); + t.assert_equal("left should be empty", "", result.left); + t.assert_equal("right should be empty", "", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + result = calculate_diff_split("", ""); + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be empty", "", result.left); + t.assert_equal("right should be empty", "", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + result = calculate_diff_split("a", "a"); + t.assert_equal("prefix should be 'a'", "a", result.prefix); + t.assert_equal("left should be empty", "", result.left); + t.assert_equal("right should be empty", "", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + result = calculate_diff_split("", ""); + t.assert_equal("prefix should be ''", "", result.prefix); + t.assert_equal("left should be empty", "", result.left); + t.assert_equal("right should be empty", "", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); +} + +static void test_calculate_diff_split_common_prefix(testing & t) { + diff_split result = calculate_diff_split("abcdef", "abcxyz"); + t.assert_equal("prefix should be 'abc'", "abc", result.prefix); + t.assert_equal("left should be 'def'", "def", result.left); + t.assert_equal("right should be 'xyz'", "xyz", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + result = calculate_diff_split("same", "sameagain"); + t.assert_equal("prefix should be 'same'", "same", result.prefix); + t.assert_equal("left should be empty", "", result.left); + t.assert_equal("right should be 'again'", "again", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + result = calculate_diff_split("test", "testing"); + t.assert_equal("prefix should be 'test'", "test", result.prefix); + t.assert_equal("left should be empty", "", result.left); + t.assert_equal("right should be 'ing'", "ing", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); +} + +static void test_calculate_diff_split_common_suffix(testing & t) { + diff_split result = calculate_diff_split("123end", "456end"); + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be '123'", "123", result.left); + t.assert_equal("right should be '456'", "456", result.right); + t.assert_equal("suffix should be 'end'", "end", result.suffix); + + result = calculate_diff_split("start", "end"); + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be 'start'", "start", result.left); + t.assert_equal("right should be 'end'", "end", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + result = calculate_diff_split("abcsuffix", "xyzsuffix"); + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be 'abc'", "abc", result.left); + t.assert_equal("right should be 'xyz'", "xyz", result.right); + t.assert_equal("suffix should be 'suffix'", "suffix", result.suffix); +} + +static void test_calculate_diff_split_common_both(testing & t) { + diff_split result = calculate_diff_split("helloXworld", "helloYworld"); + t.assert_equal("prefix should be 'hello'", "hello", result.prefix); + t.assert_equal("left should be 'X'", "X", result.left); + t.assert_equal("right should be 'Y'", "Y", result.right); + t.assert_equal("suffix should be 'world'", "world", result.suffix); + + result = calculate_diff_split("ABCmiddleXYZ", "ABCdifferentXYZ"); + t.assert_equal("prefix should be 'ABC'", "ABC", result.prefix); + t.assert_equal("left should be 'middle'", "middle", result.left); + t.assert_equal("right should be 'different'", "different", result.right); + t.assert_equal("suffix should be 'XYZ'", "XYZ", result.suffix); + + result = calculate_diff_split("startAend", "startBend"); + t.assert_equal("prefix should be 'start'", "start", result.prefix); + t.assert_equal("left should be 'A'", "A", result.left); + t.assert_equal("right should be 'B'", "B", result.right); + t.assert_equal("suffix should be 'end'", "end", result.suffix); + + // Edge case: common prefix and suffix overlap + result = calculate_diff_split("aa", "ab"); + t.assert_equal("prefix should be 'a'", "a", result.prefix); + t.assert_equal("left should be 'a'", "a", result.left); + t.assert_equal("right should be 'b'", "b", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); +} + +static void test_calculate_diff_split_empty_cases(testing & t) { + // Empty left, non-empty right + diff_split result = calculate_diff_split("", "hello"); + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be empty", "", result.left); + t.assert_equal("right should be 'hello'", "hello", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + // Non-empty left, empty right + result = calculate_diff_split("hello", ""); + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be 'hello'", "hello", result.left); + t.assert_equal("right should be empty", "", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + // Both empty + result = calculate_diff_split("", ""); + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be empty", "", result.left); + t.assert_equal("right should be empty", "", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + // Left single char, empty right + result = calculate_diff_split("a", ""); + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be 'a'", "a", result.left); + t.assert_equal("right should be empty", "", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + // Empty left, right single char + result = calculate_diff_split("", "a"); + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be empty", "", result.left); + t.assert_equal("right should be 'a'", "a", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); +} + +static void test_calculate_diff_split_no_common(testing & t) { + diff_split result = calculate_diff_split("abc", "xyz"); + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be 'abc'", "abc", result.left); + t.assert_equal("right should be 'xyz'", "xyz", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + result = calculate_diff_split("left", "right"); + // The algorithm finds "t" as a common suffix since both strings end with 't' + // This is the algorithm's actual behavior - it finds maximal common suffix + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be 'lef'", "lef", result.left); + t.assert_equal("right should be 'righ'", "righ", result.right); + t.assert_equal("suffix should be 't'", "t", result.suffix); + + result = calculate_diff_split("123", "456"); + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be '123'", "123", result.left); + t.assert_equal("right should be '456'", "456", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); +} + +static void test_calculate_diff_split_single_char(testing & t) { + diff_split result = calculate_diff_split("a", "b"); + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be 'a'", "a", result.left); + t.assert_equal("right should be 'b'", "b", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + result = calculate_diff_split("a", "a"); + t.assert_equal("prefix should be 'a'", "a", result.prefix); + t.assert_equal("left should be empty", "", result.left); + t.assert_equal("right should be empty", "", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + result = calculate_diff_split("a", "ab"); + t.assert_equal("prefix should be 'a'", "a", result.prefix); + t.assert_equal("left should be empty", "", result.left); + t.assert_equal("right should be 'b'", "b", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + result = calculate_diff_split("ab", "a"); + t.assert_equal("prefix should be 'a'", "a", result.prefix); + t.assert_equal("left should be 'b'", "b", result.left); + t.assert_equal("right should be empty", "", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); +} + +static void test_calculate_diff_split_overlaps(testing & t) { + // One string is substring of another + diff_split result = calculate_diff_split("test", "testing"); + t.assert_equal("prefix should be 'test'", "test", result.prefix); + t.assert_equal("left should be empty", "", result.left); + t.assert_equal("right should be 'ing'", "ing", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + result = calculate_diff_split("testing", "test"); + t.assert_equal("prefix should be 'test'", "test", result.prefix); + t.assert_equal("left should be 'ing'", "ing", result.left); + t.assert_equal("right should be empty", "", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + // Similar strings with one extra char at start + result = calculate_diff_split("Xtest", "Ytest"); + // The algorithm finds "test" as a common suffix since both strings end with "test" + // This is the algorithm's actual behavior - it finds maximal common suffix + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be 'X'", "X", result.left); + t.assert_equal("right should be 'Y'", "Y", result.right); + t.assert_equal("suffix should be 'test'", "test", result.suffix); + + // Similar strings with one extra char at end + result = calculate_diff_split("testX", "testY"); + t.assert_equal("prefix should be 'test'", "test", result.prefix); + t.assert_equal("left should be 'X'", "X", result.left); + t.assert_equal("right should be 'Y'", "Y", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + // Strings that are reverses + result = calculate_diff_split("abc", "cba"); + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be 'abc'", "abc", result.left); + t.assert_equal("right should be 'cba'", "cba", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); +} + +static void test_calculate_diff_split_tag_boundaries(testing & t) { + // Test with unclosed XML tags + diff_split result = calculate_diff_split("testcontent"); + // The fix_tag_boundaries should move incomplete tags appropriately + t.assert_true("prefix should start with 'test'", result.prefix.find("test") == 0); + t.assert_true("should handle tag boundaries", result.left != "" || result.right != "" || result.suffix != ""); + + // Test with unclosed brackets + result = calculate_diff_split("test[", "test]value"); + t.assert_true("should handle bracket boundaries", result.left != "" || result.right != "" || result.suffix != ""); + + // Test with partial tags on both sides + result = calculate_diff_split("prefix", "prefixsuffix"); + // fix_tag_boundaries moves the incomplete '<' from prefix to left/right + t.assert_equal("prefix should be 'prefix'", "prefix", result.prefix); + t.assert_equal("left should be ''", "", result.left); + t.assert_equal("right should be 'suffix'", "suffix", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + // Test with complex nested tags + result = calculate_diff_split("prefix
content
", "prefix
different
"); + // Algorithm finds "ent" as a common suffix because both strings end with it + // This is the actual algorithm behavior, though not semantically ideal + t.assert_equal("prefix should be 'prefix
'", "prefix
", result.prefix); + t.assert_equal("left should be 'cont'", "cont", result.left); + t.assert_equal("right should be 'differ'", "differ", result.right); + t.assert_equal("suffix should be 'ent
'", "ent
", result.suffix); + + // Test with unclosed angle bracket + result = calculate_diff_split("Hello ", "Hello test"); + t.assert_equal("prefix should be 'Hello '", "Hello ", result.prefix); + t.assert_true("left should contain ''", result.left.find("") != std::string::npos); + t.assert_equal("right should be 'test'", "test", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + // Test with unclosed square bracket + result = calculate_diff_split("test [array]", "test other"); + t.assert_equal("prefix should be 'test '", "test ", result.prefix); + t.assert_true("left should contain '[array]'", result.left.find("[array]") != std::string::npos); + t.assert_equal("right should be 'other'", "other", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + // Test empty prefix and suffix with tags + result = calculate_diff_split("left", "righ"); + t.assert_equal("prefix should be ''", "", result.prefix); + t.assert_equal("left should be 'left'", "left", result.left); + t.assert_equal("right should be 'righ'", "righ", result.right); + t.assert_equal("suffix should be ''", "", result.suffix); + + { + // real case from template tests, simplified + std::string left = "PREFIX
Sure"; + std::string right = "PREFIXLemme thinkSure"; + result = calculate_diff_split(left, right); + t.assert_equal("prefix should be PREFIX", "PREFIX", result.prefix); + t.assert_equal("suffix should be Sure", "Sure", result.suffix); + t.assert_equal("left should be empty", "", result.left); + t.assert_equal("right should be Lemme think", "Lemme think", result.right); + } + + { + // Real case: special tokens with |> boundary issue + // The suffix starts with |> which should be moved to complete <|END_RESPONSE and <|END_ACTION + std::string prefix = "SOME_PREFIX"; + std::string suffix = "|><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"; + std::string left_diff = "<|START_RESPONSE|>Let me help you.<|END_RESPONSE"; + std::string right_diff = + "<|START_THINKING|><|END_THINKING|><|START_ACTION|>[\n" + " {\"tool_call_id\": \"0\", \"tool_name\": \"test_function_name\", " + "\"parameters\": {\"param1\": \"value1\", \"param2\": \"value2\"}}\n" + "]<|END_ACTION"; + + std::string left = prefix + left_diff + suffix; + std::string right = prefix + right_diff + suffix; + result = calculate_diff_split(left, right); + + t.assert_equal("special token prefix", prefix, result.prefix); + // The |> should be moved from suffix to complete the tokens + t.assert_equal("special token left", "<|START_RESPONSE|>Let me help you.<|END_RESPONSE|>", result.left); + t.assert_true("special token right ends with |>", result.right.find("<|END_ACTION|>") != std::string::npos); + t.assert_equal("special token suffix", "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", + result.suffix); + } +} + +static void test_until_common_prefix(testing & t) { + t.test("until_common_prefix basic", test_until_common_prefix_basic); +} + +static void test_until_common_prefix_basic(testing & t) { + // Test case from the user request + std::string result = until_common_prefix("", "", ""); + t.assert_equal("untilCommonPrefix should return ''", "", result); + + // Additional test cases to ensure robustness + // Test with different common prefix lengths + result = until_common_prefix("prefixsuffix", "different", "other"); + t.assert_equal("should return 'prefix'", "prefix", result); + + // Test when common prefix is at the start + result = until_common_prefix("rest", "left", "right"); + t.assert_equal("should return empty string when common prefix at start", "", result); + + // Test when there's no common prefix + result = until_common_prefix("something", "left", "right"); + t.assert_equal("should return empty string when no common prefix", "", result); + + // Test with empty strings + result = until_common_prefix("test", "", "right"); + t.assert_equal("should return empty string when left is empty", "", result); + + // Test with longer common prefix + result = until_common_prefix("abcXYZrest", "left", "right"); + t.assert_equal("should return 'abcXYZ'", "abcXYZ", result); +} + +static void test_after_common_suffix(testing & t) { + t.test("after_common_suffix basic", test_after_common_suffix_basic); +} + +static void test_after_common_suffix_basic(testing & t) { + // Test case from the user request + std::string result = after_common_suffix("100", + "100", + "535"); + t.assert_equal("afterCommonSuffix should return ''", "", result); + + // Test when common suffix is at the end + result = after_common_suffix("rest", "left", "right"); + t.assert_equal("should return empty string when common suffix at end", "", result); + + // Test with empty strings + result = after_common_suffix("test", "left", ""); + t.assert_equal("should return empty string when right is empty", "", result); + + // Test case with XML-like structure similar to the main example + result = after_common_suffix("value", + "value", + "different"); + t.assert_equal("should return ''", "", result); + + // Test with longer common suffix appearing at the end of full + result = after_common_suffix("prefixrest", "prefixleft", "prefixright"); + t.assert_equal("should return '' when common suffix is at end of full", "", result); + + // Test with common suffix appearing in middle but not at end + result = after_common_suffix("content", "value", "other"); + t.assert_equal("should return '' when common suffix appears before end", "", result); + + // Test with multi-character common suffix at the very end of full + result = after_common_suffix("startend", "prefixleft", "prefixright"); + t.assert_equal("should return '' when common suffix is at end of full", "", result); +} + +static void test_compare_variants(testing & t) { + t.test("compare_variants basic", test_compare_variants_basic); + t.test("compare_variants messages modifier", test_compare_variants_messages_modifier); + t.test("compare_variants tools modifier", test_compare_variants_tools_modifier); + t.test("compare_variants both modifiers", test_compare_variants_both_modifiers); + t.test("compare_variants template failure", test_compare_variants_template_failure); + t.test("compare_variants identity", test_compare_variants_identity); +} + +static void test_compare_variants_basic(testing & t) { + // Create a simple template that just echoes messages + common_chat_template tmpl("{{ messages[0]['content'] }}", "", ""); + + template_params params; + params.messages = json::array({ + json {{"role", "user"}, {"content", "Hello"}} + }); + + auto modifier = [](template_params & p) { + p.messages[0]["content"] = "World"; + }; + + auto result = autoparser::compare_variants(tmpl, params, modifier); + + if (!t.assert_true("result should have value", result.has_value())) { + return; + } + // The template might not output anything if messages is empty or format is different + // Check that we get a valid result + t.assert_true("prefix or left should have content", !result->diff.prefix.empty() || !result->diff.left.empty()); +} + +static void test_compare_variants_messages_modifier(testing & t) { + // Test with messages modifier only + common_chat_template tmpl("{% for message in messages %}{{ message['role'] }}:{{ message['content'] }}{% endfor %}", "", ""); + + template_params params; + params.messages = json::array({ + json {{"role", "user"}, {"content", "A"}} + }); + + auto modifier = [](template_params & p) { + p.messages[0]["content"] = "B"; + }; + + std::optional result = autoparser::compare_variants(tmpl, params, modifier); + + if (!t.assert_true("result should have value", result.has_value())) { + return; + } + t.assert_equal("left should be 'A'", "A", result->diff.left); + t.assert_equal("right should be 'B'", "B", result->diff.right); +} + +static void test_compare_variants_tools_modifier(testing & t) { + // Test with tools modifier only + common_chat_template tmpl( + "{% for tool in tools %}{{ tool['name'] }}{% endfor %}", "", ""); + + template_params params; + params.tools = json::array({ + json {{"name", "foo"}} + }); + + auto modifier = [](template_params & p) { + p.tools[0]["name"] = "bar"; + }; + + auto result = autoparser::compare_variants(tmpl, params, modifier); + + if (!t.assert_true("result should have value", result.has_value())) { + return; + } + t.assert_equal("left should be 'foo'", "foo", result->diff.left); + t.assert_equal("right should be 'bar'", "bar", result->diff.right); +} + +static void test_compare_variants_both_modifiers(testing & t) { + // Test with both messages and tools modifiers using the for loop approach + common_chat_template tmpl( + "{% for message in messages %}{{ message['role'] }}:{{ message['content'] }}{% endfor %}", "", ""); + + template_params params; + params.messages = json::array({ + json {{"role", "user"}, {"content", "A"}} + }); + + auto modifier = [](template_params & p) { + p.messages[0]["content"] = "B"; + p.messages[0]["role"] = "newuser"; + }; + + auto result = autoparser::compare_variants(tmpl, params, modifier); + + if (!t.assert_true("result should have value", result.has_value())) { + return; + } + t.assert_equal("left should be 'user:A'", "user:A", result->diff.left); + t.assert_equal("right should be 'newuser:B'", "newuser:B", result->diff.right); +} + +static void test_compare_variants_template_failure(testing & t) { + // Test with template that causes failure during application (not construction) + // We use a valid template syntax but one that will fail during application + common_chat_template tmpl("{{ messages[0]['nonexistent_field'] }}", "", ""); + + template_params params; + params.messages = json::array({ + json {{"role", "user"}, {"content", "Hello"}} + }); + + auto modifier = [](template_params & p) { + p.messages[0]["content"] = "World"; + }; + + auto result = autoparser::compare_variants(tmpl, params, modifier); + + t.assert_true("result should be nullopt on template failure", !result.has_value()); +} + +static void test_compare_variants_identity(testing & t) { + // Test with identity modifier (no change) + common_chat_template tmpl("{{ messages[0]['content'] }}", "", ""); + + template_params params; + params.messages = json::array({ + json {{"role", "user"}, {"content", "Hello"}} + }); + + // No modifier - should use identity + auto result = autoparser::compare_variants(tmpl, params, nullptr); + + if (!t.assert_true("result should have value", result.has_value())) { + return; + } + t.assert_equal("prefix should be 'Hello'", "Hello", result->diff.prefix); + t.assert_equal("left should be empty", "", result->diff.left); + t.assert_equal("right should be empty", "", result->diff.right); + t.assert_equal("suffix should be empty", "", result->diff.suffix); +} + +// ============================================================================ +// Seed-OSS Template Tool Calling Analysis Tests +// ============================================================================ + +static void test_seed_oss_tool_analysis(testing & t) { + t.test("Seed-OSS tool presence", test_seed_oss_tool_presence); + t.test("Seed-OSS call count", test_seed_oss_call_count); + t.test("Seed-OSS function names", test_seed_oss_function_names); + t.test("Seed-OSS argument count", test_seed_oss_argument_count); + t.test("Seed-OSS args presence", test_seed_oss_args_presence); + t.test("Seed-OSS tool with reasoning", test_seed_oss_tool_with_reasoning); +} + +// Helper to load Seed-OSS template +static common_chat_template load_seed_oss_template(testing & t) { + std::string template_path = "models/templates/ByteDance-Seed-OSS.jinja"; + std::ifstream fin(template_path, std::ios::binary); + std::ostringstream buf; + if (fin.is_open()) { + buf << fin.rdbuf(); + } + std::string template_source = buf.str(); + common_chat_template tmpl(template_source, "", ""); + t.assert_true("Seed-OSS template loaded successfully", template_source.length() > 0); + return tmpl; +} + +// Helper to build tool call JSON +static json build_tool_call(const std::string & name, const json & args, const std::string & id = "call_001") { + return json{ + {"id", id}, + {"type", "function"}, + {"function", json{ + {"name", name}, + {"arguments", args} + }} + }; +} + +// Helper to build tools definition +static json build_tools_definition() { + json parameters_schema = json::object(); + parameters_schema["type"] = "object"; + parameters_schema["properties"] = json::object(); + parameters_schema["properties"]["param1"] = json::object({ + {"type", "string"}, + {"description", "First parameter"} + }); + parameters_schema["properties"]["param2"] = json::object({ + {"type", "string"}, + {"description", "Second parameter"} + }); + parameters_schema["required"] = json::array({"param1", "param2"}); + + return json::array({ + json{ + {"type", "function"}, + {"function", json{ + {"name", "test_function_name"}, + {"description", "A test function for debugging"}, + {"parameters", parameters_schema} + }} + } + }); +} + +// T1: Compare with/without tool call (user, assistant) +static void test_seed_oss_tool_presence(testing & t) { + common_chat_template tmpl = load_seed_oss_template(t); + + json assistant_no_tools = json{ + {"role", "assistant"}, + {"content", "Let me help you."} + }; + + json assistant_with_tools = json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})) + })} + }; + + json user_msg = json{ + {"role", "user"}, + {"content", "Hello, please help me."} + }; + + template_params params_no_tools; + params_no_tools.messages = json::array({user_msg, assistant_no_tools}); + params_no_tools.tools = build_tools_definition(); + params_no_tools.add_generation_prompt = false; + params_no_tools.enable_thinking = true; + + template_params params_with_tools; + params_with_tools.messages = json::array({user_msg, assistant_with_tools}); + params_with_tools.tools = build_tools_definition(); + params_with_tools.add_generation_prompt = false; + params_with_tools.enable_thinking = true; + + auto result = autoparser::compare_variants(tmpl, params_no_tools, + [&](template_params & p) { + p.messages = params_with_tools.messages; + }); + + if (!t.assert_true("T1 result should have value", result.has_value())) { + return; + } + + const auto & diff = result->diff; + t.assert_true("T1 prefix should contain system", diff.prefix.find("system") != std::string::npos); + t.assert_true("T1 prefix should contain user", diff.prefix.find("user") != std::string::npos); + t.assert_true("T1 prefix should contain assistant", diff.prefix.find("assistant") != std::string::npos); + + // Left should be the assistant content without tool + t.assert_equal("T1 left should contain 'Let me help you.'", "Let me help you.", diff.left); + + // Right should contain the tool call markers + t.assert_true("T1 right should contain tool_call begin", diff.right.find("") != std::string::npos); + t.assert_true("T1 right should contain function tag", diff.right.find("") != std::string::npos); + t.assert_true("T1 right should contain parameter=param1", diff.right.find("") != std::string::npos); + t.assert_true("T1 right should contain parameter=param2", diff.right.find("") != std::string::npos); + t.assert_true("T1 right should contain value1", diff.right.find("value1") != std::string::npos); + t.assert_true("T1 right should contain value2", diff.right.find("value2") != std::string::npos); + t.assert_true("T1 right should contain tool_call end", diff.right.find("") != std::string::npos); + + // Suffix should be the eos token + t.assert_equal("T1 suffix should be ''", "", diff.suffix); +} + +// T2: Compare one vs two tool calls +static void test_seed_oss_call_count(testing & t) { + common_chat_template tmpl = load_seed_oss_template(t); + + json assistant_one_call = json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})) + })} + }; + + json assistant_two_calls = json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})), + build_tool_call("test_function_name", json::object({{"param1", "value3"}, {"param2", "value4"}}), "call_002") + })} + }; + + json user_msg = json{ + {"role", "user"}, + {"content", "Hello, please help me."} + }; + + template_params params_one; + params_one.messages = json::array({user_msg, assistant_one_call}); + params_one.tools = build_tools_definition(); + params_one.add_generation_prompt = false; + params_one.enable_thinking = true; + + auto result = autoparser::compare_variants(tmpl, params_one, + [&](template_params & p) { + p.messages = json::array({user_msg, assistant_two_calls}); + }); + + if (!t.assert_true("T2 result should have value", result.has_value())) { + return; + } + + const auto & diff = result->diff; + + // Prefix should include the first tool call + t.assert_true("T2 prefix should contain first tool_call begin", diff.prefix.find("") != std::string::npos); + t.assert_true("T2 prefix should contain first function", diff.prefix.find("") != std::string::npos); + t.assert_true("T2 prefix should contain value1", diff.prefix.find("value1") != std::string::npos); + t.assert_true("T2 prefix should contain value2", diff.prefix.find("value2") != std::string::npos); + t.assert_true("T2 prefix should contain first tool_call end", diff.prefix.find("") != std::string::npos); + + // Left should be empty (no second tool call in variant A) + t.assert_equal("T2 left should be empty", "", diff.left); + + // Right should contain the second tool call + t.assert_true("T2 right should contain second tool_call begin", diff.right.find("") != std::string::npos); + t.assert_true("T2 right should contain second function", diff.right.find("") != std::string::npos); + t.assert_true("T2 right should contain value3", diff.right.find("value3") != std::string::npos); + t.assert_true("T2 right should contain value4", diff.right.find("value4") != std::string::npos); + t.assert_true("T2 right should contain second tool_call end", diff.right.find("") != std::string::npos); + + // Suffix should end with the eos token + t.assert_equal("T2 suffix should end with ''", "", diff.suffix.substr(diff.suffix.length() - 10, 10)); +} + +// T3: Compare different function names +static void test_seed_oss_function_names(testing & t) { + common_chat_template tmpl = load_seed_oss_template(t); + + // Build tools with two different function names + json parameters_schema = json::object(); + parameters_schema["type"] = "object"; + parameters_schema["properties"] = json::object(); + parameters_schema["properties"]["arg1"] = json::object({ + {"type", "string"}, + {"description", "Argument 1"} + }); + parameters_schema["required"] = json::array({"arg1"}); + + json tools = json::array({ + json{ + {"type", "function"}, + {"function", json{ + {"name", "func_alpha"}, + {"description", "First function"}, + {"parameters", parameters_schema} + }} + }, + json{ + {"type", "function"}, + {"function", json{ + {"name", "func_beta"}, + {"description", "Second function"}, + {"parameters", parameters_schema} + }} + } + }); + + json assistant_func_alpha = json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("func_alpha", json::object({{"arg1", "test_value"}})) + })} + }; + + json assistant_func_beta = json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("func_beta", json::object({{"arg1", "test_value"}})) + })} + }; + + json user_msg = json{ + {"role", "user"}, + {"content", "Hello"} + }; + + template_params params_alpha; + params_alpha.messages = json::array({user_msg, assistant_func_alpha}); + params_alpha.tools = tools; + params_alpha.add_generation_prompt = false; + params_alpha.enable_thinking = true; + + auto result = autoparser::compare_variants(tmpl, params_alpha, + [&](template_params & p) { + p.messages = json::array({user_msg, assistant_func_beta}); + }); + + if (!t.assert_true("T3 result should have value", result.has_value())) { + return; + } + + const auto & diff = result->diff; + + bool func_alpha_in_left = diff.left.find("func_alpha") != std::string::npos; + bool func_alpha_in_prefix = diff.prefix.find("func_alpha") != std::string::npos; + bool func_beta_in_right = diff.right.find("func_beta") != std::string::npos; + bool func_beta_in_prefix = diff.prefix.find("func_beta") != std::string::npos; + bool func_beta_in_suffix = diff.suffix.find("func_beta") != std::string::npos; + + // Left should contain func_alpha (or be in prefix) + t.assert_true("T3 left should contain func_alpha (or prefix)", func_alpha_in_left || func_alpha_in_prefix); + + // Right should contain func_beta + t.assert_true("T3 right should contain func_beta", func_beta_in_right || func_beta_in_prefix || func_beta_in_suffix); + + // Both should have the same parameter value (in common parts, not in diffs) + // Since both have same args, test_value will be in prefix/suffix + t.assert_true("T3 diff should contain test_value (in prefix or suffix)", + diff.prefix.find("test_value") != std::string::npos || diff.suffix.find("test_value") != std::string::npos); +} + +// T4: Compare different argument counts (zero, one, two parameters) +static void test_seed_oss_argument_count(testing & t) { + common_chat_template tmpl = load_seed_oss_template(t); + + // Build tools with 0, 1, or 2 required parameters + json params_2_required = json::object(); + params_2_required["type"] = "object"; + params_2_required["properties"] = json::object(); + params_2_required["properties"]["arg1"] = json::object({ + {"type", "string"}, + {"description", "Argument 1"} + }); + params_2_required["properties"]["arg2"] = json::object({ + {"type", "string"}, + {"description", "Argument 2"} + }); + params_2_required["required"] = json::array({"arg1", "arg2"}); + + json params_1_required = json::object(); + params_1_required["type"] = "object"; + params_1_required["properties"] = json::object(); + params_1_required["properties"]["arg1"] = json::object({ + {"type", "string"}, + {"description", "Argument 1"} + }); + params_1_required["required"] = json::array({"arg1"}); + + json tools = json::array({ + json{ + {"type", "function"}, + {"function", json{ + {"name", "test_func"}, + {"description", "Test function"}, + {"parameters", params_2_required} + }} + } + }); + + // Test: zero args vs one arg + json assistant_zero_args = json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("test_func", json::object()) + })} + }; + + json assistant_one_arg = json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("test_func", json::object({{"arg1", "value1"}})) + })} + }; + + json assistant_two_args = json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("test_func", json::object({{"arg1", "value1"}, {"arg2", "value2"}})) + })} + }; + + json user_msg = json{ + {"role", "user"}, + {"content", "Hello"} + }; + + // Test zero vs one + template_params params_zero; + params_zero.messages = json::array({user_msg, assistant_zero_args}); + params_zero.tools = tools; + params_zero.add_generation_prompt = false; + params_zero.enable_thinking = true; + + auto result_zero_one = autoparser::compare_variants(tmpl, params_zero, + [&](template_params & p) { + p.messages = json::array({user_msg, assistant_one_arg}); + }); + + if (!t.assert_true("T4 zero vs one result should have value", result_zero_one.has_value())) { + return; + } + t.assert_true("T4 zero vs one left should be empty or minimal", result_zero_one->diff.left.empty() || result_zero_one->diff.left == ""); + t.assert_true("T4 zero vs one right should contain arg1", result_zero_one->diff.right.find("arg1") != std::string::npos); + + // Test one vs two + template_params params_one; + params_one.messages = json::array({user_msg, assistant_one_arg}); + params_one.tools = tools; + params_one.add_generation_prompt = false; + params_one.enable_thinking = true; + + auto result_one_two = autoparser::compare_variants(tmpl, params_one, + [&](template_params & p) { + p.messages = json::array({user_msg, assistant_two_args}); + }); + + if (!t.assert_true("T4 one vs two result should have value", result_one_two.has_value())) { + return; + } + + const auto & diff4 = result_one_two->diff; + t.assert_true("T4 one vs two left should contain arg1 (or prefix)", + diff4.left.find("arg1") != std::string::npos || diff4.prefix.find("arg1") != std::string::npos); + t.assert_true("T4 one vs two right should contain arg1 (or prefix)", + diff4.right.find("arg1") != std::string::npos || diff4.prefix.find("arg1") != std::string::npos); + t.assert_true("T4 one vs two right should contain arg2 (or prefix/suffix)", + diff4.right.find("arg2") != std::string::npos || diff4.prefix.find("arg2") != std::string::npos || diff4.suffix.find("arg2") != std::string::npos); +} + +// T5: Compare different argument values +static void test_seed_oss_args_presence(testing & t) { + common_chat_template tmpl = load_seed_oss_template(t); + + json assistant_same_arg = json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("test_function_name", json::object({{"param1", "value1"}})) + })} + }; + + json assistant_other_arg = json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("test_function_name", json::object({{"param2", "value2"}})) + })} + }; + + json assistant_both_args = json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})) + })} + }; + + json user_msg = json{ + {"role", "user"}, + {"content", "Hello"} + }; + + template_params params_same; + params_same.messages = json::array({user_msg, assistant_same_arg}); + params_same.tools = build_tools_definition(); + params_same.add_generation_prompt = false; + params_same.enable_thinking = true; + + // Test same arg vs other arg + auto result_same_other = autoparser::compare_variants(tmpl, params_same, + [&](template_params & p) { + p.messages = json::array({user_msg, assistant_other_arg}); + }); + + if (!t.assert_true("T5 same vs other result should have value", result_same_other.has_value())) { + return; + } + const auto & diff5a = result_same_other->diff; + t.assert_true("T5 same vs other left should contain param1 (or prefix/suffix)", + diff5a.left.find("param1") != std::string::npos || diff5a.prefix.find("param1") != std::string::npos || diff5a.suffix.find("param1") != std::string::npos); + t.assert_true("T5 same vs other left should contain value1 (or prefix/suffix)", + diff5a.left.find("value1") != std::string::npos || diff5a.prefix.find("value1") != std::string::npos); + t.assert_true("T5 same vs other right should contain param2 (or prefix/suffix)", + diff5a.right.find("param2") != std::string::npos || diff5a.prefix.find("param2") != std::string::npos || diff5a.suffix.find("param2") != std::string::npos); + t.assert_true("T5 same vs other right should contain value2 (or prefix/suffix)", + diff5a.right.find("value2") != std::string::npos || diff5a.prefix.find("value2") != std::string::npos || diff5a.suffix.find("value2") != std::string::npos); + + // Test same arg vs both args + auto result_same_both = autoparser::compare_variants(tmpl, params_same, + [&](template_params & p) { + p.messages = json::array({user_msg, assistant_both_args}); + }); + + if (!t.assert_true("T5 same vs both result should have value", result_same_both.has_value())) { + return; + } + const auto & diff5b = result_same_both->diff; + t.assert_true("T5 same vs both left should contain param1 (or prefix/suffix)", + diff5b.left.find("param1") != std::string::npos || diff5b.prefix.find("param1") != std::string::npos || diff5b.suffix.find("param1") != std::string::npos); + t.assert_true("T5 same vs both right should contain param1 (or prefix/suffix)", + diff5b.right.find("param1") != std::string::npos || diff5b.prefix.find("param1") != std::string::npos || diff5b.suffix.find("param1") != std::string::npos); + t.assert_true("T5 same vs both right should contain param2 (or prefix/suffix)", + diff5b.right.find("param2") != std::string::npos || diff5b.prefix.find("param2") != std::string::npos || diff5b.suffix.find("param2") != std::string::npos); +} + +// T6: Tool call with vs without reasoning_content +static void test_seed_oss_tool_with_reasoning(testing & t) { + common_chat_template tmpl = load_seed_oss_template(t); + + json assistant_tool_only = json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})) + })} + }; + + json assistant_tool_with_reasoning = json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})) + })}, + {"reasoning_content", "I need to call the tool first."} + }; + + json user_msg = json{ + {"role", "user"}, + {"content", "Hello, please help me."} + }; + + template_params params_tool_only; + params_tool_only.messages = json::array({user_msg, assistant_tool_only}); + params_tool_only.tools = build_tools_definition(); + params_tool_only.add_generation_prompt = false; + params_tool_only.enable_thinking = true; + + auto result = autoparser::compare_variants(tmpl, params_tool_only, + [&](template_params & p) { + p.messages = json::array({user_msg, assistant_tool_with_reasoning}); + }); + + if (!t.assert_true("T6 result should have value", result.has_value())) { + return; + } + + const auto & diff = result->diff; + + // Left should be empty (no reasoning in variant A) + t.assert_equal("T6 left should be empty", "", diff.left); + + // Right should contain the thinking token with reasoning content + t.assert_true("T6 right should contain think begin", diff.right.find("") != std::string::npos); + t.assert_true("T6 right should contain reasoning content", diff.right.find("I need to call the tool first.") != std::string::npos); + t.assert_true("T6 right should contain think end", diff.right.find("") != std::string::npos); + + // Prefix should contain the assistant role + t.assert_true("T6 prefix should contain assistant", diff.prefix.find("assistant") != std::string::npos); + + // Suffix should contain the tool call + t.assert_true("T6 suffix should contain tool_call begin", diff.suffix.find("") != std::string::npos); + t.assert_true("T6 suffix should contain function name", diff.suffix.find("test_function_name") != std::string::npos); + t.assert_true("T6 suffix should contain eos", diff.suffix.find("") != std::string::npos); +} + +static common_chat_template load_template(testing & t, const std::string & template_path) { + std::ifstream fin(template_path, std::ios::binary); + std::ostringstream buf; + if (fin.is_open()) { + buf << fin.rdbuf(); + } + std::string template_source = buf.str(); + common_chat_template tmpl(template_source, "", ""); + t.assert_true("Nemotron template loaded successfully", template_source.length() > 0); + return tmpl; +} + +// ============================================================================ +// Nemotron Template Analysis Tests +// ============================================================================ +static common_chat_template load_nemotron_template(testing & t) { + return load_template(t, "models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja"); +} + +static void test_nemotron_analysis(testing & t) { + t.test("Nemotron reasoning detection", test_nemotron_reasoning_detection); + t.test("Nemotron tool format", test_nemotron_tool_format); +} + +static void test_nemotron_reasoning_detection(testing & t) { + common_chat_template tmpl = load_nemotron_template(t); + + // Test the comparison manually to see what's happening + json user_msg = json{ { "role", "user" }, { "content", "Hello" } }; + json assistant_no_reasoning = json{ + { "role", "assistant" }, + { "content", "I can help." } + }; + json assistant_with_reasoning = json{ + { "role", "assistant" }, + { "content", "I can help." }, + { "reasoning_content", "Let me think about this." } + }; + + template_params params; + params.messages = json::array({ user_msg, assistant_no_reasoning }); + params.add_generation_prompt = false; + params.enable_thinking = true; + + // Run differential analysis + auto analysis = autoparser::analyze_template(tmpl); + + // Check reasoning markers + t.assert_equal("reasoning_start should be ''", "", analysis.reasoning.start); + t.assert_equal("reasoning_end should be ''", "", analysis.reasoning.end); + + // Check reasoning mode detection + // Nemotron uses forced closed reasoning with add_generation_prompt + t.assert_equal("reasoning should be FORCED_CLOSED", reasoning_mode::FORCED_CLOSED, analysis.reasoning.mode); + + // Make sure reasoning markers don't spill over to content markers + t.assert_equal("content start should be empty", "", analysis.content.start); + t.assert_equal("content end should be empty", "", analysis.content.end); + + t.assert_equal("content should be PLAIN", content_mode::PLAIN, analysis.content.mode); +} + +static void test_nemotron_tool_format(testing & t) { + common_chat_template tmpl = load_nemotron_template(t); + + // Run differential analysis + auto analysis = autoparser::analyze_template(tmpl); + + // Check tool markers - Nemotron uses per-call wrapping (each call individually wrapped) + t.assert_equal("tool_section_start should be empty (per-call format)", "", analysis.tools.format.section_start); + t.assert_equal("tool_section_end should be empty (per-call format)", "", analysis.tools.format.section_end); + t.assert_equal("per_call_start should be '\\n'", "\n", analysis.tools.format.per_call_start); + t.assert_equal("per_call_end should be ''", "", analysis.tools.format.per_call_end); + t.assert_true("should support parallel calls", analysis.jinja_caps.supports_parallel_tool_calls); + + // Check function markers + t.assert_equal("func_name_prefix should be '\\n'", ">\n", analysis.tools.function.name_suffix); + t.assert_equal("func_close should be '\\n'", "\n", analysis.tools.function.close); + + // Check argument markers (note: markers retain trailing newlines for proper parsing) + t.assert_equal("arg_name_prefix should be '\\n'", ">\n", analysis.tools.arguments.name_suffix); + t.assert_equal("arg_value_suffix should be '\\n'", "\n", analysis.tools.arguments.value_suffix); + + // Check format classification + t.assert_true("tool format should be TAG_WITH_TAGGED", analysis.tools.format.mode == tool_format::TAG_WITH_TAGGED); + + // Verify tool support + t.assert_true("should support tools", analysis.jinja_caps.supports_tools); +} + +static common_chat_template load_cohere_template(testing & t) { + return load_template(t, "models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja"); +} + +static void test_cohere_analysis(testing & t) { + t.test("Cohere reasoning detection", test_cohere_reasoning_detection); +} + +static void test_cohere_reasoning_detection(testing & t) { + common_chat_template tmpl = load_cohere_template(t); + + // Run differential analysis + auto analysis = autoparser::analyze_template(tmpl); + + // Check reasoning markers - Cohere uses special token format + t.assert_equal("reasoning_start should be '<|START_THINKING|>'", "<|START_THINKING|>", analysis.reasoning.start); + t.assert_equal("reasoning_end should be '<|END_THINKING|>'", "<|END_THINKING|>", analysis.reasoning.end); + + // Check reasoning mode - Cohere only shows reasoning with tool calls (TOOLS_ONLY) + t.assert_equal("reasoning should be TOOLS_ONLY", reasoning_mode::TOOLS_ONLY, analysis.reasoning.mode); + + // Check content markers - Cohere wraps all content with START/END_RESPONSE + t.assert_equal("content_start should be '<|START_RESPONSE|>'", "<|START_RESPONSE|>", analysis.content.start); + t.assert_equal("content_end should be '<|END_RESPONSE|>'", "<|END_RESPONSE|>", analysis.content.end); + + // Content is always wrapped (both with and without tools) + t.assert_equal("content should be ALWAYS_WRAPPED", content_mode::ALWAYS_WRAPPED, analysis.content.mode); +} + +static void test_tool_format_cohere(testing & t) { + common_chat_template tmpl = load_cohere_template(t); + + // Run differential analysis + auto analysis = autoparser::analyze_template(tmpl); + + // Check tool section markers - Cohere uses ACTION markers + t.assert_equal("tool_section_start should be '<|START_ACTION|>'", "<|START_ACTION|>", analysis.tools.format.section_start); + t.assert_equal("tool_section_end should be '<|END_ACTION|>'", "<|END_ACTION|>", analysis.tools.format.section_end); + + // JSON_NATIVE format has no per-call markers + t.assert_equal("per_call_start should be empty", "", analysis.tools.format.per_call_start); + t.assert_equal("per_call_end should be empty", "", analysis.tools.format.per_call_end); + + // JSON_NATIVE format has empty function markers (no XML-style markers) + t.assert_equal("func_name_prefix should be empty", "", analysis.tools.function.name_prefix); + t.assert_equal("func_name_suffix should be empty", "", analysis.tools.function.name_suffix); + t.assert_equal("func_close should be empty", "", analysis.tools.function.close); + + // JSON_NATIVE format has empty args markers + t.assert_equal("args_start should be empty", "", analysis.tools.arguments.start); + t.assert_equal("args_end should be empty", "", analysis.tools.arguments.end); + + // JSON_NATIVE format has empty argument markers + t.assert_equal("arg_name_prefix should be empty", "", analysis.tools.arguments.name_prefix); + t.assert_equal("arg_name_suffix should be empty", "", analysis.tools.arguments.name_suffix); + t.assert_equal("arg_value_prefix should be empty", "", analysis.tools.arguments.value_prefix); + t.assert_equal("arg_value_suffix should be empty", "", analysis.tools.arguments.value_suffix); + t.assert_equal("arg_separator should be empty", "", analysis.tools.arguments.separator); + + // Check JSON field names - Cohere uses non-standard names + t.assert_equal("name_field should be 'tool_name'", "tool_name", analysis.tools.format.name_field); + t.assert_equal("args_field should be 'parameters'", "parameters", analysis.tools.format.args_field); + // This isn't a real tool call id field, i.e. with the OpenAI tool call ID format + t.assert_equal("id_field should be 'tool_call_id'", "", analysis.tools.format.id_field); + + // Check format classification + t.assert_equal("tool format should be JSON_NATIVE", tool_format::JSON_NATIVE, analysis.tools.format.mode); + + // Check flags + t.assert_true("should support tools", analysis.jinja_caps.supports_tools); + t.assert_true("should support parallel calls", analysis.jinja_caps.supports_parallel_tool_calls); + t.assert_true("should not require nonnull content", !analysis.content.requires_nonnull_content); + t.assert_true("tools_array_wrapped should be true", analysis.tools.format.tools_array_wrapped); +} + +// ============================================================================ +// standard_json_tools Format Tests +// ============================================================================ + +// Helper to build tools definition for tests +static json build_test_tools() { + json parameters_schema = json::object(); + parameters_schema["type"] = "object"; + parameters_schema["properties"] = json::object(); + parameters_schema["properties"]["location"] = json::object({ + {"type", "string"}, + {"description", "The city and state"} + }); + parameters_schema["properties"]["unit"] = json::object({ + {"type", "string"}, + {"description", "Temperature unit"}, + {"enum", json::array({"celsius", "fahrenheit"})} + }); + parameters_schema["required"] = json::array({"location"}); + + return json::array({ + json{ + {"type", "function"}, + {"function", json{ + {"name", "get_current_weather"}, + {"description", "Get the current weather in a given location"}, + {"parameters", parameters_schema} + }} + } + }); +} + +static void test_standard_json_tools_formats(testing & t) { + t.test("OpenAI format", test_standard_json_tools_openai); + t.test("Cohere format", test_standard_json_tools_cohere); + t.test("function-as-key format", test_standard_json_tools_function_key); +} + +// Test 1: OpenAI Standard Format +// {"id": "call_abc", "function": {"name": "get_weather", "arguments": {"location": "NYC"}}} +static void test_standard_json_tools_openai(testing & t) { + json tools = build_test_tools(); + + auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { + auto tool_call = p.standard_json_tools( + "", "", tools, + /* parallel */ true, + /* force */ false, + /* name_key */ "function.name", + /* args_key */ "function.arguments", + /* array_wrapped */ false, + /* function_is_key */ false, + /* call_id_key */ "id", + /* gen_call_id_key */ "", + /* parameters_order */ {} + ); + return p.content(p.until("")) + p.optional(tool_call) + p.end(); + }); + + std::string input = + "Let me check the weather." + "" + R"({"id": "call_abc123", "function": {"name": "get_current_weather", "arguments": {"location": "NYC"}}})" + ""; + + common_peg_parse_context ctx(input, false); + auto result = parser.parse(ctx); + + if (!t.assert_true("parse success", result.success())) { + return; + } + + common_chat_msg msg; + auto mapper = common_chat_peg_unified_mapper(msg); + mapper.from_ast(ctx.ast, result); + + t.assert_equal("tool calls count", 1u, msg.tool_calls.size()); + if (!msg.tool_calls.empty()) { + t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name); + t.assert_equal("tool id", "call_abc123", msg.tool_calls[0].id); + } + t.assert_true("content present", msg.content.find("Let me check the weather") != std::string::npos); +} + +// Test 2: Cohere Format +// {"tool_call_id": 0, "tool_name": "get_weather", "parameters": {"location": "NYC"}} +static void test_standard_json_tools_cohere(testing & t) { + json tools = build_test_tools(); + + auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { + auto tool_call = p.standard_json_tools( + "<|START_ACTION|>[", "]<|END_ACTION|>", tools, + /* parallel */ true, + /* force */ false, + /* name_key */ "tool_name", + /* args_key */ "parameters", + /* array_wrapped */ false, // Brackets are part of section markers + /* function_is_key */ false, + /* call_id_key */ "", + /* gen_call_id_key */ "tool_call_id", + /* parameters_order */ {"tool_call_id", "tool_name", "parameters"} + ); + return p.content(p.until("<|START_ACTION|>")) + p.optional(tool_call) + p.end(); + }); + + std::string input = + "Let me search for that." + "<|START_ACTION|>[" + R"({"tool_call_id": 0, "tool_name": "get_current_weather", "parameters": {"location": "NYC", "unit": "celsius"}})" + "]<|END_ACTION|>"; + + common_peg_parse_context ctx(input, false); + auto result = parser.parse(ctx); + + if (!t.assert_true("parse success", result.success())) { + return; + } + + common_chat_msg msg; + auto mapper = common_chat_peg_unified_mapper(msg); + mapper.from_ast(ctx.ast, result); + + t.assert_equal("tool calls count", 1u, msg.tool_calls.size()); + if (!msg.tool_calls.empty()) { + t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name); + t.assert_equal("tool id", "0", msg.tool_calls[0].id); + } + t.assert_true("content present", msg.content.find("Let me search") != std::string::npos); +} + +// Test 3: Function-as-Key Format +// {"get_current_weather": {"id": "call-0001", "args": {"location": "NYC"}}} +static void test_standard_json_tools_function_key(testing & t) { + json tools = build_test_tools(); + + auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { + auto tool_call = p.standard_json_tools( + "[", "]", tools, + /* parallel */ true, + /* force */ false, + /* name_key */ "", // Name is the key itself + /* args_key */ "args", + /* array_wrapped */ false, + /* function_is_key */ true, + /* call_id_key */ "id", + /* gen_call_id_key */ "", + /* parameters_order */ {} + ); + return p.content(p.until("")) + p.optional(tool_call) + p.end(); + }); + + std::string input = + "I'll call the weather function." + "[" + R"({"get_current_weather": {"id": "call-0001", "args": {"location": "NYC", "unit": "celsius"}}})" + "]"; + + common_peg_parse_context ctx(input, false); + auto result = parser.parse(ctx); + + if (!t.assert_true("parse success", result.success())) { + return; + } + + common_chat_msg msg; + auto mapper = common_chat_peg_unified_mapper(msg); + mapper.from_ast(ctx.ast, result); + + t.assert_equal("tool calls count", 1u, msg.tool_calls.size()); + if (!msg.tool_calls.empty()) { + t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name); + t.assert_equal("tool id", "call-0001", msg.tool_calls[0].id); + } + t.assert_true("content present", msg.content.find("I'll call the weather") != std::string::npos); +} + +// ============================================================================ +// normalize_quotes_to_json Tests +// ============================================================================ + +// Copy of the function for isolated testing (original is static in chat-peg-parser.cpp) +static std::string normalize_quotes_to_json(const std::string & input) { + std::string result; + result.reserve(input.size() + 16); + + bool in_single_quoted = false; + bool in_double_quoted = false; + + for (size_t i = 0; i < input.size(); ++i) { + char c = input[i]; + + if (c == '\\' && i + 1 < input.size()) { + char next = input[i + 1]; + + if (in_single_quoted) { + if (next == '\'') { + result += '\''; + ++i; + continue; + } + if (next == '"') { + result += "\\\""; + ++i; + continue; + } + result += c; + result += next; + ++i; + continue; + } + + if (in_double_quoted) { + result += c; + result += next; + ++i; + continue; + } + + result += c; + continue; + } + + if (c == '"') { + if (in_single_quoted) { + result += "\\\""; + } else { + in_double_quoted = !in_double_quoted; + result += c; + } + } else if (c == '\'') { + if (in_double_quoted) { + result += c; + } else if (in_single_quoted) { + in_single_quoted = false; + result += '"'; + } else { + in_single_quoted = true; + result += '"'; + } + } else { + result += c; + } + } + + return result; +} + +static void test_normalize_quotes_to_json(testing & t) { + t.test("basic single to double quotes", [](testing & t) { + std::string input = "{'key': 'value'}"; + std::string expected = "{\"key\": \"value\"}"; + std::string result = normalize_quotes_to_json(input); + t.assert_equal("basic conversion", expected, result); + }); + + t.test("escaped single quote inside single-quoted string", [](testing & t) { + std::string input = "{'code': 'print(\\'hello\\')'}"; + std::string expected = "{\"code\": \"print('hello')\"}"; + std::string result = normalize_quotes_to_json(input); + t.assert_equal("escaped single quote", expected, result); + }); + + t.test("double quote inside single-quoted string", [](testing & t) { + std::string input = "{'msg': 'He said \"hi\"'}"; + std::string expected = "{\"msg\": \"He said \\\"hi\\\"\"}"; + std::string result = normalize_quotes_to_json(input); + t.assert_equal("double quote escaping", expected, result); + }); + + t.test("nested backslash escapes", [](testing & t) { + std::string input = "{'path': 'C:\\\\Users\\\\test'}"; + std::string expected = "{\"path\": \"C:\\\\Users\\\\test\"}"; + std::string result = normalize_quotes_to_json(input); + t.assert_equal("backslash escaping", expected, result); + }); + + t.test("newline escapes", [](testing & t) { + std::string input = "{'text': 'line1\\nline2'}"; + std::string expected = "{\"text\": \"line1\\nline2\"}"; + std::string result = normalize_quotes_to_json(input); + t.assert_equal("newline escaping", expected, result); + }); + + t.test("mixed quotes", [](testing & t) { + std::string input = "{\"already_double\": 'single_value'}"; + std::string expected = "{\"already_double\": \"single_value\"}"; + std::string result = normalize_quotes_to_json(input); + t.assert_equal("mixed quotes", expected, result); + }); + + t.test("embedded quotes - the test case", test_normalize_quotes_with_embedded_quotes); +} + +// Test case that mirrors the Seed-OSS failing test scenario +static void test_normalize_quotes_with_embedded_quotes(testing & t) { + // This is similar to the Seed-OSS template test case + // The input has embedded double quotes like "14" and "bar" inside string values + std::string input = "{'filename': 'foo.cpp', 'oldString': 'def foo(arg = \"14\"):\\n return arg + \"bar\"\\n', 'newString': 'def foo(arg = \"15\"):\\n pass\\n'}"; + + // Expected: Python single quotes -> JSON double quotes, internal double quotes escaped + std::string expected = "{\"filename\": \"foo.cpp\", \"oldString\": \"def foo(arg = \\\"14\\\"):\\n return arg + \\\"bar\\\"\\n\", \"newString\": \"def foo(arg = \\\"15\\\"):\\n pass\\n\"}"; + + std::string result = normalize_quotes_to_json(input); + + t.assert_equal("normalize quotes with embedded double quotes", expected, result); + + // Also verify the result is valid JSON + try { + json parsed = json::parse(result); + t.assert_true("result is valid JSON", true); + t.assert_equal("filename field", "foo.cpp", parsed["filename"].get()); + t.assert_true("oldString contains embedded quotes", + parsed["oldString"].get().find("\"14\"") != std::string::npos); + t.assert_true("newString contains embedded quotes", + parsed["newString"].get().find("\"15\"") != std::string::npos); + } catch (const std::exception & e) { + t.assert_true(std::string("JSON parse failed: ") + e.what(), false); + } +} + +// ============================================================================ +// TAG_WITH_TAGGED Argument Parsing Tests +// ============================================================================ + +// Build tools definition for edit function +static json build_edit_tool() { + json parameters_schema = json::object(); + parameters_schema["type"] = "object"; + parameters_schema["properties"] = json::object(); + parameters_schema["properties"]["filename"] = json::object({ + {"type", "string"}, + {"description", "Path of file to edit"} + }); + parameters_schema["properties"]["oldString"] = json::object({ + {"type", "string"}, + {"description", "String to replace"} + }); + parameters_schema["properties"]["newString"] = json::object({ + {"type", "string"}, + {"description", "New (replacement) value"} + }); + parameters_schema["required"] = json::array({"filename", "oldString", "newString"}); + + return json::array({ + json{ + {"type", "function"}, + {"function", json{ + {"name", "edit"}, + {"description", "Edit a file"}, + {"parameters", parameters_schema} + }} + } + }); +} + +// Test that reproduces the Seed-OSS template issue with embedded quotes +static void test_tagged_args_with_embedded_quotes(testing & t) { + json tools = build_edit_tool(); + + // Build a parser for TAG_WITH_TAGGED format like Seed-OSS/Nemotron + auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { + // Build tool choice for the edit function + auto tool_choice = p.choice(); + + for (const auto & tool_def : tools) { + if (!tool_def.contains("function")) { continue; } + const auto & function = tool_def.at("function"); + std::string name = function.at("name"); + const auto & params = function.at("parameters"); + + if (!params.contains("properties") || !params.at("properties").is_object()) { continue; } + + const auto & properties = params.at("properties"); + + // Build argument parsers + std::vector arg_parsers; + for (const auto & [param_name, param_schema] : properties.items()) { + auto arg = p.tool_arg( + p.tool_arg_open(p.literal("")) + + p.space() + + p.tool_arg_string_value(p.until("")) + + p.space() + + p.tool_arg_close(p.literal("")) + ); + arg_parsers.push_back(p.optional(p.rule("arg-" + param_name, arg))); + } + + // Build arg sequence with space() between + common_peg_parser args_seq = p.eps(); + for (size_t i = 0; i < arg_parsers.size(); i++) { + if (i > 0) { + args_seq = args_seq + p.space(); + } + args_seq = args_seq + arg_parsers[i]; + } + + auto func_parser = + p.tool_open(p.literal("")) + + p.space() + args_seq + p.space() + + p.tool_close(p.literal("")); + + tool_choice |= p.rule("tool-" + name, p.tool(func_parser)); + } + + auto tool_section = + p.literal("") + p.space() + + tool_choice + + p.space() + p.literal(""); + + return p.content(p.until("")) + p.optional(tool_section) + p.end(); + }); + + // The exact input from the failing test + std::string input = + "\n" + "\n" + "\n" + "foo.cpp\n" + "\n" + "" + "def foo(arg = \"14\"):\n" + " return arg + \"bar\"\n" + "\n" + "\n" + "" + "def foo(arg = \"15\"):\n" + " pass\n" + "\n" + "\n" + "\n" + ""; + + common_peg_parse_context ctx(input, false); + auto result = parser.parse(ctx); + + if (!t.assert_true("parse success", result.success())) { + return; + } + + common_chat_msg msg; + auto mapper = common_chat_peg_unified_mapper(msg); + mapper.from_ast(ctx.ast, result); + + t.assert_equal("tool calls count", 1u, msg.tool_calls.size()); + + if (!msg.tool_calls.empty()) { + t.assert_equal("tool name", "edit", msg.tool_calls[0].name); + + // Parse the arguments as JSON to verify they're valid + std::string args = msg.tool_calls[0].arguments; + + try { + json parsed = json::parse(args); + t.assert_true("arguments is valid JSON", true); + + // Verify each field has proper value + t.assert_equal("filename", "foo.cpp", parsed.value("filename", "")); + + std::string oldString = parsed.value("oldString", ""); + t.assert_true("oldString contains embedded quotes", + oldString.find("\"14\"") != std::string::npos); + t.assert_true("oldString contains bar with quotes", + oldString.find("\"bar\"") != std::string::npos); + + std::string newString = parsed.value("newString", ""); + t.assert_true("newString contains embedded quotes", + newString.find("\"15\"") != std::string::npos); + + } catch (const std::exception & e) { + t.assert_true(std::string("arguments should be valid JSON: ") + e.what(), false); + } + } +} + diff --git a/tests/test-chat-parser.cpp b/tests/test-chat-parser.cpp deleted file mode 100644 index 6f44a2b421..0000000000 --- a/tests/test-chat-parser.cpp +++ /dev/null @@ -1,617 +0,0 @@ -// Tests chat handling, including grammar generation and parsing for tool calling, for various templates. -// -// Also acts as a CLI to generate a Markdown summary of the formats of Jinja templates, -// e.g. given Minja (http://github.com/google/minja) checked out in parent dir: -// -// cmake -B build && cmake --build build --parallel && ./build/bin/test-chat ../minja/build/tests/*.jinja 2>/dev/null -// -#include -#include -#include - -#include "chat-parser.h" -#include "common.h" -#include "log.h" -#include "regex-partial.h" - -template -static void assert_equals(const std::string_view label, const T & expected, const T & actual) { - if (expected != actual) { - std::cerr << label << std::endl; - std::cerr << "Expected: " << expected << std::endl; - std::cerr << "Actual: " << actual << std::endl; - std::cerr << std::flush; - throw std::runtime_error("Test failed"); - } -} - -template -static void assert_equals(const T & expected, const T & actual) { - assert_equals("", expected, actual); -} -static void assert_equals(const char * expected, const std::string & actual) { - return assert_equals(expected, actual); -} - -static void assert_throws(const std::function & fn, const std::string & expected_exception_pattern = "") { - try { - fn(); - } catch (const std::exception & e) { - if (expected_exception_pattern.empty()) { - return; - } - std::regex expected_exception_regex(expected_exception_pattern); - std::string actual_message = e.what(); - if (std::regex_search(actual_message, expected_exception_regex)) { - return; - } - throw std::runtime_error("Exception doesn't match expected pattern: " + actual_message + " (pattern: " + expected_exception_pattern + ")"); - throw std::runtime_error("Exception of unexpected type: " + std::string(e.what())); - } - throw std::runtime_error("Exception was expected but not thrown"); -} - -static void test_reasoning() { - //common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG); - { - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; - params.reasoning_format = COMMON_REASONING_FORMAT_NONE; - params.reasoning_in_content = false; - params.thinking_forced_open = false; - common_chat_msg_parser builder("CogitoErgo sum", /* is_partial= */ false, params); - assert_equals(false, builder.try_parse_reasoning("", "")); - assert_equals("CogitoErgo sum", builder.consume_rest()); - } - { - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; - params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; - params.reasoning_in_content = false; - params.thinking_forced_open = false; - common_chat_msg_parser builder("CogitoErgo sum", /* is_partial= */ false, params); - assert_equals(true, builder.try_parse_reasoning("", "")); - assert_equals(std::string("Cogito"), builder.result().reasoning_content); - assert_equals("Ergo sum", builder.consume_rest()); - } - { - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; - params.reasoning_format = COMMON_REASONING_FORMAT_NONE; - params.reasoning_in_content = false; - params.thinking_forced_open = false; - common_chat_msg_parser builder("CogitoErgo sum", /* is_partial= */ false, params); - assert_equals(false, builder.try_parse_reasoning("", "")); - assert_equals("CogitoErgo sum", builder.consume_rest()); - } - { - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; - params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; - params.reasoning_in_content = false; - params.thinking_forced_open = true; - common_chat_msg_parser builder("CogitoErgo sum", /* is_partial= */ false, params); - assert_equals(true, builder.try_parse_reasoning("", "")); - assert_equals(std::string("Cogito"), builder.result().reasoning_content); - assert_equals("Ergo sum", builder.consume_rest()); - } - { - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; - params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; - params.reasoning_in_content = true; - params.thinking_forced_open = true; - common_chat_msg_parser builder("CogitoErgo sum", /* is_partial= */ false, params); - assert_equals(true, builder.try_parse_reasoning("", "")); - assert_equals("Cogito", builder.result().content); - assert_equals("Ergo sum", builder.consume_rest()); - } - { - const std::string variant("content_only_inline_think"); - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; - params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; - params.reasoning_in_content = false; - params.thinking_forced_open = false; - params.parse_tool_calls = false; - const std::string input = "PenseBonjour"; - auto msg = common_chat_parse(input, false, params); - assert_equals(variant, std::string("Pense"), msg.reasoning_content); - assert_equals(variant, std::string("Bonjour"), msg.content); - } - { - const std::string variant("llama_3_inline_think"); - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_LLAMA_3_X; - params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; - params.reasoning_in_content = false; - params.thinking_forced_open = false; - params.parse_tool_calls = false; - const std::string input = "PlanRéponse"; - auto msg = common_chat_parse(input, false, params); - assert_equals(variant, std::string("Plan"), msg.reasoning_content); - assert_equals(variant, std::string("Réponse"), msg.content); - } - // Test DeepSeek V3.1 parsing - reasoning content followed by "" and then regular content - { - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1; - params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; - params.reasoning_in_content = false; - params.thinking_forced_open = true; - params.parse_tool_calls = true; - const std::string variant("deepseek_v3_1_reasoning_format_deepseek"); - common_chat_msg_parser builder("REASONINGok", /* is_partial= */ false, params); - assert_equals(variant, true, builder.try_parse_reasoning("", "")); - assert_equals(variant, std::string("REASONING"), builder.result().reasoning_content); - assert_equals(variant, std::string("ok"), builder.consume_rest()); - } - // Test DeepSeek V3.1 parsing - reasoning_format none - reasoning content followed by "" and then regular content - { - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1; - params.reasoning_format = COMMON_REASONING_FORMAT_NONE; - params.reasoning_in_content = false; - params.thinking_forced_open = true; - params.parse_tool_calls = true; - const std::string variant("deepseek_v3_1_reasoning_format_none"); - const std::string input = "REASONINGok"; - auto msg = common_chat_parse(input, false, params); - assert_equals(variant, std::string("REASONINGok"), msg.content); - assert_equals(variant, std::string(""), msg.reasoning_content); - } -} - -static void test_regex() { - auto test_throws = [](const std::string & input, const std::string & regex, const std::string & expected_exception_pattern = "") { - common_chat_msg_parser builder(input, /* is_partial= */ false, {}); - assert_throws([&]() { builder.consume_regex(common_regex(regex)); }, expected_exception_pattern); - }; - - test_throws("Hello, world!", "abc", "^abc$"); - test_throws("Hello, world!", "e", "^e$"); - - { - common_chat_msg_parser builder("Hello, world!", /* is_partial= */ false, {}); - builder.consume_regex(common_regex("Hello")); - assert_equals(", world!", builder.consume_rest()); - } - - { - // When in non partial mode, we can say whether the regex was consumed or not. - common_chat_msg_parser builder("Hello,", /* is_partial= */ false, {}); - assert_equals(false, builder.try_consume_regex(common_regex("Hello, world!")).has_value()); - } - { - common_chat_msg_parser builder("Hello,", /* is_partial= */ false, {}); - auto res = builder.try_consume_regex(common_regex("H(el)l(?:o, world!)?")); - assert_equals(true, res.has_value()); - // Verify captures - assert_equals(2, res->groups.size()); - assert_equals("Hell", builder.str(res->groups[0])); - assert_equals("el", builder.str(res->groups[1])); - // Verify position is after the match - assert_equals(4, builder.pos()); - assert_equals("o,", builder.consume_rest()); - } - { - // But in partial mode, we have a partial final match / can't decide, so we throw a partial exception. - common_chat_msg_parser builder("Hello,", /* is_partial= */ true, {}); - assert_throws([&]() { - builder.try_consume_regex(common_regex("Hello, world!")); - }, "^Hello, world!$"); - } - - // Now regardless of the mode, we can tell these aren't a match. - for (const auto is_partial : {false, true}) { - common_chat_msg_parser builder("Hello,", is_partial, {}); - assert_equals(false, builder.try_consume_regex(common_regex("a(b|c)(d|e)f")).has_value()); - } - for (const auto is_partial : {false, true}) { - common_chat_msg_parser builder("Hello,", is_partial, {}); - assert_equals(false, builder.try_consume_literal("Oh")); - } -} - -const std::vector barely_healable_jsons = { - "{", - "{\"", - "{\"\\", - "{\"n", - "{\"name\"", - "{\"name\":", - "{\"name\":\"", - "{\"name\":\"\\", - "{\"name\":\"python", - "{\"name\":\"python\\", - "{\",", - "{\":", - "{\"[", - "{\"]", - "{\"{", - "{\"}", - "{\"1", - "{\"name\":\",", - "{\"name\":\":", - "{\"name\":\"[", - "{\"name\":\"]", - "{\"name\":\"{", - "{\"name\":\"}", - "{\"name\":\"1", -}; - -static void test(const std::string & input, bool is_partial, const std::vector> & args_paths, const std::vector> & content_paths, const std::string & expected) { - common_chat_msg_parser builder(input, is_partial, {}); - auto js = builder.try_consume_json_with_dumped_args(args_paths, content_paths); - assert_equals(true, js.has_value()); - assert_equals(is_partial, js->is_partial); - assert_equals(expected, args_paths.size() == 1 && args_paths[0].empty() ? js->value.get() : js->value.dump()); -} - -static void test_deepseek_v3_1_tool_calls() { - //common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG); - // variant: happy path for when it works as the model card says it should - const std::string variant("simple"); - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1; - params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; - params.reasoning_in_content = false; - params.thinking_forced_open = false; - params.parse_tool_calls = true; - const std::string input = "<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>"; - auto msg = common_chat_parse(input, false, params); - assert_equals(variant, 1, msg.tool_calls.size()); - assert_equals(variant, std::string("get_time"), msg.tool_calls[0].name); - // JSON arguments are dumped without spaces - assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), msg.tool_calls[0].arguments); - assert_equals(variant, std::string(""), msg.content); - assert_equals(variant, std::string(""), msg.reasoning_content); - - // variant: simple + thinking open - { - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1; - params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; - params.reasoning_in_content = false; - params.thinking_forced_open = true; - params.parse_tool_calls = true; - const std::string variant("simple_thinking"); - const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>"; - auto m = common_chat_parse(in, false, params); - assert_equals(variant, 1, m.tool_calls.size()); - assert_equals(variant, std::string("get_time"), m.tool_calls[0].name); - assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments); - assert_equals(variant, std::string(""), m.content); - assert_equals(variant, std::string("REASONING"), m.reasoning_content); - } - // variant: simple + multiple tool calls - { - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1; - params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; - params.reasoning_in_content = false; - params.thinking_forced_open = false; - params.parse_tool_calls = true; - const std::string variant("simple_multiple_tool_calls"); - const std::string in = "CONTENT<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Paris\"}<|tool▁call▁end|><|tool▁call▁begin|>get_weather<|tool▁sep|>{\"city\": \"Paris\"}<|tool▁call▁end|><|tool▁calls▁end|>"; - auto m = common_chat_parse(in, false, params); - assert_equals(variant, 2, m.tool_calls.size()); - assert_equals(variant, std::string("get_time"), m.tool_calls[0].name); - assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[0].arguments); - assert_equals(variant, std::string("get_weather"), m.tool_calls[1].name); - assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[1].arguments); - assert_equals(variant, std::string("CONTENT"), m.content); - assert_equals(variant, std::string(""), m.reasoning_content); - } - - - // variant: thinking forced open + tool call in reasoning content - { - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1; - params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; - params.reasoning_in_content = false; - params.thinking_forced_open = true; - params.parse_tool_calls = true; - const std::string variant("thinking_forced_open_tool_call_in_reasoning"); - const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time2<|tool▁sep|>{\"city\": \"Tokyo2\"}<|tool▁call▁end|><|tool▁calls▁end|>REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>"; - auto m = common_chat_parse(in, false, params); - assert_equals(variant, 1, m.tool_calls.size()); - assert_equals(variant, std::string("get_time"), m.tool_calls[0].name); - assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments); - assert_equals(variant, std::string(""), m.content); - assert_equals(variant, std::string("REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time2<|tool▁sep|>{\"city\": \"Tokyo2\"}<|tool▁call▁end|><|tool▁calls▁end|>REASONING"), m.reasoning_content); - } - - // variant: thinking forced open + tool call in reasoning content + no closing think + not partial - // This is a bit of a fine tuning issue on the model's part IMO. It really should not be attempting - // to make tool calls in reasoning content according to the model card, but it does sometimes, so - // add the reasoning content as regular content and parse the tool calls. - { - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1; - params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; - params.reasoning_in_content = false; - params.thinking_forced_open = true; - params.parse_tool_calls = true; - const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_not_partial"); - const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>"; - auto m = common_chat_parse(in, false, params); - assert_equals(variant, std::string("REASONING"), m.content); - assert_equals(variant, std::string(""), m.reasoning_content); - assert_equals(variant, 1, m.tool_calls.size()); - assert_equals(variant, std::string("get_time"), m.tool_calls[0].name); - assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments); - } - - // variant: thinking forced open + tool call in reasoning content + no closing think + partial - { - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1; - params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; - params.reasoning_in_content = false; - params.thinking_forced_open = true; - params.parse_tool_calls = true; - const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_partial"); - const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>"; - auto m = common_chat_parse(in, /* is_partial= */ true, params); - assert_equals(variant, std::string("REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>"), m.reasoning_content); - assert_equals(variant, std::string(""), m.content); - assert_equals(variant, 0, m.tool_calls.size()); - } - - // variant: thinking not forced open + reasoning + regular content + no tool calls - { - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1; - params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; - params.reasoning_in_content = false; - params.thinking_forced_open = true; - params.parse_tool_calls = true; - const std::string variant("thinking_forced_open_reasoning_regular_content_no_tool_calls"); - const std::string in = "REASONINGCONTENT"; - auto m = common_chat_parse(in, false, params); - assert_equals(variant, 0, m.tool_calls.size()); - assert_equals(variant, std::string("CONTENT"), m.content); - assert_equals(variant, std::string("REASONING"), m.reasoning_content); - } - // variant: thinking not forced open + missing reasoning + no tool calls - { - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1; - params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; - params.reasoning_in_content = false; - params.thinking_forced_open = false; - params.parse_tool_calls = true; - const std::string variant("thinking_not_forced_open_missing_reasoning_no_tool_calls"); - const std::string in = "CONTENT"; - auto m = common_chat_parse(in, false, params); - assert_equals(variant, 0, m.tool_calls.size()); - assert_equals(variant, std::string("CONTENT"), m.content); - assert_equals(variant, std::string(""), m.reasoning_content); - } -} - -static void test_with_args(const std::string & input, const std::string & expected, bool parse_as_partial = true, bool is_partial = true) { - common_chat_msg_parser builder(input, parse_as_partial, {}); - auto js = builder.try_consume_json_with_dumped_args({{"args"}}, {}); - assert_equals(true, js.has_value()); - assert_equals(is_partial, js->is_partial); - assert_equals(expected, js->value.dump()); -} - -static void test_json_with_dumped_args_no_args() { - // Normal JSON, nothing to heal, nothing to dump - test("{\"name\": \"python\"}", false, {}, {}, "{\"name\":\"python\"}"); - // Full json is args - test("{\"name\": \"python\"}", false, {{}}, {}, "{\"name\":\"python\"}"); - - // If the arguments are further down, don't heal partial content. - for (const auto & src : barely_healable_jsons) { - test(src, true, {{"arguments"}}, {}, "{}"); - } - // But heal content that isn't partial. - test("{\"name\": \"python\"", true, {{"arguments"}}, {}, "{\"name\":\"python\"}"); -} - -static void test_json_with_dumped_args() { - - // Partial content. - test("{\"content\": \"t", true, {}, {{"content"}}, "{\"content\":\"t\"}"); - test("{\"content\": \"", true, {}, {{"content"}}, "{\"content\":\"\"}"); - test("{\"content\": ", true, {}, {{"content"}}, "{}"); - - // If the entire JSON is the arguments, healing it them dumping it produces the same output as the input (just reformatted). - test("{\"name\": \"python", true, {{}}, {}, "{\"name\":\"python"); - for (const auto & src : barely_healable_jsons) { - test(src, true, {{}}, {}, src); - } - - // Full JSON w/ args - for (auto parse_as_partial : {true, false}) { - test_with_args( - R"({"name": "python", "args": {"arg1": 1}})", - R"({"name":"python","args":"{\"arg1\":1}"})", - parse_as_partial, - /* is_partial= */ false - ); - } - - // Partial JSON w/ partial args - test_with_args( - R"({"foo": "bar", "args": {")", - R"({"foo":"bar","args":"{\""})" - ); - // Partial args broken in object key - test_with_args( - R"({"foo": "bar", "args": {"ar)", - R"({"foo":"bar","args":"{\"ar"})" - ); - // Partial args broken after object key - test_with_args( - R"({"foo": "bar", "args": {"arg1")", - R"({"foo":"bar","args":"{\"arg1\""})" - ); - // Partial args broken before object value - test_with_args( - R"({"foo": "bar", "args": {"arg1":)", - R"({"foo":"bar","args":"{\"arg1\":"})" - ); - // Partial args broken before object value (space) - test_with_args( - R"({"foo": "bar", "args": {"arg1": )", - R"({"foo":"bar","args":"{\"arg1\":"})" - ); - // Partial args broken in object value that may not be complete (int) - test_with_args( - R"({"foo": "bar", "args": {"arg1": 1)", - R"({"foo":"bar","args":"{\"arg1\":"})" - ); - // Partial args broken in object value that is complete (int) - test_with_args( - R"({"foo": "bar", "args": {"arg1": 1 )", - R"({"foo":"bar","args":"{\"arg1\":1"})" - ); - // Partial args broken in object value that is incomplete (string) - test_with_args( - R"({"foo": "bar", "args": {"arg1": ")", - R"({"foo":"bar","args":"{\"arg1\":\""})" - ); - // Partial args broken in object value that is complete (string) - test_with_args( - R"({"foo": "bar", "args": {"arg1": "1")", - R"({"foo":"bar","args":"{\"arg1\":\"1\""})" - ); - // Partial args broken on array opening - test_with_args( - R"({"foo": "bar", "args": [)", - R"({"foo":"bar","args":"["})" - ); - // Partial args broken on array value that is incomplete (int) - test_with_args( - R"({"foo": "bar", "args": [1)", - R"({"foo":"bar","args":"["})" - ); - // Partial args broken on array value that is complete (int) - test_with_args( - R"({"foo": "bar", "args": [1 )", - R"({"foo":"bar","args":"[1"})" - ); - // Partial args broken on array value that is complete (string) - test_with_args( - R"({"foo": "bar", "args": ["1")", - R"({"foo":"bar","args":"[\"1\""})" - ); - // Partial args broken after array value - test_with_args( - R"({"foo": "bar", "args": [1,)", - R"({"foo":"bar","args":"[1,"})" - ); - // Partial args broken on nested array - test_with_args( - R"({"foo": "bar", "args": {"arg1": [)", - R"({"foo":"bar","args":"{\"arg1\":["})" - ); - - // Unicode tests - test_with_args( - R"({"foo": "bar", "args": {"arg1": "\u)", - R"({"foo":"bar","args":"{\"arg1\":\"\\u"})" - ); - test_with_args( - R"({"foo": "bar", "args": {"arg1": "\u0)", - R"({"foo":"bar","args":"{\"arg1\":\"\\u0"})" - ); - test_with_args( - R"({"foo": "bar", "args": {"arg1": "\u00)", - R"({"foo":"bar","args":"{\"arg1\":\"\\u00"})" - ); - test_with_args( - R"({"foo": "bar", "args": {"arg1": "\u000)", - R"({"foo":"bar","args":"{\"arg1\":\"\\u000"})" - ); - test_with_args( - R"({"foo": "bar", "args": {"arg1": "\u0000)", - R"({"foo":"bar","args":"{\"arg1\":\"\\u0000"})" - ); - test_with_args( - R"({"foo": "bar", "args": {"arg1": "\ud8)", - R"({"foo":"bar","args":"{\"arg1\":\"\\ud8"})" - ); - test_with_args( - R"({"foo": "bar", "args": {"arg1": "\ud80)", - R"({"foo":"bar","args":"{\"arg1\":\"\\ud80"})" - ); - test_with_args( - R"({"foo": "bar", "args": {"arg1": "\ud800)", - R"({"foo":"bar","args":"{\"arg1\":\"\\ud800"})" - ); - test_with_args( - R"({"foo": "bar", "args": {"arg1": "\ud800\)", - R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\"})" - ); - test_with_args( - R"({"foo": "bar", "args": {"arg1": "\ud800\u)", - R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\u"})" - ); - test_with_args( - R"({"foo": "bar", "args": {"arg1": "\ud800\ud)", - R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\ud"})" - ); - test_with_args( - R"({"foo": "bar", "args": {"arg1": "\ud800\udc)", - R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc"})" - ); - test_with_args( - R"({"foo": "bar", "args": {"arg1": "\ud800\udc0)", - R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc0"})" - ); - test_with_args( - R"({"foo": "bar", "args": {"arg1": "\ud800\udc00)", - R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc00"})" - ); -} - -static void test_positions() { - { - common_chat_msg_parser builder("Hello, world!", /* is_partial= */ false, {}); - assert_equals(0, builder.pos()); - assert_throws([&]() { builder.move_to(100); }); - assert_equals(0, builder.pos()); - assert_throws([&]() { builder.move_back(1); }); - assert_equals(0, builder.pos()); - - builder.move_to(8); - assert_equals(8, builder.pos()); - builder.move_back(1); - assert_equals(7, builder.pos()); - assert_equals("world!", builder.consume_rest()); - - builder.move_to(0); - assert_equals(0, builder.pos()); - - assert_throws([&]() { builder.finish(); }); - assert_equals(0, builder.pos()); - - builder.move_to(builder.input().size()); - builder.finish(); - } - { - common_chat_msg_parser builder("Hello, world!", /* is_partial= */ true, {}); - - builder.move_to(builder.input().size()); - assert_equals(builder.input().size(), builder.pos()); - builder.finish(); - } -} - -int main() { - test_positions(); - test_json_with_dumped_args_no_args(); - test_json_with_dumped_args(); - test_reasoning(); - test_regex(); - test_deepseek_v3_1_tool_calls(); - std::cout << "All tests passed!\n"; - return 0; -} diff --git a/tests/test-chat-peg-parser.cpp b/tests/test-chat-peg-parser.cpp index f767c73c27..95a989e6f8 100644 --- a/tests/test-chat-peg-parser.cpp +++ b/tests/test-chat-peg-parser.cpp @@ -1,8 +1,3 @@ -#include -#include -#include - -#include "chat-parser.h" #include "chat-peg-parser.h" #include "chat.h" #include "common.h" @@ -10,6 +5,11 @@ #include "peg-parser.h" #include "testing.h" #include "peg-parser/simple-tokenize.h" + +#include +#include +#include + #include "nlohmann/json.hpp" using json = nlohmann::ordered_json; @@ -17,9 +17,12 @@ using json = nlohmann::ordered_json; static json create_tools(); static void test_example_native(testing & t); static void test_example_qwen3_coder(testing & t); +static void test_example_qwen3_non_coder(testing & t); static void test_command7_parser_compare(testing & t); +static void test_prefix_tool_names(testing & t); +static void test_tagged_peg_parser(testing & t); -int main(int argc, char *argv[]) { +int main(int argc, char * argv[]) { testing t(std::cout); if (argc >= 2) { t.set_filter(argv[1]); @@ -32,7 +35,10 @@ int main(int argc, char *argv[]) { t.test("native", test_example_native); t.test("qwen3 coder", test_example_qwen3_coder); + t.test("qwen3 non-coder", test_example_qwen3_non_coder); t.test("comparison", test_command7_parser_compare); + t.test("prefix tool names", test_prefix_tool_names); + t.test("tagged peg parser", test_tagged_peg_parser); return t.summary(); } @@ -41,87 +47,75 @@ static json create_tools() { json tools = json::array(); json tool_weather = { - {"type", "function"}, - {"function", { - {"name", "get_current_weather"}, - {"description", "Get the current weather in a given location"}, - {"parameters", { - {"type", "object"}, - {"properties", { - {"location", { - {"type", "string"}, - {"description", "The city and state, e.g. San Francisco, CA"} - }}, - {"unit", { - {"type", "string"}, - {"enum", {"celsius", "fahrenheit"}}, - {"description", "The temperature unit to use. Infer this from the users location."} - }} - }}, - {"required", {"location", "unit"}}, - }}, - }} + { "type", "function" }, + { "function", + { + { "name", "get_current_weather" }, + { "description", "Get the current weather in a given location" }, + { "parameters", + { + { "type", "object" }, + { "properties", + { { "location", + { { "type", "string" }, { "description", "The city and state, e.g. San Francisco, CA" } } }, + { "unit", + { { "type", "string" }, + { "enum", { "celsius", "fahrenheit" } }, + { "description", + "The temperature unit to use. Infer this from the users location." } } } } }, + { "required", { "location", "unit" } }, + } }, + } } }; tools.push_back(tool_weather); json tool_forecast = { - {"type", "function"}, - {"function", { - {"name", "get_forecast"}, - {"description", "Get the weather forecast for a given location"}, - {"parameters", { - {"type", "object"}, - {"properties", { - {"location", { - {"type", "string"}, - {"description", "The city and state, e.g. San Francisco, CA"} - }}, - {"unit", { - {"type", "string"}, - {"enum", {"celsius", "fahrenheit"}}, - {"description", "The temperature unit to use. Infer this from the users location."} - }}, - {"days", { - {"type", "integer"}, - {"description", "Number of days to forecast (1-10)"}, - {"minimum", 1}, - {"maximum", 10} - }} - }}, - {"required", {"location", "unit"}}, - }}, - }} + { "type", "function" }, + { "function", + { + { "name", "get_forecast" }, + { "description", "Get the weather forecast for a given location" }, + { "parameters", + { + { "type", "object" }, + { "properties", + { { "location", + { { "type", "string" }, { "description", "The city and state, e.g. San Francisco, CA" } } }, + { "unit", + { { "type", "string" }, + { "enum", { "celsius", "fahrenheit" } }, + { "description", "The temperature unit to use. Infer this from the users location." } } }, + { "days", + { { "type", "integer" }, + { "description", "Number of days to forecast (1-10)" }, + { "minimum", 1 }, + { "maximum", 10 } } } } }, + { "required", { "location", "unit" } }, + } }, + } } }; tools.push_back(tool_forecast); json tool_search = { - {"type", "function"}, - {"function", { - {"name", "search_knowledge_base"}, - {"description", "Search the internal technical documentation knowledge base."}, - {"parameters", { - {"type", "object"}, - {"properties", { - {"query", { - {"type", "string"}, - {"description", "The search query string."} - }}, - {"max_results", { - {"type", "integer"}, - {"description", "The maximum number of results to return."}, - {"default", 5} - }}, - {"category", { - {"type", "string"}, - {"enum", {"api", "troubleshooting", "billing", "general"}}, - {"description", "Filter search by specific category."} - }} - }}, - {"required", {"query", "category"}}, - {"additionalProperties", false} - }}, - {"strict", true} - }} + { "type", "function" }, + { "function", + { { "name", "search_knowledge_base" }, + { "description", "Search the internal technical documentation knowledge base." }, + { "parameters", + { { "type", "object" }, + { "properties", + { { "query", { { "type", "string" }, { "description", "The search query string." } } }, + { "max_results", + { { "type", "integer" }, + { "description", "The maximum number of results to return." }, + { "default", 5 } } }, + { "category", + { { "type", "string" }, + { "enum", { "api", "troubleshooting", "billing", "general" } }, + { "description", "Filter search by specific category." } } } } }, + { "required", { "query", "category" } }, + { "additionalProperties", false } } }, + { "strict", true } } } }; tools.push_back(tool_search); @@ -131,39 +125,39 @@ static json create_tools() { struct tool_argument { std::string name; std::string type; - bool is_required; - json schema; + bool is_required; + json schema; }; struct tool_definition { - std::string name; + std::string name; std::vector arguments; - json schema; + json schema; }; // Test fictitious model output that emits arguments as JSON. static void test_example_native(testing & t) { struct test_case { // Parameters - std::string name; - json tools; + std::string name; + json tools; common_chat_tool_choice tool_choice; common_reasoning_format reasoning_format; - json json_schema; - bool parallel_tool_calls; - bool thinking_forced_open; - std::string input; + json json_schema; + bool parallel_tool_calls; + bool thinking_forced_open; + std::string input; // Expect - std::string expect_reasoning; - std::string expect_content; + std::string expect_reasoning; + std::string expect_content; std::vector expect_tool_calls; }; auto build_parser = [](const test_case & tc) { - return build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) { + return build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { auto reasoning_in_content = (tc.reasoning_format == COMMON_REASONING_FORMAT_NONE); - auto reasoning = p.eps(); + auto reasoning = p.eps(); if (tc.thinking_forced_open) { // If thinking is forced open, expect a closing tag reasoning = p.reasoning(p.until("")) + "" + p.space(); @@ -174,231 +168,188 @@ static void test_example_native(testing & t) { // tool calling parser if (tc.tools.is_array() && !tc.tools.empty()) { - auto tools = p.choice(); - for (const auto & tool : tc.tools) { - const auto & function = tool.at("function"); - std::string name = function.at("name"); - const auto & schema = function.at("parameters"); + auto tool_call = + p.standard_json_tools("[", "]", tc.tools, tc.parallel_tool_calls, + tc.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED); - auto tool_name = p.json_member("name", "\"" + p.tool_name(p.literal(name)) + "\""); - auto tool_args = p.json_member("arguments", p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema))); - - tools |= p.rule("tool-" + name, p.tool_open(p.literal("{")) << tool_name << "," << tool_args << "}"); - }; - - auto parallel_calls = p.eps(); - if (tc.parallel_tool_calls) { - parallel_calls = p.zero_or_more("," << tools); - } - - auto tool_call = p.trigger_rule("tool-call", - p.sequence({ - p.literal("["), - tools, - parallel_calls, - p.literal("]") - }) - ); - - return p.sequence({ - (reasoning_in_content ? p.eps() : reasoning), - p.content(p.until("")), - p.optional(p.space() + tool_call), - p.space(), - p.end() - }); + return p.sequence({ (reasoning_in_content ? p.eps() : reasoning), p.content(p.until("")), + p.optional(p.space() + tool_call), p.space(), p.end() }); } // response_format parser if (tc.json_schema.is_object() && !tc.json_schema.empty()) { - return p.sequence({ - (reasoning_in_content ? p.eps() : reasoning), - p.content(p.schema(p.json(), "response-output", tc.json_schema)), - p.space(), - p.end() - }); + return p.sequence({ (reasoning_in_content ? p.eps() : reasoning), + p.content(p.schema(p.json(), "response-output", tc.json_schema)), p.space(), + p.end() }); } // Content-only parser - return p.sequence({ - (reasoning_in_content ? p.eps() : reasoning), - p.content(p.rest()), - p.end() - }); + return p.sequence({ (reasoning_in_content ? p.eps() : reasoning), p.content(p.rest()), p.end() }); }); }; std::vector test_cases = std::vector{ { - /* .name = */ "content with thinking_forced_open = false", - /* .tools = */ {}, - /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - /* .json_schema = */ {}, - /* .parallel_tool_calls = */ false, - /* .thinking_forced_open = */ false, - /* .input = */ ( - "The user said hello, I must say hello back\nHello" - ), - /* .expect_reasoning = */ "The user said hello, I must say hello back", - /* .expect_content = */ "Hello", - /* .expect_tool_calls = */ {}, - }, + /* .name = */ "content with thinking_forced_open = false", + /* .tools = */ {}, + /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, + /* .json_schema = */ {}, + /* .parallel_tool_calls = */ false, + /* .thinking_forced_open = */ false, + /* .input = */ ("The user said hello, I must say hello back\nHello"), + /* .expect_reasoning = */ "The user said hello, I must say hello back", + /* .expect_content = */ "Hello", + /* .expect_tool_calls = */ {}, + }, { - /* .name = */ "content with thinking_forced_open = false and no reasoning", - /* .tools = */ {}, - /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - /* .json_schema = */ {}, - /* .parallel_tool_calls = */ false, - /* .thinking_forced_open = */ false, - /* .input = */ ( - "Hello" - ), - /* .expect_reasoning = */ "", - /* .expect_content = */ "Hello", - /* .expect_tool_calls = */ {}, - }, + /* .name = */ "content with thinking_forced_open = false and no reasoning", + /* .tools = */ {}, + /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, + /* .json_schema = */ {}, + /* .parallel_tool_calls = */ false, + /* .thinking_forced_open = */ false, + /* .input = */ ("Hello"), + /* .expect_reasoning = */ "", + /* .expect_content = */ "Hello", + /* .expect_tool_calls = */ {}, + }, { - /* .name = */ "content with thinking_forced_open = false and reasoning_format = none", - /* .tools = */ {}, - /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE, - /* .json_schema = */ {}, - /* .parallel_tool_calls = */ false, - /* .thinking_forced_open = */ true, - /* .input = */ ( - "The user said hello, I must say hello back\nHello" - ), - /* .expect_reasoning = */ "", - /* .expect_content = */ "The user said hello, I must say hello back\nHello", - /* .expect_tool_calls = */ {}, - }, + /* .name = */ "content with thinking_forced_open = false and reasoning_format = none", + /* .tools = */ {}, + /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE, + /* .json_schema = */ {}, + /* .parallel_tool_calls = */ false, + /* .thinking_forced_open = */ true, + /* .input = */ ("The user said hello, I must say hello back\nHello"), + /* .expect_reasoning = */ "", + /* .expect_content = */ "The user said hello, I must say hello back\nHello", + /* .expect_tool_calls = */ {}, + }, { - /* .name = */ "content with thinking_forced_open = true", - /* .tools = */ {}, - /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - /* .json_schema = */ {}, - /* .parallel_tool_calls = */ false, - /* .thinking_forced_open = */ true, - /* .input = */ ( - "The user said hello, I must say hello back\nHello" - ), - /* .expect_reasoning = */ "The user said hello, I must say hello back", - /* .expect_content = */ "Hello", - /* .expect_tool_calls = */ {}, - }, + /* .name = */ "content with thinking_forced_open = true", + /* .tools = */ {}, + /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, + /* .json_schema = */ {}, + /* .parallel_tool_calls = */ false, + /* .thinking_forced_open = */ true, + /* .input = */ ("The user said hello, I must say hello back\nHello"), + /* .expect_reasoning = */ "The user said hello, I must say hello back", + /* .expect_content = */ "Hello", + /* .expect_tool_calls = */ {}, + }, { - /* .name = */ "content with thinking_forced_open = true and reasoning_format = none", - /* .tools = */ {}, - /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE, - /* .json_schema = */ {}, - /* .parallel_tool_calls = */ false, - /* .thinking_forced_open = */ true, - /* .input = */ ( - "The user said hello, I must say hello back\nHello" - ), - /* .expect_reasoning = */ "", - /* .expect_content = */ "The user said hello, I must say hello back\nHello", - /* .expect_tool_calls = */ {}, - }, + /* .name = */ "content with thinking_forced_open = true and reasoning_format = none", + /* .tools = */ {}, + /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE, + /* .json_schema = */ {}, + /* .parallel_tool_calls = */ false, + /* .thinking_forced_open = */ true, + /* .input = */ ("The user said hello, I must say hello back\nHello"), + /* .expect_reasoning = */ "", + /* .expect_content = */ "The user said hello, I must say hello back\nHello", + /* .expect_tool_calls = */ {}, + }, { - /* .name = */ "tools with tool_choice = auto and no parallel_tool_calls", - /* .tools = */ create_tools(), - /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_AUTO, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - /* .json_schema = */ {}, - /* .parallel_tool_calls = */ false, - /* .thinking_forced_open = */ true, - /* .input = */ ( - "I must get the weather in New York\n" - "[" - R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})" - "]" - ), - /* .expect_reasoning = */ "I must get the weather in New York", - /* .expect_content = */ "", - /* .expect_tool_calls = */ {{ + /* .name = */ "tools with tool_choice = auto and no parallel_tool_calls", + /* .tools = */ create_tools(), + /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_AUTO, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, + /* .json_schema = */ {}, + /* .parallel_tool_calls = */ false, + /* .thinking_forced_open = */ true, + /* .input = */ + ("I must get the weather in New York\n" + "[" + R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})" + "]"), + /* .expect_reasoning = */ "I must get the weather in New York", + /* .expect_content = */ "", + /* .expect_tool_calls = */ + { { /* .name = */ "get_current_weather", /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit"})", /* .id = */ "", - }}, - }, + } }, + }, { - /* .name = */ "tools with tool_choice = auto and parallel_tool_calls", - /* .tools = */ create_tools(), - /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_AUTO, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - /* .json_schema = */ {}, - /* .parallel_tool_calls = */ true, - /* .thinking_forced_open = */ true, - /* .input = */ ( - "I must get the weather in New York and San Francisco and a 3 day forecast of each.\nLet me search that for you." - "[" - R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})" - ", " - R"({"name": "get_current_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}})" - ", " - R"({"name": "get_forecast", "arguments": {"location": "New York City, NY", "unit": "fahrenheit", "days": 3}})" - ", " - R"({"name": "get_forecast", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3}})" - "]" - ), - /* .expect_reasoning = */ "I must get the weather in New York and San Francisco and a 3 day forecast of each.", - /* .expect_content = */ "Let me search that for you.", - /* .expect_tool_calls = */ {{ - /* .name = */ "get_current_weather", - /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit"})", - /* .id = */ "", - }, { - /* .name = */ "get_current_weather", - /* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit"})", - /* .id = */ "", - }, { - /* .name = */ "get_forecast", - /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit", "days": 3})", - /* .id = */ "", - }, { - /* .name = */ "get_forecast", - /* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3})", - /* .id = */ "", - }}, - }, + /* .name = */ "tools with tool_choice = auto and parallel_tool_calls", + /* .tools = */ create_tools(), + /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_AUTO, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, + /* .json_schema = */ {}, + /* .parallel_tool_calls = */ true, + /* .thinking_forced_open = */ true, + /* .input = */ + ("I must get the weather in New York and San Francisco and a 3 day forecast of each.\nLet me " + "search that for you." + "[" + R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})" + ", " + R"({"name": "get_current_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}})" + ", " + R"({"name": "get_forecast", "arguments": {"location": "New York City, NY", "unit": "fahrenheit", "days": 3}})" + ", " + R"({"name": "get_forecast", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3}})" + "]"), + /* .expect_reasoning = */ + "I must get the weather in New York and San Francisco and a 3 day forecast of each.", /* .expect_content = */ "Let me search that for you.", + /* .expect_tool_calls = */ + { { + /* .name = */ "get_current_weather", + /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit"})", + /* .id = */ "", + }, + { + /* .name = */ "get_current_weather", + /* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit"})", + /* .id = */ "", + }, + { + /* .name = */ "get_forecast", + /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit", "days": 3})", + /* .id = */ "", + }, + { + /* .name = */ "get_forecast", + /* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3})", + /* .id = */ "", + } }, + }, { - /* .name = */ "response_format with thinking_forced_open = true", - /* .tools = */ {}, - /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - /* .json_schema = */ { - {"type", "object"}, - {"properties", { - {"invoice_number", {{"type", "string"}}}, - {"amount", {{"type", "number"}}}, - {"due_date", {{"type", "string"}}} - }}, - {"required", {"invoice_number", "amount", "due_date"}} - }, - /* .parallel_tool_calls = */ false, - /* .thinking_forced_open = */ true, - /* .input = */ ( - "I must produce the invoice in the requested format\n" - R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})" - ), - /* .expect_reasoning = */ "I must produce the invoice in the requested format", - /* .expect_content = */ R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})", - /* .expect_tool_calls = */ {}, - }, + /* .name = */ "response_format with thinking_forced_open = true", + /* .tools = */ {}, + /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, + /* .json_schema = */ + { { "type", "object" }, + { "properties", + { { "invoice_number", { { "type", "string" } } }, + { "amount", { { "type", "number" } } }, + { "due_date", { { "type", "string" } } } } }, + { "required", { "invoice_number", "amount", "due_date" } } }, + /* .parallel_tool_calls = */ false, + /* .thinking_forced_open = */ true, + /* .input = */ + ("I must produce the invoice in the requested format\n" + R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})"), + /* .expect_reasoning = */ "I must produce the invoice in the requested format", + /* .expect_content = */ + R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})", /* .expect_tool_calls = */ {}, + }, }; for (const auto & tc : test_cases) { t.test(tc.name, [&](testing & t) { - auto parser = build_parser(tc); - auto lazy = !tc.tools.empty() && tc.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; + auto parser = build_parser(tc); + auto lazy = !tc.tools.empty() && tc.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; auto grammar = build_grammar([&](const common_grammar_builder & builder) { - for (auto const & def : tc.tools) { - auto function = def.at("function"); + for (const auto & def : tc.tools) { + auto function = def.at("function"); auto parameters = function.at("parameters"); builder.resolve_refs(parameters); }; @@ -406,17 +357,17 @@ static void test_example_native(testing & t) { }); t.log("Grammar:"); - for (auto const & line : string_split(grammar, "\n")) { + for (const auto & line : string_split(grammar, "\n")) { t.log(line); } common_peg_parse_context ctx(tc.input, false); - auto result = parser.parse(ctx); + auto result = parser.parse(ctx); t.assert_true("success", result.success()); common_chat_msg msg; - auto mapper = common_chat_peg_native_mapper(msg); + auto mapper = common_chat_peg_unified_mapper(msg); mapper.from_ast(ctx.ast, result); t.assert_equal("content equal", tc.expect_content, msg.content); @@ -431,16 +382,16 @@ static void test_example_native(testing & t) { } static void test_example_qwen3_coder(testing & t) { - auto tools = create_tools(); - auto parser = build_chat_peg_constructed_parser([&](common_chat_peg_constructed_builder & p) { + auto tools = create_tools(); + auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { auto content = p.rule("content", p.content(p.until(""))); std::vector tool_parsers; - for (auto const & def : tools) { - auto function = def.at("function"); - std::string name = function.at("name"); - auto parameters = function.at("parameters"); - auto properties = parameters.at("properties"); + for (const auto & def : tools) { + auto function = def.at("function"); + std::string name = function.at("name"); + auto parameters = function.at("parameters"); + auto properties = parameters.at("properties"); std::set required_properties; if (function.contains("required")) { @@ -450,59 +401,36 @@ static void test_example_qwen3_coder(testing & t) { std::vector arg_parsers; for (const auto & [param_name, param_schema] : properties.items()) { bool is_required = required_properties.find(param_name) != required_properties.end(); - auto type = param_schema.value("type", "object"); + auto type = param_schema.value("type", "object"); - auto arg = p.tool_arg(p.sequence({ - p.tool_arg_open(""), - (type == "string" ? - p.tool_arg_string_value( - p.schema( - p.until_one_of({ - "\n\n" - }), - "tool-" + name + "-arg-" + param_name + "-schema", - param_schema, - true - ) - ) : p.tool_arg_json_value( - p.schema( - p.json(), - "tool-" + name + "-arg-" + param_name + "-schema", - param_schema - ) - ) - ), - p.tool_arg_close( - "\n" + - p.peek(p.literal("")) - ) - })); + auto arg = p.tool_arg( + p.sequence({ p.tool_arg_open(""), + (type == "string" ? + p.tool_arg_string_value(p.schema( + p.until_one_of({ "\n\n" }), + "tool-" + name + "-arg-" + param_name + "-schema", param_schema, true)) : + p.tool_arg_json_value(p.schema( + p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema))), + p.tool_arg_close("\n" + + p.peek(p.literal(""))) })); - arg_parsers.push_back(is_required ? - p.rule("tool-" + name + "-arg-" + param_name, arg) : - p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg))); + arg_parsers.push_back(is_required ? p.rule("tool-" + name + "-arg-" + param_name, arg) : + p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg))); } - tool_parsers.push_back(p.rule("tool-" + name, - p.tool_open("") - << p.sequence(arg_parsers) - << p.tool_close(p.literal("")) - )); + tool_parsers.push_back(p.rule("tool-" + name, p.tool_open("") + << p.sequence(arg_parsers) + << p.tool_close(p.literal("")))); }; - auto tool_call = p.trigger_rule("tool-call", - "" - << p.choice(tool_parsers) - << "" - ); + auto tool_call = p.trigger_rule("tool-call", "" << p.choice(tool_parsers) << ""); return content + p.zero_or_more(p.space() + tool_call) + p.end(); }); auto grammar = build_grammar([&](const common_grammar_builder & builder) { - for (auto const & def : tools) { - auto function = def.at("function"); + for (const auto & def : tools) { + auto function = def.at("function"); auto parameters = function.at("parameters"); builder.resolve_refs(parameters); }; @@ -510,11 +438,11 @@ static void test_example_qwen3_coder(testing & t) { }); t.log("Grammar:"); - for (auto const & line : string_split(grammar, "\n")) { + for (const auto & line : string_split(grammar, "\n")) { t.log(line); } - t.test("incremental parsing", [&](testing &t) { + t.test("incremental parsing", [&](testing & t) { std::string input = "Let me search the knowledge base for cat pictures." "\n" @@ -538,7 +466,7 @@ static void test_example_qwen3_coder(testing & t) { } common_chat_msg msg; - auto mapper = common_chat_peg_constructed_mapper(msg); + auto mapper = common_chat_peg_unified_mapper(msg); mapper.from_ast(ctx.ast, result); //t.log("Input: " + input); @@ -554,7 +482,105 @@ static void test_example_qwen3_coder(testing & t) { try { // This shouldn't emit any runtime errors auto diffs = common_chat_msg_diff::compute_diffs(prev, msg); - } catch(const std::exception & e) { + } catch (const std::exception & e) { + t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end)); + t.assert_true(std::string("failed with ") + e.what(), false); + } + + prev = msg; + } + }); +} + +static void test_example_qwen3_non_coder(testing & t) { + auto tools = create_tools(); + auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { + // tool calling parser using standard JSON format + auto tool_call = p.standard_json_tools("", "", tools, true, false); + + return p.sequence({ p.content(p.until("")), p.optional(p.space() + tool_call), p.end() }); + }); + + auto grammar = build_grammar([&](const common_grammar_builder & builder) { + for (const auto & def : tools) { + auto function = def.at("function"); + auto parameters = function.at("parameters"); + builder.resolve_refs(parameters); + }; + parser.build_grammar(builder); + }); + + t.log("Grammar:"); + for (const auto & line : string_split(grammar, "\n")) { + t.log(line); + } + + t.test("tool call parsing", [&](testing & t) { + std::string input = + "I need to get the weather.\n" + "" + "{\"name\": \"get_current_weather\", \"arguments\": {\"location\": \"New York City, NY\", \"unit\": " + "\"fahrenheit\"}}" + ""; + + common_peg_parse_context ctx(input, false); + auto result = parser.parse(ctx); + + t.assert_true("success", result.success()); + + common_chat_msg msg; + auto mapper = common_chat_peg_unified_mapper(msg); + mapper.from_ast(ctx.ast, result); + + t.assert_equal("content", "I need to get the weather.\n", msg.content); + t.assert_equal("reasoning", "", msg.reasoning_content); + t.assert_equal("tool calls count", 1u, msg.tool_calls.size()); + if (!msg.tool_calls.empty()) { + t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name); + t.assert_equal("tool args", "{\"location\": \"New York City, NY\", \"unit\": \"fahrenheit\"}", + msg.tool_calls[0].arguments); + } + }); + + t.test("incremental parsing", [&](testing & t) { + std::string input = + "I need to get the weather.\n" + "" + "{\"name\": \"get_current_weather\", \"arguments\": {\"location\": \"New York City, NY\", \"unit\": " + "\"fahrenheit\"}}" + ""; + + std::vector tokens = simple_tokenize(input); + + common_chat_msg prev; + for (auto it = tokens.begin(); it != tokens.end(); it++) { + std::string in = std::accumulate(tokens.begin(), it + 1, std::string()); + + common_peg_parse_context ctx(in, it + 1 < tokens.end()); + + auto result = parser.parse(ctx); + if (!t.assert_equal("not fail", false, result.fail())) { + t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end)); + } + + common_chat_msg msg; + auto mapper = common_chat_peg_unified_mapper(msg); + mapper.from_ast(ctx.ast, result); + + //t.log("Input: " + input); + t.log("==========================================="); + t.log("Iteration " + std::to_string(in.size())); + t.log("Reasoning: " + msg.reasoning_content); + t.log("Content : " + msg.content); + for (const auto & tc : msg.tool_calls) { + t.log("Tool name: " + tc.name); + t.log("Tool args: " + tc.arguments); + } + + try { + // This shouldn't emit any runtime errors + auto diffs = common_chat_msg_diff::compute_diffs(prev, msg); + } catch (const std::exception & e) { t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end)); t.assert_true(std::string("failed with ") + e.what(), false); } @@ -565,38 +591,37 @@ static void test_example_qwen3_coder(testing & t) { } void test_command7_parser_compare(testing & t) { - auto parser = build_chat_peg_native_parser([](common_chat_peg_native_builder & p) { - auto thinking = p.reasoning_block( - "<|START_THINKING|>" << p.reasoning(p.until("<|END_THINKING|>")) << "<|END_THINKING|>"); + auto parser = build_chat_peg_unified_parser([](common_chat_peg_unified_builder & p) { + auto thinking = + p.reasoning_block("<|START_THINKING|>" << p.reasoning(p.until("<|END_THINKING|>")) << "<|END_THINKING|>"); auto response = "<|START_RESPONSE|>" << p.content(p.until("<|END_RESPONSE|>")) << "<|END_RESPONSE|>"; auto tool_call_id = p.atomic("\"tool_call_id\"" << (":" << ("\"" + p.tool_id(p.json_string_content()) + "\""))); - auto tool_call_name = p.atomic("\"tool_name\"" << (":" << ("\"" + p.tool_name(p.json_string_content()) + "\""))); + auto tool_call_name = + p.atomic("\"tool_name\"" << (":" << ("\"" + p.tool_name(p.json_string_content()) + "\""))); auto tool_call_args = "\"parameters\"" << (":" << p.tool_args(p.json())); auto tool_call_fields = p.rule("tool-call-fields", tool_call_id | tool_call_name | tool_call_args); - auto tool_call = p.rule("tool-call", p.tool( - p.tool_open(p.literal("{")) - << tool_call_fields - << p.zero_or_more( p.literal(",") << tool_call_fields) - << p.tool_close(p.literal("}")) - )); + auto tool_call = + p.rule("tool-call", p.tool(p.tool_open(p.literal("{")) + << tool_call_fields << p.zero_or_more(p.literal(",") << tool_call_fields) + << p.tool_close(p.literal("}")))); - auto tool_calls = p.rule("tool-calls", - "<|START_ACTION|>" - << ("[" << tool_call << p.zero_or_more(p.literal(",") << tool_call) << "]") - << "<|END_ACTION|>"); + auto tool_calls = p.rule( + "tool-calls", "<|START_ACTION|>" << ("[" << tool_call << p.zero_or_more(p.literal(",") << tool_call) << "]") + << "<|END_ACTION|>"); return p.optional(thinking) << (tool_calls | response) + p.end(); }); - auto test_current = [&](const common_peg_arena & p, const std::string & input, bool is_partial, bool print_results) { + auto test_current = [&](const common_peg_arena & p, const std::string & input, bool is_partial, + bool print_results) { common_peg_parse_context ctx(input, is_partial); - auto result = p.parse(ctx); + auto result = p.parse(ctx); common_chat_msg msg; - auto mapper = common_chat_peg_native_mapper(msg); + auto mapper = common_chat_peg_unified_mapper(msg); mapper.from_ast(ctx.ast, result); if (print_results) { @@ -614,79 +639,19 @@ void test_command7_parser_compare(testing & t) { } }; - auto test_legacy = [&](const std::string & input, bool need_more_input, bool print_results) { - // Original common_chat_combinator_parser taken from chat.cpp - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_GENERIC; - params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; - params.reasoning_in_content = false; - params.thinking_forced_open = false; - common_chat_msg_parser builder( - input, - /* .is_partial = */ need_more_input, - params - ); + std::string reasoning = + "To plan an effective trip to Japan that includes both historical sites and modern attractions within a " + "budget of $4000 for a two-week stay, we need to:\n\n" + "1. Identify key historical sites and modern attractions in Japan.\n" + "2. Find affordable accommodation options that provide a balance between comfort and cost.\n" + "3. Determine the best modes of transportation for getting around Japan.\n" + "4. Create a day-by-day itinerary that ensures the user gets to see a variety of attractions without " + "overspending.\n" + "5. Provide a detailed cost breakdown that includes accommodation, transportation, meals, and entry fees " + "to attractions."; - builder.try_parse_reasoning("<|START_THINKING|>", "<|END_THINKING|>"); - - static const common_regex start_action_regex("<\\|START_ACTION\\|>"); - static const common_regex end_action_regex("<\\|END_ACTION\\|>"); - static const common_regex start_response_regex("<\\|START_RESPONSE\\|>"); - static const common_regex end_response_regex("<\\|END_RESPONSE\\|>"); - - if (auto res = builder.try_find_regex(start_action_regex)) { - // If we didn't extract thoughts, prelude includes them. - auto tool_calls = builder.consume_json_with_dumped_args({ { "parameters" } }); - for (const auto & tool_call : tool_calls.value) { - std::string name = tool_call.contains("tool_name") ? tool_call.at("tool_name") : ""; - std::string id = tool_call.contains("tool_call_id") ? tool_call.at("tool_call_id") : ""; - std::string arguments = tool_call.contains("parameters") ? tool_call.at("parameters") : ""; - if (!builder.add_tool_call(name, id, arguments) || tool_calls.is_partial) { - throw common_chat_msg_partial_exception("incomplete tool call"); - } - } - if (tool_calls.is_partial) { - throw common_chat_msg_partial_exception("incomplete tool call"); - } - builder.consume_regex(end_action_regex); - } else if (auto res = builder.try_find_regex(start_response_regex)) { - if (!builder.try_find_regex(end_response_regex)) { - builder.add_content(builder.consume_rest()); - throw common_chat_msg_partial_exception(end_response_regex.str()); - } - } else { - builder.add_content(builder.consume_rest()); - } - - if (print_results) { - std::cout << "== Parsed (legacy) ==\n"; - std::cout << "=== Reasoning ===\n"; - std::cout << builder.result().reasoning_content << "\n"; - std::cout << "\n\n=== Content ===\n"; - std::cout << builder.result().content << "\n"; - std::cout << "\n\n=== Tool Calls ===\n"; - for (const auto & tc : builder.result().tool_calls) { - std::cout << "id: " << tc.id << "\n"; - std::cout << "name: " << tc.name << "\n"; - std::cout << "args: " << tc.arguments << "\n"; - } - } - }; - - std::string reasoning = "To plan an effective trip to Japan that includes both historical sites and modern attractions within a " - "budget of $4000 for a two-week stay, we need to:\n\n" - "1. Identify key historical sites and modern attractions in Japan.\n" - "2. Find affordable accommodation options that provide a balance between comfort and cost.\n" - "3. Determine the best modes of transportation for getting around Japan.\n" - "4. Create a day-by-day itinerary that ensures the user gets to see a variety of attractions without " - "overspending.\n" - "5. Provide a detailed cost breakdown that includes accommodation, transportation, meals, and entry fees " - "to attractions."; - - std::vector> tool_calls = {{ - "call_0", - "plan_trip", - nlohmann::json::parse(R"({ + std::vector> tool_calls = { + { "call_0", "plan_trip", nlohmann::json::parse(R"({ "destination": "Japan", "duration": 14, "budget": 4000, @@ -694,8 +659,8 @@ void test_command7_parser_compare(testing & t) { "accommodation_preferences": "affordable", "transportation_preferences": "efficient", "meal_preferences": "local cuisine" - })") - }}; + })") } + }; std::vector tokens; @@ -712,10 +677,10 @@ void test_command7_parser_compare(testing & t) { auto json = nlohmann::json::array(); for (const auto & tc : tool_calls) { - auto tc_json = nlohmann::json::object(); + auto tc_json = nlohmann::json::object(); tc_json["tool_call_id"] = std::get<0>(tc); - tc_json["tool_name"] = std::get<1>(tc); - tc_json["parameters"] = std::get<2>(tc); + tc_json["tool_name"] = std::get<1>(tc); + tc_json["parameters"] = std::get<2>(tc); json.push_back(tc_json); } @@ -727,42 +692,248 @@ void test_command7_parser_compare(testing & t) { std::string input = std::accumulate(tokens.begin(), tokens.end(), std::string()); - // Run tests - t.test("legacy_parse", [&](testing & /* t */) { - test_legacy(input, false, false); + t.test("current_parse", [&](testing & /* t */) { test_current(parser, input, false, false); }); + t.bench("current_parse_benchmark complete", [&]() { test_current(parser, input, false, false); }, 100); + t.bench( + "current_parse_benchmark incremental", + [&]() { + std::string in; + for (auto i = 0u; i < tokens.size(); i++) { + in += tokens[i]; + test_current(parser, in, i + 1 < tokens.size(), false); + } + }, + 20); +} + +// Test that tool names that are proper prefixes of other tool names don't cause +// premature matching during incremental parsing. +// For example, "special_function" should not match when parsing "special_function_with_opt". +static void test_prefix_tool_names(testing & t) { + // Create tools where one name is a proper prefix of another + json tools = json::array(); + + json tool_short = { + { "type", "function" }, + { "function", + { + { "name", "special_function" }, + { "description", "A special function" }, + { "parameters", + { + { "type", "object" }, + { "properties", + { + { "arg1", { { "type", "integer" } } }, + } }, + { "required", { "arg1" } }, + } }, + } } + }; + tools.push_back(tool_short); + + json tool_long = { + { "type", "function" }, + { "function", + { + { "name", "special_function_with_opt" }, + { "description", "A special function with optional params" }, + { "parameters", + { + { "type", "object" }, + { "properties", + { + { "arg1", { { "type", "integer" } } }, + { "arg2", { { "type", "integer" } } }, + } }, + { "required", { "arg1" } }, + } }, + } } + }; + tools.push_back(tool_long); + + // Use standard_constructed_tools which had the prefix matching bug + std::map markers = { + { "tool_call_start_marker", "" }, + { "tool_call_end_marker", "" }, + { "function_opener", "" }, + { "function_name_suffix", ">" }, + { "parameter_key_prefix", "" }, + { "parameter_closer", "" }, + }; + + auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { + auto content = p.rule("content", p.content(p.until(""))); + auto tool_call = p.standard_constructed_tools(markers, tools, false, false); + return content + p.zero_or_more(p.space() + tool_call) + p.end(); }); - t.test("current_parse", [&](testing & /* t */) { - test_current(parser, input, false, false); + // Test parsing the long tool name - this should NOT trigger the short tool name + t.test("parse long tool name", [&](testing & t) { + std::string input = + "Let me call the function." + "" + "" + "42" + "" + ""; + + common_peg_parse_context ctx(input, false); + auto result = parser.parse(ctx); + + t.assert_true("success", result.success()); + + common_chat_msg msg; + auto mapper = common_chat_peg_unified_mapper(msg); + mapper.from_ast(ctx.ast, result); + + t.assert_equal("content", "Let me call the function.", msg.content); + t.assert_equal("tool calls count", 1u, msg.tool_calls.size()); + if (!msg.tool_calls.empty()) { + t.assert_equal("tool name", "special_function_with_opt", msg.tool_calls[0].name); + } }); - // Run benchmarks - t.bench("legacy_parse_benchmark complete", [&]() { - test_legacy(input, false, false); - }); + // Test incremental parsing - the key test case + // This ensures that when incrementally parsing "special_function_with_opt", + // we don't prematurely emit "special_function" as a tool call + t.test("incremental parse long tool name", [&](testing & t) { + std::string input = + "Let me call the function." + "" + "" + "42" + "" + ""; - t.bench("legacy_parse_benchmark incremental", [&]() { - std::string in; - for (auto i = 0u; i < tokens.size(); i++) { - in += tokens[i]; + std::vector tokens = simple_tokenize(input); + + common_chat_msg prev; + for (auto it = tokens.begin(); it != tokens.end(); it++) { + std::string in = std::accumulate(tokens.begin(), it + 1, std::string()); + + common_peg_parse_context ctx(in, it + 1 < tokens.end()); + auto result = parser.parse(ctx); + + if (!t.assert_equal("not fail", false, result.fail())) { + t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end)); + return; + } + + common_chat_msg msg; + auto mapper = common_chat_peg_unified_mapper(msg); + mapper.from_ast(ctx.ast, result); + + // The critical check: during incremental parsing, we should never + // see "special_function" as the tool name when parsing "special_function_with_opt" + for (const auto & tc : msg.tool_calls) { + if (!t.assert_equal("tool name should not be short prefix", false, + tc.name == "special_function")) { + t.log("Premature tool name match at input: " + in); + return; + } + } try { - test_legacy(in, i + 1 < tokens.size(), false); - } catch (common_chat_msg_partial_exception & /* e */) { - // Do nothing, this is expected + auto diffs = common_chat_msg_diff::compute_diffs(prev, msg); + } catch (const std::exception & e) { + t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end)); + t.assert_true(std::string("diff failed with ") + e.what(), false); + return; } - } - }, 20); - t.bench("current_parse_benchmark complete", [&]() { - test_current(parser, input, false, false); - }, 100); - - t.bench("current_parse_benchmark incremental", [&]() { - std::string in; - for (auto i = 0u; i < tokens.size(); i++) { - in += tokens[i]; - test_current(parser, in, i + 1 < tokens.size(), false); + prev = msg; } - }, 20); + + // Final check: the complete parse should have the correct tool name + t.assert_equal("final tool calls count", 1u, prev.tool_calls.size()); + if (!prev.tool_calls.empty()) { + t.assert_equal("final tool name", "special_function_with_opt", prev.tool_calls[0].name); + } + }); + + // Test parsing the short tool name still works + t.test("parse short tool name", [&](testing & t) { + std::string input = + "Let me call the function." + "" + "" + "42" + "" + ""; + + common_peg_parse_context ctx(input, false); + auto result = parser.parse(ctx); + + t.assert_true("success", result.success()); + + common_chat_msg msg; + auto mapper = common_chat_peg_unified_mapper(msg); + mapper.from_ast(ctx.ast, result); + + t.assert_equal("content", "Let me call the function.", msg.content); + t.assert_equal("tool calls count", 1u, msg.tool_calls.size()); + if (!msg.tool_calls.empty()) { + t.assert_equal("tool name", "special_function", msg.tool_calls[0].name); + } + }); +} + +static void test_tagged_peg_parser(testing & t) { + t.test("basic tag extraction", [&](testing & t) { + auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) { + return p.tag("greeting", p.until(" ")) + " " + p.tag("name", p.rest()) + p.end(); + }); + + auto result = parser.parse_and_extract("Hello World"); + t.assert_true("success", result.result.success()); + t.assert_equal("greeting tag", "Hello", result.tags.at("greeting")); + t.assert_equal("name tag", "World", result.tags.at("name")); + }); + + t.test("duplicate tags overwrite", [&](testing & t) { + auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) { + return p.tag("item", p.until(",")) + "," + p.tag("item", p.rest()) + p.end(); + }); + + auto result = parser.parse_and_extract("first,second"); + t.assert_true("success", result.result.success()); + t.assert_equal("item tag", "second", result.tags.at("item")); + }); + + t.test("no tags extracted", [&](testing & t) { + auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) { + return p.rest() + p.end(); + }); + + auto result = parser.parse_and_extract("Hello"); + t.assert_true("success", result.result.success()); + t.assert_equal("empty tags", 0u, result.tags.size()); + }); + + t.test("structured extraction", [&](testing & t) { + auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) { + auto header = p.tag("header", p.until("\n")); + auto body = p.tag("body", p.rest()); + return header + "\n" + body + p.end(); + }); + + auto result = parser.parse_and_extract("Title\nBody content here"); + t.assert_true("success", result.result.success()); + t.assert_equal("header", "Title", result.tags.at("header")); + t.assert_equal("body", "Body content here", result.tags.at("body")); + }); + + t.test("partial parse", [&](testing & t) { + auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) { + return p.tag("prefix", p.until(":")) + ":" + p.tag("value", p.rest()) + p.end(); + }); + + auto result = parser.parse_and_extract("key:val", true); + t.assert_true("not fail", !result.result.fail()); + t.assert_equal("prefix tag", "key", result.tags.at("prefix")); + t.assert_equal("value tag", "val", result.tags.at("value")); + }); } diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp index 27b537a036..91ff83a729 100644 --- a/tests/test-chat-template.cpp +++ b/tests/test-chat-template.cpp @@ -1,7 +1,6 @@ #include +#include #include -#include -#include #include #include #include @@ -12,26 +11,28 @@ #include #include "llama.h" -#include "common.h" #include "chat.h" #include "jinja/runtime.h" #include "jinja/parser.h" #include "jinja/lexer.h" #include "jinja/caps.h" +#ifdef WIN32 +#include +#endif + using json = nlohmann::ordered_json; -int main_automated_tests(void); +static int main_automated_tests(void); -void run_multiple(std::string dir_path, bool stop_on_first_failure, json input, bool use_common = false); -void run_single(std::string contents, json input, bool use_common = false, const std::string & output_path = ""); +static void run_multiple(const std::string& dir_path, bool stop_on_first_failure, const json& input, bool use_common = false); +static void run_single(const std::string& contents, json input, bool use_common = false, const std::string & output_path = ""); - - -std::string HELP = R"( +static std::string HELP = R"( Usage: test-chat-template [OPTIONS] PATH_TO_TEMPLATE Options: -h, --help Show this help message and exit. + --with-tools Add a tool and a tool call to the default JSON input --json Path to the JSON input file. --stop-on-first-fail Stop testing on the first failure (default: false). --no-common Use direct Jinja engine instead of common chat templates (default: use common). @@ -41,7 +42,7 @@ If PATH_TO_TEMPLATE is a directory, runs all .jinja files in that directory. If PATH_TO_TEMPLATE is omitted, runs automated tests (default CI mode). )"; -std::string DEFAULT_JSON = R"({ +static std::string DEFAULT_JSON = R"({ "messages": [ { "role": "user", @@ -57,12 +58,65 @@ std::string DEFAULT_JSON = R"({ "add_generation_prompt": true })"; +static std::string DEFAULT_JSON_WITH_TOOLS = R"({ + "messages": [ + { + "role": "user", + "content": "Hello, how are you?" + }, + { + "role": "assistant", + "content": "I am fine, thank you!" + }, + { + "role": "user", + "content": "Call a tool!" + }, + { + "role": "assistant", + "tool_calls": [ + { + "id": "call00001", + "type": "function", + "function": { + "name": "test", + "arguments": { "arg": "hello" } + } + } + ] + } + ], + "tools": [ + { + "type": "function", + "function": { + "name": "test", + "description": "Test", + "parameters": { + "type": "object", + "properties": { + "arg": { + "type": "string" + } + } + }, + "required": ["arg"] + } + } + ], + "bos_token": "", + "eos_token": "", + "add_generation_prompt": true +})"; + + int main(int argc, char ** argv) { std::vector args(argv, argv + argc); std::string tmpl_path; std::string json_path; std::string output_path; + std::string & json_to_use = DEFAULT_JSON; bool stop_on_first_fail = false; bool use_common = true; @@ -70,9 +124,12 @@ int main(int argc, char ** argv) { if (args[i] == "--help" || args[i] == "-h") { std::cout << HELP << "\n"; return 0; - } else if (args[i] == "--json" && i + 1 < args.size()) { + } + if (args[i] == "--json" && i + 1 < args.size()) { json_path = args[i + 1]; i++; + } else if (args[i] == "--with-tools") { + json_to_use = DEFAULT_JSON_WITH_TOOLS; } else if (args[i] == "--stop-on-first-fail") { stop_on_first_fail = true; } else if (args[i] == "--output" && i + 1 < args.size()) { @@ -105,7 +162,7 @@ int main(int argc, char ** argv) { std::istreambuf_iterator()); input_json = json::parse(content); } else { - input_json = json::parse(DEFAULT_JSON); + input_json = json::parse(json_to_use); } std::filesystem::path p(tmpl_path); @@ -125,7 +182,7 @@ int main(int argc, char ** argv) { return 0; } -void run_multiple(std::string dir_path, bool stop_on_first_fail, json input, bool use_common) { +void run_multiple(const std::string& dir_path, bool stop_on_first_fail, const json& input, bool use_common) { std::vector failed_tests; // list all files in models/templates/ and run each @@ -180,7 +237,7 @@ static std::string format_using_common( common_chat_templates_inputs inputs; inputs.use_jinja = true; inputs.messages = messages; - inputs.tools = tools; + inputs.tools = std::move(tools); inputs.add_generation_prompt = true; auto output = common_chat_templates_apply(tmpls.get(), inputs).prompt; output = normalize_newlines(output); @@ -209,7 +266,7 @@ static jinja::value_string format_using_direct_engine( jinja::runtime runtime(ctx); const jinja::value results = runtime.execute(ast); - auto parts = runtime.gather_string_parts(results); + auto parts = jinja::runtime::gather_string_parts(results); std::cout << "\n=== RESULTS ===\n"; for (const auto & part : parts->as_string().parts) { @@ -220,7 +277,7 @@ static jinja::value_string format_using_direct_engine( } -void run_single(std::string contents, json input, bool use_common, const std::string & output_path) { +void run_single(const std::string& contents, json input, bool use_common, const std::string & output_path) { jinja::enable_debug(true); jinja::value_string output_parts; @@ -560,7 +617,7 @@ int main_automated_tests(void) { supported_tmpl.resize(res); res = llama_chat_builtin_templates(supported_tmpl.data(), supported_tmpl.size()); std::cout << "Built-in chat templates:\n"; - for (auto tmpl : supported_tmpl) { + for (const auto *tmpl : supported_tmpl) { std::cout << " " << tmpl << "\n"; } @@ -592,6 +649,7 @@ int main_automated_tests(void) { } std::vector messages; + messages.reserve(conversation.size()); for (const auto & msg : conversation) { messages.push_back(simple_msg(msg.role, msg.content)); } @@ -622,58 +680,6 @@ int main_automated_tests(void) { } } - // TODO: llama_chat_format_single will be deprecated, remove these tests later - - // test llama_chat_format_single for system message - std::cout << "\n\n=== llama_chat_format_single (system message) ===\n\n"; - std::vector chat2; - auto sys_msg = simple_msg("system", "You are a helpful assistant"); - - auto fmt_sys = [&](std::string tmpl_str) { - auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl_str); - auto output = common_chat_format_single(tmpls.get(), chat2, sys_msg, false, /* use_jinja= */ false); - std::cout << "fmt_sys(" << tmpl_str << ") : " << output << "\n"; - std::cout << "-------------------------\n"; - return output; - }; - assert(fmt_sys("chatml") == "<|im_start|>system\nYou are a helpful assistant<|im_end|>\n"); - assert(fmt_sys("mistral-v1") == " [INST] You are a helpful assistant\n\n"); - assert(fmt_sys("mistral-v3") == "[INST] You are a helpful assistant\n\n"); - assert(fmt_sys("mistral-v3-tekken") == "[INST]You are a helpful assistant\n\n"); - assert(fmt_sys("mistral-v7") == "[SYSTEM_PROMPT] You are a helpful assistant[/SYSTEM_PROMPT]"); - assert(fmt_sys("llama2") == "[INST] You are a helpful assistant\n"); - assert(fmt_sys("llama2-sys") == "[INST] <>\nYou are a helpful assistant\n<>\n\n"); - assert(fmt_sys("mistral") == "[INST] You are a helpful assistant\n"); // for old pre-v1 templates - assert(fmt_sys("gemma") == ""); // for gemma, system message is merged with user message - assert(fmt_sys("llama3") == "<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|>"); - assert(fmt_sys("gigachat") == "You are a helpful assistant<|message_sep|>"); - - - // test llama_chat_format_single for user message - std::cout << "\n\n=== llama_chat_format_single (user message) ===\n\n"; - chat2.push_back(simple_msg("system", "You are a helpful assistant")); - chat2.push_back(simple_msg("user", "Hello")); - chat2.push_back(simple_msg("assistant", "I am assistant")); - auto new_msg = simple_msg("user", "How are you"); - - auto fmt_single = [&](const std::string & tmpl_str) { - auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl_str.c_str()); - auto output = common_chat_format_single(tmpls.get(), chat2, new_msg, true, /* use_jinja= */ false); - std::cout << "fmt_single(" << tmpl_str << ") : " << output << "\n"; - std::cout << "-------------------------\n"; - return output; - }; - assert(fmt_single("chatml") == "\n<|im_start|>user\nHow are you<|im_end|>\n<|im_start|>assistant\n"); - assert(fmt_single("mistral-v1") == " [INST] How are you [/INST]"); - assert(fmt_single("mistral-v3") == "[INST] How are you[/INST]"); - assert(fmt_single("mistral-v3-tekken") == "[INST]How are you[/INST]"); - assert(fmt_single("mistral-v7") == "[INST] How are you[/INST]"); - assert(fmt_single("llama2") == "[INST] How are you [/INST]"); - assert(fmt_single("mistral") == "[INST] How are you [/INST]"); // for old pre-v1 templates - assert(fmt_single("gemma") == "\nuser\nHow are you\nmodel\n"); - assert(fmt_single("llama3") == "<|start_header_id|>user<|end_header_id|>\n\nHow are you<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"); - // assert(fmt_single("gigachat") == "user<|role_sep|>How are you<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|>"); - std::cout << "\nOK: All tests passed successfully.\n"; return 0; diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 4378a8db71..d9f1eea2f2 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -5,18 +5,21 @@ // // cmake -B build && cmake --build build --parallel && ./build/bin/test-chat ../minja/build/tests/*.jinja 2>/dev/null // +#include "../src/llama-grammar.h" +#include "../src/unicode.h" +#include "chat-auto-parser.h" #include "chat.h" - +#include "common.h" +#include "ggml.h" #include "log.h" -#include "../src/unicode.h" -#include "../src/llama-grammar.h" - -#include - +#include +#include #include -#include #include +#include +#include +#include #include using json = nlohmann::ordered_json; @@ -33,6 +36,7 @@ static std::ostream & operator<<(std::ostream & os, const common_chat_msg_diff & os << "}"; return os; } + // operator<< for vector: static std::ostream & operator<<(std::ostream & os, const std::vector & diffs) { os << "[\n"; @@ -42,6 +46,7 @@ static std::ostream & operator<<(std::ostream & os, const std::vector -bool equals(const common_chat_msg & expected, const common_chat_msg & actual) { +template <> bool equals(const common_chat_msg & expected, const common_chat_msg & actual) { return normalize(expected) == normalize(actual); } template static void assert_equals(const T & expected, const T & actual) { if (!equals(expected, actual)) { - std::cerr << "Expected:```\n" << expected << "\n```" << std::endl; - std::cerr << "Actual:```\n" << actual << "\n```" << std::endl; - std::cerr << std::flush; + std::ostringstream oss_expected; + oss_expected << expected; + std::ostringstream oss_actual; + oss_actual << actual; + LOG_ERR("Expected: %s\n", oss_expected.str().c_str()); + LOG_ERR("Actual: %s\n", oss_actual.str().c_str()); + common_log_flush(common_log_main()); throw std::runtime_error("Test failed"); } } static std::string read_file(const std::string & path) { - std::cerr << "# Reading: " << path << '\n' << std::flush; std::ifstream fs(path, std::ios_base::binary); if (!fs.is_open()) { fs = std::ifstream("../" + path, std::ios_base::binary); @@ -146,11 +152,13 @@ static std::string renormalize_json(const std::string & json_str) { auto json_obj = json::parse(json_str); return json_obj.dump(); } catch (const std::exception & e) { - std::cerr << "Failed to parse JSON: " << e.what() << '\n'; - return json_str; + return ""; // ignore parial JSON contents for comparison purposes } } -static void assert_msg_equals(const common_chat_msg & expected, const common_chat_msg & actual, bool ignore_whitespace_differences = false) { + +static void assert_msg_equals(const common_chat_msg & expected, + const common_chat_msg & actual, + bool ignore_whitespace_differences = false) { assert_equals(expected.role, actual.role); if (ignore_whitespace_differences) { assert_equals(string_strip(expected.content), string_strip(actual.content)); @@ -183,7 +191,7 @@ static void assert_msg_equals(const common_chat_msg & expected, const common_cha } } -common_chat_tool special_function_tool { +static common_chat_tool special_function_tool{ /* .name = */ "special_function", /* .description = */ "I'm special", /* .parameters = */ R"({ @@ -197,7 +205,7 @@ common_chat_tool special_function_tool { "required": ["arg1"] })", }; -common_chat_tool special_function_tool_with_optional_param { +static common_chat_tool special_function_tool_with_optional_param{ /* .name = */ "special_function_with_opt", /* .description = */ "I'm special but have optional stuff", /* .parameters = */ R"({ @@ -215,7 +223,7 @@ common_chat_tool special_function_tool_with_optional_param { "required": ["arg1"] })", }; -common_chat_tool python_tool { +static common_chat_tool python_tool{ /* .name = */ "python", /* .description = */ "an ipython interpreter", /* .parameters = */ R"({ @@ -229,44 +237,335 @@ common_chat_tool python_tool { "required": ["code"] })", }; -common_chat_tool code_interpreter_tool { - /* .name = */ "code_interpreter", - /* .description = */ "an ipython interpreter", + +static common_chat_tool html_tool{ + /* .name = */ "html", + /* .description = */ "an html validator", /* .parameters = */ R"({ "type": "object", "properties": { - "code": { + "markup": { "type": "string", - "description": "Python code to execute." + "description": "HTML markup to validate." } }, - "required": ["code"] + "required": ["markup"] })", }; -std::vector tools { special_function_tool, special_function_tool_with_optional_param, python_tool }; -std::vector llama_3_1_tools { special_function_tool, code_interpreter_tool }; + +static common_chat_tool get_time_tool{ + /* .name = */ "get_time", + /* .description = */ "Get the current time in a city", + /* .parameters = */ R"({ + "type": "object", + "properties": { + "city": { + "type": "string", + "description": "City name" + } + }, + "required": ["city"] + })", +}; + +static common_chat_tool get_weather_tool{ + /* .name = */ "get_weather", + /* .description = */ "Get the current weather in a city", + /* .parameters = */ R"({ + "type": "object", + "properties": { + "city": { + "type": "string", + "description": "City name" + } + }, + "required": ["city"] + })", +}; + +static common_chat_tool todo_list{ + /* .name = */ "todo_list", + /* .description = */ "Create or update the todo list", + /* .parameters = */ R"({ + "type": "object", + "properties": { + "todos": { + "type": "array", + "description": "List of TODO list items" + } + }, + "required": ["todos"] + })", +}; + +static common_chat_tool edit_tool{ + /* .name = */ "edit", + /* .description = */ "Edit file", + /* .parameters = */ R"({ + "type": "object", + "properties": { + "filename": { + "type": "string", + "description": "Path of file to edit" + }, + "oldString": { + "type": "string", + "description": "String to replace" + }, + "newString": { + "type": "string", + "description": "New (replacement) value" + } + }, + "required": ["filename", "oldString", "newString"] + })", +}; + +static common_chat_tool magic_tool{ + /* .name = */ "magic", + /* .description = */ "Magic tool that takes a hash", + /* .parameters = */ R"({ + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "ref": { + "type": "string" + } + }, + "required": ["name", "ref"] + })", +}; + +static common_chat_tool magic_int_tool{ + /* .name = */ "magic_int", + /* .description = */ "Magic tool that takes a hash", + /* .parameters = */ R"({ + "type": "object", + "properties": { + "ref": { + "type": "integer" + }, + "name": { + "type": "string" + } + }, + "required": ["ref"] + })", +}; + +static common_chat_tool amount_tool{ + /* .name = */ "amount", + /* .description = */ "Amount converter", + /* .parameters = */ R"({ + "type": "object", + "properties": { + "orig": { + "type": "number" + } + }, + "required": ["orig"] + })", +}; + +static common_chat_tool imaginary_number_tool{ + /* .name = */ "imaginary_number", + /* .description = */ "Imaginary number converter", + /* .parameters = */ R"({ + "type": "object", + "properties": { + "number": { + "type": "object", + "properties": { + "real": { + "type": "number" + }, + "imaginary": { + "type": "number" + } + }, + "required": ["real", "imaginary"] + } + }, + "required": ["number"] + })", +}; + +static common_chat_tool string_param_tool{ + /* .name = */ "string_param", + /* .description = */ "Tool with string parameter for testing", + /* .parameters = */ R"({ + "type": "object", + "properties": { + "text": { + "type": "string", + "description": "A text parameter" + } + }, + "required": [] + })", +}; + +static common_chat_tool quoted_unquoted_tool{ + /* .name = */ "quoted_unquoted", + /* .description = */ "Tool with two string parameters, one for quoted string, one for unquoted", + /* .parameters = */ R"({ + "type": "object", + "properties": { + "quoted": { + "type": "string", + "description": "Quoted value" + }, + "unquoted": { + "type": "string", + "description": "Unquoted value" + } + }, + "required": ["quoted", "unquoted"] + })", +}; + + +static std::vector tools{ special_function_tool, special_function_tool_with_optional_param, + python_tool, html_tool, todo_list }; + +const common_chat_msg message_user{ + "user", + "Hey there!", + /* .content_parts = */ {}, + /* .tool_calls = */ {}, + /* .reasoning_content = */ "", + /* .tool_name = */ "", + /* .tool_call_id = */ "", +}; + +const common_chat_msg message_user_parts{ + "user", + /* .content = */ "", + /* .content_parts = */ + { + { "text", "Hey" }, + { "text", "there" }, + }, + /* .tool_calls = */ + { }, + /* .reasoning_content = */ + "", + /* .tool_name = */ "", + /* .tool_call_id = */ "", +}; + +static common_chat_msg simple_assist_msg(const std::string & content, + const std::string & reasoning_content = "", + const std::string & tool_name = "", + const std::string & arguments = "", + const std::string & id = "") { + common_chat_msg msg; + msg.role = "assistant"; + msg.content = content; + msg.reasoning_content = reasoning_content; + if (!tool_name.empty() || !id.empty()) { + msg.tool_calls.push_back({ tool_name, arguments, id }); + } + return msg; +} + +static common_chat_msg message_with_tool_calls(const std::string & tool_name, const std::string & arguments) { + return simple_assist_msg("", "", tool_name, arguments); +} + +static common_chat_msg message_with_tool_calls_and_reasoning(const std::string & tool_name, + const std::string & arguments, + const std::string & reasoning) { + return simple_assist_msg("", reasoning, tool_name, arguments); +} + +static common_chat_msg message_with_reasoning_content_and_multiple_tool_calls( + const std::string & reasoning, + const std::string & content, + const std::vector> & tool_calls) { + common_chat_msg msg; + msg.role = "assistant"; + msg.content = content; + msg.reasoning_content = reasoning; + for (const auto & [name, args] : tool_calls) { + msg.tool_calls.push_back({ name, args, "" }); + } + return msg; +} + +static common_chat_msg message_with_content_and_tool_call(const std::string & content, + const std::string & tool_name, + const std::string & arguments) { + return simple_assist_msg(content, "", tool_name, arguments); +} + +static common_chat_msg message_with_reasoning_and_tool_call(const std::string & reasoning, + const std::string & tool_name, + const std::string & arguments) { + return simple_assist_msg("", reasoning, tool_name, arguments); +} + +const common_chat_msg message_assist = simple_assist_msg("Hello, world!\nWhat's up?"); +const common_chat_msg message_assist_empty = simple_assist_msg(""); +const common_chat_msg message_assist_thoughts_unparsed_deepseek = + simple_assist_msg("I'm\nthinkingHello, world!\nWhat's up?"); +const common_chat_msg message_assist_thoughts_unparsed_md = + simple_assist_msg("I'm\nthinkingHello, world!\nWhat's up?\n```json\n{}```"); +const common_chat_msg message_assist_thoughts_unparsed_md_partial = + simple_assist_msg("I'm\nthinkingHello, world!\nWhat's up?\n```json\n{}"); + +const common_chat_msg message_assist_thoughts_unparsed_r7b = + simple_assist_msg("<|START_THINKING|>I'm\nthinking<|END_THINKING|>Hello, world!\nWhat's up?"); +const common_chat_msg message_assist_thoughts_unparsed_magistral = + simple_assist_msg("[THINK]raisonnement[/THINK]Réponse"); +const common_chat_msg message_assist_thoughts = simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"); +const common_chat_msg message_assist_thoughts_unopened_unparsed = + simple_assist_msg("I'm\nthinkingHello, world!\nWhat's up?"); +const common_chat_msg message_assist_thoughts_no_content = simple_assist_msg("", "I'm\nthinking"); +const common_chat_msg message_assist_call = simple_assist_msg("", "", "special_function", "{\"arg1\": 1}"); +const common_chat_msg message_assist_call_noopt = + simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1}"); +const common_chat_msg message_assist_call_withopt = + simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1, \"arg2\": 2}"); +const common_chat_msg message_assist_call_content = + simple_assist_msg("Hello, world!\nWhat's up?", "", "special_function", "{\"arg1\":1}"); +const common_chat_msg message_assist_call_empty_args = simple_assist_msg("", "", "special_function"); +const common_chat_msg message_assist_call_cutoff_args = simple_assist_msg("", "", "special_function", "{\"arg"); +const common_chat_msg message_assist_call_thoughts = + simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\":1}"); +const common_chat_msg message_assist_call_thoughts_unparsed = + simple_assist_msg("I'm\nthinking\n\n", "", "special_function", "{\"arg1\": 1}"); +const common_chat_msg message_assist_call_thoughts_content = + simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": 1}"); +const common_chat_msg message_assist_call_id = + simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "123456789"); +const common_chat_msg message_assist_call_idx = + simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "0"); +const common_chat_msg message_assist_thoughts_call_idx = + simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}", /* id = */ "0"); +const common_chat_msg message_assist_thoughts_partial_call = + simple_assist_msg("", "I'm\nthinking", "special_function", "", /* id = */ "0"); +const common_chat_msg message_assist_call_python = simple_assist_msg("", "", "python", "{\"code\":\"print('hey')\"}"); +const common_chat_msg message_assist_call_python_lines = + simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')\"}"); +const common_chat_msg message_assist_call_python_lines_unclosed = + simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')"); +const common_chat_msg message_assist_json_content = + simple_assist_msg("{\n \"response\": \"Hello, world!\\nWhat's up?\"\n}"); struct delta_data { std::string delta; common_chat_params params; }; -static common_chat_msg simple_assist_msg(const std::string & content, const std::string & reasoning_content = "", const std::string & tool_name = "", const std::string & arguments = "", const std::string & id = "") { - common_chat_msg msg; - msg.role = "assistant"; - msg.content = content; - msg.reasoning_content = reasoning_content; - if (!tool_name.empty()) { - msg.tool_calls.push_back({ tool_name, arguments, id }); - } - return msg; -} - -static delta_data init_delta(const struct common_chat_templates * tmpls, const std::vector & end_tokens, - const common_chat_msg & user_message, - const common_chat_msg & delta_message, +static delta_data init_delta(const struct common_chat_templates * tmpls, + const std::vector & end_tokens, + const common_chat_msg & user_message, + const common_chat_msg & delta_message, const std::vector & tools, - const common_chat_tool_choice & tool_choice) { + const common_chat_tool_choice & tool_choice) { common_chat_templates_inputs inputs; inputs.parallel_tool_calls = true; inputs.messages.push_back(user_message); @@ -317,20 +616,27 @@ static delta_data init_delta(const struct common_chat_templates * tmpls, const s gets the diff, removes any end tokens and parses the result w/ the grammar, checking that the parsed message is the same as the test_message */ -static void test_templates(const struct common_chat_templates * tmpls, const std::vector & end_tokens, - const common_chat_msg & test_message, - const std::vector & tools = {}, - const std::string & expected_delta = "", - bool expect_grammar_triggered = true, - bool test_grammar_if_triggered = true, - common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE, - bool ignore_whitespace_differences = false - ) { +static void test_templates(const struct common_chat_templates * tmpls, + const std::vector & end_tokens, + const common_chat_msg & test_message, + const std::vector & tools = {}, + const std::string & expected_delta = "", + bool expect_grammar_triggered = true, + bool test_grammar_if_triggered = true, + common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE, + bool ignore_whitespace_differences = false) { common_chat_msg user_message; - user_message.role = "user"; + user_message.role = "user"; user_message.content = "Hello, world!"; - for (const auto & tool_choice : std::vector {COMMON_CHAT_TOOL_CHOICE_AUTO, COMMON_CHAT_TOOL_CHOICE_REQUIRED}) { + common_chat_templates_inputs inputs_tools; + inputs_tools.messages = { message_user }; + inputs_tools.tools = { special_function_tool }; + + common_chat_params params = common_chat_templates_apply(tmpls, inputs_tools); + + for (const auto & tool_choice : + std::vector{ COMMON_CHAT_TOOL_CHOICE_AUTO, COMMON_CHAT_TOOL_CHOICE_REQUIRED }) { auto data = init_delta(tmpls, end_tokens, user_message, test_message, tools, tool_choice); if (!expected_delta.empty()) { if (ignore_whitespace_differences) { @@ -342,10 +648,14 @@ static void test_templates(const struct common_chat_templates * tmpls, const std if (expect_grammar_triggered) { // TODO @ngxson : refactor common_chat_parse to avoid passing format/reasoning_format every time - common_chat_parser_params params; - params.format = data.params.format; - params.reasoning_format = reasoning_format; - const auto msg = common_chat_parse(data.delta, /* is_partial= */ false, params); + common_chat_parser_params parser_params; + parser_params.format = data.params.format; + parser_params.reasoning_format = reasoning_format; + if (!parser_params.parser.empty()) { + parser_params.parser = common_peg_arena(); + parser_params.parser.load(params.parser); + } + const auto msg = common_chat_parse(data.delta, /* is_partial= */ false, parser_params); assert_msg_equals(test_message, msg, ignore_whitespace_differences); } @@ -358,43 +668,43 @@ static void test_templates(const struct common_chat_templates * tmpls, const std throw std::runtime_error("Failed to build grammar"); } auto earliest_trigger_pos = std::string::npos; - auto constrained = data.delta; + auto constrained = data.delta; for (const auto & trigger : data.params.grammar_triggers) { - size_t pos = std::string::npos; + size_t pos = std::string::npos; std::smatch match; switch (trigger.type) { case COMMON_GRAMMAR_TRIGGER_TYPE_WORD: - { - const auto & word = trigger.value; - pos = constrained.find(word); - break; - } + { + const auto & word = trigger.value; + pos = constrained.find(word); + break; + } case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN: - { - const auto & pattern = trigger.value; - if (std::regex_search(constrained, match, std::regex(pattern))) { - pos = match.position(1); + { + const auto & pattern = trigger.value; + if (std::regex_search(constrained, match, std::regex(pattern))) { + pos = match.position(1); + } + break; } - break; - } case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL: - { - const auto & pattern = trigger.value; - if (std::regex_match(constrained, match, std::regex(pattern))) { - auto mpos = std::string::npos; - for (size_t i = 1; i < match.size(); ++i) { - if (match[i].length() > 0) { - mpos = match.position(i); - break; + { + const auto & pattern = trigger.value; + if (std::regex_match(constrained, match, std::regex(pattern))) { + auto mpos = std::string::npos; + for (size_t i = 1; i < match.size(); ++i) { + if (match[i].length() > 0) { + mpos = match.position(i); + break; + } } + if (mpos == std::string::npos) { + mpos = match.position(0); + } + pos = mpos; } - if (mpos == std::string::npos) { - mpos = match.position(0); - } - pos = mpos; + break; } - break; - } default: throw std::runtime_error("Unknown trigger type"); } @@ -407,7 +717,7 @@ static void test_templates(const struct common_chat_templates * tmpls, const std } auto grammar_triggered = false; if (earliest_trigger_pos != std::string::npos) { - constrained = constrained.substr(earliest_trigger_pos); + constrained = constrained.substr(earliest_trigger_pos); grammar_triggered = true; } if (data.params.grammar_lazy) { @@ -416,8 +726,7 @@ static void test_templates(const struct common_chat_templates * tmpls, const std if (grammar_triggered && test_grammar_if_triggered && !match_string(constrained, grammar.get())) { throw std::runtime_error("Failed to match delta against grammar:\n\n" + data.delta + - "\n\nConstrained: " + constrained + - "\n\nGrammar: " + data.params.grammar); + "\n\nConstrained: " + constrained + "\n\nGrammar: " + data.params.grammar); } } } @@ -431,24 +740,31 @@ template static void test_parser_with_streaming(const common_chat_msg & expected, const std::string & raw_message, T parse_msg) { constexpr auto utf8_truncate_safe_len = [](const std::string_view s) -> size_t { auto len = s.size(); - if (len == 0) return 0; + if (len == 0) { + return 0; + } auto i = len; for (size_t back = 0; back < 4 && i > 0; ++back) { --i; unsigned char c = s[i]; if ((c & 0x80) == 0) { return len; - } else if ((c & 0xC0) == 0xC0) { + } + if ((c & 0xC0) == 0xC0) { size_t expected_len = 0; - if ((c & 0xE0) == 0xC0) expected_len = 2; - else if ((c & 0xF0) == 0xE0) expected_len = 3; - else if ((c & 0xF8) == 0xF0) expected_len = 4; - else return i; - if (len - i >= expected_len) { - return len; + if ((c & 0xE0) == 0xC0) { + expected_len = 2; + } else if ((c & 0xF0) == 0xE0) { + expected_len = 3; + } else if ((c & 0xF8) == 0xF0) { + expected_len = 4; } else { return i; } + if (len - i >= expected_len) { + return len; + } + return i; } } return len - std::min(len, size_t(3)); @@ -457,13 +773,15 @@ static void test_parser_with_streaming(const common_chat_msg & expected, const s return s.substr(0, utf8_truncate_safe_len(s)); }; - auto merged = simple_assist_msg(""); + auto merged = simple_assist_msg(""); auto last_msg = parse_msg(""); for (size_t i = 1; i <= raw_message.size(); ++i) { auto curr_msg = parse_msg(std::string(utf8_truncate_safe_view(std::string_view(raw_message).substr(0, i)))); - if (curr_msg == simple_assist_msg("")) continue; - LOG_INF("Streaming msg: %s\n", common_chat_msgs_to_json_oaicompat({curr_msg}).dump().c_str()); - for (auto diff: common_chat_msg_diff::compute_diffs(last_msg, curr_msg)) { + if (curr_msg == simple_assist_msg("")) { + continue; + } + LOG_INF("Streaming msg: %s\n", common_chat_msgs_to_json_oaicompat({ curr_msg }).dump().c_str()); + for (auto diff : common_chat_msg_diff::compute_diffs(last_msg, curr_msg)) { LOG_INF("Streaming diff: %s\n", common_chat_msg_diff_to_json_oaicompat(diff).dump().c_str()); if (!diff.reasoning_content_delta.empty()) { merged.reasoning_content += diff.reasoning_content_delta; @@ -473,14 +791,14 @@ static void test_parser_with_streaming(const common_chat_msg & expected, const s } if (diff.tool_call_index != std::string::npos) { if (!diff.tool_call_delta.name.empty()) { - merged.tool_calls.push_back({diff.tool_call_delta.name, "", ""}); + merged.tool_calls.push_back({ diff.tool_call_delta.name, "", "" }); } if (!diff.tool_call_delta.arguments.empty()) { GGML_ASSERT(!merged.tool_calls.empty()); merged.tool_calls.back().arguments += diff.tool_call_delta.arguments; } } - LOG_INF("Streaming merged: %s\n", common_chat_msgs_to_json_oaicompat({merged}).dump().c_str()); + LOG_INF("Streaming merged: %s\n", common_chat_msgs_to_json_oaicompat({ merged }).dump().c_str()); } assert_msg_equals(curr_msg, merged, true); last_msg = curr_msg; @@ -489,99 +807,95 @@ static void test_parser_with_streaming(const common_chat_msg & expected, const s assert_msg_equals(expected, merged, true); } -const common_chat_msg message_user { - "user", - "Hey there!", - /* .content_parts = */ {}, - /* .tool_calls = */ {}, - /* .reasoning_content = */ "", - /* .tool_name = */ "", - /* .tool_call_id = */ "", -}; - -const common_chat_msg message_user_parts { - "user", - /* .content = */ "", - /* .content_parts = */ { - { "text", "Hey" }, - { "text", "there" }, - }, - /* .tool_calls = */ {}, - /* .reasoning_content = */ "", - /* .tool_name = */ "", - /* .tool_call_id = */ "", -}; - -const common_chat_msg message_assist = simple_assist_msg("Hello, world!\nWhat's up?"); -const common_chat_msg message_assist_empty = simple_assist_msg(""); -const common_chat_msg message_assist_thoughts_unparsed_deepseek = simple_assist_msg("I'm\nthinkingHello, world!\nWhat's up?"); -const common_chat_msg message_assist_thoughts_unparsed_md = simple_assist_msg("I'm\nthinkingHello, world!\nWhat's up?\n```json\n{}```"); -const common_chat_msg message_assist_thoughts_unparsed_md_partial = simple_assist_msg("I'm\nthinkingHello, world!\nWhat's up?\n```json\n{}"); - -const common_chat_msg message_assist_thoughts_unparsed_r7b = simple_assist_msg("<|START_THINKING|>I'm\nthinking<|END_THINKING|>Hello, world!\nWhat's up?"); -const common_chat_msg message_assist_thoughts_unparsed_magistral = simple_assist_msg("[THINK]raisonnement[/THINK]Réponse"); -const common_chat_msg message_assist_thoughts = simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"); -const common_chat_msg message_assist_thoughts_unopened_unparsed = simple_assist_msg("I'm\nthinkingHello, world!\nWhat's up?"); -const common_chat_msg message_assist_thoughts_no_content = simple_assist_msg("", "I'm\nthinking"); -const common_chat_msg message_assist_call = simple_assist_msg("", "", "special_function", "{\"arg1\": 1}"); -const common_chat_msg message_assist_call_noopt = simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1}"); -const common_chat_msg message_assist_call_withopt = simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1, \"arg2\": 2}"); -const common_chat_msg message_assist_call_content = simple_assist_msg("Hello, world!\nWhat's up?", "", "special_function", "{\"arg1\":1}"); -const common_chat_msg message_assist_call_empty_args = simple_assist_msg("", "", "special_function"); -const common_chat_msg message_assist_call_cutoff_args = simple_assist_msg("", "", "special_function", "{\"arg"); -const common_chat_msg message_assist_call_thoughts = simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\":1}"); -const common_chat_msg message_assist_call_thoughts_unparsed = simple_assist_msg("I'm\nthinking\n\n", "", "special_function", "{\"arg1\": 1}"); -const common_chat_msg message_assist_call_thoughts_content = simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": 1}"); -const common_chat_msg message_assist_call_id = simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "123456789"); -const common_chat_msg message_assist_call_idx = simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "0"); -const common_chat_msg message_assist_thoughts_call_idx = simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}", /* id = */ "0"); -const common_chat_msg message_assist_call_python = simple_assist_msg("", "", "python", "{\"code\":\"print('hey')\"}"); -const common_chat_msg message_assist_call_python_lines = simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')\"}"); -const common_chat_msg message_assist_call_python_lines_unclosed = simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')"); -const common_chat_msg message_assist_call_code_interpreter = simple_assist_msg("", "", "code_interpreter", "{\"code\":\"print('hey')\"}"); - // Use for PEG parser implementations struct peg_test_case { common_chat_templates_inputs params; - std::string input; - common_chat_msg expect; + std::string input; + common_chat_msg expect; + bool is_partial = false; }; struct make_peg_parser { common_chat_params params_; - common_peg_arena arena_; + common_peg_arena arena_; + bool detailed_debug_; - make_peg_parser(common_chat_templates * tmpls, const common_chat_templates_inputs & inputs) { - params_ = common_chat_templates_apply(tmpls, inputs); + make_peg_parser(common_chat_templates * tmpls, + const common_chat_templates_inputs & inputs, + bool detailed_debug = false) { + detailed_debug_ = detailed_debug; + params_ = common_chat_templates_apply(tmpls, inputs); arena_.load(params_.parser); } - common_chat_msg parse(const std::string & msg, bool is_partial) { + common_chat_msg parse(const std::string & msg, bool is_partial) const { common_chat_parser_params parser_params; parser_params.format = params_.format; + parser_params.debug = detailed_debug_; return common_chat_peg_parse(arena_, msg, is_partial, parser_params); } }; -static void test_peg_parser(common_chat_templates * tmpls, const std::function & init) { +static void test_peg_parser(common_chat_templates * tmpls, + const std::function & init, + bool detailed_debug) { + // UTF-8-safe truncation helper (same as in test_parser_with_streaming) + constexpr auto utf8_truncate_safe_len = [](const std::string_view s) -> size_t { + auto len = s.size(); + if (len == 0) { + return 0; + } + auto i = len; + for (size_t back = 0; back < 4 && i > 0; ++back) { + --i; + unsigned char c = s[i]; + if ((c & 0x80) == 0) { + return len; + } + if ((c & 0xC0) == 0xC0) { + size_t expected_len = 0; + if ((c & 0xE0) == 0xC0) { + expected_len = 2; + } else if ((c & 0xF0) == 0xE0) { + expected_len = 3; + } else if ((c & 0xF8) == 0xF0) { + expected_len = 4; + } else { + return i; + } + if (len - i >= expected_len) { + return len; + } + return i; + } + } + return len - std::min(len, size_t(3)); + }; + peg_test_case tc; init(tc); if (tc.params.messages.empty()) { - tc.params.messages = {message_user}; + tc.params.messages = { message_user }; } if (tc.expect.role.empty()) { tc.expect.role = "assistant"; } - auto parser = make_peg_parser(tmpls, tc.params); + auto parser = make_peg_parser(tmpls, tc.params, detailed_debug); + if (detailed_debug) { + LOG_DBG("Using parser: \n%s\n", parser.arena_.dump(parser.arena_.root()).c_str()); + } common_chat_msg msg_accum; common_chat_msg msg_prev; msg_accum.role = msg_prev.role = "assistant"; for (size_t i = 1; i <= tc.input.size(); ++i) { - auto is_partial = i < tc.input.size(); - common_chat_msg msg_current = parser.parse(tc.input.substr(0, i), is_partial); + auto is_partial = i < tc.input.size() || tc.is_partial; + // Use UTF-8 safe truncation to avoid corrupting multi-byte characters + size_t safe_len = utf8_truncate_safe_len(std::string_view(tc.input).substr(0, i)); + std::string prefix = tc.input.substr(0, safe_len); + common_chat_msg msg_current = parser.parse(prefix, is_partial); for (const auto & diff : common_chat_msg_diff::compute_diffs(msg_prev, msg_current)) { if (!diff.reasoning_content_delta.empty()) { @@ -591,24 +905,147 @@ static void test_peg_parser(common_chat_templates * tmpls, const std::function tools) { + tc_.params.tools = std::move(tools); + return *this; + } + + peg_test_builder & enable_thinking(bool val) { + tc_.params.enable_thinking = val; + return *this; + } + + peg_test_builder & parallel_tool_calls(bool val) { + tc_.params.parallel_tool_calls = val; + return *this; + } + + peg_test_builder & json_schema(const std::string & schema) { + tc_.params.json_schema = schema; + return *this; + } + + peg_test_builder & is_partial(bool val) { + tc_.is_partial = val; + return *this; + } + + // Expect setters + peg_test_builder & expect(const common_chat_msg & msg) { + tc_.expect = msg; + return *this; + } + + peg_test_builder & expect_content(const std::string & content) { + tc_.expect.content = content; + return *this; + } + + peg_test_builder & expect_reasoning(const std::string & reasoning) { + tc_.expect.reasoning_content = reasoning; + return *this; + } + + peg_test_builder & expect_tool_calls(std::vector calls) { + tc_.expect.tool_calls = std::move(calls); + return *this; + } + + // Execute the test + void run() { + // Check template filter + if (!g_template_filter.empty()) { + // Case-insensitive substring match + std::string template_path_lower = tester_.template_path(); + std::string filter_lower = g_template_filter; + std::transform(template_path_lower.begin(), template_path_lower.end(), template_path_lower.begin(), + ::tolower); + std::transform(filter_lower.begin(), filter_lower.end(), filter_lower.begin(), ::tolower); + if (template_path_lower.find(filter_lower) == std::string::npos) { + // Skip this test + return; + } + } + LOG_INF("\n\x1b[38;5;126m[%s]\x1b[0m\n%s\n\n", tester_.template_path().c_str(), tc_.input.c_str()); + test_peg_parser(tester_.tmpls_.get(), [this](peg_test_case & t) { t = tc_; }, tester_.detailed_debug_); + } +}; + +peg_test_builder peg_tester::test(const std::string & input) { + return peg_test_builder(*this, input); +} + static void test_msgs_oaicompat_json_conversion() { - printf("[%s]\n", __func__); + LOG_DBG("%s\n", __func__); std::vector msgs{ message_user, message_user_parts, @@ -619,54 +1056,50 @@ static void test_msgs_oaicompat_json_conversion() { message_assist_call_id, message_assist_call_idx, message_assist_call_python, - message_assist_call_code_interpreter, }; for (const auto & msg : msgs) { - auto oai_json = common_chat_msgs_to_json_oaicompat({msg}); - auto msgs2 = common_chat_msgs_parse_oaicompat(oai_json); + auto oai_json = common_chat_msgs_to_json_oaicompat({ msg }); + auto msgs2 = common_chat_msgs_parse_oaicompat(oai_json); assert_equals((size_t) 1, msgs2.size()); - auto msg2 = msgs2[0]; + const auto & msg2 = msgs2[0]; assert_msg_equals(msg, msg2); } - assert_equals( - std::string( - "[\n" - " {\n" - " \"role\": \"user\",\n" - " \"content\": [\n" - " {\n" - " \"type\": \"text\",\n" - " \"text\": \"Hey\"\n" - " },\n" - " {\n" - " \"type\": \"text\",\n" - " \"text\": \"there\"\n" - " }\n" - " ]\n" - " }\n" - "]" - ), - common_chat_msgs_to_json_oaicompat({message_user_parts}).dump(2)); + assert_equals(std::string("[\n" + " {\n" + " \"role\": \"user\",\n" + " \"content\": [\n" + " {\n" + " \"type\": \"text\",\n" + " \"text\": \"Hey\"\n" + " },\n" + " {\n" + " \"type\": \"text\",\n" + " \"text\": \"there\"\n" + " }\n" + " ]\n" + " }\n" + "]"), + common_chat_msgs_to_json_oaicompat({ message_user_parts }).dump(2)); - assert_equals( - std::string( - "[\n" - " {\n" - " \"role\": \"assistant\",\n" - " \"content\": \"\",\n" - " \"tool_calls\": [\n" - " {\n" - " \"type\": \"function\",\n" - " \"function\": {\n" - " \"name\": \"python\",\n" - " \"arguments\": \"{\\\"code\\\":\\\"print('hey')\\\"}\"\n" - " }\n" - " }\n" - " ]\n" - " }\n" - "]" - ), - common_chat_msgs_to_json_oaicompat({message_assist_call_python}).dump(2)); + // Note: content is "" instead of null due to workaround for templates that render null as "None" + assert_equals(std::string("[\n" + " {\n" + " \"role\": \"assistant\",\n" + " \"content\": \"\",\n" + " \"tool_calls\": [\n" + " {\n" + " \"type\": \"function\",\n" + " \"function\": {\n" + " \"name\": \"python\",\n" + " \"arguments\": {\n" + " \"code\": \"print('hey')\"\n" + " }\n" + " }\n" + " }\n" + " ]\n" + " }\n" + "]"), + common_chat_msgs_to_json_oaicompat({ message_assist_call_python }).dump(2)); auto res = common_chat_msgs_parse_oaicompat(json::parse("[{\"role\": \"assistant\", \"tool_calls\": []}]")); assert_equals(1, res.size()); @@ -685,16 +1118,15 @@ static void test_msgs_oaicompat_json_conversion() { } static void test_tools_oaicompat_json_conversion() { - printf("[%s]\n", __func__); + LOG_DBG("%s\n", __func__); std::vector tools{ special_function_tool, python_tool, - code_interpreter_tool, }; for (const auto & tool : tools) { - auto oai_json = common_chat_tools_to_json_oaicompat({tool}); - auto tools2 = common_chat_tools_parse_oaicompat(oai_json); + auto oai_json = common_chat_tools_to_json_oaicompat({ tool }); + auto tools2 = common_chat_tools_parse_oaicompat(oai_json); assert_equals((size_t) 1, tools2.size()); auto tool2 = tools2[0]; assert_equals(tool.name, tool2.name); @@ -702,2861 +1134,32 @@ static void test_tools_oaicompat_json_conversion() { assert_equals(json::parse(tool.parameters).dump(2), json::parse(tool2.parameters).dump(2)); } - assert_equals( - std::string( - "[\n" - " {\n" - " \"type\": \"function\",\n" - " \"function\": {\n" - " \"name\": \"special_function\",\n" - " \"description\": \"I'm special\",\n" - " \"parameters\": {\n" - " \"type\": \"object\",\n" - " \"properties\": {\n" - " \"arg1\": {\n" - " \"type\": \"integer\",\n" - " \"description\": \"The arg.\"\n" - " }\n" - " },\n" - " \"required\": [\n" - " \"arg1\"\n" - " ]\n" - " }\n" - " }\n" - " }\n" - "]" - ), - common_chat_tools_to_json_oaicompat({special_function_tool}).dump(2)); - - { - auto tools_no_params = common_chat_tools_parse_oaicompat(json::parse( - R"([{"type": "function", "function": {"name": "test_func", "description": "A test"}}])")); - assert_equals((size_t) 1, tools_no_params.size()); - assert_equals(std::string("test_func"), tools_no_params[0].name); - assert_equals(std::string("A test"), tools_no_params[0].description); - assert_equals(std::string("{}"), tools_no_params[0].parameters); - } - { - auto tools_no_desc = common_chat_tools_parse_oaicompat(json::parse( - R"([{"type": "function", "function": {"name": "test_func", "parameters": {"type": "object"}}}])")); - assert_equals((size_t) 1, tools_no_desc.size()); - assert_equals(std::string("test_func"), tools_no_desc[0].name); - assert_equals(std::string(""), tools_no_desc[0].description); - } - { - auto tools_minimal = common_chat_tools_parse_oaicompat(json::parse( - R"([{"type": "function", "function": {"name": "test_func"}}])")); - assert_equals((size_t) 1, tools_minimal.size()); - assert_equals(std::string("test_func"), tools_minimal[0].name); - assert_equals(std::string(""), tools_minimal[0].description); - assert_equals(std::string("{}"), tools_minimal[0].parameters); - } + assert_equals(std::string("[\n" + " {\n" + " \"type\": \"function\",\n" + " \"function\": {\n" + " \"name\": \"special_function\",\n" + " \"description\": \"I'm special\",\n" + " \"parameters\": {\n" + " \"type\": \"object\",\n" + " \"properties\": {\n" + " \"arg1\": {\n" + " \"type\": \"integer\",\n" + " \"description\": \"The arg.\"\n" + " }\n" + " },\n" + " \"required\": [\n" + " \"arg1\"\n" + " ]\n" + " }\n" + " }\n" + " }\n" + "]"), + common_chat_tools_to_json_oaicompat({ special_function_tool }).dump(2)); } -// for compat; ref: https://github.com/ggml-org/llama.cpp/pull/18961 -struct test_parser_params { - common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY; - common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE; - bool reasoning_in_content = false; - bool thinking_forced_open = false; - bool parse_tool_calls = true; -}; - -static common_chat_msg test_chat_parse(const std::string & input, bool is_partial, const test_parser_params & syntax) { - common_chat_parser_params params; - params.format = syntax.format; - params.reasoning_format = syntax.reasoning_format; - params.reasoning_in_content = syntax.reasoning_in_content; - params.thinking_forced_open = syntax.thinking_forced_open; - params.parse_tool_calls = syntax.parse_tool_calls; - return common_chat_parse(input, is_partial, params); -} - -static void test_template_output_parsers() { - printf("[%s]\n", __func__); - - common_chat_templates_inputs inputs_no_tools; - inputs_no_tools.messages = {message_user}; - - common_chat_templates_inputs inputs_tools; - inputs_tools.messages = {message_user}; - inputs_tools.tools = {special_function_tool}; - - common_chat_templates_inputs inputs_tools_builtin; - inputs_tools_builtin.messages = {message_user}; - inputs_tools_builtin.tools = {python_tool}; - - { - // Not supported yet - auto tmpls = read_templates("models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja"); - assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_GENERIC, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - } - { - auto tmpls = read_templates("models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja"); - std::vector end_tokens{ "<|END_OF_TURN_TOKEN|>" }; - - for (const auto & inputs : { inputs_no_tools, inputs_tools }) { - auto params = common_chat_templates_apply(tmpls.get(), inputs); - assert_equals(COMMON_CHAT_FORMAT_COMMAND_R7B, params.format); - assert_equals(false, params.thinking_forced_open); - } - - assert_msg_equals(message_assist, - test_chat_parse( - "Hello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_COMMAND_R7B})); - assert_msg_equals(message_assist, - test_chat_parse( - "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_COMMAND_R7B})); - assert_msg_equals(message_assist_thoughts, - test_chat_parse( - "<|START_THINKING|>I'm\nthinking<|END_THINKING|>" - "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - assert_msg_equals(message_assist_thoughts_unparsed_deepseek, - test_chat_parse( - "<|START_THINKING|>I'm\nthinking<|END_THINKING|>" - "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ true, - /* .thinking_forced_open = */ false, - })); - assert_msg_equals(message_assist_thoughts_unparsed_r7b, - test_chat_parse( - "<|START_THINKING|>I'm\nthinking<|END_THINKING|>" - "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_COMMAND_R7B})); - assert_msg_equals(message_assist_thoughts, - test_chat_parse( - "<|START_THINKING|>I'm\nthinking<|END_THINKING|>" - "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - assert_msg_equals(message_assist_thoughts_call_idx, - test_chat_parse( - "<|START_THINKING|>I'm\nthinking<|END_THINKING|>" - "<|START_ACTION|>[\n" - " {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n" - "]<|END_ACTION|>", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - assert_msg_equals(message_assist_thoughts_no_content, - test_chat_parse( - "<|START_THINKING|>I'm\nthinking<|END_THINKING|>" - "<|START_ACTION|>[\n" - " {\"tool_call_id\": \"0\", \"tool_name\": \"special", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - - test_templates(tmpls.get(), end_tokens, message_assist_call_idx, tools, - "<|START_THINKING|><|END_THINKING|>" - "<|START_ACTION|>[\n" - " {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n" - "]<|END_ACTION|>", - /* expect_grammar_triggered= */ true, - /* test_grammar_if_triggered= */ true, - COMMON_REASONING_FORMAT_DEEPSEEK); - test_templates(tmpls.get(), end_tokens, message_assist, tools, - "<|START_RESPONSE|>Hello, world!\n" - "What's up?<|END_RESPONSE|>", - /* expect_grammar_triggered= */ false); - } - // TODO @ngxson : generic tool calls is too costly to maintain, consider removing it in the future - { - auto tmpls = read_templates("models/templates/google-gemma-2-2b-it.jinja"); - std::vector end_tokens{ "" }; - - assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_GENERIC, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - assert_equals(COMMON_CHAT_FORMAT_GENERIC, - common_chat_templates_apply( - read_templates("models/templates/microsoft-Phi-3.5-mini-instruct.jinja").get(), - inputs_tools) - .format); - - // Generic tool calls doesn't generate / parse content-only messages symmetrically. - - assert_equals( - simple_assist_msg("{ \"tool_call\" : { \"name\" : \"t"), - test_chat_parse( - "{ \"tool_call\" : { \"name\" : \"t", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_GENERIC, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ true, - /* .parse_tool_calls = */ false, - })); - assert_equals( - message_assist_empty, - test_chat_parse( - "{ \"tool_call\" : { \"name\" : \"t", - /* is_partial= */ true, - {COMMON_CHAT_FORMAT_GENERIC})); - - assert_equals( - simple_assist_msg("", "", "puppeteer_screenshot", "{\"name\":\"servethehome_homepage\","), - test_chat_parse( - R"({"tool_call": {"name": "puppeteer_screenshot", "arguments": {"name": "servethehome_homepage",)", - /* is_partial= */ true, - {COMMON_CHAT_FORMAT_GENERIC})); - - assert_equals( - message_assist_call_empty_args, - test_chat_parse( - "{ \"tool_call\" : { \"name\" : \"special_function\"", - /* is_partial= */ true, - {COMMON_CHAT_FORMAT_GENERIC})); - assert_equals( - message_assist_call_cutoff_args, - test_chat_parse( - "{ \"tool_call\" : { \"name\" : \"special_function\", \"arguments\" : { \"arg", - /* is_partial= */ true, - {COMMON_CHAT_FORMAT_GENERIC})); - - assert_msg_equals(message_assist, - test_chat_parse( - "{\n" - " \"response\": \"Hello, world!\\nWhat's up?\"\n" - "}", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_GENERIC})); -#if 0 - test_templates(tmpls.get(), end_tokens, message_assist_call_id, tools, - "{\n" - " \"tool_calls\": [\n" - " {\n" - " \"name\": \"special_function\",\n" - " \"arguments\": {\n" - " \"arg1\": 1\n" - " },\n" - " \"id\": \"123456789\"\n" - " }\n" - " ],\n" - " \"content\": \"\"\n" - "}"); -#endif - } - { - auto tmpls = read_templates("models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja"); - std::vector end_tokens{ "" }; - - assert_equals(COMMON_CHAT_FORMAT_MISTRAL_NEMO, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - - test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); - test_templates( - tmpls.get(), end_tokens, message_assist_call_id, tools, - "[TOOL_CALLS][{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, \"id\": \"123456789\"}]"); - } - { - assert_msg_equals( - simple_assist_msg("Réponse", "raisonnement"), - test_chat_parse( - message_assist_thoughts_unparsed_magistral.content, - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_MAGISTRAL, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - })); - } - { - auto tmpls = read_templates("models/templates/Qwen-QwQ-32B.jinja"); - std::vector end_tokens{ "<|im_end|>" }; - - assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - } - { - auto tmpls = read_templates("models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja"); - std::vector end_tokens{ "<|im_end|>" }; - - assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - assert_equals( - COMMON_CHAT_FORMAT_HERMES_2_PRO, - common_chat_templates_apply( - read_templates("models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja").get(), - inputs_tools) - .format); - assert_equals( - COMMON_CHAT_FORMAT_HERMES_2_PRO, - common_chat_templates_apply( - read_templates("models/templates/Qwen-Qwen2.5-7B-Instruct.jinja").get(), - inputs_tools) - .format); - - // Test parsing - assert_msg_equals( - simple_assist_msg("", "", "python", ""), - test_chat_parse( - "```json\n" - " { \"name\" : \"python\"", - /* is_partial= */ true, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - simple_assist_msg("Let's call something\n"), - test_chat_parse( - "Let's call something\n" - "{\"name\"", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - assert_msg_equals( - simple_assist_msg("Let's call something\n"), - test_chat_parse( - "Let's call something\n" - "{\"name", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - assert_msg_equals(message_assist_call_thoughts, - test_chat_parse( - // QwQ-32B's template adds a trailing if add_generation_prompt - "I'm\nthinking\n" - "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ true, - })); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "\n" - "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" - "", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals(message_assist_call_content, - test_chat_parse( - "Hello, world!\nWhat's up?\n" - "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" - "", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "{\"arg1\": 1}", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "\n" - "{\"arg1\": 1}\n" - "", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "\n" - " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" - "", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "\n" - " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" - "", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "\n" - " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" - "", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "```xml\n" - "\n" - " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" - "\n" - "```", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "```xml\n" - " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" - "```", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "```\n" - " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" - "```", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "```\n" - "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" - "```", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "```json\n" - " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" - "```", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "```json\n" - "\n" - " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}} \n" - " \n" - "``` ", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "\n" - " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" - "", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "\n" - " {\n" - " \"name\": \"special_function\", \"arguments\": {\"arg1\": 1}\n" - " }\n" - "", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "\n" - " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" - "", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "{\n \"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - - // Test multiple tool calls - common_chat_msg message_assist_multiple_calls; - message_assist_multiple_calls.role = "assistant"; - message_assist_multiple_calls.content = ""; - message_assist_multiple_calls.tool_calls.push_back({"special_function", "{\"arg1\": 1}", ""}); - message_assist_multiple_calls.tool_calls.push_back({"python", "{\"code\":\"print('hello')\"}", ""}); - - assert_msg_equals( - message_assist_multiple_calls, - test_chat_parse( - "\n" - "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" - "\n" - "\n" - "{\"name\": \"python\", \"arguments\": {\"code\":\"print('hello')\"}}\n" - "", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - - assert_msg_equals( - message_assist_multiple_calls, - test_chat_parse( - "{\"arg1\": 1}\n" - "{\"code\":\"print('hello')\"}", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - - assert_msg_equals( - simple_assist_msg( - "This is not a tool call:", - "", - "special_function", - "{\"arg1\": 1}"), - test_chat_parse( - "This is not a tool call:\n" - "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals(message_assist, - test_chat_parse( - "Hello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals(message_assist_thoughts_unparsed_deepseek, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - // assert_msg_equals(message_assist_thoughts_unparsed_deepseek, - // test_chat_parse( - // "I'm\nthinkingHello, world!\nWhat's up?", - // COMMON_CHAT_FORMAT_HERMES_2_PRO)); - assert_msg_equals(message_assist_thoughts, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - assert_msg_equals(message_assist_thoughts, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - assert_msg_equals(message_assist_thoughts_unparsed_md, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?\n```json\n{}```", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ true, - /* .thinking_forced_open = */ false, - /* .parse_tool_calls = */ false, - })); - assert_msg_equals(message_assist_thoughts_unparsed_md_partial, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?\n```json\n{}```", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ true, - /* .thinking_forced_open = */ false, - })); - assert_msg_equals(message_assist_thoughts_unopened_unparsed, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - assert_msg_equals(message_assist_thoughts, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ true, - })); - - test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); - test_templates(tmpls.get(), end_tokens, message_assist_call, tools, - "\n" - "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" - ""); - - // Test multiple tool calls with template - common_chat_msg message_assist_multiple_calls_template; - message_assist_multiple_calls_template.role = "assistant"; - message_assist_multiple_calls_template.content = ""; - message_assist_multiple_calls_template.tool_calls.push_back({"special_function", "{\"arg1\": 1}", ""}); - message_assist_multiple_calls_template.tool_calls.push_back({"python", "{\"code\":\"print('test')\"}", ""}); - - test_templates(tmpls.get(), end_tokens, message_assist_multiple_calls_template, tools, - "\n" - "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" - "\n" - "\n" - "{\"name\": \"python\", \"arguments\": {\"code\":\"print('test')\"}}\n" - ""); - - test_templates(tmpls.get(), end_tokens, message_assist_call_python_lines, tools, - "\n" - "{\"name\": \"python\", \"arguments\": {\"code\":\"# This is a program:\\nprint('hey')\"}}\n" - ""); - assert_msg_equals( - simple_assist_msg("", /* reasoning_content= */ "nah uhg"), - test_chat_parse( - "nah uhg", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - } - { - auto tmpls = read_templates("models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja"); - std::vector end_tokens{ "<|eom_id|>", "<|eot_id|>" }; - - assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS, - common_chat_templates_apply(tmpls.get(), inputs_tools_builtin).format); - assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS, - common_chat_templates_apply( - read_templates("models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja").get(), - inputs_tools_builtin) - .format); - - assert_equals( - message_assist_call, - test_chat_parse( - "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_LLAMA_3_X})); - - // test_templates(tmpls.get(), end_tokens, message_assist, tools, R"(?)", /* expect_grammar_triggered= */ false); - test_templates(tmpls.get(), end_tokens, message_assist_call_code_interpreter, llama_3_1_tools, - "<|python_tag|>code_interpreter.call(code=\"print('hey')\")"); - test_templates(tmpls.get(), end_tokens, message_assist_call_python, tools, - "<|python_tag|>python.call(code=\"print('hey')\")"); - test_templates(tmpls.get(), end_tokens, message_assist_call, tools, - "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}"); - } - { - auto tmpls = read_templates("models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja"); - std::vector end_tokens{ "<|eom_id|>", "<|eot_id|>" }; - - assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - - test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); - test_templates(tmpls.get(), end_tokens, message_assist_call, tools, - "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}"); - } - { - auto tmpls = read_templates("models/templates/meetkai-functionary-medium-v3.1.jinja"); - std::vector end_tokens{ "<|eom_id|>", "<|eot_id|>" }; - - assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, - common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1, - common_chat_templates_apply(tmpls.get(), inputs_tools).format); - assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, - common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - - for (auto is_partial : { false, true }) { - assert_equals( - message_assist_call, - test_chat_parse( - "{\"arg1\": 1}", - is_partial, - {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1})); - } - - assert_equals( - message_assist_call, - test_chat_parse( - "{\"arg1\": 1}<", - /* is_partial= */ true, - {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1})); - - test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); - test_templates(tmpls.get(), end_tokens, message_assist_call, tools, - "{\"arg1\": 1}"); - } - { - auto tmpls = read_templates("models/templates/meetkai-functionary-medium-v3.2.jinja"); - std::vector end_tokens{ "<|eom_id|>", "<|eot_id|>" }; - - assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - - assert_msg_equals( - simple_assist_msg( - "Hello, world!\nnono\nWhat's up?", - "", - "special_function", - "{\"arg1\": 1}"), - test_chat_parse( - "all\n" - "Hello, world!\n" - "nono\n" - "What's up?>>>special_function\n" - "{\"arg1\": 1}\n", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2})); - assert_msg_equals(message_assist_call_python_lines, - test_chat_parse( - "python\n" - "# This is a program:\n" - "print('hey')", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2})); - assert_msg_equals(message_assist_call_python_lines_unclosed, - test_chat_parse( - "python\n" - "# This is a program:\n" - "print('hey')", - /* is_partial= */ true, - {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2})); - assert_msg_equals(message_assist_call, - test_chat_parse( - "special_function\n" - "{\"arg1\": 1} \n ", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2})); - assert_msg_equals(message_assist, - test_chat_parse( - "all\n" - "Hello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2})); - - test_templates(tmpls.get(), end_tokens, message_assist, {}, - "all\n" - "Hello, world!\n" - "What's up?", - /* expect_grammar_triggered= */ false); - test_templates(tmpls.get(), end_tokens, message_assist_call, tools, - "special_function\n" - "{\"arg1\": 1}"); - } - { - auto tmpls = read_templates("models/templates/fireworks-ai-llama-3-firefunction-v2.jinja"); - std::vector end_tokens{ "<|eot_id|>" }; - - assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_FIREFUNCTION_V2, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - - test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); - test_templates(tmpls.get(), end_tokens, message_assist_call, tools, - " functools[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]"); - } - { - // Original DeepSeek R1 template. Leaves <|tool▁calls▁begin|> and others unclosed. Our logic fixes the prompt. - auto tmpls = read_templates("models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja"); - std::vector end_tokens{ "<|end▁of▁sentence|>" }; - - for (const auto & inputs : { inputs_no_tools, inputs_tools }) { - auto params = common_chat_templates_apply(tmpls.get(), inputs); - assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, params.format); - assert_equals(true, params.thinking_forced_open); - } - - test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); - test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); - assert_msg_equals( - simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"), - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - { - COMMON_CHAT_FORMAT_DEEPSEEK_R1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ true, - })); - assert_msg_equals( - simple_assist_msg("", "I need to remember the correct syntax. It starts with <|tool▁calls▁begin|> and ends with"), - test_chat_parse( - "I need to remember the correct syntax. It starts with <|tool▁calls▁begin|> and ends with", - /* is_partial= */ true, - { - COMMON_CHAT_FORMAT_DEEPSEEK_R1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ true, - })); - assert_msg_equals(message_assist_thoughts, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - assert_msg_equals(message_assist_thoughts_unopened_unparsed, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - assert_msg_equals(message_assist_thoughts, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ true, - })); - assert_msg_equals(message_assist_thoughts, - // Latest template update (ast of 20250209) adds a trailing \n if add_generation_prompt is true. - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ true, - })); - // test_templates(tmpls.get(), end_tokens, message_assist_call, tools, - // "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n" - // "```json\n" - // "{\"arg1\": 1}\n" - // // Look what's not here: <|tool▁calls▁end|> (also missing the <|end▁of▁sentence|>, but that is removed lazily by the test's delta logic) - // "```<|tool▁call▁end|>", - // /* expect_grammar_triggered= */ true, - // /* test_grammar_if_triggered= */ false); - } - { - // Replacement DeepSeek R1 template. Makes the Distill Qwen 7B/32B models happy to call tools and all. - auto tmpls = read_templates("models/templates/llama-cpp-deepseek-r1.jinja"); - std::vector end_tokens{ "<|end▁of▁sentence|>" }; - - assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - - test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); - test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); - assert_msg_equals(message_assist_thoughts_unparsed_deepseek, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_DEEPSEEK_R1})); - assert_msg_equals(message_assist_thoughts, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - assert_msg_equals(message_assist_thoughts, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ true, - })); - - assert_msg_equals(message_assist_call_thoughts_unparsed, - test_chat_parse( - "I'm\nthinking\n\n" - "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n" - "```json\n" - "{\"arg1\": 1}\n" - "```<|tool▁call▁end|><|tool▁calls▁end|>", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_DEEPSEEK_R1})); - assert_msg_equals(message_assist_call, - test_chat_parse( - "<|tool▁calls|>function<|tool▁sep|>special_function\n" - "```json\n" - "{\"arg1\": 1}\n" - "```<|tool▁call▁end|><|tool▁calls▁end|>", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_DEEPSEEK_R1})); - - assert_msg_equals(message_assist_call_thoughts, - test_chat_parse( - "I'm\nthinking\n\n" - "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n" - "```json\n" - "{\"arg1\": 1}\n" - "```<|tool▁call▁end|><|tool▁calls▁end|>", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - test_templates(tmpls.get(), end_tokens, message_assist_call, tools, - "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n" - "```json\n" - "{\"arg1\": 1}\n" - "```<|tool▁call▁end|><|tool▁calls▁end|>"); - } - { - auto tmpls = read_templates("models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja"); - std::vector end_tokens{ "<|end_of_text|>" }; - - assert_equals(COMMON_CHAT_FORMAT_GRANITE, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - - assert_equals(COMMON_CHAT_FORMAT_GRANITE, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - - // Test parsing regular content - assert_msg_equals(message_assist, - test_chat_parse( - "Hello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_GRANITE})); - assert_msg_equals( - message_assist, - test_chat_parse( - "Hello, world!\nWhat's up?", - /* is_partial= */ true, - {COMMON_CHAT_FORMAT_GRANITE})); - - // Test parsing content with thinking - assert_msg_equals(message_assist_thoughts, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_GRANITE, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - assert_msg_equals(message_assist_thoughts_unparsed_deepseek, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_GRANITE})); - assert_msg_equals(message_assist_thoughts, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_GRANITE, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - assert_msg_equals(message_assist_thoughts, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_GRANITE, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - assert_msg_equals(simple_assist_msg("I'm\nthinkingHello, world!\nWhat's up?"), - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_GRANITE})); - assert_msg_equals(message_assist_empty, - test_chat_parse( - "I'm\nthinking", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_GRANITE, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - assert_msg_equals( - message_assist_empty, - test_chat_parse( - "I'm\nthinking[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_GRANITE})); - assert_msg_equals( - message_assist_call_empty_args, - test_chat_parse( - "<|tool_call|>[{\"name\": \"special_function\"", - /* is_partial= */ true, - {COMMON_CHAT_FORMAT_GRANITE})); - assert_msg_equals( - message_assist_call_cutoff_args, - test_chat_parse( - "<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg", - /* is_partial= */ true, - {COMMON_CHAT_FORMAT_GRANITE})); - assert_msg_equals( - message_assist_call_cutoff_args, - test_chat_parse( - "<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_GRANITE, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - - // Test parsing tool calls with thinking - assert_msg_equals( - message_assist_call_thoughts, - test_chat_parse( - "I'm\nthinking<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, {", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_GRANITE, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - - // Test template generation for regular content - test_templates(tmpls.get(), end_tokens, message_assist, tools, - "Hello, world!\nWhat's up?", - /* expect_grammar_triggered= */ false); - // TODO @ngxson : generic tool call should be removed in the future -#if 0 - // Test template generation for tool calls - test_templates(tmpls.get(), end_tokens, message_assist_call_id, tools, - "{\n" - " \"tool_calls\": [\n" - " {\n" - " \"name\": \"special_function\",\n" - " \"arguments\": {\n" - " \"arg1\": 1\n" - " },\n" - " \"id\": \"123456789\"\n" - " }\n" - " ],\n" - " \"content\": \"\"\n" - "}", - /* expect_grammar_triggered= */ false - ); -#endif - } - { - auto tmpls = read_templates("models/templates/openai-gpt-oss-120b.jinja"); - std::vector end_tokens{ "<|return|>", "<|call|>" }; - - assert_equals(COMMON_CHAT_FORMAT_GPT_OSS, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_GPT_OSS, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - - assert_msg_equals(simple_assist_msg("", "I'm\nthink"), - test_chat_parse( - "<|channel|>analysis<|message|>I'm\nthink", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - })); - assert_msg_equals(simple_assist_msg("", "I'm\nthinking"), - test_chat_parse( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - })); - assert_msg_equals(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"), - test_chat_parse( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>" - "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - })); - assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1"), - test_chat_parse( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>" - "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - })); - assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1"), - test_chat_parse( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>" - "<|start|>assistant<|channel|>commentary to=functions.special_function<|message|>{\"arg1", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - })); - assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}"), - test_chat_parse( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>" - "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - })); - assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}"), - test_chat_parse( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>" - "<|start|>assistant<|channel|>analysis to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - })); - assert_msg_equals(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"), - test_chat_parse( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>" - "<|start|>assistant<|channel|>commentary<|message|>Hello, world!\nWhat's up?", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - })); - assert_msg_equals(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": 1}"), - test_chat_parse( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>" - "<|start|>assistant<|channel|>commentary<|message|>Hello, world!\nWhat's up?<|end|>" - "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - })); - - // Test parse_tool_calls == false - assert_msg_equals( - simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"), - test_chat_parse( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>" - "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ false, - /* .parse_tool_calls = */ false, - })); - assert_msg_equals( - simple_assist_msg("", "I'm\nthinking"), - test_chat_parse( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>" - "<|start|>assistant<|channel|>commentary to=functions.special_function<|message|>{\"arg1", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ false, - /* .parse_tool_calls = */ false, - })); - assert_msg_equals( - simple_assist_msg("", "I'm\nthinking"), - test_chat_parse( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>" - "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ false, - /* .parse_tool_calls = */ false, - })); - - // Test reasoning formats - assert_msg_equals( - simple_assist_msg( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>Hello, world!\nWhat's up?"), - test_chat_parse( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>" - "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE, - })); - - assert_msg_equals( - simple_assist_msg( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>Hello, world!\nWhat's up?"), - test_chat_parse( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>" - "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - /* .reasoning_in_content = */ true, - })); - - // Test tool calling in role header - assert_msg_equals(simple_assist_msg("", "", "special_function", "{\"arg1\": 1}"), - test_chat_parse( - " to=functions.special_function<|channel|>commentary <|constrain|>json<|message|>{\"arg1\": 1}", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - })); - assert_msg_equals(simple_assist_msg("", "", "special_function", "{\"arg1\": 1}"), - test_chat_parse( - " to=functions.special_function<|channel|>analysis <|constrain|>json<|message|>{\"arg1\": 1}", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - })); - assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}"), - test_chat_parse( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>" - "<|start|>assistant to=functions.special_function<|channel|>analysis <|constrain|>json<|message|>{\"arg1\": 1}", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - })); - } - { - // Seed-OSS format tests - auto tmpls = read_templates("models/templates/ByteDance-Seed-OSS.jinja"); - std::vector end_tokens{ "" }; - - assert_equals(COMMON_CHAT_FORMAT_SEED_OSS, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_SEED_OSS, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - - test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); - - // Test simple reasoning content - assert_msg_equals( - simple_assist_msg("Hello, world!", "I'm thinking about the answer"), - test_chat_parse( - "I'm thinking about the answerHello, world!", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - - // Test budget reflection tags - common_chat_msg msg_budget_reflect; - msg_budget_reflect.role = "assistant"; - msg_budget_reflect.content = "Token usage: 45/1000\nI should continue thinking to find the best solution.I need to calculate this step by step."; - msg_budget_reflect.reasoning_content = "Token usage: 45/1000\nI should continue thinking to find the best solution."; - assert_msg_equals( - msg_budget_reflect, - test_chat_parse( - "Token usage: 45/1000\nI should continue thinking to find the best solution." - "Token usage: 45/1000\nI should continue thinking to find the best solution." - "I need to calculate this step by step.", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - - // Test tool calls with Seed-OSS format - common_chat_msg msg_tool_call; - msg_tool_call.role = "assistant"; - msg_tool_call.tool_calls.push_back({"calculate_sum", "{\"numbers\": [1, 2, 3]}", ""}); - assert_msg_equals( - msg_tool_call, - test_chat_parse( - "\n" - "\n" - "[1, 2, 3]\n" - "\n" - "", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_SEED_OSS})); - - // Test reasoning + tool call combination - common_chat_msg msg_reasoning_tool; - msg_reasoning_tool.role = "assistant"; - msg_reasoning_tool.content = ""; - msg_reasoning_tool.reasoning_content = "I need to calculate the sum of these numbers"; - msg_reasoning_tool.tool_calls.push_back({"calculate_sum", "{\"numbers\": [1, 2, 3]}", ""}); - assert_msg_equals( - msg_reasoning_tool, - test_chat_parse( - "I need to calculate the sum of these numbers" - "\n" - "\n" - "[1, 2, 3]\n" - "\n" - "", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - - // Test deltas: the number of tool calls in partial parses should never decrease - std::string tool_msg = "\n" - "\n" - "[1, 2, 3]\n" - ""; - std::size_t previousToolCalls = 0; - for (std::size_t i = std::string("").length(); i < tool_msg.length() - 1; i++) { - auto partial = tool_msg.substr(0, i); - auto partial_res = test_chat_parse(partial, true, { COMMON_CHAT_FORMAT_SEED_OSS, COMMON_REASONING_FORMAT_DEEPSEEK }); - if (partial_res.tool_calls.size() < previousToolCalls) { - throw std::runtime_error("Tool call size decreased on partial: " + partial + " from " + std::to_string(previousToolCalls) + " to " + std::to_string(partial_res.tool_calls.size())); - } - previousToolCalls = partial_res.tool_calls.size(); - } - - // Test multiple parameters in tool call - common_chat_msg msg_multi_param; - msg_multi_param.role = "assistant"; - msg_multi_param.tool_calls.push_back({"process_data", "{\"input\": \"test\", \"format\": \"json\"}", ""}); - assert_msg_equals( - msg_multi_param, - test_chat_parse( - "\n" - "\n" - "test\n" - "json\n" - "\n" - "", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_SEED_OSS})); - - // Test partial parsing for incomplete tool call - don't actually add the call until parsing parameters is done - assert_msg_equals( - simple_assist_msg("", "", "calculate_sum", "{\"numbers\":"), - test_chat_parse( - "\n" - "\n" - "[1,\n", - /* is_partial= */ true, - {COMMON_CHAT_FORMAT_SEED_OSS})); - - // Test incomplete reasoning tag - assert_msg_equals( - simple_assist_msg("", "I was thinking"), - test_chat_parse( - "I was thinking", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - - // Test content without reasoning - assert_msg_equals( - simple_assist_msg("This is a simple response without reasoning."), - test_chat_parse( - "This is a simple response without reasoning.", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_SEED_OSS})); - } - { - auto tmpls = read_templates("models/templates/NVIDIA-Nemotron-Nano-v2.jinja"); - std::vector end_tokens{ "" }; - - assert_equals(COMMON_CHAT_FORMAT_NEMOTRON_V2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_NEMOTRON_V2, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - - // Test parsing regular content - assert_msg_equals(message_assist, - test_chat_parse( - "Hello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_NEMOTRON_V2})); - - // Test parsing content with thinking - assert_msg_equals(message_assist_thoughts, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_NEMOTRON_V2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - - // Test parsing tool calls - assert_msg_equals(message_assist_call, - test_chat_parse( - "[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_NEMOTRON_V2})); - - // Test parsing tool calls with thinking - assert_msg_equals(message_assist_call_thoughts, - test_chat_parse( - "I'm\nthinking[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_NEMOTRON_V2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - })); - - // Test tool calls with extra content - assert_msg_equals(message_assist_call_content, - test_chat_parse( - "[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]Hello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_NEMOTRON_V2} - )); - - // Test tool calls with extra content AND thinking - assert_msg_equals(message_assist_call_thoughts_content, - test_chat_parse( - "I'm\nthinking[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]Hello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_NEMOTRON_V2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - })); - - // Test template generation for regular content - test_templates(tmpls.get(), end_tokens, message_assist, tools, - "Hello, world!\nWhat's up?\n", - /* expect_grammar_triggered= */ false); - - // Test template generation for tool calls - test_templates(tmpls.get(), end_tokens, message_assist_call, tools, - "[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]", - /* expect_grammar_triggered= */ true - ); - } - { - auto tmpls = read_templates("models/templates/deepseek-ai-DeepSeek-V3.1.jinja"); - std::vector end_tokens{ "<|end▁of▁sentence|>" }; - - for (const auto & inputs : { inputs_no_tools, inputs_tools }) { - auto params = common_chat_templates_apply(tmpls.get(), inputs); - assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, params.format); - assert_equals(true, params.thinking_forced_open); - } - - test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); - test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); - assert_msg_equals( - simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"), - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - { - COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ true, - })); - // variant: thinking forced open, reasoning_format none - assert_msg_equals( - simple_assist_msg("REASONINGok", ""), - test_chat_parse( - "REASONINGok", - /* is_partial= */ false, - { - COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ true, - /* .parse_tool_calls = */ true, - })); - // variant: happy path for when it works as the model card says it should - assert_msg_equals( - simple_assist_msg("", "", "get_time", "{\"city\":\"Tokyo\"}"), - test_chat_parse( - "<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>", - /* is_partial= */ false, - { - COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ false, - /* .parse_tool_calls = */ true, - })); - // variant: simple + thinking open - assert_msg_equals( - simple_assist_msg("", "REASONING", "get_time", "{\"city\":\"Tokyo\"}"), - test_chat_parse( - "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>", - /* is_partial= */ false, - { - COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ true, - /* .parse_tool_calls = */ true, - })); - // variant: simple + multiple tool calls - common_chat_msg message_assist_multiple_calls; - message_assist_multiple_calls.role = "assistant"; - message_assist_multiple_calls.content = "CONTENT"; - message_assist_multiple_calls.tool_calls.push_back({"get_time", "{\"city\":\"Paris\"}", ""}); - message_assist_multiple_calls.tool_calls.push_back({"get_weather", "{\"city\":\"Paris\"}", ""}); - assert_msg_equals( - message_assist_multiple_calls, - test_chat_parse( - "CONTENT<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Paris\"}<|tool▁call▁end|><|tool▁call▁begin|>get_weather<|tool▁sep|>{\"city\": \"Paris\"}<|tool▁call▁end|><|tool▁calls▁end|>", - /* is_partial= */ false, - { - COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ false, - /* .parse_tool_calls = */ true, - })); - // variant: thinking forced open + tool call in reasoning content - assert_msg_equals( - simple_assist_msg("", "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time2<|tool▁sep|>{\"city\": \"Tokyo2\"}<|tool▁call▁end|><|tool▁calls▁end|>REASONING", "get_time", "{\"city\":\"Tokyo\"}"), - test_chat_parse( - "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time2<|tool▁sep|>{\"city\": \"Tokyo2\"}<|tool▁call▁end|><|tool▁calls▁end|>REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>", - /* is_partial= */ false, - { - COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ true, - /* .parse_tool_calls = */ true, - })); - // variant: thinking forced open + tool call in reasoning content + no closing think + not partial - // This is a bit of a fine tuning issue on the model's part IMO. It really should not be attempting - // to make tool calls in reasoning content according to the model card, but it does sometimes, so - // add the reasoning content as regular content and parse the tool calls. - assert_msg_equals( - simple_assist_msg("REASONING", "", "get_time", "{\"city\":\"Tokyo\"}"), - test_chat_parse( - "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>", - /* is_partial= */ false, - { - COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ true, - /* .parse_tool_calls = */ true, - })); - // variant: thinking forced open + tool call in reasoning content + no closing think + partial - assert_msg_equals( - simple_assist_msg("", "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>", "", ""), - test_chat_parse( - "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>", - /* is_partial= */ true, - { - COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ true, - /* .parse_tool_calls = */ true, - })); - // variant: thinking not forced open + missing reasoning + no tool calls - assert_msg_equals( - simple_assist_msg("CONTENT", ""), - test_chat_parse( - "CONTENT", - /* is_partial= */ false, - { - COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ false, - /* .parse_tool_calls = */ true, - })); - } - { - auto tmpls = read_templates("models/templates/Apertus-8B-Instruct.jinja"); - std::vector end_tokens{ "<|assistant_end|>" }; - - assert_equals(COMMON_CHAT_FORMAT_APERTUS, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_APERTUS, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - - // Test parsing regular content - assert_msg_equals(message_assist, - test_chat_parse( - "Hello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_APERTUS})); - - // Test parsing content with thinking - assert_msg_equals(message_assist_thoughts, - test_chat_parse( - "<|inner_prefix|>I'm\nthinking<|inner_suffix|>Hello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_APERTUS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - - // Test parsing tool calls - assert_msg_equals(message_assist_call, - test_chat_parse( - "<|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_APERTUS})); - - // Test parsing tool calls with thinking - assert_msg_equals(message_assist_call_thoughts, - test_chat_parse( - "<|inner_prefix|>I'm\nthinking<|inner_suffix|><|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_APERTUS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - })); - - // Test tool calls with extra content - assert_msg_equals(message_assist_call_content, - test_chat_parse( - "<|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>Hello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_APERTUS} - )); - - // Test tool calls with extra content AND thinking - assert_msg_equals(message_assist_call_thoughts_content, - test_chat_parse( - "<|inner_prefix|>I'm\nthinking<|inner_suffix|><|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>Hello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_APERTUS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - })); - - // Test template generation for regular content - test_templates(tmpls.get(), end_tokens, message_assist, tools, - "Hello, world!\nWhat's up?", - /* expect_grammar_triggered= */ false); - - // Test template generation for tool calls - test_templates(tmpls.get(), end_tokens, message_assist_call, tools, - "<|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>", - /* expect_grammar_triggered= */ true - ); - - // TODO @ngxson : not sure why this fails, but not very important for now - // assert_equals(true, common_chat_templates_support_enable_thinking(tmpls.get())); - } - { - // LFM2 format tests - auto tmpls = read_templates("models/templates/llama-cpp-lfm2.jinja"); - std::vector end_tokens{ "<|im_end|>" }; - - auto inputs_tools_forced_json_schema = std::invoke([&]() -> common_chat_templates_inputs { - common_chat_templates_inputs inputs; - inputs.messages = { - std::invoke([&]() -> common_chat_msg { - common_chat_msg msg; - msg.role = "system"; - msg.content = "force json schema.\n"; - return msg; - }), - message_user, - }; - inputs.tools = {special_function_tool}; - return inputs; - }); - - { - auto params = common_chat_templates_apply(tmpls.get(), inputs_no_tools); - assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, params.format); - assert_equals(false, params.grammar_lazy); - assert_equals(std::string(R"(<|im_start|>user -Hey there!<|im_end|> -<|im_start|>assistant -)"), params.prompt); - } - - { - auto params = common_chat_templates_apply(tmpls.get(), inputs_tools); - assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, params.format); - assert_equals(false, params.grammar_lazy); - assert_equals(std::string(R"(<|im_start|>system -List of tools: <|tool_list_start|>[{"type": "function", "function": {"name": "special_function", "description": "I'm special", "parameters": {"type": "object", "properties": {"arg1": {"type": "integer", "description": "The arg."}}, "required": ["arg1"]}}}]<|tool_list_end|><|im_end|> -<|im_start|>user -Hey there!<|im_end|> -<|im_start|>assistant -)"), params.prompt); - assert_equals(true, params.grammar.empty()); - } - - { - auto params = common_chat_templates_apply(tmpls.get(), inputs_tools_forced_json_schema); - assert_equals(COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS, params.format); - assert_equals(true, params.grammar_lazy); - assert_equals(std::string(R"(<|im_start|>system -List of tools: <|tool_list_start|>[{"type": "function", "function": {"name": "special_function", "description": "I'm special", "parameters": {"type": "object", "properties": {"arg1": {"type": "integer", "description": "The arg."}}, "required": ["arg1"]}}}]<|tool_list_end|><|im_end|> -<|im_start|>user -Hey there!<|im_end|> -<|im_start|>assistant -)"), params.prompt); - assert_equals(false, params.grammar.empty()); - } - - // Test parsing regular content - assert_msg_equals(message_assist, - test_chat_parse( - "Hello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS})); - - // Test single tool call with JSON format - common_chat_msg msg_single_tool_call; - msg_single_tool_call.role = "assistant"; - msg_single_tool_call.tool_calls.push_back({"special_function", "{\"arg1\":1}", ""}); - assert_msg_equals( - msg_single_tool_call, - test_chat_parse( - "<|tool_call_start|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]<|tool_call_end|>", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS})); - - // Test tool call with string argument - common_chat_msg msg_tool_call_string; - msg_tool_call_string.role = "assistant"; - msg_tool_call_string.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""}); - assert_msg_equals( - msg_tool_call_string, - test_chat_parse( - "<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}]<|tool_call_end|>", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS})); - - // Test tool call with multiple arguments - common_chat_msg msg_multi_args; - msg_multi_args.role = "assistant"; - msg_multi_args.tool_calls.push_back({"calculate", "{\"x\":10,\"y\":20,\"operation\":\"add\"}", ""}); - assert_msg_equals( - msg_multi_args, - test_chat_parse( - "<|tool_call_start|>[{\"name\": \"calculate\", \"arguments\": {\"x\": 10, \"y\": 20, \"operation\": \"add\"}}]<|tool_call_end|>", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS})); - - // Test multiple tool calls in single array - common_chat_msg msg_multiple_tools; - msg_multiple_tools.role = "assistant"; - msg_multiple_tools.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""}); - msg_multiple_tools.tool_calls.push_back({"get_time", "{\"timezone\":\"UTC\"}", ""}); - assert_msg_equals( - msg_multiple_tools, - test_chat_parse( - "<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}, {\"name\": \"get_time\", \"arguments\": {\"timezone\": \"UTC\"}}]<|tool_call_end|>", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS})); - - // Test tool call with content before - common_chat_msg msg_content_before_tool; - msg_content_before_tool.role = "assistant"; - msg_content_before_tool.content = "Let me check the weather for you."; - msg_content_before_tool.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""}); - assert_msg_equals( - msg_content_before_tool, - test_chat_parse( - "Let me check the weather for you.<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}]<|tool_call_end|>", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS})); - - // Test tool call with content after - common_chat_msg msg_content_after_tool; - msg_content_after_tool.role = "assistant"; - msg_content_after_tool.content = "Here's the result."; - msg_content_after_tool.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""}); - assert_msg_equals( - msg_content_after_tool, - test_chat_parse( - "<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}]<|tool_call_end|>Here's the result.", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS})); - - // Test tool call with newlines (common in LLM output) - common_chat_msg msg_tool_call_newlines; - msg_tool_call_newlines.role = "assistant"; - msg_tool_call_newlines.tool_calls.push_back({"get_current_time", "{\"location\":\"Paris\"}", ""}); - assert_msg_equals( - msg_tool_call_newlines, - test_chat_parse( - "<|tool_call_start|>[{\n \"name\": \"get_current_time\",\n \"arguments\": {\n \"location\": \"Paris\"\n }\n}]<|tool_call_end|>", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS})); - - // Note: LFM2 uses JSON format for tool calls: [{"name": "...", "arguments": {...}}] - // Unlike other formats, LFM2 template does not render tool calls in conversation history, - // so we don't use test_templates() for tool call generation. Instead, the parsing tests - // above verify edge cases and format variations for the tool call output format. - } - - { - auto tmpls = read_templates("models/templates/MiniMax-M2.jinja"); - std::vector end_tokens{ "[e~[" }; - - assert_equals(COMMON_CHAT_FORMAT_MINIMAX_M2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_MINIMAX_M2, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - - // Test parsing regular content - assert_msg_equals(message_assist, - test_chat_parse( - "Hello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_MINIMAX_M2})); - - // Test parsing content with thinking - assert_msg_equals(message_assist_thoughts, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - - // Test parsing tool calls - assert_msg_equals(message_assist_call, - test_chat_parse( - "1", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_MINIMAX_M2})); - - // Test parsing tool calls with thinking - assert_msg_equals(message_assist_call_thoughts, - test_chat_parse( - "I'm\nthinking1", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - })); - - // Test tool calls with extra content - assert_msg_equals(message_assist_call_content, - test_chat_parse( - "1Hello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_MINIMAX_M2} - )); - - // Test tool calls with extra content AND thinking - assert_msg_equals(message_assist_call_thoughts_content, - test_chat_parse( - "I'm\nthinking1Hello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - })); - - // Test streaming - test_parser_with_streaming(message_assist_call_thoughts_content, - "I'm\nthinking\nHello, world!\nWhat's up?\n1", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - }); }); - test_parser_with_streaming(message_assist_call_thoughts_unparsed, - "I'm\nthinking\n\n1", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE - }); }); - test_parser_with_streaming(message_assist_call_thoughts_content, - "I'm\nthinking\n\n\nHello, world!\nWhat's up?\n\n\n\n1\n\n\n", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - }); }); - test_parser_with_streaming(message_assist_call_withopt, - "\n\n1\n2\n\n", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE - }); }); - - // Test template generation for regular content - test_templates(tmpls.get(), end_tokens, message_assist, tools, - "Hello, world!\nWhat's up?", - /* expect_grammar_triggered= */ false); - - // Test template generation for tool calls - test_templates(tmpls.get(), end_tokens, message_assist_call, tools, - "\n\n1\n\n", - /* expect_grammar_triggered= */ true, - /* test_grammar_if_triggered= */ true, - /* common_reasoning_format= */ COMMON_REASONING_FORMAT_NONE, - /* ignore_whitespace_differences= */ true - ); - - // Test template generation for tools with optional parameters - test_templates(tmpls.get(), end_tokens, message_assist_call_noopt, tools, - "\n\n1\n\n", - /* expect_grammar_triggered= */ true, - /* test_grammar_if_triggered= */ true, - /* common_reasoning_format= */ COMMON_REASONING_FORMAT_NONE, - /* ignore_whitespace_differences= */ true - ); - test_templates(tmpls.get(), end_tokens, message_assist_call_withopt, tools, - "\n\n1\n2\n\n", - /* expect_grammar_triggered= */ true, - /* test_grammar_if_triggered= */ true, - /* common_reasoning_format= */ COMMON_REASONING_FORMAT_NONE, - /* ignore_whitespace_differences= */ true - ); - } - - { - auto tmpls = read_templates("models/templates/GLM-4.6.jinja"); - std::vector end_tokens{ "<|assistant|>", "<|observation|>" }; - - assert_equals(COMMON_CHAT_FORMAT_GLM_4_5, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_GLM_4_5, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - - // Test parsing regular content - assert_msg_equals(message_assist, - test_chat_parse( - "Hello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_GLM_4_5})); - - // Test parsing content with thinking - assert_msg_equals(message_assist_thoughts, - test_chat_parse( - "\nI'm\nthinking\nHello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - }), true); - - // Test parsing tool calls - assert_msg_equals(message_assist_call, - test_chat_parse( - "\nspecial_function\narg1\n1\n", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_GLM_4_5}), true); - - // Test parsing tool calls with thinking - assert_msg_equals(message_assist_call_thoughts, - test_chat_parse( - "\nI'm\nthinking\nspecial_function\narg1\n1\n", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - }), true); - - // Test tool calls with extra content - assert_msg_equals(message_assist_call_content, - test_chat_parse( - "\nspecial_function\narg1\n1\nHello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_GLM_4_5} - ), true); - - // Test tool calls with extra content AND thinking - assert_msg_equals(message_assist_call_thoughts_content, - test_chat_parse( - "\nI'm\nthinkingHello, world!\nWhat's up?\nspecial_function\narg1\n1\n", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - }), true); - - // Test streaming - test_parser_with_streaming(message_assist_call_thoughts_content, - "\nI'm\nthinkingHello, world!\nWhat's up?\nspecial_function\narg1\n1\n", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - }); }); - test_parser_with_streaming(message_assist_call_thoughts_unparsed, - "\nI'm\nthinking\n\nspecial_function\narg1\n1\n", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE - }); }); - test_parser_with_streaming(message_assist_call_withopt, - "\n\nspecial_function_with_opt\narg1\n1\narg2\n2\n\n", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - }); }); - test_parser_with_streaming( - simple_assist_msg("", "", "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"), - "complex_function\n" - "name\n" - "John Doe\n" - "age\n" - "30\n" - "active\n" - "true\n" - "score\n" - "95.5\n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_GLM_4_5}); }); - test_parser_with_streaming( - simple_assist_msg("", "", "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"), - "web_search\n" - "query\n" - "\"From Zero\" Linkin Park album tracklist complete songs\n" - "limit\n" - "3\n" - "type\n" - "text\n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_GLM_4_5}); }); - - // Test interleaved thinking - test_parser_with_streaming(simple_assist_msg("Hello, world!\n\nWhat's up?", "I'm\nthinkingThinking2", "special_function", "{\"arg1\": 1}"), - "\nI'm\nthinkingHello, world!\nThinking2What's up?\nspecial_function\narg1\n1\n", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - }); }); - test_parser_with_streaming(simple_assist_msg("\nI'm\nthinkingHello, world!\nThinking2What's up?", "", "special_function", "{\"arg1\": 1}"), - "\nI'm\nthinkingHello, world!\nThinking2What's up?\nspecial_function\narg1\n1\n", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE - }); }); - - // Test template generation for regular content - test_templates(tmpls.get(), end_tokens, message_assist, tools, - "\n\nHello, world!\nWhat's up?", - /* expect_grammar_triggered= */ false); - - // Test template generation for tool calls - test_templates(tmpls.get(), end_tokens, message_assist_call, tools, - "\n\nspecial_function\narg1\n1\n\n", - /* expect_grammar_triggered= */ true, - /* test_grammar_if_triggered= */ false, - /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* ignore_whitespace_differences= */ true - ); - - // Test template generation for tools with optional parameters - test_templates(tmpls.get(), end_tokens, message_assist_call_noopt, tools, - "\n\nspecial_function_with_opt\narg1\n1\n\n", - /* expect_grammar_triggered= */ true, - /* test_grammar_if_triggered= */ false, - /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* ignore_whitespace_differences= */ true - ); - test_templates(tmpls.get(), end_tokens, message_assist_call_withopt, tools, - "\n\nspecial_function_with_opt\narg1\n1\narg2\n2\n\n", - /* expect_grammar_triggered= */ true, - /* test_grammar_if_triggered= */ false, - /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* ignore_whitespace_differences= */ true - ); - } - - { - auto tmpls = read_templates("models/templates/Kimi-K2-Thinking.jinja"); - std::vector end_tokens{ "<|im_end|>" }; - - assert_equals(COMMON_CHAT_FORMAT_KIMI_K2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_KIMI_K2, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - - // Test parsing regular content - assert_msg_equals(message_assist, - test_chat_parse( - "Hello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_KIMI_K2})); - - // Test parsing content with thinking - assert_msg_equals(message_assist_thoughts, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - - // Test parsing tool calls - assert_msg_equals(message_assist_call, - test_chat_parse( - "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_KIMI_K2})); - - // Test parsing tool calls with thinking - assert_msg_equals(message_assist_call_thoughts, - test_chat_parse( - "I'm\nthinking<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - })); - - // Test tool calls with extra content - assert_msg_equals(message_assist_call_content, - test_chat_parse( - "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>Hello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_KIMI_K2} - )); - - // Test tool calls with extra content AND thinking - assert_msg_equals(message_assist_call_thoughts_content, - test_chat_parse( - "I'm\nthinking<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>Hello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - })); - - // Test streaming - test_parser_with_streaming(message_assist_call_thoughts_content, - "I'm\nthinking\nHello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - }); }); - test_parser_with_streaming(message_assist_call_thoughts_unparsed, - "I'm\nthinking\n\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE - }); }); - test_parser_with_streaming(message_assist_call_thoughts_content, - "I'm\nthinking\n\n\nHello, world!\nWhat's up?\n\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>\n", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - }); }); - test_parser_with_streaming(message_assist_call_withopt, - "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function_with_opt:0<|tool_call_argument_begin|>{\"arg1\": 1, \"arg2\": 2}<|tool_call_end|><|tool_calls_section_end|>", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE - }); }); - test_parser_with_streaming(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": \"123456\"}"), - "I'm\nthinkingHello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": \"123456\"}<|tool_call_end|><|tool_calls_section_end|>", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - }); }); - test_parser_with_streaming(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": [1, 2, \"345\", 6]}"), - "I'm\nthinkingHello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": [1, 2, \"345\", 6]}<|tool_call_end|><|tool_calls_section_end|>", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - }); }); - test_parser_with_streaming(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": {\"12\": 34, \"5\": [67, 8], \"9\": \"10\"}}"), - "I'm\nthinkingHello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": {\"12\": 34, \"5\": [67, 8], \"9\": \"10\"}}<|tool_call_end|><|tool_calls_section_end|>", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - }); }); - test_parser_with_streaming( - simple_assist_msg("", "", "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"), - "<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function:0<|tool_call_argument_begin|>" - "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}" - "<|tool_call_end|><|tool_calls_section_end|>", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); }); - test_parser_with_streaming( - simple_assist_msg("", "", "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"), - "<|tool_calls_section_begin|><|tool_call_begin|>functions.web_search:0<|tool_call_argument_begin|>" - "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}" - "<|tool_call_end|><|tool_calls_section_end|>", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); }); - test_parser_with_streaming( - simple_assist_msg("", "", "read_file", "{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}"), - "<|tool_calls_section_begin|><|tool_call_begin|>functions.read_file:0<|tool_call_argument_begin|>" - "{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}" - "<|tool_call_end|><|tool_calls_section_end|>", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); }); - test_parser_with_streaming( - simple_assist_msg( - "Let me start by examining the relevant files to understand the current implementation.", "", - "read_file", - "{\"files\": [{\"path\": \"src/app/Partners.tsx\", \"line_ranges\": [\"1-100\"]}]}"), - "Let me start by examining the relevant files to understand the current implementation." - "<|tool_calls_section_begin|><|tool_call_begin|>functions.read_file:0<|tool_call_argument_begin|>" - "{\"files\":[{\"path\":\"src/app/Partners.tsx\",\"line_ranges\":[\"1-100\"]}]}" - "<|tool_call_end|><|tool_calls_section_end|>", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); }); - auto multi_tool_msg = simple_assist_msg("Let me call multiple tools.", "I'm thinking."); - multi_tool_msg.tool_calls.push_back({ "read_file", "{\"files\": [{\"path\": \"src/app/Partners.tsx\", \"line_ranges\": [\"1-100\"]}]}", "" }); - multi_tool_msg.tool_calls.push_back({ "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}", "" }); - multi_tool_msg.tool_calls.push_back({ "complex_function", "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}", "" }); - multi_tool_msg.tool_calls.push_back({ "emoji_function", "{\"message\":\"Hello! 👋 🌟 🚀 Testing emojis: 😀😃😄😁 and symbols: ∑∏∆∇\"}", "" }); - test_parser_with_streaming(multi_tool_msg, - "I'm thinking.Let me call multiple tools." - "<|tool_calls_section_begin|>" - "<|tool_call_begin|>functions.read_file:0<|tool_call_argument_begin|>" - "{\"files\":[{\"path\":\"src/app/Partners.tsx\",\"line_ranges\":[\"1-100\"]}]}" - "<|tool_call_end|>" - "<|tool_call_begin|>functions.web_search:1<|tool_call_argument_begin|>" - "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}" - "<|tool_call_end|>" - "<|tool_call_begin|>functions.complex_function:2<|tool_call_argument_begin|>" - "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}" - "<|tool_call_end|>" - "<|tool_call_begin|>functions.emoji_function:3<|tool_call_argument_begin|>" - "{\"message\":\"Hello! 👋 🌟 🚀 Testing emojis: 😀😃😄😁 and symbols: ∑∏∆∇\"}" - "<|tool_call_end|>" - "<|tool_calls_section_end|>", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - COMMON_CHAT_FORMAT_KIMI_K2, - COMMON_REASONING_FORMAT_DEEPSEEK - }); }); - test_parser_with_streaming( - simple_assist_msg("", "I'm thinking", "complex_function_in_think", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"), - "I'm thinking<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function_in_think:0<|tool_call_argument_begin|>" - "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}" - "<|tool_call_end|><|tool_calls_section_end|>", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - COMMON_CHAT_FORMAT_KIMI_K2, - COMMON_REASONING_FORMAT_DEEPSEEK - }); }); - test_parser_with_streaming( - simple_assist_msg("Hello", "I'm thinkingI'm still thinking", "complex_function_in_think", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"), - "I'm thinking<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function_in_think:0<|tool_call_argument_begin|>" - "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}" - "<|tool_call_end|><|tool_calls_section_end|>I'm still thinkingHello", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - COMMON_CHAT_FORMAT_KIMI_K2, - COMMON_REASONING_FORMAT_DEEPSEEK - }); }); - - // Test template rendering - common_chat_templates_inputs conversation_with_tools = inputs_tools; - conversation_with_tools.messages.push_back(simple_assist_msg("Let's do it", "Think first", "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}")); - conversation_with_tools.messages.push_back({ - "tool", - "Tool response 1", - /* .content_parts = */ {}, - /* .tool_calls = */ {}, - /* .reasoning_content = */ "", - /* .tool_name = */ "complex_function", - /* .tool_call_id = */ "", - }); - conversation_with_tools.messages.push_back(simple_assist_msg("Continue", "Think next", "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}")); - conversation_with_tools.messages.push_back({ - "tool", - "Tool response 2", - /* .content_parts = */ {}, - /* .tool_calls = */ {}, - /* .reasoning_content = */ "", - /* .tool_name = */ "web_search", - /* .tool_call_id = */ "", - }); - conversation_with_tools.messages.push_back(simple_assist_msg("CC", "Think last", "read_file", "{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}")); - conversation_with_tools.messages.push_back({ - "tool", - "Tool response 3", - /* .content_parts = */ {}, - /* .tool_calls = */ {}, - /* .reasoning_content = */ "", - /* .tool_name = */ "read_file", - /* .tool_call_id = */ "", - }); - assert_equals(common_chat_templates_apply(tmpls.get(), conversation_with_tools).prompt, std::string("<|im_system|>tool_declare<|im_middle|>[{\"type\": \"function\", \"function\": {\"name\": \"special_function\", \"description\": \"I'm special\", \"parameters\": {\"type\": \"object\", \"properties\": {\"arg1\": {\"type\": \"integer\", \"description\": \"The arg.\"}}, \"required\": [\"arg1\"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|><|im_user|>user<|im_middle|>Hey there!<|im_end|><|im_assistant|>assistant<|im_middle|>Think firstLet's do it<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function:0<|tool_call_argument_begin|>{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}<|tool_call_end|><|tool_calls_section_end|><|im_end|><|im_system|>complex_function<|im_middle|>## Return of functions.complex_function:0\nTool response 1<|im_end|><|im_assistant|>assistant<|im_middle|>Think nextContinue<|tool_calls_section_begin|><|tool_call_begin|>functions.web_search:1<|tool_call_argument_begin|>{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}<|tool_call_end|><|tool_calls_section_end|><|im_end|><|im_system|>web_search<|im_middle|>## Return of functions.web_search:1\nTool response 2<|im_end|><|im_assistant|>assistant<|im_middle|>Think lastCC<|tool_calls_section_begin|><|tool_call_begin|>functions.read_file:2<|tool_call_argument_begin|>{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}<|tool_call_end|><|tool_calls_section_end|><|im_end|><|im_system|>read_file<|im_middle|>## Return of functions.read_file:2\nTool response 3<|im_end|><|im_assistant|>assistant<|im_middle|>")); - - // Test template generation for regular content - test_templates(tmpls.get(), end_tokens, message_assist, tools, - "Hello, world!\nWhat's up?", - /* expect_grammar_triggered= */ false); - - // Test template generation for tool calls - test_templates(tmpls.get(), end_tokens, message_assist_call, tools, - "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>", - /* expect_grammar_triggered= */ true, - /* test_grammar_if_triggered= */ true, - /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* ignore_whitespace_differences= */ true - ); - - // Test template generation for tools with optional parameters - test_templates(tmpls.get(), end_tokens, message_assist_call_noopt, tools, - "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function_with_opt:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>", - /* expect_grammar_triggered= */ true, - /* test_grammar_if_triggered= */ true, - /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* ignore_whitespace_differences= */ true - ); - test_templates(tmpls.get(), end_tokens, message_assist_call_withopt, tools, - "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function_with_opt:0<|tool_call_argument_begin|>{\"arg1\": 1, \"arg2\": 2}<|tool_call_end|><|tool_calls_section_end|>", - /* expect_grammar_triggered= */ true, - /* test_grammar_if_triggered= */ true, - /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* ignore_whitespace_differences= */ true - ); - } - - // Test Qwen3-Coder XML format - { - // Basic XML tool call parsing - assert_msg_equals( - message_assist_call, - test_chat_parse( - "\n" - " \n" - " \n" - " 1\n" - " \n" - " \n" - "", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_QWEN3_CODER_XML})); - - // Multiple parameters with different types - common_chat_msg expected_multi_param; - expected_multi_param.role = "assistant"; - expected_multi_param.tool_calls = { - { "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}", "" } - }; - - test_parser_with_streaming(expected_multi_param, - "\n" - " \n" - " \n" - " John Doe\n" - " \n" - " \n" - " 30\n" - " \n" - " \n" - " true\n" - " \n" - " \n" - " 95.5\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Special characters and Unicode - common_chat_msg expected_special_chars; - expected_special_chars.role = "assistant"; - expected_special_chars.tool_calls = { - { "unicode_function", "{\"message\":\"Hello 世界! 🌍 Special chars: @#$%^&*()\"}", "" } - }; - - test_parser_with_streaming(expected_special_chars, - "\n" - " \n" - " \n" - " Hello 世界! 🌍 Special chars: @#$%^&*()\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Multiline content with newlines and indentation - common_chat_msg expected_multiline; - expected_multiline.role = "assistant"; - expected_multiline.tool_calls = { - { "code_function", "{\"code\":\"def hello():\\n print(\\\"Hello, World!\\\")\\n return True\"}", "" } - }; - - test_parser_with_streaming(expected_multiline, - "\n" - " \n" - " \n" - "def hello():\n" - " print(\"Hello, World!\")\n" - " return True\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // JSON object as parameter value - common_chat_msg expected_json_param; - expected_json_param.role = "assistant"; - expected_json_param.tool_calls = { - { "json_function", "{\"config\":{\"host\":\"localhost\",\"port\":8080,\"ssl\":false}}", "" } - }; - - test_parser_with_streaming( - expected_json_param, - "\n" - " \n" - " \n" - " {\"host\": \"localhost\", \"port\": 8080, \"ssl\": false}\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Array as parameter value - common_chat_msg expected_array_param; - expected_array_param.role = "assistant"; - expected_array_param.tool_calls = { - { "array_function", "{\"items\":[\"apple\",\"banana\",\"cherry\"]}", "" } - }; - - test_parser_with_streaming( - expected_array_param, - "\n" - " \n" - " \n" - " [\"apple\", \"banana\", \"cherry\"]\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Empty parameter - common_chat_msg expected_empty_param; - expected_empty_param.role = "assistant"; - expected_empty_param.tool_calls = { - { "empty_function", "{\"empty_param\":\"\"}", "" } - }; - - test_parser_with_streaming( - expected_empty_param, - "\n" - " \n" - " \n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Boolean values (true/false) - common_chat_msg expected_boolean; - expected_boolean.role = "assistant"; - expected_boolean.tool_calls = { - { "boolean_function", "{\"enabled\":true,\"debug\":false}", "" } - }; - - test_parser_with_streaming( - expected_boolean, - "\n" - " \n" - " \n" - " true\n" - " \n" - " \n" - " false\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Null value - common_chat_msg expected_null; - expected_null.role = "assistant"; - expected_null.tool_calls = { - { "null_function", "{\"optional_param\":null}", "" } - }; - - test_parser_with_streaming( - expected_null, - "\n" - " \n" - " \n" - " null\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Negative numbers and scientific notation - common_chat_msg expected_numbers; - expected_numbers.role = "assistant"; - expected_numbers.tool_calls = { - { "math_function", "{\"negative\":-42,\"decimal\":-3.14,\"scientific\":1.23e-4}", "" } - }; - - test_parser_with_streaming( - expected_numbers, - "\n" - " \n" - " \n" - " -42\n" - " \n" - " \n" - " -3.14\n" - " \n" - " \n" - " 1.23e-4\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // XML-like content in parameters (should be escaped) - common_chat_msg expected_xml_content; - expected_xml_content.role = "assistant"; - expected_xml_content.tool_calls = { - { "xml_function", "{\"xml_content\":\"value\"}", "" } - }; - - test_parser_with_streaming( - expected_xml_content, - "\n" - " \n" - " \n" - " value\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Quotes and escape characters - common_chat_msg expected_quotes; - expected_quotes.role = "assistant"; - expected_quotes.tool_calls = { - { "quote_function", "{\"message\":\"She said \\\"Hello!\\\" and left.\"}", "" } - }; - - test_parser_with_streaming( - expected_quotes, - "\n" - " \n" - " \n" - " She said \"Hello!\" and left.\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Long parameter value (simplified) - std::string long_text = "This is a long text parameter that should test the parser's ability to handle larger amounts of text data."; - - common_chat_msg expected_long_text; - expected_long_text.role = "assistant"; - expected_long_text.tool_calls = { - { "long_function", "{\"long_text\":\"" + long_text + "\"}", "" } - }; - - test_parser_with_streaming( - expected_long_text, - "\n" - " \n" - " \n" - " " + long_text + "\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Mixed content with text before and after tool call - common_chat_msg expected_mixed_content; - expected_mixed_content.role = "assistant"; - expected_mixed_content.content = "I'll help you search for products. "; - expected_mixed_content.tool_calls = { - { "search_function", "{\"query\":\"laptops\"}", "" } - }; - - test_parser_with_streaming( - expected_mixed_content, - "I'll help you search for products. \n" - " \n" - " \n" - " laptops\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Compact format (no extra whitespace) - common_chat_msg expected_compact; - expected_compact.role = "assistant"; - expected_compact.tool_calls = { - { "compact_function", "{\"param\":\"value\"}", "" } - }; - - test_parser_with_streaming( - expected_compact, - "value", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Function name with underscores and numbers - common_chat_msg expected_complex_name; - expected_complex_name.role = "assistant"; - expected_complex_name.tool_calls = { - { "get_user_data_v2", "{\"user_id\":12345}", "" } - }; - - test_parser_with_streaming( - expected_complex_name, - "\n" - " \n" - " \n" - " 12345\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Parameter names with underscores and numbers - common_chat_msg expected_complex_params; - expected_complex_params.role = "assistant"; - expected_complex_params.tool_calls = { - { "test_function", "{\"param_1\":\"value1\",\"param_2_name\":\"value2\",\"param3\":123}", "" } - }; - - test_parser_with_streaming( - expected_complex_params, - "\n" - " \n" - " \n" - " value1\n" - " \n" - " \n" - " value2\n" - " \n" - " \n" - " 123\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Very deeply nested XML content in parameter - common_chat_msg expected_deep_xml; - expected_deep_xml.role = "assistant"; - expected_deep_xml.tool_calls = { - { "xml_parser", "{\"xml\":\"deep content\"}", "" } - }; - - test_parser_with_streaming( - expected_deep_xml, - "\n" - " \n" - " \n" - " deep content\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Parameter with only whitespace - common_chat_msg expected_whitespace_param; - expected_whitespace_param.role = "assistant"; - expected_whitespace_param.tool_calls = { - { "whitespace_function", "{\"spaces\":\"\"}", "" } - }; - - test_parser_with_streaming( - expected_whitespace_param, - "\n" - " \n" - " \n" - " \n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Parameter with tabs and mixed whitespace - common_chat_msg expected_mixed_whitespace; - expected_mixed_whitespace.role = "assistant"; - expected_mixed_whitespace.tool_calls = { - { "tab_function", "{\"content\":\"line1\\n\\tindented line\\n spaces\"}", "" } - }; - - test_parser_with_streaming( - expected_mixed_whitespace, - "\n" - " \n" - " \n" - "line1\n" - "\tindented line\n" - " spaces\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Control characters and special Unicode - common_chat_msg expected_control_chars; - expected_control_chars.role = "assistant"; - expected_control_chars.tool_calls = { - { "control_function", "{\"text\":\"Line1\\nLine2\\tTabbed\\rCarriage return\"}", "" } - }; - - test_parser_with_streaming( - expected_control_chars, - "\n" - " \n" - " \n" - "Line1\nLine2\tTabbed\rCarriage return\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Emoji and extended Unicode characters - common_chat_msg expected_emoji; - expected_emoji.role = "assistant"; - expected_emoji.tool_calls = { - { "emoji_function", "{\"message\":\"Hello! 👋 🌟 🚀 Testing emojis: 😀😃😄😁 and symbols: ∑∏∆∇\"}", "" } - }; - - test_parser_with_streaming( - expected_emoji, - "\n" - " \n" - " \n" - " Hello! 👋 🌟 🚀 Testing emojis: 😀😃😄😁 and symbols: ∑∏∆∇\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Mathematical expressions and formulas - common_chat_msg expected_math; - expected_math.role = "assistant"; - expected_math.tool_calls = { - { "math_function", "{\"formula\":\"E = mc² and ∫f(x)dx = F(x) + C\"}", "" } - }; - - test_parser_with_streaming( - expected_math, - "\n" - " \n" - " \n" - " E = mc² and ∫f(x)dx = F(x) + C\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // SQL injection-like content (should be safely escaped) - common_chat_msg expected_sql; - expected_sql.role = "assistant"; - expected_sql.tool_calls = { - { "sql_function", "{\"query\":\"SELECT * FROM users WHERE id = 1; DROP TABLE users; --\"}", "" } - }; - - test_parser_with_streaming( - expected_sql, - "\n" - " \n" - " \n" - " SELECT * FROM users WHERE id = 1; DROP TABLE users; --\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // HTML/XML injection content - common_chat_msg expected_html; - expected_html.role = "assistant"; - expected_html.tool_calls = { - { "html_function", "{\"content\":\"\"}", "" } - }; - - test_parser_with_streaming( - expected_html, - "\n" - " \n" - " \n" - " \n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Binary-like content (base64) - common_chat_msg expected_binary; - expected_binary.role = "assistant"; - expected_binary.tool_calls = { - { "binary_function", "{\"data\":\"SGVsbG8gV29ybGQhIFRoaXMgaXMgYmFzZTY0IGVuY29kZWQgdGV4dC4=\"}", "" } - }; - - test_parser_with_streaming( - expected_binary, - "\n" - " \n" - " \n" - " SGVsbG8gV29ybGQhIFRoaXMgaXMgYmFzZTY0IGVuY29kZWQgdGV4dC4=\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Very large numbers (should be parsed as scientific notation) - common_chat_msg expected_large_numbers; - expected_large_numbers.role = "assistant"; - expected_large_numbers.tool_calls = { - { "number_function", "{\"big_int\":1e+60}", "" } // Large number becomes scientific notation - }; - - test_parser_with_streaming( - expected_large_numbers, - "\n" - " \n" - " \n" - " 999999999999999999999999999999999999999999999999999999999999\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - } - - { - // Qwen3-Coder template - auto tmpls = read_templates("models/templates/Qwen3-Coder.jinja"); - common_chat_templates_inputs inputs; - inputs.messages = { message_user }; - - common_chat_tool qwen_union_tool { - /* .name = */ "qwen_union", - /* .description = */ "Test tool for union/anyOf handling", - /* .parameters = */ R"({ - "type": "object", - "properties": { - "priority": { "type": ["number", "null"] }, - "maybe_text": { "anyOf": [ { "type": "string" } ] }, - "config": { "anyOf": [ { "type": "object" }, { "type": "null" } ] } - }, - "required": [] - })", - }; - inputs.tools = { qwen_union_tool }; - - auto params = common_chat_templates_apply(tmpls.get(), inputs); - assert_equals(COMMON_CHAT_FORMAT_QWEN3_CODER_XML, params.format); - assert_equals(false, params.grammar.empty()); - - // Grammar should compile successfully - auto grammar = build_grammar(params.grammar); - GGML_ASSERT(grammar && "Failed to build Qwen3-Coder grammar with union types"); - } -} - -static void test_template_output_peg_parsers() { - printf("[%s]\n", __func__); +static void test_template_output_peg_parsers(bool detailed_debug) { + LOG_DBG("%s\n", __func__); // JSON schemas const char * invoice_schema = R"({ @@ -3569,368 +1172,1203 @@ static void test_template_output_peg_parsers() { { // Ministral-3-14B-Reasoning-2512 - auto tmpls = read_templates("models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja"); + auto tst = peg_tester("models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja", detailed_debug); - // Test basic message - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "Hello, world!\nWhat's up?"; - t.expect = message_assist; - }); + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); - // Test basic message and reasoning with reasoning_format = none - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?"; - t.expect.content = "[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?"; - }); + tst.test("[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?") + .expect_content("[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?") + .run(); - // Test basic message and reasoning with reasoning_format = auto - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?"; - t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; + tst.test("[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .expect(message_assist_thoughts) + .run(); - t.expect = message_assist_thoughts; - }); + tst.test(R"([TOOL_CALLS]special_function[ARGS]{"arg1":1})") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); - // Test tool call - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = R"([TOOL_CALLS]special_function[ARGS]{"arg1":1})"; - t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; - t.params.tools = {special_function_tool}; + tst.test( + "[THINK]I'm\nthinking[/THINK]" + R"([TOOL_CALLS]special_function[ARGS]{"arg1":1})") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ special_function_tool }) + .expect(message_assist_call_thoughts) + .run(); - t.expect = message_assist_call; - }); + tst.test(R"([TOOL_CALLS]special_function[ARGS]{"arg1": 1})" + R"([TOOL_CALLS]special_function_with_opt[ARGS]{"arg1": 1, "arg2": 2})") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .parallel_tool_calls(true) + .tools({ + special_function_tool, special_function_tool_with_optional_param + }) + .expect_tool_calls({ + { "special_function", R"({"arg1": 1})", {} }, + { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} }, + }) + .run(); - // Test tool call with reasoning - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "[THINK]I'm\nthinking[/THINK]" - R"([TOOL_CALLS]special_function[ARGS]{"arg1":1})"; - t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; - t.params.tools = {special_function_tool}; - - t.expect = message_assist_call_thoughts; - }); - - // Test parallel tool calls - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = R"([TOOL_CALLS]special_function[ARGS]{"arg1": 1})" - R"([TOOL_CALLS]special_function_with_opt[ARGS]{"arg1": 1, "arg2": 2})"; - t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; - t.params.parallel_tool_calls = true; - t.params.tools = {special_function_tool, special_function_tool_with_optional_param}; - - t.expect.tool_calls = {{ - /* .name = */ "special_function", - /* .arguments = */ R"({"arg1": 1})", - /* .id = */ {}, - }, { - /* .name = */ "special_function_with_opt", - /* .arguments = */ R"({"arg1": 1, "arg2": 2})", - /* .id = */ {}, - }}; - }); - - // Test response format - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "[THINK]I need to output the invoice details in JSON[/THINK]" - "```json\n" - R"({"amount": 123.45, "date": "2025-12-03"})" - "\n```"; - t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; - t.params.json_schema = invoice_schema; - - t.expect.reasoning_content = "I need to output the invoice details in JSON"; - t.expect.content =R"({"amount": 123.45, "date": "2025-12-03"})"; - }); + tst.test( + "[THINK]I need to output the invoice details in JSON[/THINK]" + "```json\n" + R"({"amount": 123.45, "date": "2025-12-03"})" + "\n```") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .json_schema(invoice_schema) + .expect_reasoning("I need to output the invoice details in JSON") + .expect_content(R"({"amount": 123.45, "date": "2025-12-03"})") + .run(); } { // NVIDIA Nemotron-3 Nano - auto tmpls = read_templates("models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja"); + auto tst = peg_tester("models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja", detailed_debug); - // Test basic message - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "Hello, world!\nWhat's up?"; - t.expect = message_assist; - }); + tst.test("Hello, world!\nWhat's up?").enable_thinking(false).expect(message_assist).run(); - // Test basic message and reasoning with reasoning_format = none - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "I'm\nthinking\n\nHello, world!\nWhat's up?"; - t.expect.content = "I'm\nthinking\n\nHello, world!\nWhat's up?"; - }); + tst.test("I'm\nthinking\n\nHello, world!\nWhat's up?") + .enable_thinking(false) + .reasoning_format(COMMON_REASONING_FORMAT_NONE) + .expect_content("I'm\nthinking\n\nHello, world!\nWhat's up?") + .run(); - // Test basic message and reasoning with reasoning_format = auto - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "I'm\nthinking\n\nHello, world!\nWhat's up?"; - t.params.enable_thinking = true; - t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; + tst.test("I'm\nthinking\n\nHello, world!\nWhat's up?") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .expect(message_assist_thoughts) + .run(); - t.expect = message_assist_thoughts; - }); + tst.test( + "\n" + "\n" + "\n1\n\n" + "\n" + "") + .enable_thinking(false) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); - // Test tool call - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = - "\n" - "\n" - "\n" - "1\n" - "\n" - "\n" - ""; - t.params.enable_thinking = false; - t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; - t.params.tools = {special_function_tool}; + tst.test( + "I'm\nthinking\n\n" + "\n" + "\n" + "\n1\n\n" + "\n" + "") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ special_function_tool }) + .expect(message_assist_call_thoughts) + .run(); - t.expect = message_assist_call; - }); + tst.test( + "\n" + "\n" + "\n1\n\n" + "\n" + "\n" + "\n" + "\n" + "\n1\n\n" + "\n2\n\n" + "\n" + "") + .enable_thinking(false) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .parallel_tool_calls(true) + .tools({ + special_function_tool, special_function_tool_with_optional_param + }) + .expect_tool_calls({ + { "special_function", R"({"arg1": 1})", {} }, + { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} }, + }) + .run(); - // Test tool call with reasoning - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = - "I'm\nthinking\n\n" - "\n" - "\n" - "\n" - "1\n" - "\n" - "\n" - ""; - t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; - t.params.tools = {special_function_tool}; + tst.test( + "\n" + "\n" + "\n" + "def hello():\n" + " print(\"Hello, world!\")\n" + "\n" + "hello()\n" + "\n" + "\n" + "") + .enable_thinking(false) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ + python_tool + }) + .expect_tool_calls({ + { "python", "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}", {} }, + }) + .run(); - t.expect = message_assist_call_thoughts; - }); - - // Test parallel tool calls - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = - "\n" - "\n" - "\n" - "1\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "1\n" - "\n" - "\n" - "2\n" - "\n" - "\n" - ""; - t.params.enable_thinking = false; - t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; - t.params.parallel_tool_calls = true; - t.params.tools = {special_function_tool, special_function_tool_with_optional_param}; - - t.expect.tool_calls = {{ - /* .name = */ "special_function", - /* .arguments = */ R"({"arg1": 1})", - /* .id = */ {}, - }, { - /* .name = */ "special_function_with_opt", - /* .arguments = */ R"({"arg1": 1, "arg2": 2})", - /* .id = */ {}, - }}; - }); - - // Test tool call with string parameter - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = - "\n" - "\n" - "\n" - "def hello():\n" - " print(\"Hello, world!\")\n" - "\n" - "hello()\n" - "\n" - "\n" - ""; - t.params.enable_thinking = false; - t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; - t.params.tools = {python_tool}; - - t.expect.tool_calls = {{ - /* .name = */ "python", - /* .arguments = */ "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}", - /* .id = */ {}, - }}; - }); - - // Test tool call with string parameter and no closing tag - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = - "\n" - "\n" - "\n" - "def hello():\n" - " print(\"Hello, world!\")\n" - "\n" - "hello()\n" - "\n" - ""; - t.params.enable_thinking = false; - t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; - t.params.tools = {python_tool}; - - t.expect.tool_calls = {{ - /* .name = */ "python", - /* .arguments = */ "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}", - /* .id = */ {}, - }}; - }); - - // Test response format - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = - "I need to output the invoice details in JSON\n" - "\n" - R"({"amount": 123.45, "date": "2025-12-03"})"; - t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; - t.params.json_schema = invoice_schema; - - t.expect.reasoning_content = "I need to output the invoice details in JSON"; - t.expect.content = R"({"amount": 123.45, "date": "2025-12-03"})"; - }); + tst.test( + "I need to output the invoice details in JSON\n" + "\n" + R"({"amount": 123.45, "date": "2025-12-03"})") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .enable_thinking(true) + .json_schema(invoice_schema) + .expect_reasoning("I need to output the invoice details in JSON") + .expect_content(R"({"amount": 123.45, "date": "2025-12-03"})") + .run(); } { - // Solar-Open-100B - auto tmpls = read_templates("models/templates/upstage-Solar-Open-100B.jinja"); + // CohereForAI Command-R 7B (2024-tool_use) + auto tst = peg_tester("models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja", detailed_debug); - // Test basic message - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "<|content|>Hello, world!\nWhat's up?"; - t.expect = message_assist; - }); + tst.test("<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>").expect(message_assist).run(); - // Test basic message and reasoning - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "<|think|>I'm\nthinking<|end|><|begin|>assistant<|content|>Hello, world!\nWhat's up?"; - t.expect = message_assist_thoughts; - }); + tst.test( + "<|START_THINKING|>I'm\nthinking<|END_THINKING|>" + "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>") + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .expect(message_assist_thoughts) + .run(); - // Test basic message and reasoning_effort = low - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "<|content|>Hello, world!\nWhat's up?"; - t.params.chat_template_kwargs["reasoning_effort"] = "\"low\""; - t.expect = message_assist; - }); + tst.test( + "<|START_THINKING|>I'm\nthinking<|END_THINKING|>" + "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>") + .expect(message_assist_thoughts_unparsed_r7b) + .run(); - // Test tool call - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "<|tool_calls|>" - "<|tool_call:begin|>123456789" - "<|tool_call:name|>special_function" - "<|tool_call:args|>{\"arg1\":1}" - "<|tool_call:end|>"; + tst.test( + "<|START_THINKING|>I'm\nthinking<|END_THINKING|>" + "<|START_ACTION|>[\n" + " {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n" + "]<|END_ACTION|>") + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ special_function_tool }) + .expect(message_assist_thoughts_call_idx) + .run(); - t.params.chat_template_kwargs["reasoning_effort"] = "\"low\""; - t.params.tools = {special_function_tool}; - t.expect = message_assist_call_id; - }); + tst.test( + "<|START_THINKING|>I'm\nthinking<|END_THINKING|>" + "<|START_ACTION|>[\n" + " {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", ") + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ special_function_tool }) + .is_partial(true) + .expect(message_assist_thoughts_partial_call) + .run(); - // Test tool call with reasoning - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "<|think|>I'm\nthinking<|end|>" - "<|begin|>assistant<|tool_calls|>" - "<|tool_call:begin|>0" - "<|tool_call:name|>special_function" - "<|tool_call:args|>{\"arg1\":1}" - "<|tool_call:end|>"; + tst.test( + "<|START_THINKING|><|END_THINKING|>" + "<|START_ACTION|>[\n" + " {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n" + "]<|END_ACTION|>") + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ special_function_tool }) + .expect(message_assist_call_idx) + .run(); + } - t.params.tools = {special_function_tool}; - t.expect = message_assist_thoughts_call_idx; - }); + { + // Google Gemma 2 2B - does not support tool calling + auto tst = peg_tester("models/templates/google-gemma-2-2b-it.jinja"); - // Test tool call with reasoning and tool_choice = required - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "<|think|>I'm\nthinking<|end|>" - "<|begin|>assistant<|tool_calls|>" - "<|tool_call:begin|>0" - "<|tool_call:name|>special_function" - "<|tool_call:args|>{\"arg1\":1}" - "<|tool_call:end|>"; + tst.test("Hello, world!").expect(simple_assist_msg("Hello, world!")).run(); - t.params.tools = {special_function_tool}; - t.params.tool_choice = COMMON_CHAT_TOOL_CHOICE_REQUIRED; - t.expect = message_assist_thoughts_call_idx; - }); + tst.test("Line 1\nLine 2\nLine 3").expect(simple_assist_msg("Line 1\nLine 2\nLine 3")).run(); + } - // Test tool call without reasoning and tool_choice = required - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "<|tool_calls|>" - "<|tool_call:begin|>0" - "<|tool_call:name|>special_function" - "<|tool_call:args|>{\"arg1\":1}" - "<|tool_call:end|>"; + { + // Qwen-QwQ-32B (reasoning model) + auto tst = peg_tester("models/templates/Qwen-QwQ-32B.jinja"); - t.params.tools = {special_function_tool}; - t.params.tool_choice = COMMON_CHAT_TOOL_CHOICE_REQUIRED; - t.params.chat_template_kwargs["reasoning_effort"] = "\"low\""; - t.expect = message_assist_call_idx; - }); + // QwQ always has thinking forced open - input starts after the \n in the prompt + tst.test("Let me think about this...\n\nThe answer is 42.") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .expect(simple_assist_msg("The answer is 42.", "Let me think about this...")) + .run(); - // Test parallel tool calls - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "<|think|>I'm\nthinking<|end|>" - "<|begin|>assistant<|tool_calls|>" - "<|tool_call:begin|>0" - "<|tool_call:name|>special_function" - "<|tool_call:args|>{\"arg1\":1}" - "<|tool_call:end|>" - "<|tool_call:begin|>1" - "<|tool_call:name|>special_function_with_opt" - "<|tool_call:args|>{\"arg1\": 1, \"arg2\": 2}" - "<|tool_call:end|>"; + tst.test("Hello, world!").expect(simple_assist_msg("Hello, world!")).run(); + } + { + // NousResearch-Hermes-2-Pro and Hermes-3 (tool calling models) + auto tst = peg_tester("models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja", detailed_debug); - t.params.parallel_tool_calls = true; - t.params.tools = {special_function_tool, special_function_tool_with_optional_param}; + tst.test( + "\n" + "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" + "") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); - t.expect.reasoning_content = "I'm\nthinking"; - t.expect.tool_calls = {{ - /* .name = */ "special_function", - /* .arguments = */ R"({"arg1": 1})", - /* .id = */ "0", - }, { - /* .name = */ "special_function_with_opt", - /* .arguments = */ R"({"arg1": 1, "arg2": 2})", - /* .id = */ "1", - }}; - }); + tst.test( + "Hello, world!\nWhat's up?\n" + "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" + "") + .tools({ special_function_tool }) + .expect(message_assist_call_content) + .run(); - // Test response format - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "<|think|>I need to output the invoice details in JSON<|end|>" - "<|begin|>assistant<|content|>" - R"({"amount": 123.45, "date": "2025-12-03"})"; + // Note: Hermes template doesn't support thinking/reasoning natively + // Note: We only support one tool calling format per template, no alternate formats + } + { + // Test simple content-only template + auto tst = peg_tester("models/templates/google-gemma-2-2b-it.jinja", detailed_debug); - t.params.json_schema = invoice_schema; + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); + } + { + // IBM Granite (reasoning and tool calling model) + auto tst = peg_tester("models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja", detailed_debug); - t.expect.reasoning_content = "I need to output the invoice details in JSON"; - t.expect.content =R"({"amount": 123.45, "date": "2025-12-03"})"; - }); + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); - // Test response format no reasoning - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "<|content|>" - R"({"amount": 123.45, "date": "2025-12-03"})"; + tst.test("I'm\nthinkingHello, world!\nWhat's up?") + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .expect(message_assist_thoughts) + .run(); - t.params.chat_template_kwargs["reasoning_effort"] = "\"low\""; - t.params.json_schema = invoice_schema; + // TODO: pending support for WRAPPED_WITH_REASONING + // tst.test("I'm\nthinkingHello, world!\nWhat's up?") + // .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + // .expect(message_assist_thoughts) + // .run(); + } + + { + // ByteDance-Seed-OSS (reasoning and tool calling model) + auto tst = peg_tester("models/templates/ByteDance-Seed-OSS.jinja", detailed_debug); + + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); + + tst.test("I'm thinking about the answerHello, world!") + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .expect(simple_assist_msg("Hello, world!", "I'm thinking about the answer")) + .run(); + + tst.test( + "\n" + "\n" + "1\n" + "\n" + "") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + + tst.test( + "\n" + "\n" + "1\n" + "\n" + "\n" + "\n" + "\n" + "1\n" + "2\n" + "\n" + "") + .parallel_tool_calls(true) + .tools({ + special_function_tool, special_function_tool_with_optional_param + }) + .expect_tool_calls({ + { "special_function", R"({"arg1": 1})", {} }, + { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} }, + }) + .run(); + + tst.test( + "\n" + "\n" + "[{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]\n" + "\n" + "") + .tools({ + todo_list + }) + .expect_tool_calls({ + { "todo_list", "{\"todos\": [{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]}", {} }, + }) + .run(); + + // single-quote normalization + tst.test( + "\n" + "\n" + "[{'item': 'Check stuff', 'selected': false}, {'item': 'Prepare stuff', 'selected': true}]\n" + "\n" + "") + .tools({ + todo_list + }) + .expect_tool_calls({ + { "todo_list", "{\"todos\": [{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]}", {} }, + }) + .run(); + + // tool call with inside quotes + tst.test( + "\n" + "\n" + "\n" + "foo.cpp\n" + "\n" + "" + "def foo(arg = \"14\"):\n" + " return arg + \"bar\"\n" + "\n" + "\n" + "" + "def foo(arg = \"15\"):\n" + " pass\n" + "\n" + "\n" + "\n" + "") + .tools({ + edit_tool + }) + .expect_tool_calls({ + { "edit", "{\"filename\": \"foo.cpp\", " + "\"oldString\": \"def foo(arg = \\\"14\\\"):\\n return arg + \\\"bar\\\"\\n\", " + "\"newString\": \"def foo(arg = \\\"15\\\"):\\n pass\\n\"}", {} + } + }) + .run(); + } + + { + // Qwen3-Coder (tool calling with XML-style format) + auto tst = peg_tester("models/templates/Qwen3-Coder.jinja", detailed_debug); + + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); + + tst.test( + "\n" + "\n" + "\n" + "1\n" + "\n" + "\n" + "") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + + tst.test( + "\n" + "\n" + "\n" + "1\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "1\n" + "\n" + "\n" + "2\n" + "\n" + "\n" + "") + .parallel_tool_calls(true) + .tools({ + special_function_tool, special_function_tool_with_optional_param + }) + .expect_tool_calls({ + { "special_function", R"({"arg1": 1})", {} }, + { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} }, + }) + .run(); + + // Test with code content (multiline) + tst.test( + "\n" + "\n" + "\n" + "def hello():\n" + " print(\"Hello, world!\")\n" + "\n" + "hello()\n" + "\n" + "\n" + "") + .tools({ + python_tool + }) + .expect_tool_calls({ + { "python", "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}", {} }, + }) + .run(); + + // Test with HTML tag content + tst.test( + "\n" + "\n" + "\n" + "\n" + " \n" + " Hello!\n" + " \n" + "\n" + "\n" + "\n" + "") + .tools({ + html_tool + }) + .expect_tool_calls({ + { "html", "{\"markup\": \"\\n \\n Hello!\\n \\n\"}", {} }, + }) + .run(); + + // Test with TODO list (array of objects) + tst.test( + "\n" + "\n" + "\n" + "[{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]\n" + "\n" + "\n" + "") + .tools({ + todo_list + }) + .expect_tool_calls({ + { "todo_list", "{\"todos\": [{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]}", {} }, + }) + .run(); + } + { + auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug); + tst.test( + "<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": " + "\"XYZCITY\"}<|tool▁call▁end|><|tool▁calls▁end|>") + .tools({ get_time_tool }) + .expect(message_with_tool_calls("get_time", "{\"city\":\"XYZCITY\"}")) + .run(); + } + + { + auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug); + tst.test( + "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": " + "\"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ get_time_tool }) + .expect(message_with_tool_calls_and_reasoning("get_time", "{\"city\":\"Tokyo\"}", "REASONING")) + .run(); + } + + { + auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug); + tst.test( + "REASONINGCONTENT<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": " + "\"Paris\"}<|tool▁call▁end|><|tool▁call▁begin|>get_weather<|tool▁sep|>{\"city\": " + "\"Paris\"}<|tool▁call▁end|><|tool▁calls▁end|>") + .tools({ + get_time_tool, get_weather_tool + }) + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .parallel_tool_calls(true) + .expect(message_with_reasoning_content_and_multiple_tool_calls( + "REASONING", "CONTENT", + { { "get_time", "{\"city\":\"Paris\"}" }, { "get_weather", "{\"city\":\"Paris\"}" } })) + .run(); + } + + { + auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug); + tst.test("REASONING\nCONTENT") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .expect(simple_assist_msg("CONTENT", "REASONING\n")) + .run(); + } + + { + auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug); + tst.test("CONTENT").expect(simple_assist_msg("CONTENT", "")).run(); + } + + // GLM-4.6 tests - format: function_name\n...\n...\n + { + auto tst = peg_tester("models/templates/GLM-4.6.jinja", detailed_debug); + tst.test( + "special_function\n" + "arg1\n1\n" + "") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + } + + // GLM-4.7-Flash tests - format: function_name...... + // Note: Template uses forced-open thinking mode (prompt ends with ) + { + auto tst = peg_tester("models/templates/GLM-4.7-Flash.jinja", detailed_debug); + + // Pure content (no reasoning) + tst.test("Hello, world!\nWhat's up?") + .enable_thinking(false) + .expect(message_assist) + .run(); + + // Reasoning with content (forced-open mode - input starts after ) + tst.test("I'm\nthinkingHello, world!\nWhat's up?") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .expect(message_assist_thoughts) + .run(); + + // Tool call without reasoning + tst.test( + "special_function" + "arg11" + "") + .enable_thinking(false) + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + + // Tool call with reasoning (forced-open mode) + tst.test( + "I'm\nthinking" + "special_function" + "arg11" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ special_function_tool }) + .expect(message_assist_call_thoughts) + .run(); + + // String argument starting with '[' - should NOT be treated as JSON array + // This tests the fix for Godot scene files and similar content + tst.test( + "html" + "markup[gd_scene load_steps=3 format=3]" + "") + .enable_thinking(false) + .tools({ html_tool }) + .expect_tool_calls({ + { "html", "{\"markup\": \"[gd_scene load_steps=3 format=3]\"}", {} }, + }) + .run(); + + // Multiple tool calls + // Note: Parallel tool calls streaming test skipped - the KEY_VALUE_TAGS format has + // partial parsing edge cases when function names share common prefixes (special_function vs special_function_with_opt) + // The grammar and full parsing work correctly, but incremental streaming detection needs more work. + } + + // Kimi-K2-Thinking tests - FUNC_PREFIXED_INDEXED format + { + auto tst = peg_tester("models/templates/Kimi-K2-Thinking.jinja", detailed_debug); + tst.test( + "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>" + "{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + } + + // Apertus-8B-Instruct tests - FUNC_NAME_AS_KEY format + // Format: <|tools_prefix|>[{"function_name": {...arguments...}}]<|tools_suffix|> + { + auto tst = peg_tester("models/templates/Apertus-8B-Instruct.jinja", detailed_debug); + tst.test("<|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + } + + // MiniMax-M2 tests - XML invoke format with parameter tags + // Format: value + { + auto tst = peg_tester("models/templates/MiniMax-M2.jinja", detailed_debug); + tst.test( + "\n\n1\n\n") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + } + + // NVIDIA-Nemotron-Nano-v2 tests - ... format + // Format: [{"name": "func", "arguments": {...}}] + { + auto tst = peg_tester("models/templates/NVIDIA-Nemotron-Nano-v2.jinja", detailed_debug); + tst.test("[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + } + + // CohereForAI-c4ai-command-r7b (uses START_RESPONSE/END_RESPONSE, START_THINKING/END_THINKING, START_ACTION/END_ACTION) + { + auto tst = peg_tester("models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja", detailed_debug); + tst.test("<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>").expect(message_assist).run(); + tst.test( + "<|START_THINKING|>I'm\nthinking<|END_THINKING|>" + "<|START_ACTION|>[\n" + " {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n" + "]<|END_ACTION|>") + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ special_function_tool }) + .expect(message_assist_thoughts_call_idx) + .run(); + } + // CohereForAI-c4ai-command-r-plus (uses markdown code block format) + { + auto tst = peg_tester("models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja", detailed_debug); + tst.test("<|CHATBOT_TOKEN|>Hello, world!\nWhat's up?<|END_OF_TURN_TOKEN|>").expect(message_assist).run(); + // Tool calls: Action: followed by JSON code block + tst.test( + "Action:\n" + "```json\n" + "[{\"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}]\n" + "```") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + } + + // mistralai-Mistral-Nemo-Instruct-2407.jinja + { + auto tst = peg_tester("models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja", detailed_debug); + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); + tst.test("[TOOL_CALLS][{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, \"id\": \"123456789\"}]") + .tools({ special_function_tool }) + .expect(message_assist_call_id) + .run(); + } + { + auto tst = peg_tester("models/templates/meetkai-functionary-medium-v3.1.jinja", detailed_debug); + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); + tst.test("{\"arg1\": 1}") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + } + // Functionary v3.2 - recipient-based format: >>>recipient\n{content} + { + auto tst = peg_tester("models/templates/meetkai-functionary-medium-v3.2.jinja", detailed_debug); + tst.test(">>>all\nHello, world!\nWhat's up?").expect(message_assist).run(); + tst.test(">>>special_function\n{\"arg1\": 1}") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + } + + // FireFunction + { + auto tst = peg_tester("models/templates/fireworks-ai-llama-3-firefunction-v2.jinja", detailed_debug); + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); + tst.test(" functools[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + } + + // DeepSeek R1 Distill Llama 8B - reasoning tests only (forced open thinking) + // Note: Template uses forced-open mode (prompt ends with ), so input shouldn't include opening tag + { + auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja", detailed_debug); + tst.test("Hello, world!\nWhat's up?") + .enable_thinking(true) // Forced open + .expect(message_assist) + .run(); + tst.test("I'm\nthinkingHello, world!\nWhat's up?") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .expect(message_assist_thoughts) + .run(); + } + // llama-cpp DeepSeek R1 template (always forced-open thinking) + { + auto tst = peg_tester("models/templates/llama-cpp-deepseek-r1.jinja", detailed_debug); + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); + tst.test("I'm\nthinkingHello, world!\nWhat's up?") + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .expect(message_assist_thoughts) + .run(); + tst.test( + "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n" + "```json\n{\"arg1\": 1}```<|tool▁call▁end|><|tool▁calls▁end|>") + .tools({ special_function_tool }) + .parallel_tool_calls(true) + .expect(message_assist_call) + .run(); + } + // DeepSeek R1 Distill Qwen 32B - reasoning tests only (forced open thinking) + // Note: Template uses forced-open mode (prompt ends with ), so input shouldn't include opening tag + { + auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja", detailed_debug); + tst.test("Hello, world!\nWhat's up?").enable_thinking(true).expect(message_assist).run(); + tst.test("I'm\nthinkingHello, world!\nWhat's up?") + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .expect(message_assist_thoughts) + .run(); + tst.test( + "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n" + "```json\n{\"arg1\": 1}```<|tool▁call▁end|><|tool▁calls▁end|>") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + } + // Kimi-K2 (moonshotai) - FUNC_PREFIXED_INDEXED format + { + auto tst = peg_tester("models/templates/moonshotai-Kimi-K2.jinja", detailed_debug); + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); + tst.test( + "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>" + "{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + } + // Kimi-K2-Instruct - FUNC_PREFIXED_INDEXED format + { + auto tst = peg_tester("models/templates/Kimi-K2-Instruct.jinja", detailed_debug); + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); + tst.test( + "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>" + "{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + } + + // MiMo-VL / Hermes 3 / Qwen 2.5 (Common JSON format) + for (const auto & path : + { "models/templates/MiMo-VL.jinja", "models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja", + "models/templates/Qwen-Qwen2.5-7B-Instruct.jinja" }) { + auto tst = peg_tester(path, detailed_debug); + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); + tst.test("\n{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + } + + // Apriel 1.5 + { + auto tst = peg_tester("models/templates/unsloth-Apriel-1.5.jinja", detailed_debug); + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); + tst.test("[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + } + + // Apriel 1.6 Thinker (reasoning-only support) + { + auto tst = peg_tester("models/templates/Apriel-1.6-15b-Thinker-fixed.jinja", detailed_debug); + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); + + // Implicit reasoning start (forced open) + tst.test("I'm\nthinking\n[BEGIN FINAL RESPONSE]\nHello, world!\nWhat's up?") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .expect(message_assist_thoughts) + .run(); + + // Reasoning + Tool calls + tst.test( + "I'm\nthinking\n[BEGIN FINAL RESPONSE]\n[{\"name\": \"special_function\", \"arguments\": " + "{\"arg1\": 1}}]") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ special_function_tool }) + .expect(message_assist_call_thoughts) + .run(); + } + + // Mistral Small 3.2 - FUNC_BRACKET_TAG format: [TOOL_CALLS]func_name[CALL_ID]id[ARGS]{...} + { + auto tst = peg_tester("models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja", detailed_debug); + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); + tst.test("[TOOL_CALLS]special_function[CALL_ID]123456789[ARGS]{\"arg1\": 1}") + .tools({ special_function_tool }) + .expect(message_assist_call_id) + .run(); + } + // Devstral - FUNC_BRACKET_TAG format (no ID marker): [TOOL_CALLS]func_name[ARGS]{...} + { + auto tst = peg_tester("models/templates/unsloth-mistral-Devstral-Small-2507.jinja", detailed_debug); + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); + tst.test("[TOOL_CALLS]special_function[ARGS]{\"arg1\": 1}") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + tst.test("Hello, world!\nWhat's up?[TOOL_CALLS]special_function[ARGS]{\"arg1\": 1}") + .tools({ special_function_tool }) + .expect(message_assist_call_content) + .run(); + } + + { + // Llama 3.1 + auto tst = peg_tester("models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja", detailed_debug); + tst.test("Hello, world!\nWhat's up?").tools({ special_function_tool }).expect(message_assist).run(); + } + + { + // Llama 3.2 + auto tst = peg_tester("models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja", detailed_debug); + tst.test("Hello, world!\nWhat's up?").tools({ special_function_tool }).expect(message_assist).run(); + } + + { + // Llama 3.3 + auto tst = peg_tester("models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja", detailed_debug); + tst.test("Hello, world!\nWhat's up?").tools({ python_tool }).expect(message_assist).run(); + } + + // GPT-OSS format tests + { + auto tst = peg_tester("models/templates/openai-gpt-oss-120b.jinja", detailed_debug); + + // Basic content only - final channel + tst.test("<|channel|>final<|message|>Hello, world!\nWhat's up?").expect(message_assist).run(); + + // Basic content only - commentary channel + tst.test("<|channel|>commentary<|message|>Hello, world!\nWhat's up?").expect(message_assist).run(); + + // Analysis channel (reasoning) with final channel (content) + tst.test( + "<|channel|>analysis<|message|>I'm\nthinking<|end|>\n<|channel|>final<|message|>Hello, world!\nWhat's " + "up?") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .expect(message_assist_thoughts) + .run(); + + // Analysis channel only (partial) - still works when reasoning format is set + tst.test("<|channel|>analysis<|message|>I'm\nthinking") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .is_partial(true) + .expect_reasoning("I'm\nthinking") + .run(); + + // Reasoning format none - reasoning stays in content + tst.test( + "<|channel|>analysis<|message|>I'm\nthinking<|end|>\n<|channel|>final<|message|>Hello, world!\nWhat's " + "up?") + .reasoning_format(COMMON_REASONING_FORMAT_NONE) + .expect_content( + "<|channel|>analysis<|message|>I'm\nthinking<|end|>Hello, world!\nWhat's up?") + .run(); + + // Tool call with recipient in role header: " to=functions.NAME<|channel|>analysis<|message|>JSON" + tst.test(" to=functions.special_function<|channel|>analysis<|message|>{\"arg1\": 1}") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + + // Tool call with recipient in channel header: "<|channel|>analysis to=functions.NAME<|message|>JSON" + tst.test("<|channel|>analysis to=functions.special_function<|message|>{\"arg1\": 1}") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + + // Tool call with constraint: " to=functions.NAME<|channel|>analysis <|constrain|>json<|message|>JSON" + tst.test(" to=functions.special_function<|channel|>analysis <|constrain|>json<|message|>{\"arg1\": 1}") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + + // Tool call in commentary channel (channel header variant) + tst.test("<|channel|>commentary to=functions.special_function<|message|>{\"arg1\": 1}") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + + // Tool call with reasoning + content (analysis first, then tool call) + tst.test( + "<|channel|>analysis<|message|>I'm\nthinking<|end|>\n" + "<|start|>assistant to=functions.special_function<|channel|>analysis<|message|>{\"arg1\": 1}") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ special_function_tool }) + .expect(message_assist_call_thoughts) + .run(); + + // Tool calling with extra channel before + tst.test( + "<|channel|>analysis<|message|>I'm\nthinking<|end|><|start|>assistant<|channel|>commentary" + " to=functions.special_function <|message|>{\"arg1\": 1}") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ special_function_tool }) + .expect(message_assist_call_thoughts) + .run(); + + // Reasoning after final channel + // Tool calling after final channel + tst.test( + "<|channel|>final<|message|><|end|>" + "<|start|>assistant<|channel|>analysis<|message|>Thinking about edit..." + ) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .expect_reasoning("Thinking about edit...") + .expect_content("") + .run(); + + // Tool calling after final channel + tst.test( + "<|channel|>final<|message|><|end|>" + "<|start|>assistant<|channel|>analysis<|message|>Thinking about edit...<|end|>" + "<|start|>assistant<|channel|>commentary to=functions.edit <|constrain|>json" + "<|message|>{\"filePath\": \"file.js\", \"oldString\": \"if (part < railCount - 1) {\", \"newString\": \"if (part < 4) {\", \"replaceAll\": false}" + ) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ + { + /* .name = */ "edit", + /* .description = */ "Edit a file", + /* .parameters = */ R"({ + "type": "object", + "properties": { + "oldString": { + "type": "string", + "description": "Old string to replace." + }, + "newString": { + "type": "string", + "description": "New replacement string." + }, + "replaceAll": { + "type": "boolean", + "description": "Whether to replace all occurences." + } + }, + "required": ["oldString", "newString"] + })", + } + }) + .expect_reasoning("Thinking about edit...") + .expect_tool_calls({ + { "edit", R"({"filePath": "file.js", "oldString": "if (part < railCount - 1) {", "newString": "if (part < 4) {", "replaceAll": false})", {} } + }) + .run(); + + // Parallel tool calls + tst.test( + " to=functions.special_function<|channel|>analysis<|message|>{\"arg1\": 1}\n" + "<|start|>assistant to=functions.special_function_with_opt<|channel|>analysis<|message|>{\"arg1\": 1, " + "\"arg2\": 2}") + .parallel_tool_calls(true) + .tools({ + special_function_tool, special_function_tool_with_optional_param + }) + .expect_tool_calls({ + { "special_function", R"({"arg1": 1})", {} }, + { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} }, + }) + .run(); + } + + { + auto tst = peg_tester("models/templates/StepFun3.5-Flash.jinja", detailed_debug); + tst.test("I was thinkingNow I'm not."). + enable_thinking(true). + reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK). + expect_reasoning("I was thinking"). + expect_content("Now I'm not.") + .run(); + + // Test that numeric-looking string values are coerced to strings per the schema + tst.test( + "Let me call the magic tool\n" + "\n" + "\n" + "\n" + "\nfooBar\n\n" + "\n5123123\n\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ magic_tool }) + .expect_reasoning("Let me call the magic tool") + .expect_tool_calls({ + { "magic", R"({"name": "fooBar", "ref": "5123123"})", {} }, + }) + .run(); + + // Test that numeric values are correctly interpreted as numbers when schema calls for number + tst.test( + "Let me call the special function\n" + "\n" + "\n" + "\n" + "\n42555916\n\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ special_function_tool }) + .expect_reasoning("Let me call the special function") + .expect_tool_calls({ + { "special_function", R"({"arg1": 42555916})", {} }, + }) + .run(); + + tst.test( + "Let me call the special function with opt\n" + "\n" + "\n" + "\n" + "\n42555916\n\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ special_function_tool_with_optional_param }) + .expect_reasoning("Let me call the special function with opt") + .expect_tool_calls({ + { "special_function_with_opt", R"({"arg1": 42555916})", {} }, + }) + .run(); + + tst.test( + "Let me call the magic_int function\n" + "\n" + "\n" + "\n" + "\n42555916\n\n" + "\nbaz\n\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ magic_int_tool }) + .expect_reasoning("Let me call the magic_int function") + .expect_tool_calls({ + { "magic_int", R"({"ref": 42555916, "name": "baz"})", {} }, + }) + .run(); + + tst.test( + "Call string_param with empty text\n" + "\n" + "\n" + "\n" + "\n\n\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ string_param_tool }) + .expect_reasoning("Call string_param with empty text") + .expect_tool_calls({ + { "string_param", R"({"text": ""})", {} }, + }) + .run(); + + tst.test( + "Test simple quoted unquoted\n" + "\n" + "\n" + "\n" + "\n\"foo\"\n\n" + "\nfoo\n\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ quoted_unquoted_tool }) + .expect_reasoning("Test simple quoted unquoted") + .expect_tool_calls({ + { "quoted_unquoted", R"({"quoted": "\"foo\"", "unquoted": "foo"})", {} }, + }) + .run(); + + tst.test( + "Test complex quoted unquoted\n" + "\n" + "\n" + "\n" + "\n\"printf(\\\"foo\\\");\"\n\n" + "\nprintf(\"foo\");\n\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ quoted_unquoted_tool }) + .expect_reasoning("Test complex quoted unquoted") + .expect_tool_calls({ + { "quoted_unquoted", R"({ "quoted" : "\"printf(\\\"foo\\\");\"", "unquoted": "printf(\"foo\");" })", {} } + }) + .run(); + + tst.test( + "Test negative number\n" + "\n" + "\n" + "\n" + "\n-14\n\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ magic_int_tool }) + .expect_reasoning("Test negative number") + .expect_tool_calls({ + { "magic_int", R"({ "ref" : -14 })", {} } + }) + .run(); + + tst.test( + "Test decimal number\n" + "\n" + "\n" + "\n" + "\n3.14\n\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ amount_tool }) + .expect_reasoning("Test decimal number") + .expect_tool_calls({ + { "amount", R"({ "orig" : 3.14 })", {} } + }) + .run(); + + tst.test( + "Test imaginary number\n" + "\n" + "\n" + "\n" + "\n" + "{ \"real\": 3.14, \"imaginary\": 2.71 }\n" + "\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ imaginary_number_tool }) + .expect_reasoning("Test imaginary number") + .expect_tool_calls({ + { "imaginary_number", R"({ "number" : {"real":3.14,"imaginary":2.71 } })", {} } + }) + .run(); - t.expect.content =R"({"amount": 123.45, "date": "2025-12-03"})"; - }); } } static void test_msg_diffs_compute() { - printf("[%s]\n", __func__); + LOG_DBG("%s\n", __func__); { common_chat_msg msg1; @@ -3940,9 +2378,7 @@ static void test_msg_diffs_compute() { common_chat_msg_diff diff; diff.content_delta = "Hello, world!"; - assert_equals( - {diff}, - common_chat_msg_diff::compute_diffs(msg1, msg2)); + assert_equals({ diff }, common_chat_msg_diff::compute_diffs(msg1, msg2)); } { common_chat_msg msg1; @@ -3954,37 +2390,35 @@ static void test_msg_diffs_compute() { common_chat_msg_diff diff; diff.content_delta = " world!"; - assert_equals( - {diff}, - common_chat_msg_diff::compute_diffs(msg1, msg2)); + assert_equals({ diff }, common_chat_msg_diff::compute_diffs(msg1, msg2)); } { common_chat_msg msg0; common_chat_msg msg1; - msg1.tool_calls = { { "special_function", "{\"ar", /* .id = */ "123" } }; + msg1.tool_calls = { + { "special_function", "{\"ar", /* .id = */ "123" } + }; common_chat_msg msg2; - msg2.tool_calls = { { "special_function", "{\"arg1\": 1}", /* .id = */ "123" } }; + msg2.tool_calls = { + { "special_function", "{\"arg1\": 1}", /* .id = */ "123" } + }; common_chat_msg_diff diff01; - diff01.tool_call_index = 0; - diff01.tool_call_delta.name = "special_function"; - diff01.tool_call_delta.id = "123"; + diff01.tool_call_index = 0; + diff01.tool_call_delta.name = "special_function"; + diff01.tool_call_delta.id = "123"; diff01.tool_call_delta.arguments = "{\"ar"; - assert_equals( - {diff01}, - common_chat_msg_diff::compute_diffs(msg0, msg1)); + assert_equals({ diff01 }, common_chat_msg_diff::compute_diffs(msg0, msg1)); common_chat_msg_diff diff12; - diff12.tool_call_index = 0; + diff12.tool_call_index = 0; // Note: neither id nor name change here. diff12.tool_call_delta.arguments = "g1\": 1}"; - assert_equals( - {diff12}, - common_chat_msg_diff::compute_diffs(msg1, msg2)); + assert_equals({ diff12 }, common_chat_msg_diff::compute_diffs(msg1, msg2)); } { common_chat_msg msg0; @@ -3996,68 +2430,81 @@ static void test_msg_diffs_compute() { }; common_chat_msg_diff diff1; - diff1.tool_call_index = 0; - diff1.tool_call_delta.name = "f1"; - diff1.tool_call_delta.id = "123"; + diff1.tool_call_index = 0; + diff1.tool_call_delta.name = "f1"; + diff1.tool_call_delta.id = "123"; diff1.tool_call_delta.arguments = "{\"arg1\": 1}"; common_chat_msg_diff diff2; - diff2.tool_call_index = 1; - diff2.tool_call_delta.name = "f2"; - diff2.tool_call_delta.id = "222"; + diff2.tool_call_index = 1; + diff2.tool_call_delta.name = "f2"; + diff2.tool_call_delta.id = "222"; diff2.tool_call_delta.arguments = "{\"arg2\": 2}"; - assert_equals( - {diff1, diff2}, - common_chat_msg_diff::compute_diffs(msg0, msg2)); + assert_equals({ diff1, diff2 }, common_chat_msg_diff::compute_diffs(msg0, msg2)); } } int main(int argc, char ** argv) { - common_log_set_verbosity_thold(999); + bool detailed_debug = false; + bool only_run_filtered = false; - // try { -#ifndef _WIN32 - if (argc > 1) { - common_chat_templates_inputs inputs; - common_chat_msg msg; - msg.role = "user"; - msg.content = "Hey"; - inputs.messages = {msg}; - inputs.tools = { special_function_tool }; - - std::cout << "| Template | Format |\n"; - std::cout << "|----------|--------|\n"; - - for (int i = 1; i < argc; i++) { - try { - std::string path = argv[i]; - if (path.rfind(".jinja") != path.size() - 6) { - std::cerr << "Skipping non-jinja file: " << path << '\n'; - continue; - } - auto tmpls = read_templates(path); - auto parts = string_split(path, "/"); - auto name = parts[parts.size() - 1]; - auto format = common_chat_format_name(common_chat_templates_apply(tmpls.get(), inputs).format); - std::cout << "| " << name << " | " << format << " |\n"; - } catch (const std::exception & e) { - std::cerr << "Failed to process " << argv[i] << ": " << e.what() << '\n'; - } - } - } else -#endif - { - test_msg_diffs_compute(); - test_msgs_oaicompat_json_conversion(); - test_tools_oaicompat_json_conversion(); - test_template_output_parsers(); - test_template_output_peg_parsers(); - std::cout << "\n[chat] All tests passed!" << '\n'; + // Check for --template flag + for (int i = 1; i < argc; i++) { + std::string arg = argv[i]; + if (arg == "--template" && i + 1 < argc) { + g_template_filter = argv[++i]; + // Only run PEG parser tests with the filter + only_run_filtered = true; } + if (arg == "--detailed") { + detailed_debug = true; + common_log_set_verbosity_thold(999); + } + } + + if (only_run_filtered) { + test_template_output_peg_parsers(detailed_debug); + std::cout << "\n[chat] All template tests passed!" << '\n'; return 0; - // } catch (const std::exception & e) { - // std::cerr << "Error: " << e.what() << '\n'; - // return 1; - // } + } + +#ifndef _WIN32 + if (argc > 1) { + common_chat_templates_inputs inputs; + common_chat_msg msg; + msg.role = "user"; + msg.content = "Hey"; + inputs.messages = { msg }; + inputs.tools = { special_function_tool }; + + std::cout << "| Template | Format |\n"; + std::cout << "|----------|--------|\n"; + + for (int i = 1; i < argc; i++) { + try { + std::string path = argv[i]; + if (path.rfind(".jinja") != path.size() - 6) { + std::cerr << "Skipping non-jinja file: " << path << '\n'; + continue; + } + auto tmpls = read_templates(path); + auto parts = string_split(path, "/"); + const auto & name = parts[parts.size() - 1]; + const auto * format = common_chat_format_name(common_chat_templates_apply(tmpls.get(), inputs).format); + std::cout << "| " << name << " | " << format << " |\n"; + } catch (const std::exception & e) { + std::cerr << "Failed to process " << argv[i] << ": " << e.what() << '\n'; + } + } + } else +#endif + { + test_msg_diffs_compute(); + test_msgs_oaicompat_json_conversion(); + test_tools_oaicompat_json_conversion(); + test_template_output_peg_parsers(detailed_debug); + std::cout << "\n[chat] All tests passed!" << '\n'; + } + return 0; } diff --git a/tests/test-peg-parser.cpp b/tests/test-peg-parser.cpp index 220745d029..7d22d77612 100644 --- a/tests/test-peg-parser.cpp +++ b/tests/test-peg-parser.cpp @@ -20,6 +20,7 @@ int main(int argc, char *argv[]) { t.test("json", test_json_parser); t.test("gbnf", test_gbnf_generation); t.test("serialization", test_json_serialization); + t.test("python-dict", test_python_dict_parser); return t.summary(); } diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 518f8b9ae7..7c63b3aae5 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -26,6 +26,7 @@ else() add_subdirectory(server) endif() add_subdirectory(tokenize) + add_subdirectory(parser) add_subdirectory(tts) add_subdirectory(mtmd) if (GGML_RPC) diff --git a/tools/parser/CMakeLists.txt b/tools/parser/CMakeLists.txt new file mode 100644 index 0000000000..55e0c63437 --- /dev/null +++ b/tools/parser/CMakeLists.txt @@ -0,0 +1,20 @@ +if (NOT WIN32 OR NOT BUILD_SHARED_LIBS) + # this tool is disabled on Windows when building with shared libraries because it uses internal functions not exported with LLAMA_API + set(TARGET llama-debug-template-parser) + add_executable(${TARGET} debug-template-parser.cpp) + target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) + target_compile_features(${TARGET} PRIVATE cxx_std_17) + + if(LLAMA_TOOLS_INSTALL) + install(TARGETS ${TARGET} RUNTIME) + endif() +endif() + +set(TARGET llama-template-analysis) +add_executable(${TARGET} template-analysis.cpp) +target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_compile_features(${TARGET} PRIVATE cxx_std_17) + +if(LLAMA_TOOLS_INSTALL) + install(TARGETS ${TARGET} RUNTIME) +endif() diff --git a/tools/parser/debug-template-parser.cpp b/tools/parser/debug-template-parser.cpp new file mode 100644 index 0000000000..17cbdfea18 --- /dev/null +++ b/tools/parser/debug-template-parser.cpp @@ -0,0 +1,488 @@ +#include "../src/llama-grammar.h" +#include "chat-auto-parser.h" +#include "chat-diff-analyzer.h" +#include "chat.h" +#include "common.h" +#include "gguf.h" +#include "jinja/runtime.h" +#include "log.h" + +#include +#include +#include +#include + +#include "nlohmann/json.hpp" +#include "peg-parser.h" + +using json = nlohmann::ordered_json; + +enum class output_mode { + ANALYSIS, // Only output analysis results (default) + TEMPLATE, // Only output rendered template + BOTH // Output both +}; + +enum class input_message_type { + NONE, // Don't render any message scenarios (only analysis) + CONTENT_ONLY, // Simple assistant message with content + REASONING_CONTENT, // Message with reasoning_content + content + TOOL_CALL_ONLY, // Message with tool_calls only + CONTENT_TOOL_CALL, // Message with content + tool_calls + REASONING_TOOL_CALL, // Message with reasoning_content + tool_calls + CONTENT_FAKE_TOOL_CALL, // Message with content but no actual tool_calls (for testing) + ALL // Render all scenarios +}; + +struct debug_options { + std::string template_path; + bool with_tools = true; + bool generation_prompt = true; + bool enable_reasoning = true; + bool debug_jinja = false; + output_mode mode = output_mode::BOTH; + input_message_type input_message = input_message_type::NONE; +}; + +static std::string read_file(const std::string & path) { + std::ifstream fin(path, std::ios::binary); + if (!fin.is_open()) { + throw std::runtime_error("Could not open file: " + path); + } + std::ostringstream buf; + buf << fin.rdbuf(); + return buf.str(); +} + +static std::string read_gguf_chat_template(const std::string & path) { + struct gguf_init_params params = { /*no_alloc =*/true, // We only need metadata, not tensor data + /*ctx=*/nullptr }; + + struct gguf_context * ctx = gguf_init_from_file(path.c_str(), params); + if (ctx == nullptr) { + throw std::runtime_error("Could not open GGUF file: " + path); + } + + const char * key = "tokenizer.chat_template"; + int64_t key_id = gguf_find_key(ctx, key); + + if (key_id == -1) { + gguf_free(ctx); + throw std::runtime_error("GGUF file does not contain chat template key: " + std::string(key)); + } + + const char * template_str = gguf_get_val_str(ctx, key_id); + if (template_str == nullptr) { + gguf_free(ctx); + throw std::runtime_error("GGUF file contains chat template key but value is null"); + } + + std::string result = template_str; + gguf_free(ctx); + return result; +} + +static void print_usage(const char * program_name) { + LOG_ERR("Usage: %s [options]\n", program_name); + LOG_ERR("\nOptions:\n"); + LOG_ERR(" --no-tools Disable tool definitions\n"); + LOG_ERR(" --generation-prompt=0|1 Set add_generation_prompt (default: 1)\n"); + LOG_ERR(" --enable-reasoning=0|1 Enable reasoning parsing (default: 1)\n"); + LOG_ERR(" --output=MODE Output mode: analysis, template, both (default: both)\n"); + LOG_ERR(" --debug-jinja Enable Jinja fine-grained debug\n"); + LOG_ERR(" --input-message=TYPE Message type to render:\n"); + LOG_ERR(" content_only, reasoning_content, tool_call_only,\n"); + LOG_ERR(" content_tool_call, reasoning_tool_call,\n"); + LOG_ERR(" content_fake_tool_call, all\n"); + LOG_ERR("\nExamples:\n"); + LOG_ERR(" %s template.jinja --input-message=all --generation-prompt=1\n", program_name); + LOG_ERR(" %s template.jinja --output=template --input-message=tool_call_only\n", program_name); +} + +static bool parse_bool_option(const std::string & value) { + return value == "1" || value == "true" || value == "yes"; +} + +static bool parse_options(int argc, char ** argv, debug_options & opts) { + if (argc < 2) { + print_usage(argv[0]); + return false; + } + + opts.template_path = argv[1]; + + for (int i = 2; i < argc; ++i) { + std::string arg = argv[i]; + + if (arg == "--debug-jinja") { + opts.debug_jinja = true; + } else if (arg == "--no-tools") { + opts.with_tools = false; + } else if (arg.rfind("--generation-prompt=", 0) == 0) { + opts.generation_prompt = parse_bool_option(arg.substr(20)); + } else if (arg.rfind("--enable-reasoning=", 0) == 0) { + opts.enable_reasoning = parse_bool_option(arg.substr(19)); + } else if (arg.rfind("--output=", 0) == 0) { + std::string mode = arg.substr(9); + if (mode == "analysis") { + opts.mode = output_mode::ANALYSIS; + } else if (mode == "template") { + opts.mode = output_mode::TEMPLATE; + } else if (mode == "both") { + opts.mode = output_mode::BOTH; + } else { + LOG_ERR("Unknown output mode: %s\n", mode.c_str()); + return false; + } + } else if (arg.rfind("--input-message=", 0) == 0) { + std::string type = arg.substr(16); + if (type == "content_only") { + opts.input_message = input_message_type::CONTENT_ONLY; + } else if (type == "reasoning_content") { + opts.input_message = input_message_type::REASONING_CONTENT; + } else if (type == "tool_call_only") { + opts.input_message = input_message_type::TOOL_CALL_ONLY; + } else if (type == "content_tool_call") { + opts.input_message = input_message_type::CONTENT_TOOL_CALL; + } else if (type == "reasoning_tool_call") { + opts.input_message = input_message_type::REASONING_TOOL_CALL; + } else if (type == "content_fake_tool_call") { + opts.input_message = input_message_type::CONTENT_FAKE_TOOL_CALL; + } else if (type == "all") { + opts.input_message = input_message_type::ALL; + } else { + LOG_ERR("Unknown input message type: %s\n", type.c_str()); + return false; + } + } else { + LOG_ERR("Unknown option: %s\n", arg.c_str()); + print_usage(argv[0]); + return false; + } + } + + return true; +} + +static json build_user_message() { + return json{ + { "role", "user" }, + { "content", "Hello, please help me with a task." } + }; +} + +static json build_content_only_message() { + return json{ + { "role", "assistant" }, + { "content", "Hello! I'm here to help you with your task." } + }; +} + +static json build_reasoning_content_message() { + return json{ + { "role", "assistant" }, + { "content", "Hello! I'm here to help you with your task." }, + { "reasoning_content", "The user is greeting me and asking for help. I should respond politely." } + }; +} + +static json build_tool_call_only_message() { + return json{ + { "role", "assistant" }, + { "content", nullptr }, + { "tool_calls", + json::array({ json{ + { "type", "function" }, + { "function", json{ { "name", "test_function_name" }, + { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } }, + { "id", "123456789" } } }) } + }; +} + +static json build_content_tool_call_message() { + return json{ + { "role", "assistant" }, + { "content", "I'll help you by calling a function." }, + { "tool_calls", + json::array({ json{ + { "type", "function" }, + { "function", + json{ { "name", "test_function_name" }, + { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } } } }) } + }; +} + +static json build_reasoning_tool_call_message() { + return json{ + { "role", "assistant" }, + { "content", nullptr }, + { "reasoning_content", "I need to call a function to help with this task." }, + { "tool_calls", + json::array({ json{ + { "type", "function" }, + { "function", + json{ { "name", "test_function_name" }, + { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } } } }) } + }; +} + +static json build_content_fake_tool_call_message() { + // This message has content but NO tool_calls field + // It's used to test if a template renders tool definitions but not tool calls + return json{ + { "role", "assistant" }, + { "content", "I'll help you by calling a function." } + }; +} + +static json build_tools_definition() { + json parameters_schema = json::object(); + parameters_schema["type"] = "object"; + parameters_schema["properties"] = json::object(); + parameters_schema["properties"]["param1"] = json::object({ + { "type", "string" }, + { "description", "First parameter" } + }); + parameters_schema["properties"]["param2"] = json::object({ + { "type", "string" }, + { "description", "Second parameter" } + }); + parameters_schema["required"] = json::array({ "param1" }); + + return json::array({ + json{ { "type", "function" }, + { "function", json{ { "name", "test_function_name" }, + { "description", "A test function for debugging" }, + { "parameters", parameters_schema } } } } + }); +} + +static void render_scenario(const common_chat_template & tmpl, + const std::string & scenario_name, + const json & messages, + const json & tools, + bool add_generation_prompt, + bool enable_thinking) { + LOG_ERR("\n=== Scenario: %s ===\n", scenario_name.c_str()); + LOG_ERR("add_generation_prompt: %s, enable_thinking: %s\n", add_generation_prompt ? "true" : "false", + enable_thinking ? "true" : "false"); + + // When add_generation_prompt is true, add a trailing user message to trigger the prompt + json final_messages = messages; + if (add_generation_prompt && !messages.empty() && messages.back().value("role", "") == "assistant") { + final_messages.push_back(json{ + { "role", "user" }, + { "content", "Now please continue with another response." } + }); + } + + LOG_ERR("Messages:\n%s\n", final_messages.dump(2).c_str()); + + try { + autoparser::templates_params inputs; + inputs.messages = final_messages; + inputs.add_generation_prompt = add_generation_prompt; + inputs.extra_context["enable_thinking"] = enable_thinking; + + if (!tools.is_null() && tools.is_array() && !tools.empty()) { + inputs.tools = tools; + } + + std::string output = common_chat_template_direct_apply(tmpl, inputs); + + LOG_ERR("\n--- Rendered Output ---\n"); + LOG_ERR("%s\n", output.c_str()); + LOG_ERR("--- End Output (length: %zu) ---\n", output.length()); + } catch (const std::exception & e) { + LOG_ERR("Rendering failed: %s\n", e.what()); + } +} + +static void render_all_scenarios(const common_chat_template & tmpl, + const json & tools, + bool add_generation_prompt, + bool enable_thinking, + input_message_type message_type) { + json user_msg = build_user_message(); + + auto render_if = [&](input_message_type type, const std::string & name, const json & assistant_msg) { + if (message_type == input_message_type::ALL || message_type == type) { + json messages = json::array({ user_msg, assistant_msg }); + render_scenario(tmpl, name, messages, tools, add_generation_prompt, enable_thinking); + } + }; + + render_if(input_message_type::CONTENT_ONLY, "content_only", build_content_only_message()); + render_if(input_message_type::REASONING_CONTENT, "reasoning_content", build_reasoning_content_message()); + render_if(input_message_type::TOOL_CALL_ONLY, "tool_call_only", build_tool_call_only_message()); + render_if(input_message_type::CONTENT_TOOL_CALL, "content_tool_call", build_content_tool_call_message()); + render_if(input_message_type::REASONING_TOOL_CALL, "reasoning_tool_call", build_reasoning_tool_call_message()); + render_if(input_message_type::CONTENT_FAKE_TOOL_CALL, "content_fake_tool_call", + build_content_fake_tool_call_message()); + + // Also render with add_generation_prompt=true to show the prompt ending + if (message_type == input_message_type::ALL) { + LOG_ERR("\n\n=== Generation Prompt Scenarios (add_generation_prompt=true) ===\n"); + + json prompt_messages = json::array({ user_msg }); + render_scenario(tmpl, "generation_prompt_only", prompt_messages, tools, true, enable_thinking); + + // With enable_thinking toggled + render_scenario(tmpl, "generation_prompt_thinking_disabled", prompt_messages, tools, true, false); + } +} + +template +static std::string mode_to_str(T mode) { + std::ostringstream os; + os << mode; + return os.str(); +} + +int main(int argc, char ** argv) { + // Set log level to most verbose to capture all debug output + common_log_set_verbosity_thold(99); + + debug_options opts; + if (!parse_options(argc, argv, opts)) { + return 1; + } + + if (opts.debug_jinja || std::getenv("LLAMA_DEBUG_JINJA") != nullptr) { + jinja::enable_debug(true); + } + + std::string template_source; + try { + // Check if the file is a GGUF file + if (opts.template_path.size() >= 5 && + opts.template_path.compare(opts.template_path.size() - 5, 5, ".gguf") == 0) { + template_source = read_gguf_chat_template(opts.template_path); + } else { + template_source = read_file(opts.template_path); + } + } catch (const std::exception & e) { + LOG_ERR("Error reading template: %s\n", e.what()); + return 1; + } + + LOG_ERR("Analyzing template: %s\n", opts.template_path.c_str()); + LOG_ERR("Options: with_tools=%s, generation_prompt=%s, enable_reasoning=%s\n", opts.with_tools ? "true" : "false", + opts.generation_prompt ? "true" : "false", opts.enable_reasoning ? "true" : "false"); + + try { + common_chat_template chat_template(template_source, "", ""); + + // Build tools definition + json tools = opts.with_tools ? build_tools_definition() : json(); + + // Render template scenarios if requested + if (opts.input_message != input_message_type::NONE && + (opts.mode == output_mode::TEMPLATE || opts.mode == output_mode::BOTH)) { + LOG_ERR("\n"); + LOG_ERR("================================================================================\n"); + LOG_ERR(" TEMPLATE RENDERING OUTPUT\n"); + LOG_ERR("================================================================================\n"); + + render_all_scenarios(chat_template, tools, opts.generation_prompt, opts.enable_reasoning, + opts.input_message); + } + + // Output analysis if requested + if (opts.mode == output_mode::ANALYSIS || opts.mode == output_mode::BOTH) { + LOG_ERR("\n"); + LOG_ERR("================================================================================\n"); + LOG_ERR(" TEMPLATE ANALYSIS\n"); + LOG_ERR("================================================================================\n"); + + autoparser::analyze_template analysis(chat_template); + + // Generate Parser + autoparser::templates_params params; + params.messages = json::array(); + params.reasoning_format = + opts.enable_reasoning ? COMMON_REASONING_FORMAT_DEEPSEEK : COMMON_REASONING_FORMAT_NONE; + params.enable_thinking = opts.enable_reasoning; + params.add_generation_prompt = opts.generation_prompt; + + if (opts.with_tools) { + params.tools = tools; + params.tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO; + } else { + params.tools = json(); + params.tool_choice = COMMON_CHAT_TOOL_CHOICE_NONE; + } + params.parallel_tool_calls = false; + + auto parser_data = autoparser::universal_peg_generator::generate_parser(chat_template, params, analysis); + + LOG_ERR("\n=== Differential Analysis Results ===\n"); + + LOG_ERR("\n--- Reasoning & Content Structure ---\n"); + LOG_ERR("reasoning_mode: %s\n", mode_to_str(analysis.reasoning.mode).c_str()); + LOG_ERR("reasoning_start: '%s'\n", analysis.reasoning.start.c_str()); + LOG_ERR("reasoning_end: '%s'\n", analysis.reasoning.end.c_str()); + LOG_ERR("content_mode: %s\n", mode_to_str(analysis.content.mode).c_str()); + LOG_ERR("content_start: '%s'\n", analysis.content.start.c_str()); + LOG_ERR("content_end: '%s'\n", analysis.content.end.c_str()); + + LOG_ERR("\n--- Tool Call Structure ---\n"); + LOG_ERR("tool_mode: %s\n", mode_to_str(analysis.tools.format.mode).c_str()); + LOG_ERR("supports_tools: %s\n", analysis.jinja_caps.supports_tools ? "true" : "false"); + LOG_ERR("supports_parallel_calls: %s\n", analysis.jinja_caps.supports_parallel_tool_calls ? "true" : "false"); + LOG_ERR("tool_section_start: '%s'\n", analysis.tools.format.section_start.c_str()); + LOG_ERR("tool_section_end: '%s'\n", analysis.tools.format.section_end.c_str()); + LOG_ERR("per_call_start: '%s'\n", analysis.tools.format.per_call_start.c_str()); + LOG_ERR("per_call_end: '%s'\n", analysis.tools.format.per_call_end.c_str()); + LOG_ERR("func_name_prefix: '%s'\n", analysis.tools.function.name_prefix.c_str()); + LOG_ERR("func_name_suffix: '%s'\n", analysis.tools.function.name_suffix.c_str()); + LOG_ERR("func_close: '%s'\n", analysis.tools.function.close.c_str()); + LOG_ERR("arg_name_prefix: '%s'\n", analysis.tools.arguments.name_prefix.c_str()); + LOG_ERR("arg_name_suffix: '%s'\n", analysis.tools.arguments.name_suffix.c_str()); + LOG_ERR("arg_value_prefix: '%s'\n", analysis.tools.arguments.value_prefix.c_str()); + LOG_ERR("arg_value_suffix: '%s'\n", analysis.tools.arguments.value_suffix.c_str()); + LOG_ERR("name_field: '%s'\n", analysis.tools.format.name_field.c_str()); + LOG_ERR("args_field: '%s'\n", analysis.tools.format.args_field.c_str()); + LOG_ERR("id_field: '%s'\n", analysis.tools.format.id_field.c_str()); + LOG_ERR("gen_id_field: '%s'\n", analysis.tools.format.gen_id_field.c_str()); + LOG_ERR("parameter_order: '%s'\n", std::accumulate(analysis.tools.format.parameter_order.begin(), analysis.tools.format.parameter_order.end(), + std::string(""), [] (const std::string & a, const std::string & b) { return a.empty() ? b : a + ", " + b; } + ).c_str()); + + LOG_ERR("\n=== Generated Parser ===\n"); + common_peg_arena arena; + arena.load(parser_data.parser); + LOG_ERR("%s\n", arena.dump(arena.root()).c_str()); + + LOG_ERR("\n=== Generated Grammar ===\n"); + LOG_ERR("%s\n", parser_data.grammar.c_str()); + + LOG_ERR("\n=== Generated Lazy Grammar ===\n"); + LOG_ERR("%d\n", parser_data.grammar_lazy); + + LOG_ERR("\n=== Generated Grammar Triggers ===\n"); + for (const common_grammar_trigger & cgt : parser_data.grammar_triggers) { + LOG_ERR("Token: %d | Type: %d | Value: %s\n", cgt.token, cgt.type, cgt.value.c_str()); + } + + LOG_ERR("\n=== Preserved Tokens ===\n"); + for (const std::string & token : parser_data.preserved_tokens) { + LOG_ERR(" '%s'\n", token.c_str()); + } + + if (!parser_data.grammar.empty()) { + LOG_ERR("\n=== Verifying created grammar ===\n"); + auto * grammar = llama_grammar_init_impl(nullptr, parser_data.grammar.c_str(), "root", + parser_data.grammar_lazy, nullptr, 0, nullptr, 0); + if (grammar != nullptr) { + LOG_ERR("\n=== Grammar successfully created ===\n"); + } + } + } + } catch (const std::exception & e) { + LOG_ERR("Analysis failed: %s\n", e.what()); + return 1; + } + + return 0; +} diff --git a/tools/parser/template-analysis.cpp b/tools/parser/template-analysis.cpp new file mode 100644 index 0000000000..a92e104ac0 --- /dev/null +++ b/tools/parser/template-analysis.cpp @@ -0,0 +1,611 @@ +#include "chat-auto-parser.h" +#include "chat-auto-parser-helpers.h" +#include "chat.h" +#include "log.h" +#include "jinja/caps.h" +#include "jinja/runtime.h" + +#include +#include +#include +#include +#include + +#include "nlohmann/json.hpp" + +using json = nlohmann::ordered_json; + +// ANSI color codes - using 256-color palette for brighter colors (all bold) +#define ANSI_RESET "\033[0m" +#define ANSI_PURPLE "\033[1m\x1b[38;5;126m" // Bold bright purple for main headers +#define ANSI_CYAN "\033[1m\x1b[38;5;81m" // Bold bright cyan for section headers +#define ANSI_BLUE "\033[1m\x1b[38;5;12m" // Bold bright blue for labels +#define ANSI_ORANGE "\033[1m\x1b[38;5;209m" // Bold orange for right differences +#define ANSI_GREEN "\033[1m\x1b[38;5;83m" // Bold bright green for left differences +#define ANSI_GRAY "\033[1m\x1b[38;5;240m" // Bold gray (used for "no variables" message) +#define ANSI_BOLD "\033[1m" // Standalone bold +#define ANSI_PREFIX "\033[1m\x1b[38;5;176m" // Bold color for common prefix +#define ANSI_SUFFIX "\033[1m\x1b[38;5;61m" // Bold color for common suffix + +// All template paths extracted from tests/test-chat.cpp +static const std::vector ALL_TEMPLATE_PATHS = { + "models/templates/Apertus-8B-Instruct.jinja", + "models/templates/Apriel-1.6-15b-Thinker-fixed.jinja", + "models/templates/ByteDance-Seed-OSS.jinja", + "models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja", + "models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja", + "models/templates/GLM-4.6.jinja", + "models/templates/GLM-4.7-Flash.jinja", + "models/templates/Kimi-K2-Instruct.jinja", + "models/templates/Kimi-K2-Thinking.jinja", + "models/templates/MiMo-VL.jinja", + "models/templates/MiniMax-M2.jinja", + "models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja", + "models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja", + "models/templates/NVIDIA-Nemotron-Nano-v2.jinja", + "models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja", + "models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja", + "models/templates/Qwen-QwQ-32B.jinja", + "models/templates/Qwen-Qwen2.5-7B-Instruct.jinja", + "models/templates/Qwen3-Coder.jinja", + "models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja", + "models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja", + "models/templates/deepseek-ai-DeepSeek-V3.1.jinja", + "models/templates/fireworks-ai-llama-3-firefunction-v2.jinja", + "models/templates/google-gemma-2-2b-it.jinja", + "models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja", + "models/templates/llama-cpp-deepseek-r1.jinja", + "models/templates/meetkai-functionary-medium-v3.1.jinja", + "models/templates/meetkai-functionary-medium-v3.2.jinja", + "models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja", + "models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja", + "models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja", + "models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja", + "models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja", + "models/templates/moonshotai-Kimi-K2.jinja", + "models/templates/openai-gpt-oss-120b.jinja", + "models/templates/unsloth-Apriel-1.5.jinja", + "models/templates/unsloth-mistral-Devstral-Small-2507.jinja", +}; + +struct analysis_options { + std::vector template_paths; + bool analyze_all = false; +}; + +static std::string read_file(const std::string & path) { + std::ifstream fin(path, std::ios::binary); + if (!fin.is_open()) { + throw std::runtime_error("Could not open file: " + path); + } + std::ostringstream buf; + buf << fin.rdbuf(); + return buf.str(); +} + +static void print_usage(const char * program_name) { + LOG_ERR("Usage: %s [options]\n", program_name); + LOG_ERR("\nOptions:\n"); + LOG_ERR(" --template Analyze specific template from test suite (e.g., 'deepseek' or 'DeepSeek-V3.1')\n"); + LOG_ERR(" --template-file Analyze custom template file\n"); + LOG_ERR(" --all Analyze all templates from test suite\n"); + LOG_ERR("\nExamples:\n"); + LOG_ERR(" %s --all\n", program_name); + LOG_ERR(" %s --template deepseek\n", program_name); + LOG_ERR(" %s --template-file my-template.jinja\n", program_name); +} + +static bool parse_options(int argc, char ** argv, analysis_options & opts) { + if (argc < 2) { + print_usage(argv[0]); + return false; + } + + for (int i = 1; i < argc; ++i) { + std::string arg = argv[i]; + + if (arg == "--all") { + opts.analyze_all = true; + } else if (arg == "--template") { + if (i + 1 >= argc) { + LOG_ERR("--template requires an argument\n"); + return false; + } + std::string pattern = argv[++i]; + std::transform(pattern.begin(), pattern.end(), pattern.begin(), ::tolower); + + // Find matching templates + bool found = false; + for (const auto & path : ALL_TEMPLATE_PATHS) { + std::string path_lower = path; + std::transform(path_lower.begin(), path_lower.end(), path_lower.begin(), ::tolower); + if (path_lower.find(pattern) != std::string::npos) { + opts.template_paths.push_back(path); + found = true; + } + } + + if (!found) { + LOG_ERR("No templates found matching: %s\n", pattern.c_str()); + return false; + } + } else if (arg == "--template-file") { + if (i + 1 >= argc) { + LOG_ERR("--template-file requires an argument\n"); + return false; + } + opts.template_paths.push_back(argv[++i]); + } else { + LOG_ERR("Unknown option: %s\n", arg.c_str()); + print_usage(argv[0]); + return false; + } + } + + if (opts.analyze_all) { + opts.template_paths = ALL_TEMPLATE_PATHS; + } + + if (opts.template_paths.empty()) { + LOG_ERR("No templates specified\n"); + print_usage(argv[0]); + return false; + } + + return true; +} + +static json build_tools_definition() { + json parameters_schema = json::object(); + parameters_schema["type"] = "object"; + parameters_schema["properties"] = json::object(); + parameters_schema["properties"]["param1"] = json::object({ + { "type", "string" }, + { "description", "First parameter" } + }); + parameters_schema["properties"]["param2"] = json::object({ + { "type", "string" }, + { "description", "Second parameter" } + }); + parameters_schema["required"] = json::array({ "param1", "param2" }); + + return json::array({ + json{ { "type", "function" }, + { "function", json{ { "name", "test_function_name" }, + { "description", "A test function for debugging" }, + { "parameters", parameters_schema } } } } + }); +} + +// Helper to create a tool call with arguments as JSON object +static json build_tool_call(const std::string & name, const json & args_object, const std::string & id = "call_001") { + return json{ + {"id", id}, + {"type", "function"}, + {"function", json{ + {"name", name}, + {"arguments", args_object} // Pass as JSON object, not serialized string + }} + }; +} + +// Helper functions to create repeating message definitions +static json make_user_msg() { + return json{ + {"role", "user"}, + {"content", "Hello, please help me."} + }; +} + +static json make_user_msg2() { + return json{ + {"role", "user"}, + {"content", "Thank you."} + }; +} + +static json make_user_msg2_continue() { + return json{ + {"role", "user"}, + {"content", "Continue."} + }; +} + +static json make_assistant_no_tool() { + return json{ + {"role", "assistant"}, + {"content", "Let me help you."} + }; +} + +static json make_assistant_one_tool() { + return json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})) + })} + }; +} + +static json make_assistant_two_tools() { + return json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})), + build_tool_call("test_function_name", json::object({{"param1", "value3"}, {"param2", "value4"}}), "call_002") + })} + }; +} + +static json make_assistant_no_reasoning() { + return json{ + {"role", "assistant"}, + {"content", "I can help you with that."} + }; +} + +static json make_assistant_with_reasoning() { + return json{ + {"role", "assistant"}, + {"content", "I can help you with that."}, + {"reasoning_content", "The user is asking for help. I should respond positively."} + }; +} + +static json make_assistant_one_tool_with_reasoning() { + return json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})) + })}, + {"reasoning_content", "I need to call the tool first."} + }; +} + +static void print_diff_split(const std::string & title, const diff_split & diff) { + LOG_ERR("\n%s=== %s ===%s\n", ANSI_CYAN, title.c_str(), ANSI_RESET); + LOG_ERR("%sCommon Prefix:%s '%s'\n", ANSI_PREFIX, ANSI_RESET, diff.prefix.c_str()); + LOG_ERR("%sCommon Suffix:%s '%s'\n", ANSI_SUFFIX, ANSI_RESET, diff.suffix.c_str()); + LOG_ERR("%sLeft (difference):%s '%s'\n", ANSI_GREEN, ANSI_RESET, diff.left.c_str()); + LOG_ERR("%sRight (difference):%s '%s'\n", ANSI_ORANGE, ANSI_RESET, diff.right.c_str()); +} + +static void check_reasoning_variables(const common_chat_template & tmpl) { + LOG_ERR("\n%s=== Checking Reasoning Variables ===%s\n", ANSI_CYAN, ANSI_RESET); + + try { + // Create a list of candidate reasoning/thinking variable names to probe + std::vector candidate_vars = { + "enable_reasoning", + "use_reasoning", + "reasoning_enabled", + "has_reasoning", + "reasoning_mode", + "reasoning_format", + "reasoning_active", + "with_reasoning", + "use_thinking", + "thinking_enabled", + "has_thinking", + "thinking_mode", + "thinking_format", + "thinking_active", + "with_thinking", + "enable_reason", + "reason_enabled", + "enable_think", + "think_enabled", + }; + + jinja::context ctx; + ctx.is_get_stats = true; + + json messages = json::array({ + json{ + {"role", "user"}, + {"content", "Test message"} + }, + json{ + {"role", "assistant"}, + {"content", "Response"}, + {"reasoning_content", "Some reasoning"} + } + }); + + // Set up base context + jinja::global_from_json(ctx, json{ + {"messages", messages}, + {"tools", json::array()}, + {"bos_token", ""}, + {"eos_token", ""}, + {"add_generation_prompt", false}, + {"enable_thinking", true} // Already passed, so we'll exclude this from results + }, true); + + // Add candidate variables as undefined to probe which ones are accessed + for (const auto & var_name : candidate_vars) { + ctx.set_val(var_name, jinja::mk_val(var_name)); + } + + try { + jinja::runtime runtime(ctx); + runtime.execute(tmpl.prog); + } catch (const std::exception & e) { + // Execution may fail, that's okay - we just want to see what variables were accessed + } + + // Check which candidate variables were accessed (stats.used = true) + std::vector accessed_vars; + for (const auto & var_name : candidate_vars) { + auto val = ctx.get_val(var_name); + if (!val->is_undefined()) { + // Variable was overwritten, skip it + continue; + } + if (val->stats.used) { + accessed_vars.push_back(var_name); + } + } + + if (accessed_vars.empty()) { + LOG_ERR("%sNo reasoning/thinking-related variables were queried by the template%s\n", ANSI_GRAY, ANSI_RESET); + } else { + LOG_ERR("Template queries the following reasoning/thinking-related variables:\n"); + for (const auto & var : accessed_vars) { + LOG_ERR(" %s- %s%s\n", ANSI_ORANGE, var.c_str(), ANSI_RESET); + } + } + + } catch (const std::exception & e) { + LOG_ERR("Error checking reasoning variables: %s\n", e.what()); + } +} + +static void analyze_template(const std::string & template_path) { + LOG_ERR("\n"); + LOG_ERR("%s", ANSI_PURPLE); + LOG_ERR("================================================================================\n"); + LOG_ERR(" ANALYZING TEMPLATE: %s\n", template_path.c_str()); + LOG_ERR("================================================================================\n"); + LOG_ERR("%s", ANSI_RESET); + + std::string template_source; + try { + template_source = read_file(template_path); + } catch (const std::exception & e) { + LOG_ERR("Error reading template: %s\n", e.what()); + return; + } + + try { + common_chat_template chat_template(template_source, "", ""); + json tools = build_tools_definition(); + + // ===== CAPABILITIES ANALYSIS ===== + LOG_ERR("\n%s=== Template Capabilities (from jinja::caps) ===%s\n", ANSI_CYAN, ANSI_RESET); + auto caps = chat_template.original_caps(); + LOG_ERR("%ssupports_tools:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_tools ? "true" : "false"); + LOG_ERR("%ssupports_tool_calls:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_tool_calls ? "true" : "false"); + LOG_ERR("%ssupports_system_role:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_system_role ? "true" : "false"); + LOG_ERR("%ssupports_parallel_tool_calls:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_parallel_tool_calls ? "true" : "false"); + LOG_ERR("%ssupports_typed_content:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_typed_content ? "true" : "false"); + LOG_ERR("%ssupports_string_content:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_string_content ? "true" : "false"); + + // ===== DIFFERENTIAL ANALYSIS ===== + + // Test 1: With and without tools (single user message) + { + json user_msg = make_user_msg(); + + autoparser::templates_params params_no_tools; + params_no_tools.messages = json::array({ user_msg }); + params_no_tools.add_generation_prompt = false; + params_no_tools.tools = json::array(); + + autoparser::templates_params params_with_tools = params_no_tools; + params_with_tools.tools = tools; + + std::string output_no_tools = common_chat_template_direct_apply(chat_template, params_no_tools); + std::string output_with_tools = common_chat_template_direct_apply(chat_template, params_with_tools); + + auto diff = calculate_diff_split(output_no_tools, output_with_tools); + print_diff_split("Diff: With vs Without Tools (single user message)", diff); + } + + // Test 2: With and without add_generation_prompt (single user message) + { + json user_msg = make_user_msg(); + + autoparser::templates_params params_no_prompt; + params_no_prompt.messages = json::array({ user_msg }); + params_no_prompt.add_generation_prompt = false; + params_no_prompt.tools = json::array(); + + autoparser::templates_params params_with_prompt = params_no_prompt; + params_with_prompt.add_generation_prompt = true; + + std::string output_no_prompt = common_chat_template_direct_apply(chat_template, params_no_prompt); + std::string output_with_prompt = common_chat_template_direct_apply(chat_template, params_with_prompt); + + auto diff = calculate_diff_split(output_no_prompt, output_with_prompt); + print_diff_split("Diff: With vs Without add_generation_prompt (single user message)", diff); + } + + // Test 3: Assistant with reasoning_content (user, assistant) + { + json user_msg = make_user_msg(); + + autoparser::templates_params params_no_reasoning; + params_no_reasoning.messages = json::array({ user_msg, make_assistant_no_reasoning() }); + params_no_reasoning.add_generation_prompt = false; + params_no_reasoning.enable_thinking = true; + + autoparser::templates_params params_with_reasoning = params_no_reasoning; + params_with_reasoning.messages = json::array({ user_msg, make_assistant_with_reasoning() }); + + std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning); + std::string output_with_reasoning = common_chat_template_direct_apply(chat_template, params_with_reasoning); + + auto diff = calculate_diff_split(output_no_reasoning, output_with_reasoning); + print_diff_split("Diff: With vs Without reasoning_content (user, assistant)", diff); + } + + // Test 4: Assistant with reasoning_content (user, assistant, user) + { + json user_msg = make_user_msg(); + json user_msg2 = make_user_msg2(); + + autoparser::templates_params params_no_reasoning; + params_no_reasoning.messages = json::array({ user_msg, make_assistant_no_reasoning(), user_msg2 }); + params_no_reasoning.add_generation_prompt = false; + params_no_reasoning.enable_thinking = true; + + autoparser::templates_params params_with_reasoning = params_no_reasoning; + params_with_reasoning.messages = json::array({ user_msg, make_assistant_with_reasoning(), user_msg2 }); + + std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning); + std::string output_with_reasoning = common_chat_template_direct_apply(chat_template, params_with_reasoning); + + auto diff = calculate_diff_split(output_no_reasoning, output_with_reasoning); + print_diff_split("Diff: With vs Without reasoning_content (user, assistant, user)", diff); + } + + // Test 5: Tool call in last assistant message (user, assistant) + { + json user_msg = make_user_msg(); + + autoparser::templates_params params_no_tool; + params_no_tool.messages = json::array({ user_msg, make_assistant_no_tool() }); + params_no_tool.add_generation_prompt = false; + params_no_tool.tools = tools; + + autoparser::templates_params params_with_tool = params_no_tool; + params_with_tool.messages = json::array({ user_msg, make_assistant_one_tool() }); + + std::string output_no_tool = common_chat_template_direct_apply(chat_template, params_no_tool); + std::string output_with_tool = common_chat_template_direct_apply(chat_template, params_with_tool); + + auto diff = calculate_diff_split(output_no_tool, output_with_tool); + print_diff_split("Diff: With vs Without tool call (user, assistant)", diff); + } + + // Test 6: Tool call in last assistant message (user, assistant, user) + { + json user_msg = make_user_msg(); + json user_msg2 = make_user_msg2_continue(); + + autoparser::templates_params params_no_tool; + params_no_tool.messages = json::array({ user_msg, make_assistant_no_tool(), user_msg2 }); + params_no_tool.add_generation_prompt = false; + params_no_tool.tools = tools; + + autoparser::templates_params params_with_tool = params_no_tool; + params_with_tool.messages = json::array({ user_msg, make_assistant_one_tool(), user_msg2 }); + + std::string output_no_tool = common_chat_template_direct_apply(chat_template, params_no_tool); + std::string output_with_tool = common_chat_template_direct_apply(chat_template, params_with_tool); + + auto diff = calculate_diff_split(output_no_tool, output_with_tool); + print_diff_split("Diff: With vs Without tool call (user, assistant, user)", diff); + } + + // Test 7: One vs two tool calls (user, assistant) + { + json user_msg = make_user_msg(); + + autoparser::templates_params params_one_tool; + params_one_tool.messages = json::array({ user_msg, make_assistant_one_tool() }); + params_one_tool.add_generation_prompt = false; + params_one_tool.tools = tools; + + autoparser::templates_params params_two_tools = params_one_tool; + params_two_tools.messages = json::array({ user_msg, make_assistant_two_tools() }); + + std::string output_one_tool = common_chat_template_direct_apply(chat_template, params_one_tool); + std::string output_two_tools = common_chat_template_direct_apply(chat_template, params_two_tools); + + auto diff = calculate_diff_split(output_one_tool, output_two_tools); + print_diff_split("Diff: One vs Two tool calls (user, assistant)", diff); + } + + // Test 8: One vs two tool calls (user, assistant, user) + { + json user_msg = make_user_msg(); + json user_msg2 = make_user_msg2_continue(); + + autoparser::templates_params params_one_tool; + params_one_tool.messages = json::array({ user_msg, make_assistant_one_tool(), user_msg2 }); + params_one_tool.add_generation_prompt = false; + params_one_tool.tools = tools; + + autoparser::templates_params params_two_tools = params_one_tool; + params_two_tools.messages = json::array({ user_msg, make_assistant_two_tools(), user_msg2 }); + + std::string output_one_tool = common_chat_template_direct_apply(chat_template, params_one_tool); + std::string output_two_tools = common_chat_template_direct_apply(chat_template, params_two_tools); + + auto diff = calculate_diff_split(output_one_tool, output_two_tools); + print_diff_split("Diff: One vs Two tool calls (user, assistant, user)", diff); + } + + // Test 9: Tool call with vs without reasoning_content (user, assistant) + { + json user_msg = make_user_msg(); + + autoparser::templates_params params_no_reasoning; + params_no_reasoning.messages = json::array({ user_msg, make_assistant_one_tool() }); + params_no_reasoning.add_generation_prompt = false; + params_no_reasoning.tools = tools; + params_no_reasoning.enable_thinking = true; + + autoparser::templates_params params_with_reasoning = params_no_reasoning; + params_with_reasoning.messages = json::array({ user_msg, make_assistant_one_tool_with_reasoning() }); + + std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning); + std::string output_with_reasoning = common_chat_template_direct_apply(chat_template, params_with_reasoning); + + auto diff = calculate_diff_split(output_no_reasoning, output_with_reasoning); + print_diff_split("Diff: Tool call with vs without reasoning_content (user, assistant)", diff); + } + + // Check reasoning variables + check_reasoning_variables(chat_template); + + } catch (const std::exception & e) { + LOG_ERR("Analysis failed: %s\n", e.what()); + } +} + +int main(int argc, char ** argv) { + // Set log level to capture all output + common_log_set_verbosity_thold(99); + + analysis_options opts; + if (!parse_options(argc, argv, opts)) { + return 1; + } + + LOG_ERR("\n"); + LOG_ERR("%s", ANSI_PURPLE); + LOG_ERR("================================================================================\n"); + LOG_ERR(" TEMPLATE ANALYSIS TOOL\n"); + LOG_ERR("================================================================================\n"); + LOG_ERR("%s", ANSI_RESET); + LOG_ERR("Analyzing %s%zu%s template(s)\n", ANSI_CYAN, opts.template_paths.size(), ANSI_RESET); + + for (const auto & path : opts.template_paths) { + analyze_template(path); + } + + LOG_ERR("\n"); + LOG_ERR("%s", ANSI_GREEN); + LOG_ERR("================================================================================\n"); + LOG_ERR(" ANALYSIS COMPLETE\n"); + LOG_ERR("================================================================================\n"); + LOG_ERR("%s", ANSI_RESET); + + return 0; +} diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp index a137427c69..61e5ec5729 100644 --- a/tools/server/server-task.cpp +++ b/tools/server/server-task.cpp @@ -1,12 +1,12 @@ -#include "server-common.h" #include "server-task.h" -#include "common.h" -#include "llama.h" #include "chat.h" +#include "common.h" +#include "json-schema-to-grammar.h" +#include "llama.h" #include "sampling.h" #include "speculative.h" -#include "json-schema-to-grammar.h" +#include "server-common.h" using json = nlohmann::ordered_json; @@ -18,8 +18,8 @@ json task_params::format_logit_bias(const std::vector & logit_ json data = json::array(); for (const auto & lb : logit_bias) { data.push_back(json{ - {"bias", lb.bias}, - {"token", lb.token}, + { "bias", lb.bias }, + { "token", lb.token }, }); } return data; @@ -34,41 +34,44 @@ json task_params::to_json(bool only_metrics) const { json lora = json::array(); for (auto & it : this->lora) { - lora.push_back({{"id", it.first}, {"scale", it.second}}); + lora.push_back({ + { "id", it.first }, + { "scale", it.second } + }); } if (only_metrics) { - return json { - {"seed", sampling.seed}, - {"temperature", sampling.temp}, - {"dynatemp_range", sampling.dynatemp_range}, - {"dynatemp_exponent", sampling.dynatemp_exponent}, - {"top_k", sampling.top_k}, - {"top_p", sampling.top_p}, - {"min_p", sampling.min_p}, - {"top_n_sigma", sampling.top_n_sigma}, - {"xtc_probability", sampling.xtc_probability}, - {"xtc_threshold", sampling.xtc_threshold}, - {"typical_p", sampling.typ_p}, - {"repeat_last_n", sampling.penalty_last_n}, - {"repeat_penalty", sampling.penalty_repeat}, - {"presence_penalty", sampling.penalty_present}, - {"frequency_penalty", sampling.penalty_freq}, - {"dry_multiplier", sampling.dry_multiplier}, - {"dry_base", sampling.dry_base}, - {"dry_allowed_length", sampling.dry_allowed_length}, - {"dry_penalty_last_n", sampling.dry_penalty_last_n}, - {"mirostat", sampling.mirostat}, - {"mirostat_tau", sampling.mirostat_tau}, - {"mirostat_eta", sampling.mirostat_eta}, - {"max_tokens", n_predict}, - {"n_predict", n_predict}, // TODO: deduplicate? - {"n_keep", n_keep}, - {"n_discard", n_discard}, - {"ignore_eos", sampling.ignore_eos}, - {"stream", stream}, - {"n_probs", sampling.n_probs}, - {"min_keep", sampling.min_keep}, + return json{ + { "seed", sampling.seed }, + { "temperature", sampling.temp }, + { "dynatemp_range", sampling.dynatemp_range }, + { "dynatemp_exponent", sampling.dynatemp_exponent }, + { "top_k", sampling.top_k }, + { "top_p", sampling.top_p }, + { "min_p", sampling.min_p }, + { "top_n_sigma", sampling.top_n_sigma }, + { "xtc_probability", sampling.xtc_probability }, + { "xtc_threshold", sampling.xtc_threshold }, + { "typical_p", sampling.typ_p }, + { "repeat_last_n", sampling.penalty_last_n }, + { "repeat_penalty", sampling.penalty_repeat }, + { "presence_penalty", sampling.penalty_present }, + { "frequency_penalty", sampling.penalty_freq }, + { "dry_multiplier", sampling.dry_multiplier }, + { "dry_base", sampling.dry_base }, + { "dry_allowed_length", sampling.dry_allowed_length }, + { "dry_penalty_last_n", sampling.dry_penalty_last_n }, + { "mirostat", sampling.mirostat }, + { "mirostat_tau", sampling.mirostat_tau }, + { "mirostat_eta", sampling.mirostat_eta }, + { "max_tokens", n_predict }, + { "n_predict", n_predict }, // TODO: deduplicate? + { "n_keep", n_keep }, + { "n_discard", n_discard }, + { "ignore_eos", sampling.ignore_eos }, + { "stream", stream }, + { "n_probs", sampling.n_probs }, + { "min_keep", sampling.min_keep }, {"chat_format", common_chat_format_name(chat_parser_params.format)}, {"reasoning_format", common_reasoning_format_name(chat_parser_params.reasoning_format)}, {"reasoning_in_content", chat_parser_params.reasoning_in_content}, @@ -94,44 +97,44 @@ json task_params::to_json(bool only_metrics) const { grammar_triggers.push_back(ct.to_json()); } - return json { - {"seed", sampling.seed}, - {"temperature", sampling.temp}, - {"dynatemp_range", sampling.dynatemp_range}, - {"dynatemp_exponent", sampling.dynatemp_exponent}, - {"top_k", sampling.top_k}, - {"top_p", sampling.top_p}, - {"min_p", sampling.min_p}, - {"top_n_sigma", sampling.top_n_sigma}, - {"xtc_probability", sampling.xtc_probability}, - {"xtc_threshold", sampling.xtc_threshold}, - {"typical_p", sampling.typ_p}, - {"repeat_last_n", sampling.penalty_last_n}, - {"repeat_penalty", sampling.penalty_repeat}, - {"presence_penalty", sampling.penalty_present}, - {"frequency_penalty", sampling.penalty_freq}, - {"dry_multiplier", sampling.dry_multiplier}, - {"dry_base", sampling.dry_base}, - {"dry_allowed_length", sampling.dry_allowed_length}, - {"dry_penalty_last_n", sampling.dry_penalty_last_n}, - {"dry_sequence_breakers", sampling.dry_sequence_breakers}, - {"mirostat", sampling.mirostat}, - {"mirostat_tau", sampling.mirostat_tau}, - {"mirostat_eta", sampling.mirostat_eta}, - {"stop", antiprompt}, - {"max_tokens", n_predict}, - {"n_predict", n_predict}, // TODO: deduplicate? - {"n_keep", n_keep}, - {"n_discard", n_discard}, - {"ignore_eos", sampling.ignore_eos}, - {"stream", stream}, - {"logit_bias", format_logit_bias(sampling.logit_bias)}, - {"n_probs", sampling.n_probs}, - {"min_keep", sampling.min_keep}, - {"grammar", sampling.grammar}, - {"grammar_lazy", sampling.grammar_lazy}, - {"grammar_triggers", grammar_triggers}, - {"preserved_tokens", sampling.preserved_tokens}, + return json{ + { "seed", sampling.seed }, + { "temperature", sampling.temp }, + { "dynatemp_range", sampling.dynatemp_range }, + { "dynatemp_exponent", sampling.dynatemp_exponent }, + { "top_k", sampling.top_k }, + { "top_p", sampling.top_p }, + { "min_p", sampling.min_p }, + { "top_n_sigma", sampling.top_n_sigma }, + { "xtc_probability", sampling.xtc_probability }, + { "xtc_threshold", sampling.xtc_threshold }, + { "typical_p", sampling.typ_p }, + { "repeat_last_n", sampling.penalty_last_n }, + { "repeat_penalty", sampling.penalty_repeat }, + { "presence_penalty", sampling.penalty_present }, + { "frequency_penalty", sampling.penalty_freq }, + { "dry_multiplier", sampling.dry_multiplier }, + { "dry_base", sampling.dry_base }, + { "dry_allowed_length", sampling.dry_allowed_length }, + { "dry_penalty_last_n", sampling.dry_penalty_last_n }, + { "dry_sequence_breakers", sampling.dry_sequence_breakers }, + { "mirostat", sampling.mirostat }, + { "mirostat_tau", sampling.mirostat_tau }, + { "mirostat_eta", sampling.mirostat_eta }, + { "stop", antiprompt }, + { "max_tokens", n_predict }, + { "n_predict", n_predict }, // TODO: deduplicate? + { "n_keep", n_keep }, + { "n_discard", n_discard }, + { "ignore_eos", sampling.ignore_eos }, + { "stream", stream }, + { "logit_bias", format_logit_bias(sampling.logit_bias) }, + { "n_probs", sampling.n_probs }, + { "min_keep", sampling.min_keep }, + { "grammar", sampling.grammar }, + { "grammar_lazy", sampling.grammar_lazy }, + { "grammar_triggers", grammar_triggers }, + { "preserved_tokens", sampling.preserved_tokens }, {"chat_format", common_chat_format_name(chat_parser_params.format)}, {"reasoning_format", common_reasoning_format_name(chat_parser_params.reasoning_format)}, {"reasoning_in_content", chat_parser_params.reasoning_in_content}, @@ -154,21 +157,75 @@ json task_params::to_json(bool only_metrics) const { // // task_result_state // -common_chat_msg task_result_state::update_chat_msg( - const std::string & text_added, - bool is_partial, - std::vector & diffs) { +common_chat_msg task_result_state::update_chat_msg(const std::string & text_added, + bool is_partial, + std::vector & diffs, + bool filter_tool_calls) { generated_text += text_added; auto msg_prv_copy = chat_msg; SRV_DBG("Parsing chat message: %s\n", generated_text.c_str()); - auto new_msg = common_chat_parse( - generated_text, - is_partial, - chat_parser_params); + auto new_msg = common_chat_parse(generated_text, is_partial, chat_parser_params); if (!new_msg.empty()) { new_msg.set_tool_call_ids(generated_tool_call_ids, gen_tool_call_id); - chat_msg = new_msg; - diffs = common_chat_msg_diff::compute_diffs(msg_prv_copy, new_msg.empty() ? msg_prv_copy : new_msg); + chat_msg = new_msg; + auto all_diffs = common_chat_msg_diff::compute_diffs(msg_prv_copy, chat_msg); + + if (!filter_tool_calls) { + diffs = std::move(all_diffs); + } else { + for (auto & d : all_diffs) { + // If this is a new type of delta, flush all currently pending tool call names + for (size_t i = 0; i < chat_msg.tool_calls.size(); ++i) { + if (sent_tool_call_names.count(i) || chat_msg.tool_calls[i].name.empty()) { + continue; + } + if (d.tool_call_index != i || !d.tool_call_delta.arguments.empty()) { + common_chat_msg_diff header; + header.tool_call_index = i; + header.tool_call_delta.id = chat_msg.tool_calls[i].id; + header.tool_call_delta.name = chat_msg.tool_calls[i].name; + diffs.push_back(std::move(header)); + sent_tool_call_names.insert(i); + } + } + + if (d.tool_call_index == std::string::npos) { + diffs.push_back(std::move(d)); + } else { + size_t i = d.tool_call_index; + if (sent_tool_call_names.count(i)) { + if (!d.tool_call_delta.arguments.empty()) { + d.tool_call_delta.name = ""; + d.tool_call_delta.id = ""; + diffs.push_back(std::move(d)); + } + } else { + // Not sent yet. + if (!d.tool_call_delta.arguments.empty() || !is_partial) { + d.tool_call_delta.name = chat_msg.tool_calls[i].name; + d.tool_call_delta.id = chat_msg.tool_calls[i].id; + diffs.push_back(std::move(d)); + sent_tool_call_names.insert(i); + } else { + // Suppress + } + } + } + } + // Final check at EOF + if (!is_partial) { + for (size_t i = 0; i < chat_msg.tool_calls.size(); ++i) { + if (!sent_tool_call_names.count(i) && !chat_msg.tool_calls[i].name.empty()) { + common_chat_msg_diff header; + header.tool_call_index = i; + header.tool_call_delta.id = chat_msg.tool_calls[i].id; + header.tool_call_delta.name = chat_msg.tool_calls[i].name; + diffs.push_back(std::move(header)); + sent_tool_call_names.insert(i); + } + } + } + } } return chat_msg; } @@ -177,11 +234,10 @@ common_chat_msg task_result_state::update_chat_msg( // server_task // -task_params server_task::params_from_json_cmpl( - const llama_vocab * vocab, - const common_params & params_base, - const int n_ctx_slot, - const json & data) { +task_params server_task::params_from_json_cmpl(const llama_vocab * vocab, + const common_params & params_base, + const int n_ctx_slot, + const json & data) { task_params params; // Sampling parameter defaults are loaded from the global server context (but individual requests can still them) @@ -211,8 +267,8 @@ task_params server_task::params_from_json_cmpl( params.n_cmpl = json_value(data, "n_cmpl", json_value(data, "n", 1)); params.n_cache_reuse = json_value(data, "n_cache_reuse", defaults.n_cache_reuse); //params.t_max_prompt_ms = json_value(data, "t_max_prompt_ms", defaults.t_max_prompt_ms); // TODO: implement - params.t_max_predict_ms = json_value(data, "t_max_predict_ms", defaults.t_max_predict_ms); - params.response_fields = json_value(data, "response_fields", std::vector()); + params.t_max_predict_ms = json_value(data, "t_max_predict_ms", defaults.t_max_predict_ms); + params.response_fields = json_value(data, "response_fields", std::vector()); params.sampling.top_k = json_value(data, "top_k", defaults.sampling.top_k); params.sampling.top_p = json_value(data, "top_p", defaults.sampling.top_p); @@ -262,7 +318,7 @@ task_params server_task::params_from_json_cmpl( params.speculative.ngram_min_hits = std::max(std::min(1, (int) params.speculative.ngram_min_hits), 1024); // Use OpenAI API logprobs only if n_probs wasn't provided - if (data.contains("logprobs") && params.sampling.n_probs == defaults.sampling.n_probs){ + if (data.contains("logprobs") && params.sampling.n_probs == defaults.sampling.n_probs) { params.sampling.n_probs = json_value(data, "logprobs", defaults.sampling.n_probs); } @@ -305,7 +361,8 @@ task_params server_task::params_from_json_cmpl( // Ref: https://github.com/oobabooga/text-generation-webui/blob/d1af7a41ade7bd3c3a463bfa640725edb818ebaf/extensions/openai/typing.py#L39 if (data.contains("dry_sequence_breakers")) { - params.sampling.dry_sequence_breakers = json_value(data, "dry_sequence_breakers", std::vector()); + params.sampling.dry_sequence_breakers = + json_value(data, "dry_sequence_breakers", std::vector()); if (params.sampling.dry_sequence_breakers.empty()) { throw std::runtime_error("Error: dry_sequence_breakers must be a non-empty array of strings"); } @@ -315,15 +372,15 @@ task_params server_task::params_from_json_cmpl( // process "json_schema" and "grammar" if (data.contains("json_schema") && !data.contains("grammar")) { try { - auto schema = json_value(data, "json_schema", json::object()); + auto schema = json_value(data, "json_schema", json::object()); SRV_DBG("JSON schema: %s\n", schema.dump(2).c_str()); - params.sampling.grammar = json_schema_to_grammar(schema); + params.sampling.grammar = json_schema_to_grammar(schema); SRV_DBG("Converted grammar: %s\n", params.sampling.grammar.c_str()); } catch (const std::exception & e) { throw std::runtime_error(std::string("\"json_schema\": ") + e.what()); } } else { - params.sampling.grammar = json_value(data, "grammar", defaults.sampling.grammar); + params.sampling.grammar = json_value(data, "grammar", defaults.sampling.grammar); SRV_DBG("Grammar: %s\n", params.sampling.grammar.c_str()); params.sampling.grammar_lazy = json_value(data, "grammar_lazy", defaults.sampling.grammar_lazy); SRV_DBG("Grammar lazy: %s\n", params.sampling.grammar_lazy ? "true" : "false"); @@ -342,9 +399,10 @@ task_params server_task::params_from_json_cmpl( reasoning_format = common_reasoning_format_from_name(data.at("reasoning_format").get()); } params.chat_parser_params.reasoning_format = reasoning_format; - params.chat_parser_params.reasoning_in_content = params.stream && (reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY); + params.chat_parser_params.reasoning_in_content = + params.stream && (reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY); params.chat_parser_params.thinking_forced_open = json_value(data, "thinking_forced_open", false); - params.chat_parser_params.parse_tool_calls = json_value(data, "parse_tool_calls", false); + params.chat_parser_params.parse_tool_calls = json_value(data, "parse_tool_calls", false); if (data.contains("chat_parser")) { params.chat_parser_params.parser.load(data.at("chat_parser").get()); } @@ -354,7 +412,8 @@ task_params server_task::params_from_json_cmpl( const auto preserved_tokens = data.find("preserved_tokens"); if (preserved_tokens != data.end()) { for (const auto & t : *preserved_tokens) { - auto ids = common_tokenize(vocab, t.get(), /* add_special= */ false, /* parse_special= */ true); + auto ids = + common_tokenize(vocab, t.get(), /* add_special= */ false, /* parse_special= */ true); if (ids.size() == 1) { SRV_DBG("Preserved token: %d\n", ids[0]); params.sampling.preserved_tokens.insert(ids[0]); @@ -373,18 +432,20 @@ task_params server_task::params_from_json_cmpl( auto ids = common_tokenize(vocab, word, /* add_special= */ false, /* parse_special= */ true); if (ids.size() == 1) { auto token = ids[0]; - if (std::find(params.sampling.preserved_tokens.begin(), params.sampling.preserved_tokens.end(), (llama_token) token) == params.sampling.preserved_tokens.end()) { - throw std::runtime_error("Grammar trigger word should be marked as preserved token: " + word); + if (std::find(params.sampling.preserved_tokens.begin(), params.sampling.preserved_tokens.end(), + (llama_token) token) == params.sampling.preserved_tokens.end()) { + throw std::runtime_error("Grammar trigger word should be marked as preserved token: " + + word); } SRV_DBG("Grammar trigger token: %d (`%s`)\n", token, word.c_str()); common_grammar_trigger trigger; - trigger.type = COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN; + trigger.type = COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN; trigger.value = word; trigger.token = token; params.sampling.grammar_triggers.push_back(std::move(trigger)); } else { SRV_DBG("Grammar trigger word: `%s`\n", word.c_str()); - params.sampling.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, word}); + params.sampling.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, word }); } } else { if (ct.value.type == COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN) { @@ -424,12 +485,12 @@ task_params server_task::params_from_json_cmpl( if (el[0].is_number_integer()) { llama_token tok = el[0].get(); if (tok >= 0 && tok < n_vocab) { - params.sampling.logit_bias.push_back({tok, bias}); + params.sampling.logit_bias.push_back({ tok, bias }); } } else if (el[0].is_string()) { auto toks = common_tokenize(vocab, el[0].get(), false); for (auto tok : toks) { - params.sampling.logit_bias.push_back({tok, bias}); + params.sampling.logit_bias.push_back({ tok, bias }); } } } @@ -437,8 +498,8 @@ task_params server_task::params_from_json_cmpl( } else if (logit_bias != data.end() && logit_bias->is_object()) { const int n_vocab = llama_vocab_n_tokens(vocab); for (const auto & el : logit_bias->items()) { - float bias; - const auto & key = el.key(); + float bias; + const auto & key = el.key(); const auto & value = el.value(); if (value.is_number()) { bias = value.get(); @@ -448,16 +509,16 @@ task_params server_task::params_from_json_cmpl( continue; } - char *end; + char * end; llama_token tok = strtol(key.c_str(), &end, 10); if (*end == 0) { if (tok >= 0 && tok < n_vocab) { - params.sampling.logit_bias.push_back({tok, bias}); + params.sampling.logit_bias.push_back({ tok, bias }); } } else { auto toks = common_tokenize(vocab, key, false); for (auto tok : toks) { - params.sampling.logit_bias.push_back({tok, bias}); + params.sampling.logit_bias.push_back({ tok, bias }); } } } @@ -465,9 +526,9 @@ task_params server_task::params_from_json_cmpl( params.sampling.ignore_eos = json_value(data, "ignore_eos", params_base.sampling.ignore_eos); if (params.sampling.ignore_eos) { - params.sampling.logit_bias.insert( - params.sampling.logit_bias.end(), - defaults.sampling.logit_bias_eog.begin(), defaults.sampling.logit_bias_eog.end()); + params.sampling.logit_bias.insert(params.sampling.logit_bias.end(), + defaults.sampling.logit_bias_eog.begin(), + defaults.sampling.logit_bias_eog.end()); } } @@ -493,7 +554,7 @@ task_params server_task::params_from_json_cmpl( if (samplers != data.end()) { if (samplers->is_array()) { params.sampling.samplers = common_sampler_types_from_names(*samplers, false); - } else if (samplers->is_string()){ + } else if (samplers->is_string()) { params.sampling.samplers = common_sampler_types_from_chars(samplers->get()); } } else { @@ -514,21 +575,21 @@ task_params server_task::params_from_json_cmpl( json result_timings::to_json() const { json base = { - {"cache_n", cache_n}, + { "cache_n", cache_n }, - {"prompt_n", prompt_n}, - {"prompt_ms", prompt_ms}, - {"prompt_per_token_ms", prompt_per_token_ms}, - {"prompt_per_second", prompt_per_second}, + { "prompt_n", prompt_n }, + { "prompt_ms", prompt_ms }, + { "prompt_per_token_ms", prompt_per_token_ms }, + { "prompt_per_second", prompt_per_second }, - {"predicted_n", predicted_n}, - {"predicted_ms", predicted_ms}, - {"predicted_per_token_ms", predicted_per_token_ms}, - {"predicted_per_second", predicted_per_second}, + { "predicted_n", predicted_n }, + { "predicted_ms", predicted_ms }, + { "predicted_per_token_ms", predicted_per_token_ms }, + { "predicted_per_second", predicted_per_second }, }; if (draft_n > 0) { - base["draft_n"] = draft_n; + base["draft_n"] = draft_n; base["draft_n_accepted"] = draft_n_accepted; } @@ -539,20 +600,24 @@ json result_timings::to_json() const { // result_prompt_progress // json result_prompt_progress::to_json() const { - return json { - {"total", total}, - {"cache", cache}, - {"processed", processed}, - {"time_ms", time_ms}, + return json{ + { "total", total }, + { "cache", cache }, + { "processed", processed }, + { "time_ms", time_ms }, }; } static inline std::string stop_type_to_str(stop_type type) { switch (type) { - case STOP_TYPE_EOS: return "eos"; - case STOP_TYPE_WORD: return "word"; - case STOP_TYPE_LIMIT: return "limit"; - default: return "none"; + case STOP_TYPE_EOS: + return "eos"; + case STOP_TYPE_WORD: + return "word"; + case STOP_TYPE_LIMIT: + return "limit"; + default: + return "none"; } } @@ -565,36 +630,28 @@ json completion_token_output::to_json(bool post_sampling_probs) const { for (const auto & p : probs) { std::string txt(p.txt); txt.resize(validate_utf8(txt)); - probs_for_token.push_back(json { - {"id", p.tok}, - {"token", txt}, - {"bytes", str_to_bytes(p.txt)}, - { - post_sampling_probs ? "prob" : "logprob", - post_sampling_probs ? p.prob : logarithm(p.prob) - }, + probs_for_token.push_back(json{ + { "id", p.tok }, + { "token", txt }, + { "bytes", str_to_bytes(p.txt) }, + { post_sampling_probs ? "prob" : "logprob", post_sampling_probs ? p.prob : logarithm(p.prob) }, }); } return probs_for_token; } -json completion_token_output::probs_vector_to_json(const std::vector & probs, bool post_sampling_probs) { +json completion_token_output::probs_vector_to_json(const std::vector & probs, + bool post_sampling_probs) { json out = json::array(); for (const auto & p : probs) { std::string txt(p.text_to_send); txt.resize(validate_utf8(txt)); - out.push_back(json { - {"id", p.tok}, - {"token", txt}, - {"bytes", str_to_bytes(p.text_to_send)}, - { - post_sampling_probs ? "prob" : "logprob", - post_sampling_probs ? p.prob : logarithm(p.prob) - }, - { - post_sampling_probs ? "top_probs" : "top_logprobs", - p.to_json(post_sampling_probs) - }, + out.push_back(json{ + { "id", p.tok }, + { "token", txt }, + { "bytes", str_to_bytes(p.text_to_send) }, + { post_sampling_probs ? "prob" : "logprob", post_sampling_probs ? p.prob : logarithm(p.prob) }, + { post_sampling_probs ? "top_probs" : "top_logprobs", p.to_json(post_sampling_probs) }, }); } return out; @@ -635,61 +692,58 @@ json server_task_result_cmpl_final::to_json() { } json server_task_result_cmpl_final::to_json_non_oaicompat() { - json res = json { - {"index", index}, - {"content", content}, - {"tokens", tokens}, - {"id_slot", id_slot}, - {"stop", true}, - {"model", oaicompat_model}, - {"tokens_predicted", n_decoded}, - {"tokens_evaluated", n_prompt_tokens}, - {"generation_settings", generation_params.to_json()}, - {"prompt", prompt}, - {"has_new_line", has_new_line}, - {"truncated", truncated}, - {"stop_type", stop_type_to_str(stop)}, - {"stopping_word", stopping_word}, - {"tokens_cached", n_tokens_cached}, - {"timings", timings.to_json()}, + json res = json{ + { "index", index }, + { "content", content }, + { "tokens", tokens }, + { "id_slot", id_slot }, + { "stop", true }, + { "model", oaicompat_model }, + { "tokens_predicted", n_decoded }, + { "tokens_evaluated", n_prompt_tokens }, + { "generation_settings", generation_params.to_json() }, + { "prompt", prompt }, + { "has_new_line", has_new_line }, + { "truncated", truncated }, + { "stop_type", stop_type_to_str(stop) }, + { "stopping_word", stopping_word }, + { "tokens_cached", n_tokens_cached }, + { "timings", timings.to_json() }, }; if (!stream && !probs_output.empty()) { - res["completion_probabilities"] = completion_token_output::probs_vector_to_json(probs_output, post_sampling_probs); + res["completion_probabilities"] = + completion_token_output::probs_vector_to_json(probs_output, post_sampling_probs); } return response_fields.empty() ? res : json_get_nested_values(response_fields, res); } json server_task_result_cmpl_final::to_json_oaicompat() { - std::time_t t = std::time(0); - json logprobs = json(nullptr); // OAI default to null + std::time_t t = std::time(0); + json logprobs = json(nullptr); // OAI default to null if (!stream && probs_output.size() > 0) { logprobs = json{ - {"content", completion_token_output::probs_vector_to_json(probs_output, post_sampling_probs)}, + { "content", completion_token_output::probs_vector_to_json(probs_output, post_sampling_probs) }, }; } json finish_reason = "length"; if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) { finish_reason = "stop"; } - json res = json { - {"choices", json::array({ - json{ - {"text", content}, - {"index", index}, - {"logprobs", logprobs}, - {"finish_reason", finish_reason}, - } - })}, - {"created", t}, - {"model", oaicompat_model}, - {"system_fingerprint", build_info}, - {"object", "text_completion"}, - {"usage", json { - {"completion_tokens", n_decoded}, - {"prompt_tokens", n_prompt_tokens}, - {"total_tokens", n_decoded + n_prompt_tokens} - }}, - {"id", oaicompat_cmpl_id} + json res = json{ + { "choices", json::array({ json{ + { "text", content }, + { "index", index }, + { "logprobs", logprobs }, + { "finish_reason", finish_reason }, + } }) }, + { "created", t }, + { "model", oaicompat_model }, + { "system_fingerprint", build_info }, + { "object", "text_completion" }, + { "usage", json{ { "completion_tokens", n_decoded }, + { "prompt_tokens", n_prompt_tokens }, + { "total_tokens", n_decoded + n_prompt_tokens } } }, + { "id", oaicompat_cmpl_id } }; // extra fields for debugging purposes @@ -697,19 +751,19 @@ json server_task_result_cmpl_final::to_json_oaicompat() { res["__verbose"] = to_json_non_oaicompat(); } if (timings.prompt_n >= 0) { - res.push_back({"timings", timings.to_json()}); + res.push_back({ "timings", timings.to_json() }); } return res; } json server_task_result_cmpl_final::to_json_oaicompat_chat() { - std::string finish_reason = "length"; + std::string finish_reason = "length"; common_chat_msg msg; if (!oaicompat_msg.empty()) { msg = oaicompat_msg; } else { - msg.role = "assistant"; + msg.role = "assistant"; msg.content = content; } if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) { @@ -724,24 +778,22 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat() { if (!stream && probs_output.size() > 0) { choice["logprobs"] = json{ - {"content", completion_token_output::probs_vector_to_json(probs_output, post_sampling_probs)}, + { "content", completion_token_output::probs_vector_to_json(probs_output, post_sampling_probs) }, }; } std::time_t t = std::time(0); - json res = json { - {"choices", json::array({choice})}, - {"created", t}, - {"model", oaicompat_model}, - {"system_fingerprint", build_info}, - {"object", "chat.completion"}, - {"usage", json { - {"completion_tokens", n_decoded}, - {"prompt_tokens", n_prompt_tokens}, - {"total_tokens", n_decoded + n_prompt_tokens} - }}, - {"id", oaicompat_cmpl_id} + json res = json{ + { "choices", json::array({ choice }) }, + { "created", t }, + { "model", oaicompat_model }, + { "system_fingerprint", build_info }, + { "object", "chat.completion" }, + { "usage", json{ { "completion_tokens", n_decoded }, + { "prompt_tokens", n_prompt_tokens }, + { "total_tokens", n_decoded + n_prompt_tokens } } }, + { "id", oaicompat_cmpl_id } }; // extra fields for debugging purposes @@ -749,14 +801,14 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat() { res["__verbose"] = to_json_non_oaicompat(); } if (timings.prompt_n >= 0) { - res.push_back({"timings", timings.to_json()}); + res.push_back({ "timings", timings.to_json() }); } return res; } json server_task_result_cmpl_final::to_json_oaicompat_chat_stream() { - std::time_t t = std::time(0); + std::time_t t = std::time(0); std::string finish_reason = "length"; if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) { finish_reason = oaicompat_msg.tool_calls.empty() ? "stop" : "tool_calls"; @@ -781,40 +833,41 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat_stream() { } deltas.push_back({ - {"choices", json::array({ - json { - {"finish_reason", finish_reason}, - {"index", 0}, - {"delta", json::object()}, - }, - })}, - {"created", t}, - {"id", oaicompat_cmpl_id}, - {"model", oaicompat_model}, - {"system_fingerprint", build_info}, - {"object", "chat.completion.chunk"}, + { "choices", json::array({ + json{ + { "finish_reason", finish_reason }, + { "index", 0 }, + { "delta", json::object() }, + }, + }) }, + { "created", t }, + { "id", oaicompat_cmpl_id }, + { "model", oaicompat_model }, + { "system_fingerprint", build_info }, + { "object", "chat.completion.chunk" }, }); if (include_usage) { // OpenAI API spec for chat.completion.chunks specifies an empty `choices` array for the last chunk when including usage // https://platform.openai.com/docs/api-reference/chat_streaming/streaming#chat_streaming/streaming-choices deltas.push_back({ - {"choices", json::array()}, - {"created", t}, - {"id", oaicompat_cmpl_id}, - {"model", oaicompat_model}, - {"system_fingerprint", build_info}, - {"object", "chat.completion.chunk"}, - {"usage", json { - {"completion_tokens", n_decoded}, - {"prompt_tokens", n_prompt_tokens}, - {"total_tokens", n_decoded + n_prompt_tokens}, - }}, + { "choices", json::array() }, + { "created", t }, + { "id", oaicompat_cmpl_id }, + { "model", oaicompat_model }, + { "system_fingerprint", build_info }, + { "object", "chat.completion.chunk" }, + { "usage", + json{ + { "completion_tokens", n_decoded }, + { "prompt_tokens", n_prompt_tokens }, + { "total_tokens", n_decoded + n_prompt_tokens }, + } }, }); } if (timings.prompt_n >= 0) { - deltas.back().push_back({"timings", timings.to_json()}); + deltas.back().push_back({ "timings", timings.to_json() }); } // extra fields for debugging purposes @@ -1017,7 +1070,7 @@ json server_task_result_cmpl_final::to_json_anthropic() { if (!oaicompat_msg.empty()) { msg = oaicompat_msg; } else { - msg.role = "assistant"; + msg.role = "assistant"; msg.content = content; } @@ -1032,16 +1085,16 @@ json server_task_result_cmpl_final::to_json_anthropic() { if (!msg.content.empty()) { content_blocks.push_back({ - {"type", "text"}, - {"text", msg.content} + { "type", "text" }, + { "text", msg.content } }); } for (const auto & tool_call : msg.tool_calls) { json tool_use_block = { - {"type", "tool_use"}, - {"id", tool_call.id}, - {"name", tool_call.name} + { "type", "tool_use" }, + { "id", tool_call.id }, + { "name", tool_call.name } }; try { @@ -1054,17 +1107,14 @@ json server_task_result_cmpl_final::to_json_anthropic() { } json res = { - {"id", oaicompat_cmpl_id}, - {"type", "message"}, - {"role", "assistant"}, - {"content", content_blocks}, - {"model", oaicompat_model}, - {"stop_reason", stop_reason}, - {"stop_sequence", stopping_word.empty() ? nullptr : json(stopping_word)}, - {"usage", { - {"input_tokens", n_prompt_tokens}, - {"output_tokens", n_decoded} - }} + { "id", oaicompat_cmpl_id }, + { "type", "message" }, + { "role", "assistant" }, + { "content", content_blocks }, + { "model", oaicompat_model }, + { "stop_reason", stop_reason }, + { "stop_sequence", stopping_word.empty() ? nullptr : json(stopping_word) }, + { "usage", { { "input_tokens", n_prompt_tokens }, { "output_tokens", n_decoded } } } }; return res; @@ -1159,31 +1209,27 @@ json server_task_result_cmpl_final::to_json_anthropic_stream() { const auto & full_tool_call = oaicompat_msg.tool_calls[diff.tool_call_index]; events.push_back({ - {"event", "content_block_start"}, - {"data", { - {"type", "content_block_start"}, - {"index", content_block_index}, - {"content_block", { - {"type", "tool_use"}, - {"id", full_tool_call.id}, - {"name", full_tool_call.name} - }} - }} + { "event", "content_block_start" }, + { "data", + { { "type", "content_block_start" }, + { "index", content_block_index }, + { "content_block", + { { "type", "tool_use" }, + { "id", full_tool_call.id }, + { "name", full_tool_call.name } } } } } }); tool_calls_started.insert(diff.tool_call_index); } if (!diff.tool_call_delta.arguments.empty()) { events.push_back({ - {"event", "content_block_delta"}, - {"data", { - {"type", "content_block_delta"}, - {"index", content_block_index}, - {"delta", { - {"type", "input_json_delta"}, - {"partial_json", diff.tool_call_delta.arguments} - }} - }} + { "event", "content_block_delta" }, + { "data", + { { "type", "content_block_delta" }, + { "index", content_block_index }, + { "delta", + { { "type", "input_json_delta" }, + { "partial_json", diff.tool_call_delta.arguments } } } } } }); } } @@ -1226,33 +1272,24 @@ json server_task_result_cmpl_final::to_json_anthropic_stream() { for (size_t i = 0; i < num_tool_calls; i++) { size_t content_block_index = (has_thinking ? 1 : 0) + (has_text ? 1 : 0) + i; events.push_back({ - {"event", "content_block_stop"}, - {"data", { - {"type", "content_block_stop"}, - {"index", content_block_index} - }} + { "event", "content_block_stop" }, + { "data", { { "type", "content_block_stop" }, { "index", content_block_index } } } }); } events.push_back({ - {"event", "message_delta"}, - {"data", { - {"type", "message_delta"}, - {"delta", { - {"stop_reason", stop_reason}, - {"stop_sequence", stopping_word.empty() ? nullptr : json(stopping_word)} - }}, - {"usage", { - {"output_tokens", n_decoded} - }} - }} + { "event", "message_delta" }, + { "data", + { { "type", "message_delta" }, + { "delta", + { { "stop_reason", stop_reason }, + { "stop_sequence", stopping_word.empty() ? nullptr : json(stopping_word) } } }, + { "usage", { { "output_tokens", n_decoded } } } } } }); events.push_back({ - {"event", "message_stop"}, - {"data", { - {"type", "message_stop"} - }} + { "event", "message_stop" }, + { "data", { { "type", "message_stop" } } } }); return events; @@ -1311,50 +1348,49 @@ json server_task_result_cmpl_partial::to_json() { json server_task_result_cmpl_partial::to_json_non_oaicompat() { // non-OAI-compat JSON - json res = json { - {"index", index}, - {"content", content}, - {"tokens", tokens}, - {"stop", false}, - {"id_slot", id_slot}, - {"tokens_predicted", n_decoded}, - {"tokens_evaluated", n_prompt_tokens}, + json res = json{ + { "index", index }, + { "content", content }, + { "tokens", tokens }, + { "stop", false }, + { "id_slot", id_slot }, + { "tokens_predicted", n_decoded }, + { "tokens_evaluated", n_prompt_tokens }, }; // populate the timings object when needed (usually for the last response or with timings_per_token enabled) if (timings.prompt_n > 0) { - res.push_back({"timings", timings.to_json()}); + res.push_back({ "timings", timings.to_json() }); } if (is_progress) { - res.push_back({"prompt_progress", progress.to_json()}); + res.push_back({ "prompt_progress", progress.to_json() }); } if (!prob_output.probs.empty()) { - res["completion_probabilities"] = completion_token_output::probs_vector_to_json({prob_output}, post_sampling_probs); + res["completion_probabilities"] = + completion_token_output::probs_vector_to_json({ prob_output }, post_sampling_probs); } return res; } json server_task_result_cmpl_partial::to_json_oaicompat() { - std::time_t t = std::time(0); - json logprobs = json(nullptr); // OAI default to null + std::time_t t = std::time(0); + json logprobs = json(nullptr); // OAI default to null if (prob_output.probs.size() > 0) { logprobs = json{ - {"content", completion_token_output::probs_vector_to_json({prob_output}, post_sampling_probs)}, + { "content", completion_token_output::probs_vector_to_json({ prob_output }, post_sampling_probs) }, }; } - json res = json { - {"choices", json::array({ - json{ - {"text", content}, - {"index", index}, - {"logprobs", logprobs}, - {"finish_reason", nullptr}, - } - })}, - {"created", t}, - {"model", oaicompat_model}, - {"system_fingerprint", build_info}, - {"object", "text_completion"}, - {"id", oaicompat_cmpl_id} + json res = json{ + { "choices", json::array({ json{ + { "text", content }, + { "index", index }, + { "logprobs", logprobs }, + { "finish_reason", nullptr }, + } }) }, + { "created", t }, + { "model", oaicompat_model }, + { "system_fingerprint", build_info }, + { "object", "text_completion" }, + { "id", oaicompat_cmpl_id } }; // extra fields for debugging purposes @@ -1362,42 +1398,42 @@ json server_task_result_cmpl_partial::to_json_oaicompat() { res["__verbose"] = to_json_non_oaicompat(); } if (timings.prompt_n >= 0) { - res.push_back({"timings", timings.to_json()}); + res.push_back({ "timings", timings.to_json() }); } if (is_progress) { - res.push_back({"prompt_progress", progress.to_json()}); + res.push_back({ "prompt_progress", progress.to_json() }); } return res; } json server_task_result_cmpl_partial::to_json_oaicompat_chat() { - bool first = n_decoded == 1; - std::time_t t = std::time(0); - json choices; + bool first = n_decoded == 1; + std::time_t t = std::time(0); + json choices; std::vector deltas; - auto add_delta = [&](const json & delta) { + auto add_delta = [&](const json & delta) { deltas.push_back({ - {"choices", json::array({ - json { - {"finish_reason", nullptr}, - {"index", index}, - {"delta", delta}, - }, - })}, - {"created", t}, - {"id", oaicompat_cmpl_id}, - {"model", oaicompat_model}, - {"system_fingerprint", build_info}, - {"object", "chat.completion.chunk"}, + { "choices", json::array({ + json{ + { "finish_reason", nullptr }, + { "index", index }, + { "delta", delta }, + }, + }) }, + { "created", t }, + { "id", oaicompat_cmpl_id }, + { "model", oaicompat_model }, + { "system_fingerprint", build_info }, + { "object", "chat.completion.chunk" }, }); }; // We have to send an initial update to conform to openai behavior if (first || is_progress) { add_delta({ - {"role", "assistant"}, - {"content", nullptr}, + { "role", "assistant" }, + { "content", nullptr }, }); } @@ -1410,16 +1446,16 @@ json server_task_result_cmpl_partial::to_json_oaicompat_chat() { GGML_ASSERT(last_json.at("choices").size() >= 1); if (prob_output.probs.size() > 0) { - last_json.at("choices").at(0)["logprobs"] = json { - {"content", completion_token_output::probs_vector_to_json({prob_output}, post_sampling_probs)}, + last_json.at("choices").at(0)["logprobs"] = json{ + { "content", completion_token_output::probs_vector_to_json({ prob_output }, post_sampling_probs) }, }; } if (timings.prompt_n >= 0) { - last_json.push_back({"timings", timings.to_json()}); + last_json.push_back({ "timings", timings.to_json() }); } if (is_progress) { - last_json.push_back({"prompt_progress", progress.to_json()}); + last_json.push_back({ "prompt_progress", progress.to_json() }); } } @@ -1560,23 +1596,18 @@ json server_task_result_cmpl_partial::to_json_anthropic() { if (first) { events.push_back({ - {"event", "message_start"}, - {"data", { - {"type", "message_start"}, - {"message", { - {"id", oaicompat_cmpl_id}, - {"type", "message"}, - {"role", "assistant"}, - {"content", json::array()}, - {"model", oaicompat_model}, - {"stop_reason", nullptr}, - {"stop_sequence", nullptr}, - {"usage", { - {"input_tokens", n_prompt_tokens}, - {"output_tokens", 0} - }} - }} - }} + { "event", "message_start" }, + { "data", + { { "type", "message_start" }, + { "message", + { { "id", oaicompat_cmpl_id }, + { "type", "message" }, + { "role", "assistant" }, + { "content", json::array() }, + { "model", oaicompat_model }, + { "stop_reason", nullptr }, + { "stop_sequence", nullptr }, + { "usage", { { "input_tokens", n_prompt_tokens }, { "output_tokens", 0 } } } } } } } }); } @@ -1658,30 +1689,26 @@ json server_task_result_cmpl_partial::to_json_anthropic() { if (!diff.tool_call_delta.name.empty()) { events.push_back({ - {"event", "content_block_start"}, - {"data", { - {"type", "content_block_start"}, - {"index", content_block_index}, - {"content_block", { - {"type", "tool_use"}, - {"id", diff.tool_call_delta.id}, - {"name", diff.tool_call_delta.name} - }} - }} + { "event", "content_block_start" }, + { "data", + { { "type", "content_block_start" }, + { "index", content_block_index }, + { "content_block", + { { "type", "tool_use" }, + { "id", diff.tool_call_delta.id }, + { "name", diff.tool_call_delta.name } } } } } }); } if (!diff.tool_call_delta.arguments.empty()) { events.push_back({ - {"event", "content_block_delta"}, - {"data", { - {"type", "content_block_delta"}, - {"index", content_block_index}, - {"delta", { - {"type", "input_json_delta"}, - {"partial_json", diff.tool_call_delta.arguments} - }} - }} + { "event", "content_block_delta" }, + { "data", + { { "type", "content_block_delta" }, + { "index", content_block_index }, + { "delta", + { { "type", "input_json_delta" }, + { "partial_json", diff.tool_call_delta.arguments } } } } } }); } } @@ -1741,28 +1768,28 @@ json server_task_result_error::to_json() { // server_task_result_metrics // json server_task_result_metrics::to_json() { - return json { - { "idle", n_idle_slots }, - { "processing", n_processing_slots }, - { "deferred", n_tasks_deferred }, - { "t_start", t_start }, + return json{ + { "idle", n_idle_slots }, + { "processing", n_processing_slots }, + { "deferred", n_tasks_deferred }, + { "t_start", t_start }, { "n_prompt_tokens_processed_total", n_prompt_tokens_processed_total }, - { "t_tokens_generation_total", t_tokens_generation_total }, - { "n_tokens_predicted_total", n_tokens_predicted_total }, - { "t_prompt_processing_total", t_prompt_processing_total }, + { "t_tokens_generation_total", t_tokens_generation_total }, + { "n_tokens_predicted_total", n_tokens_predicted_total }, + { "t_prompt_processing_total", t_prompt_processing_total }, - { "n_tokens_max", n_tokens_max }, + { "n_tokens_max", n_tokens_max }, - { "n_prompt_tokens_processed", n_prompt_tokens_processed }, - { "t_prompt_processing", t_prompt_processing }, - { "n_tokens_predicted", n_tokens_predicted }, - { "t_tokens_generation", t_tokens_generation }, + { "n_prompt_tokens_processed", n_prompt_tokens_processed }, + { "t_prompt_processing", t_prompt_processing }, + { "n_tokens_predicted", n_tokens_predicted }, + { "t_tokens_generation", t_tokens_generation }, - { "n_decode_total", n_decode_total }, - { "n_busy_slots_total", n_busy_slots_total }, + { "n_decode_total", n_decode_total }, + { "n_busy_slots_total", n_busy_slots_total }, - { "slots", slots_data }, + { "slots", slots_data }, }; } @@ -1771,25 +1798,21 @@ json server_task_result_metrics::to_json() { // json server_task_result_slot_save_load::to_json() { if (is_save) { - return json { - { "id_slot", id_slot }, - { "filename", filename }, - { "n_saved", n_tokens }, - { "n_written", n_bytes }, - { "timings", { - { "save_ms", t_ms } - }}, + return json{ + { "id_slot", id_slot }, + { "filename", filename }, + { "n_saved", n_tokens }, + { "n_written", n_bytes }, + { "timings", { { "save_ms", t_ms } } }, }; } - return json { - { "id_slot", id_slot }, - { "filename", filename }, - { "n_restored", n_tokens }, - { "n_read", n_bytes }, - { "timings", { - { "restore_ms", t_ms } - }}, + return json{ + { "id_slot", id_slot }, + { "filename", filename }, + { "n_restored", n_tokens }, + { "n_read", n_bytes }, + { "timings", { { "restore_ms", t_ms } } }, }; } @@ -1797,8 +1820,8 @@ json server_task_result_slot_save_load::to_json() { // server_task_result_slot_erase // json server_task_result_slot_erase::to_json() { - return json { - { "id_slot", id_slot }, + return json{ + { "id_slot", id_slot }, { "n_erased", n_erased }, }; } @@ -1810,13 +1833,13 @@ json server_task_result_slot_erase::to_json() { json server_task_result_get_lora::to_json() { json result = json::array(); for (size_t i = 0; i < loras.size(); ++i) { - auto & lora = loras[i]; - json entry = { - {"id", i}, - {"path", lora.info.path}, - {"scale", lora.info.scale}, - {"task_name", lora.info.task_name}, - {"prompt_prefix", lora.info.prompt_prefix}, + auto & lora = loras[i]; + json entry = { + { "id", i }, + { "path", lora.info.path }, + { "scale", lora.info.scale }, + { "task_name", lora.info.task_name }, + { "prompt_prefix", lora.info.prompt_prefix }, }; if (!lora.alora_invocation_tokens.empty()) { entry["alora_invocation_string"] = lora.alora_invocation_string; @@ -1832,7 +1855,9 @@ json server_task_result_get_lora::to_json() { // json server_task_result_apply_lora::to_json() { - return json {{ "success", true }}; + return json{ + { "success", true } + }; } // @@ -1890,7 +1915,7 @@ server_prompt * server_prompt_cache::alloc(const server_prompt & prompt, size_t } catch (const std::bad_alloc & e) { SRV_ERR("failed to allocate memory for prompt cache state: %s\n", e.what()); - limit_size = std::max(1, 0.4*size()); + limit_size = std::max(1, 0.4 * size()); SRV_WRN(" - cache size limit reduced to %.3f MiB\n", limit_size / (1024.0 * 1024.0)); @@ -1901,16 +1926,19 @@ server_prompt * server_prompt_cache::alloc(const server_prompt & prompt, size_t // TODO: for some reason we can't copy server_tokens, so we have to do this workaround auto & cur = states.emplace_back(); - cur = { - /*.tokens =*/ server_tokens(prompt.tokens.get_text_tokens(), false), - /*.data =*/ std::move(state_data), - /*.checkpoints =*/ prompt.checkpoints, + cur = { + /*.tokens =*/server_tokens(prompt.tokens.get_text_tokens(), false), + /*.data =*/std::move(state_data), + /*.checkpoints =*/prompt.checkpoints, }; return &cur; } -bool server_prompt_cache::load(server_prompt & prompt, const server_tokens & tokens_new, llama_context * ctx, int32_t id_slot) { +bool server_prompt_cache::load(server_prompt & prompt, + const server_tokens & tokens_new, + llama_context * ctx, + int32_t id_slot) { const int lcp_best = prompt.tokens.get_common_prefix(tokens_new); float f_keep_best = float(lcp_best) / prompt.tokens.size(); @@ -1944,7 +1972,7 @@ bool server_prompt_cache::load(server_prompt & prompt, const server_tokens & tok SRV_WRN(" - found better prompt with f_keep = %.3f, sim = %.3f\n", f_keep_best, sim_best); const size_t size = it_best->data.size(); - const size_t n = llama_state_seq_set_data_ext(ctx, it_best->data.data(), size, id_slot, 0); + const size_t n = llama_state_seq_set_data_ext(ctx, it_best->data.data(), size, id_slot, 0); if (n != size) { SRV_WRN("failed to restore state with size %zu\n", size); @@ -1970,7 +1998,8 @@ void server_prompt_cache::update() { break; } - SRV_WRN(" - cache size limit reached, removing oldest entry (size = %.3f MiB)\n", states.front().size() / (1024.0 * 1024.0)); + SRV_WRN(" - cache size limit reached, removing oldest entry (size = %.3f MiB)\n", + states.front().size() / (1024.0 * 1024.0)); states.pop_front(); } @@ -1980,7 +2009,8 @@ void server_prompt_cache::update() { const float size_per_token = std::max(1.0f, float(size()) / (std::max(1, n_tokens()))); // dynamically increase the token limit if it can fit in the memory limit - const size_t limit_tokens_cur = limit_size > 0 ? std::max(limit_tokens, limit_size/size_per_token) : limit_tokens; + const size_t limit_tokens_cur = + limit_size > 0 ? std::max(limit_tokens, limit_size / size_per_token) : limit_tokens; if (limit_tokens > 0) { while (states.size() > 1 && n_tokens() > limit_tokens_cur) { @@ -1995,11 +2025,11 @@ void server_prompt_cache::update() { } } - SRV_WRN(" - cache state: %zu prompts, %.3f MiB (limits: %.3f MiB, %zu tokens, %zu est)\n", - states.size(), size() / (1024.0 * 1024.0), limit_size / (1024.0 * 1024.0), limit_tokens, limit_tokens_cur); + SRV_WRN(" - cache state: %zu prompts, %.3f MiB (limits: %.3f MiB, %zu tokens, %zu est)\n", states.size(), + size() / (1024.0 * 1024.0), limit_size / (1024.0 * 1024.0), limit_tokens, limit_tokens_cur); for (const auto & state : states) { - SRV_WRN(" - prompt %p: %7d tokens, checkpoints: %2zu, %9.3f MiB\n", - (const void *)&state, state.n_tokens(), state.checkpoints.size(), state.size() / (1024.0 * 1024.0)); + SRV_WRN(" - prompt %p: %7d tokens, checkpoints: %2zu, %9.3f MiB\n", (const void *) &state, state.n_tokens(), + state.checkpoints.size(), state.size() / (1024.0 * 1024.0)); } } diff --git a/tools/server/server-task.h b/tools/server/server-task.h index a69e8f1a3d..7ccaf3c31b 100644 --- a/tools/server/server-task.h +++ b/tools/server/server-task.h @@ -3,10 +3,10 @@ #include "common.h" #include "llama.h" -#include -#include #include #include +#include +#include // TODO: prevent including the whole server-common.h as we only use server_tokens #include "server-common.h" @@ -30,7 +30,7 @@ enum server_task_type { // TODO: change this to more generic "response_format" to replace the "format_response_*" in server-common enum task_response_type { - TASK_RESPONSE_TYPE_NONE, // llama.cpp native format + TASK_RESPONSE_TYPE_NONE, // llama.cpp native format TASK_RESPONSE_TYPE_OAI_CHAT, TASK_RESPONSE_TYPE_OAI_CMPL, TASK_RESPONSE_TYPE_OAI_RESP, @@ -48,22 +48,23 @@ enum stop_type { struct task_params { bool stream = true; bool include_usage = false; - bool cache_prompt = true; // remember the prompt to avoid reprocessing all prompt + bool cache_prompt = true; // remember the prompt to avoid reprocessing all prompt bool return_tokens = false; bool return_progress = false; - int32_t n_keep = 0; // number of tokens to keep from initial prompt - int32_t n_discard = 0; // number of tokens after n_keep that may be discarded when shifting context, 0 defaults to half - int32_t n_predict = -1; // new tokens to predict - int32_t n_indent = 0; // minimum line indentation for the generated text in number of whitespace characters - int32_t n_cmpl = 1; // number of completions to generate from this prompt + int32_t n_keep = 0; // number of tokens to keep from initial prompt + int32_t n_discard = + 0; // number of tokens after n_keep that may be discarded when shifting context, 0 defaults to half + int32_t n_predict = -1; // new tokens to predict + int32_t n_indent = 0; // minimum line indentation for the generated text in number of whitespace characters + int32_t n_cmpl = 1; // number of completions to generate from this prompt - int32_t n_cache_reuse = 0; // min chunk size to attempt reusing from the cache via KV shifting (0 = disabled) + int32_t n_cache_reuse = 0; // min chunk size to attempt reusing from the cache via KV shifting (0 = disabled) - int64_t t_max_prompt_ms = -1; // TODO: implement - int64_t t_max_predict_ms = -1; // if positive, limit the generation phase to this time limit + int64_t t_max_prompt_ms = -1; // TODO: implement + int64_t t_max_predict_ms = -1; // if positive, limit the generation phase to this time limit - std::map lora; // mapping adapter ID -> scale + std::map lora; // mapping adapter ID -> scale std::vector antiprompt; std::vector response_fields; @@ -71,7 +72,7 @@ struct task_params { bool timings_per_token = false; bool post_sampling_probs = false; - struct common_params_sampling sampling; + struct common_params_sampling sampling; struct common_params_speculative speculative; // response formatting @@ -84,7 +85,7 @@ struct task_params { common_chat_parser_params chat_parser_params; // Embeddings - int32_t embd_normalize = 2; // (-1=none, 0=max absolute int16, 1=taxicab, 2=Euclidean/L2, >2=p-norm) + int32_t embd_normalize = 2; // (-1=none, 0=max absolute int16, 1=taxicab, 2=Euclidean/L2, >2=p-norm) json format_logit_bias(const std::vector & logit_bias) const; json to_json(bool only_metrics = false) const; @@ -95,9 +96,10 @@ struct task_result_state { // tracking diffs for partial tool calls std::vector diffs; common_chat_parser_params chat_parser_params; - common_chat_msg chat_msg; - std::string generated_text; // append new chunks of generated text here - std::vector generated_tool_call_ids; + common_chat_msg chat_msg; + std::string generated_text; // append new chunks of generated text here + std::vector generated_tool_call_ids; + std::unordered_set sent_tool_call_names; // for OpenAI Responses and Anthropic streaming API: // track output item / content block state across chunks @@ -117,17 +119,17 @@ struct task_result_state { , oai_resp_message_id("msg_" + random_string()) {} // parse partial tool calls and update the internal state - common_chat_msg update_chat_msg( - const std::string & text_added, - bool is_partial, - std::vector & diffs); + common_chat_msg update_chat_msg(const std::string & text_added, + bool is_partial, + std::vector & diffs, + bool filter_tool_calls = false); }; struct server_task { - int id = -1; // to be filled by server_queue + int id = -1; // to be filled by server_queue // TODO @ngxson : remove this field and implement a mapping task_id -> idx in the response_reader - size_t index = 0; // used when there are multiple prompts (batch request) + size_t index = 0; // used when there are multiple prompts (batch request) // used by SERVER_TASK_TYPE_CANCEL int id_target = -1; @@ -157,13 +159,14 @@ struct server_task { std::string filename; std::string filepath; }; + slot_action slot_action; // used by SERVER_TASK_TYPE_METRICS bool metrics_reset_bucket = false; // used by SERVER_TASK_TYPE_SET_LORA - std::map set_lora; // mapping adapter ID -> scale + std::map set_lora; // mapping adapter ID -> scale server_task() = default; @@ -203,11 +206,10 @@ struct server_task { } } - static task_params params_from_json_cmpl( - const llama_vocab * vocab, - const common_params & params_base, - const int n_ctx_slot, - const json & data); + static task_params params_from_json_cmpl(const llama_vocab * vocab, + const common_params & params_base, + const int n_ctx_slot, + const json & data); // utility function static std::unordered_set get_list_id(const std::vector & tasks) { @@ -259,50 +261,53 @@ struct result_timings { int32_t cache_n = -1; int32_t prompt_n = -1; - double prompt_ms; - double prompt_per_token_ms; - double prompt_per_second; + double prompt_ms; + double prompt_per_token_ms; + double prompt_per_second; int32_t predicted_n = -1; - double predicted_ms; - double predicted_per_token_ms; - double predicted_per_second; + double predicted_ms; + double predicted_per_token_ms; + double predicted_per_second; // Optional speculative metrics - only included when > 0 - int32_t draft_n = 0; + int32_t draft_n = 0; int32_t draft_n_accepted = 0; json to_json() const; }; struct result_prompt_progress { - int32_t total = 0; - int32_t cache = 0; + int32_t total = 0; + int32_t cache = 0; int32_t processed = 0; - int64_t time_ms = 0; + int64_t time_ms = 0; json to_json() const; }; struct server_task_result { - int id = -1; - int id_slot = -1; + int id = -1; + int id_slot = -1; // TODO @ngxson : remove this field and implement a mapping task_id -> idx in the response_reader - size_t index = 0; // to be used for batched tasks + size_t index = 0; // to be used for batched tasks virtual bool is_error() { // only used by server_task_result_error return false; } + virtual bool is_stop() { // only used by server_task_result_cmpl_* return true; } + virtual void update(task_result_state &) { // only used by server_task_result_cmpl_* } - virtual json to_json() = 0; + + virtual json to_json() = 0; virtual ~server_task_result() = default; }; @@ -311,13 +316,15 @@ using server_task_result_ptr = std::unique_ptr; struct completion_token_output { llama_token tok; - float prob; + float prob; std::string text_to_send; + struct prob_info { llama_token tok; std::string txt; - float prob; + float prob; }; + std::vector probs; json to_json(bool post_sampling_probs) const; @@ -327,29 +334,28 @@ struct completion_token_output { static float logarithm(float x); static std::vector str_to_bytes(const std::string & str); - }; struct server_task_result_cmpl_final : server_task_result { - std::string content; + std::string content; llama_tokens tokens; - bool stream; - bool include_usage; + bool stream; + bool include_usage; result_timings timings; - std::string prompt; + std::string prompt; - bool truncated; - int32_t n_decoded; - int32_t n_prompt_tokens; - int32_t n_tokens_cached; - bool has_new_line; + bool truncated; + int32_t n_decoded; + int32_t n_prompt_tokens; + int32_t n_tokens_cached; + bool has_new_line; std::string stopping_word; - stop_type stop = STOP_TYPE_NONE; + stop_type stop = STOP_TYPE_NONE; - bool post_sampling_probs; + bool post_sampling_probs; std::vector probs_output; - std::vector response_fields; + std::vector response_fields; task_params generation_params; @@ -358,7 +364,7 @@ struct server_task_result_cmpl_final : server_task_result { task_response_type res_type = TASK_RESPONSE_TYPE_NONE; std::string oaicompat_model; std::string oaicompat_cmpl_id; - common_chat_msg oaicompat_msg; // to be populated by update() + common_chat_msg oaicompat_msg; // to be populated by update() std::vector oaicompat_msg_diffs; // to be populated by update() bool is_updated = false; @@ -369,7 +375,7 @@ struct server_task_result_cmpl_final : server_task_result { std::string oai_resp_message_id; virtual bool is_stop() override { - return true; // in stream mode, final responses are considered stop + return true; // in stream mode, final responses are considered stop } virtual json to_json() override; @@ -407,11 +413,11 @@ struct server_task_result_cmpl_partial : server_task_result { int32_t n_decoded; int32_t n_prompt_tokens; - bool post_sampling_probs; - bool is_progress = false; + bool post_sampling_probs; + bool is_progress = false; completion_token_output prob_output; - result_timings timings; - result_prompt_progress progress; + result_timings timings; + result_prompt_progress progress; // response formatting bool verbose = false; @@ -435,7 +441,7 @@ struct server_task_result_cmpl_partial : server_task_result { bool anthropic_has_reasoning = false; virtual bool is_stop() override { - return false; // in stream mode, partial responses are not considered stop + return false; // in stream mode, partial responses are not considered stop } virtual void update(task_result_state & state) override; @@ -477,24 +483,22 @@ struct server_task_result_rerank : server_task_result { }; struct server_task_result_error : server_task_result { - error_type err_type = ERROR_TYPE_SERVER; + error_type err_type = ERROR_TYPE_SERVER; std::string err_msg; // for ERROR_TYPE_EXCEED_CONTEXT_SIZE int32_t n_prompt_tokens = 0; int32_t n_ctx = 0; - virtual bool is_error() override { - return true; - } + virtual bool is_error() override { return true; } virtual json to_json() override; }; struct server_task_result_metrics : server_task_result { - int n_idle_slots; - int n_processing_slots; - int n_tasks_deferred; + int n_idle_slots; + int n_processing_slots; + int n_tasks_deferred; int64_t t_start; // TODO: somehow reuse server_metrics in the future, instead of duplicating the fields @@ -523,7 +527,7 @@ struct server_task_result_metrics : server_task_result { struct server_task_result_slot_save_load : server_task_result { std::string filename; - bool is_save; // true = save, false = load + bool is_save; // true = save, false = load size_t n_tokens; size_t n_bytes; @@ -541,9 +545,10 @@ struct server_task_result_slot_erase : server_task_result { struct server_task_result_get_lora : server_task_result { struct lora { common_adapter_lora_info info; - std::string alora_invocation_string; - llama_tokens alora_invocation_tokens; + std::string alora_invocation_string; + llama_tokens alora_invocation_tokens; }; + std::vector loras; virtual json to_json() override; @@ -559,9 +564,7 @@ struct server_prompt_checkpoint { std::vector data; - size_t size() const { - return data.size(); - } + size_t size() const { return data.size(); } }; struct server_prompt { @@ -581,22 +584,14 @@ struct server_prompt { return res; } - int n_tokens() const { - return tokens.size(); - } + int n_tokens() const { return tokens.size(); } - server_prompt clone() const { - return server_prompt { - tokens.clone(), - data, - checkpoints - }; - } + server_prompt clone() const { return server_prompt{ tokens.clone(), data, checkpoints }; } }; struct server_prompt_cache { server_prompt_cache(int32_t limit_size_mib, size_t limit_tokens) { - this->limit_size = 1024ull*1024ull*(limit_size_mib < 0 ? 0 : limit_size_mib); + this->limit_size = 1024ull * 1024ull * (limit_size_mib < 0 ? 0 : limit_size_mib); this->limit_tokens = limit_tokens; }