251 lines
13 KiB
C++
251 lines
13 KiB
C++
#include "chat-auto-parser-helpers.h"
|
|
#include "chat-auto-parser.h"
|
|
#include "chat-peg-parser.h"
|
|
#include "chat.h"
|
|
#include "json-schema-to-grammar.h"
|
|
#include "log.h"
|
|
#include "nlohmann/json.hpp"
|
|
|
|
#include <optional>
|
|
|
|
using json = nlohmann::ordered_json;
|
|
|
|
common_chat_params universal_peg_generator::generate_parser(const template_analysis_result & analysis,
|
|
const common_chat_template & tmpl,
|
|
const struct templates_params & inputs) {
|
|
common_chat_params data;
|
|
|
|
try {
|
|
LOG_DBG("%s\n", __func__);
|
|
|
|
// Patch messages if template requires non-null content
|
|
// Some templates (e.g., iquest) render null as "None" when concatenating strings
|
|
std::optional<json> messages_override;
|
|
if (analysis.tools.requires_nonnull_content && !inputs.messages.empty()) {
|
|
LOG_DBG("Patching null content to empty string (template requires non-null content)\n");
|
|
json patched_messages = inputs.messages;
|
|
for (auto & msg : patched_messages) {
|
|
if (msg.contains("content") && msg["content"].is_null()) {
|
|
msg["content"] = "";
|
|
}
|
|
}
|
|
messages_override = patched_messages;
|
|
}
|
|
|
|
if (inputs.messages.empty()) {
|
|
// Some templates don't handle empty messages well - always leave something in
|
|
json message = {
|
|
{ { "role", "user" }, { "content", "Hello" } }
|
|
};
|
|
messages_override.emplace(message);
|
|
}
|
|
|
|
// Calculate prompt first to detect forced thinking
|
|
data.prompt = common_chat_template_direct_apply(tmpl, inputs, messages_override);
|
|
|
|
// Determine if thinking is forced open based on prompt ending
|
|
bool thinking_forced_open = false;
|
|
if (analysis.content.reasoning_mode == content_structure::REASONING_FORCED_OPEN) {
|
|
if (inputs.enable_thinking) {
|
|
thinking_forced_open = true;
|
|
LOG_DBG("Thinking forced open based on template analysis\n");
|
|
} else {
|
|
// Template ends with reasoning start marker but thinking is disabled
|
|
// Append the end marker to close it
|
|
data.prompt += analysis.content.reasoning_end;
|
|
LOG_DBG("Appended reasoning end marker since thinking is disabled\n");
|
|
}
|
|
}
|
|
data.thinking_forced_open = thinking_forced_open;
|
|
|
|
// Build the unified parser
|
|
auto arena = build_parser(analysis, tmpl, inputs, thinking_forced_open);
|
|
data.parser = arena.save();
|
|
|
|
// Determine format
|
|
bool has_tools =
|
|
inputs.tools.is_array() && !inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
|
|
|
|
if (has_tools && analysis.tools.supports_tools) {
|
|
// Unified format that handles both JSON and tagged tool calls
|
|
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
|
|
LOG_DBG("Generated unified parser with tool support (format: PEG_NATIVE)\n");
|
|
} else if (analysis.content.reasoning_mode != content_structure::REASONING_NONE) {
|
|
// Reasoning markers detected - use PEG parser to handle thinking blocks
|
|
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
|
|
LOG_DBG("Generated unified parser for reasoning handling (format: PEG_NATIVE)\n");
|
|
} else if (analysis.content.content_mode != content_structure::CONTENT_PLAIN) {
|
|
// Content markers detected - use PEG parser to strip them even without tools
|
|
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
|
|
LOG_DBG("Generated unified parser for content marker stripping (format: PEG_NATIVE)\n");
|
|
} else if (analysis.tools.function_format == tool_call_structure::FUNC_RECIPIENT_BASED) {
|
|
// Recipient-based format (e.g., Functionary v3.2): >>>recipient\n{content}
|
|
// Need PEG parser to handle recipient delimiter parsing
|
|
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
|
|
LOG_DBG("Generated unified parser for recipient-based format (format: PEG_NATIVE)\n");
|
|
} else if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME) {
|
|
// Tag-with-name format (e.g., func_name\n{args} for Functionary)
|
|
// Need PEG parser to handle function name parsing
|
|
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
|
|
LOG_DBG("Generated unified parser for tag-with-name format (format: PEG_NATIVE)\n");
|
|
} else if (analysis.tools.function_format == tool_call_structure::FUNC_BRACKET_TAG) {
|
|
// Bracket-tag format (e.g., [TOOL_CALLS]name[CALL_ID]id[ARGS]{...} for Mistral Small 3.2)
|
|
// Need PEG parser to handle bracket tag parsing
|
|
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
|
|
LOG_DBG("Generated unified parser for bracket-tag format (format: PEG_NATIVE)\n");
|
|
} else if (analysis.tools.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED) {
|
|
// Prefixed-indexed format (e.g., Kimi-K2)
|
|
// Need PEG parser to handle namespace and indexed format
|
|
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
|
|
LOG_DBG("Generated unified parser for prefixed-indexed format (format: PEG_NATIVE)\n");
|
|
} else {
|
|
data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
|
LOG_DBG("Generated unified parser without tools or content markers (format: CONTENT_ONLY)\n");
|
|
}
|
|
|
|
// Determine trigger word for lazy grammar
|
|
std::string trigger_word;
|
|
if (!analysis.tools.tool_section_start.empty() ||
|
|
analysis.tools.function_format == tool_call_structure::FUNC_RECIPIENT_BASED) {
|
|
trigger_word = analysis.tools.tool_section_start;
|
|
} else if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME) {
|
|
trigger_word = analysis.tools.function_prefix;
|
|
} else if (analysis.tools.function_format == tool_call_structure::FUNC_BRACKET_TAG ||
|
|
analysis.tools.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED) {
|
|
// For formats with per-call markers, use per_call_start as trigger
|
|
trigger_word = analysis.tools.per_call_start;
|
|
}
|
|
|
|
// Build grammar for tool calls
|
|
data.grammar_lazy = analysis.tools.supports_tools && has_tools;
|
|
|
|
// For FUNC_TAG_WITH_NAME with empty prefix (Functionary), disable lazy grammar
|
|
// since there's no clear trigger word - constrain from the start
|
|
if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME &&
|
|
analysis.tools.function_prefix.empty()) {
|
|
data.grammar_lazy = false;
|
|
}
|
|
|
|
if (data.grammar_lazy) {
|
|
if (!trigger_word.empty()) {
|
|
data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, trigger_word });
|
|
}
|
|
}
|
|
|
|
// Build grammar
|
|
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
if (inputs.tools.is_array()) {
|
|
for (const auto & tool : inputs.tools) {
|
|
if (!tool.contains("type") || tool.at("type") != "function" || !tool.contains("function")) {
|
|
continue;
|
|
}
|
|
const auto & function = tool.at("function");
|
|
if (function.contains("parameters")) {
|
|
auto params = function.at("parameters");
|
|
builder.resolve_refs(params);
|
|
}
|
|
}
|
|
}
|
|
arena.build_grammar(builder, data.grammar_lazy);
|
|
});
|
|
|
|
// Set preserved tokens from analysis
|
|
data.preserved_tokens = analysis.preserved_tokens;
|
|
|
|
LOG_DBG("=== UNIFIED PEG PARSER GENERATION COMPLETED ===\n");
|
|
|
|
} catch (const std::exception & e) {
|
|
LOG_DBG("Unified parser generation failed: %s\n", e.what());
|
|
throw;
|
|
}
|
|
|
|
return data;
|
|
}
|
|
|
|
common_peg_arena universal_peg_generator::build_parser(const template_analysis_result & analysis,
|
|
const common_chat_template & tmpl,
|
|
const struct templates_params & inputs,
|
|
bool thinking_forced_open) {
|
|
GGML_UNUSED(tmpl);
|
|
|
|
auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
|
|
// Build reasoning block using ContentStructure
|
|
auto reasoning = p.build_reasoning_block(analysis.content, inputs.reasoning_format, thinking_forced_open);
|
|
|
|
// Build content block using ContentStructure
|
|
// Note: we don't pass tool_section_start here because content-before-tools handling
|
|
// is done inline in each branch below with p.content(p.until(marker))
|
|
auto content = p.build_content_block(analysis.content, inputs.reasoning_format);
|
|
|
|
// Build tool section using ToolCallStructure (if applicable)
|
|
bool has_tools =
|
|
inputs.tools.is_array() && !inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
|
|
|
|
if (has_tools && analysis.tools.supports_tools) {
|
|
bool force_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
|
auto tool_section =
|
|
p.build_tool_section(analysis.tools, inputs.tools, inputs.parallel_tool_calls, force_calls);
|
|
|
|
// Compose: reasoning -> content before tools -> tool_section -> trailing content
|
|
// When thinking is forced open, the reasoning block expects </think>.
|
|
// For tool-only messages (no thinking content), the model may output tools directly
|
|
// without the </think> tag, so we need to make reasoning optional in that case.
|
|
// But if reasoning_format is NONE, the reasoning block is already eps() - don't wrap it
|
|
// in optional() as that would generate invalid grammar.
|
|
auto reasoning_for_tools =
|
|
(thinking_forced_open && inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE) ?
|
|
p.optional(reasoning) :
|
|
reasoning;
|
|
|
|
if (!analysis.tools.tool_section_start.empty()) {
|
|
// With section markers: look for start marker to delimit content
|
|
auto content_before_tools = p.content(p.until(analysis.tools.tool_section_start));
|
|
return p.sequence({ reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section,
|
|
p.space(), p.optional(p.content(p.rest())), p.end() });
|
|
}
|
|
if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME &&
|
|
!analysis.tools.function_prefix.empty()) {
|
|
// Tag-with-name format (e.g., >>>func_name): content stops at function prefix
|
|
auto content_before_tools = p.content(p.until(analysis.tools.function_prefix));
|
|
return p.sequence(
|
|
{ reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section, p.end() });
|
|
}
|
|
if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME) {
|
|
// Functionary-style format: tool call starts immediately (e.g., func_name\n{args})
|
|
// No content before tools in this format - the entire output is the tool call
|
|
return p.sequence({ reasoning_for_tools, p.space(), tool_section, p.end() });
|
|
}
|
|
if (analysis.tools.function_format == tool_call_structure::FUNC_BRACKET_TAG ||
|
|
analysis.tools.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED) {
|
|
// Bracket-tag (Mistral Small 3.2) or prefixed-indexed (Kimi-K2) format:
|
|
// Tool calls start with per_call_start marker (e.g., [TOOL_CALLS], <|tool_call_begin|>)
|
|
if (!analysis.tools.per_call_start.empty()) {
|
|
auto content_before_tools = p.content(p.until(analysis.tools.per_call_start));
|
|
return p.sequence(
|
|
{ reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section, p.end() });
|
|
}
|
|
// Fallback: no content before tools
|
|
return p.sequence({ reasoning_for_tools, p.space(), tool_section, p.end() });
|
|
}
|
|
if (analysis.tools.function_format == tool_call_structure::FUNC_MARKDOWN_CODE_BLOCK &&
|
|
!analysis.tools.code_block_marker.empty()) {
|
|
// Markdown code block format (Cohere Command-R Plus):
|
|
// Content stops at the code_block_marker (e.g., "Action:")
|
|
auto content_before_tools = p.content(p.until(analysis.tools.code_block_marker));
|
|
return p.sequence(
|
|
{ reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section, p.end() });
|
|
}
|
|
// No section markers (raw JSON format): content must stop at JSON object start
|
|
// Tool calls start with "{", so use that as a delimiter
|
|
auto content_before_tools = p.content(p.until("{"));
|
|
return p.sequence(
|
|
{ reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section, p.end() });
|
|
}
|
|
|
|
// No tools - just reasoning (if any) followed by content
|
|
return p.sequence({ reasoning, p.space(), content, p.end() });
|
|
});
|
|
|
|
return parser;
|
|
}
|