llama.cpp/common/chat-auto-parser-generator.cpp

251 lines
13 KiB
C++

#include "chat-auto-parser-helpers.h"
#include "chat-auto-parser.h"
#include "chat-peg-parser.h"
#include "chat.h"
#include "json-schema-to-grammar.h"
#include "log.h"
#include "nlohmann/json.hpp"
#include <optional>
using json = nlohmann::ordered_json;
common_chat_params universal_peg_generator::generate_parser(const template_analysis_result & analysis,
const common_chat_template & tmpl,
const struct templates_params & inputs) {
common_chat_params data;
try {
LOG_DBG("%s\n", __func__);
// Patch messages if template requires non-null content
// Some templates (e.g., iquest) render null as "None" when concatenating strings
std::optional<json> messages_override;
if (analysis.tools.requires_nonnull_content && !inputs.messages.empty()) {
LOG_DBG("Patching null content to empty string (template requires non-null content)\n");
json patched_messages = inputs.messages;
for (auto & msg : patched_messages) {
if (msg.contains("content") && msg["content"].is_null()) {
msg["content"] = "";
}
}
messages_override = patched_messages;
}
if (inputs.messages.empty()) {
// Some templates don't handle empty messages well - always leave something in
json message = {
{ { "role", "user" }, { "content", "Hello" } }
};
messages_override.emplace(message);
}
// Calculate prompt first to detect forced thinking
data.prompt = common_chat_template_direct_apply(tmpl, inputs, messages_override);
// Determine if thinking is forced open based on prompt ending
bool thinking_forced_open = false;
if (analysis.content.reasoning_mode == content_structure::REASONING_FORCED_OPEN) {
if (inputs.enable_thinking) {
thinking_forced_open = true;
LOG_DBG("Thinking forced open based on template analysis\n");
} else {
// Template ends with reasoning start marker but thinking is disabled
// Append the end marker to close it
data.prompt += analysis.content.reasoning_end;
LOG_DBG("Appended reasoning end marker since thinking is disabled\n");
}
}
data.thinking_forced_open = thinking_forced_open;
// Build the unified parser
auto arena = build_parser(analysis, tmpl, inputs, thinking_forced_open);
data.parser = arena.save();
// Determine format
bool has_tools =
inputs.tools.is_array() && !inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
if (has_tools && analysis.tools.supports_tools) {
// Unified format that handles both JSON and tagged tool calls
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
LOG_DBG("Generated unified parser with tool support (format: PEG_NATIVE)\n");
} else if (analysis.content.reasoning_mode != content_structure::REASONING_NONE) {
// Reasoning markers detected - use PEG parser to handle thinking blocks
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
LOG_DBG("Generated unified parser for reasoning handling (format: PEG_NATIVE)\n");
} else if (analysis.content.content_mode != content_structure::CONTENT_PLAIN) {
// Content markers detected - use PEG parser to strip them even without tools
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
LOG_DBG("Generated unified parser for content marker stripping (format: PEG_NATIVE)\n");
} else if (analysis.tools.function_format == tool_call_structure::FUNC_RECIPIENT_BASED) {
// Recipient-based format (e.g., Functionary v3.2): >>>recipient\n{content}
// Need PEG parser to handle recipient delimiter parsing
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
LOG_DBG("Generated unified parser for recipient-based format (format: PEG_NATIVE)\n");
} else if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME) {
// Tag-with-name format (e.g., func_name\n{args} for Functionary)
// Need PEG parser to handle function name parsing
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
LOG_DBG("Generated unified parser for tag-with-name format (format: PEG_NATIVE)\n");
} else if (analysis.tools.function_format == tool_call_structure::FUNC_BRACKET_TAG) {
// Bracket-tag format (e.g., [TOOL_CALLS]name[CALL_ID]id[ARGS]{...} for Mistral Small 3.2)
// Need PEG parser to handle bracket tag parsing
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
LOG_DBG("Generated unified parser for bracket-tag format (format: PEG_NATIVE)\n");
} else if (analysis.tools.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED) {
// Prefixed-indexed format (e.g., Kimi-K2)
// Need PEG parser to handle namespace and indexed format
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
LOG_DBG("Generated unified parser for prefixed-indexed format (format: PEG_NATIVE)\n");
} else {
data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
LOG_DBG("Generated unified parser without tools or content markers (format: CONTENT_ONLY)\n");
}
// Determine trigger word for lazy grammar
std::string trigger_word;
if (!analysis.tools.tool_section_start.empty() ||
analysis.tools.function_format == tool_call_structure::FUNC_RECIPIENT_BASED) {
trigger_word = analysis.tools.tool_section_start;
} else if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME) {
trigger_word = analysis.tools.function_prefix;
} else if (analysis.tools.function_format == tool_call_structure::FUNC_BRACKET_TAG ||
analysis.tools.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED) {
// For formats with per-call markers, use per_call_start as trigger
trigger_word = analysis.tools.per_call_start;
}
// Build grammar for tool calls
data.grammar_lazy = analysis.tools.supports_tools && has_tools;
// For FUNC_TAG_WITH_NAME with empty prefix (Functionary), disable lazy grammar
// since there's no clear trigger word - constrain from the start
if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME &&
analysis.tools.function_prefix.empty()) {
data.grammar_lazy = false;
}
if (data.grammar_lazy) {
if (!trigger_word.empty()) {
data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, trigger_word });
}
}
// Build grammar
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
if (inputs.tools.is_array()) {
for (const auto & tool : inputs.tools) {
if (!tool.contains("type") || tool.at("type") != "function" || !tool.contains("function")) {
continue;
}
const auto & function = tool.at("function");
if (function.contains("parameters")) {
auto params = function.at("parameters");
builder.resolve_refs(params);
}
}
}
arena.build_grammar(builder, data.grammar_lazy);
});
// Set preserved tokens from analysis
data.preserved_tokens = analysis.preserved_tokens;
LOG_DBG("=== UNIFIED PEG PARSER GENERATION COMPLETED ===\n");
} catch (const std::exception & e) {
LOG_DBG("Unified parser generation failed: %s\n", e.what());
throw;
}
return data;
}
common_peg_arena universal_peg_generator::build_parser(const template_analysis_result & analysis,
const common_chat_template & tmpl,
const struct templates_params & inputs,
bool thinking_forced_open) {
GGML_UNUSED(tmpl);
auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
// Build reasoning block using ContentStructure
auto reasoning = p.build_reasoning_block(analysis.content, inputs.reasoning_format, thinking_forced_open);
// Build content block using ContentStructure
// Note: we don't pass tool_section_start here because content-before-tools handling
// is done inline in each branch below with p.content(p.until(marker))
auto content = p.build_content_block(analysis.content, inputs.reasoning_format);
// Build tool section using ToolCallStructure (if applicable)
bool has_tools =
inputs.tools.is_array() && !inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
if (has_tools && analysis.tools.supports_tools) {
bool force_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
auto tool_section =
p.build_tool_section(analysis.tools, inputs.tools, inputs.parallel_tool_calls, force_calls);
// Compose: reasoning -> content before tools -> tool_section -> trailing content
// When thinking is forced open, the reasoning block expects </think>.
// For tool-only messages (no thinking content), the model may output tools directly
// without the </think> tag, so we need to make reasoning optional in that case.
// But if reasoning_format is NONE, the reasoning block is already eps() - don't wrap it
// in optional() as that would generate invalid grammar.
auto reasoning_for_tools =
(thinking_forced_open && inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE) ?
p.optional(reasoning) :
reasoning;
if (!analysis.tools.tool_section_start.empty()) {
// With section markers: look for start marker to delimit content
auto content_before_tools = p.content(p.until(analysis.tools.tool_section_start));
return p.sequence({ reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section,
p.space(), p.optional(p.content(p.rest())), p.end() });
}
if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME &&
!analysis.tools.function_prefix.empty()) {
// Tag-with-name format (e.g., >>>func_name): content stops at function prefix
auto content_before_tools = p.content(p.until(analysis.tools.function_prefix));
return p.sequence(
{ reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section, p.end() });
}
if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME) {
// Functionary-style format: tool call starts immediately (e.g., func_name\n{args})
// No content before tools in this format - the entire output is the tool call
return p.sequence({ reasoning_for_tools, p.space(), tool_section, p.end() });
}
if (analysis.tools.function_format == tool_call_structure::FUNC_BRACKET_TAG ||
analysis.tools.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED) {
// Bracket-tag (Mistral Small 3.2) or prefixed-indexed (Kimi-K2) format:
// Tool calls start with per_call_start marker (e.g., [TOOL_CALLS], <|tool_call_begin|>)
if (!analysis.tools.per_call_start.empty()) {
auto content_before_tools = p.content(p.until(analysis.tools.per_call_start));
return p.sequence(
{ reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section, p.end() });
}
// Fallback: no content before tools
return p.sequence({ reasoning_for_tools, p.space(), tool_section, p.end() });
}
if (analysis.tools.function_format == tool_call_structure::FUNC_MARKDOWN_CODE_BLOCK &&
!analysis.tools.code_block_marker.empty()) {
// Markdown code block format (Cohere Command-R Plus):
// Content stops at the code_block_marker (e.g., "Action:")
auto content_before_tools = p.content(p.until(analysis.tools.code_block_marker));
return p.sequence(
{ reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section, p.end() });
}
// No section markers (raw JSON format): content must stop at JSON object start
// Tool calls start with "{", so use that as a delimiter
auto content_before_tools = p.content(p.until("{"));
return p.sequence(
{ reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section, p.end() });
}
// No tools - just reasoning (if any) followed by content
return p.sequence({ reasoning, p.space(), content, p.end() });
});
return parser;
}