This commit is contained in:
Piotr Wilkin 2026-03-15 02:39:51 +01:00
parent ba6410ff74
commit a5b51007c1
20 changed files with 315 additions and 345 deletions

View File

@ -1,5 +1,5 @@
#include "chat-auto-parser.h"
#include "chat-auto-parser-helpers.h"
#include "chat-auto-parser.h"
#include "chat-peg-parser.h"
#include "chat.h"
#include "common.h"
@ -24,13 +24,13 @@ static void foreach_function(const json & tools, const std::function<void(const
namespace autoparser {
parser_build_context::parser_build_context(common_chat_peg_builder & p, const templates_params & inputs) :
parser_build_context::parser_build_context(common_chat_peg_builder & p, const generation_params & inputs) :
p(p),
inputs(inputs),
reasoning_parser(p.eps()) {}
common_chat_params peg_generator::generate_parser(const common_chat_template & tmpl,
const struct templates_params & inputs) {
const struct generation_params & inputs) {
// Run differential analysis to extract template structure
struct autoparser autoparser;
autoparser.analyze_template(tmpl);
@ -38,7 +38,7 @@ common_chat_params peg_generator::generate_parser(const common_chat_template &
}
common_chat_params peg_generator::generate_parser(const common_chat_template & tmpl,
const struct templates_params & inputs,
const struct generation_params & inputs,
const autoparser & autoparser) {
// Create the result structure
common_chat_params data;
@ -46,62 +46,7 @@ common_chat_params peg_generator::generate_parser(const common_chat_template &
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
data.preserved_tokens = autoparser.preserved_tokens;
// Extract what the template appends when add_generation_prompt=true (the generation prompt suffix).
std::string gen_prompt_suffix;
{
template_params tparams;
tparams.messages = json::array({ json{ {"role", "user"}, {"content", "x"} } });
tparams.add_generation_prompt = false;
tparams.enable_thinking = inputs.enable_thinking;
auto result = compare_variants(tmpl, tparams, [](template_params & p) {
p.add_generation_prompt = true;
});
if (result) {
gen_prompt_suffix = result->diff.right;
}
}
// Fallback for templates that ignore add_generation_prompt: search the rendered prompt.
// Excluded for TOOLS_ONLY: the start tag there is model-generated and may appear in prior turns.
const std::string & prompt_to_search =
(gen_prompt_suffix.empty() && autoparser.reasoning.mode != reasoning_mode::TOOLS_ONLY)
? data.prompt
: gen_prompt_suffix;
bool clear_reasoning_start = false;
if (inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE &&
autoparser.reasoning.mode != reasoning_mode::NONE &&
!autoparser.reasoning.end.empty()) {
const auto & r_start = autoparser.reasoning.start;
const auto & r_end = autoparser.reasoning.end;
auto r_end_t = trim_trailing_whitespace(r_end);
auto r_start_t = trim_trailing_whitespace(r_start);
if (!r_start_t.empty()) {
auto start_pos = prompt_to_search.rfind(r_start_t);
if (start_pos != std::string::npos) {
std::string from_start = prompt_to_search.substr(start_pos);
auto fs_trimmed = trim_trailing_whitespace(from_start);
if (string_ends_with(fs_trimmed, r_end_t)) {
data.prefill = r_start + r_end;
} else if (string_ends_with(fs_trimmed, r_start_t)) {
data.prefill = from_start;
} else {
clear_reasoning_start = true;
}
}
}
}
common_peg_arena parser;
if (clear_reasoning_start) {
struct autoparser modified = autoparser;
modified.reasoning.start.clear();
parser = modified.build_parser(inputs);
} else {
parser = autoparser.build_parser(inputs);
}
auto parser = autoparser.build_parser(inputs);
data.parser = parser.save();
// Build grammar if tools are present
@ -137,18 +82,11 @@ common_chat_params peg_generator::generate_parser(const common_chat_template &
return data;
}
common_peg_arena autoparser::build_parser(const templates_params & inputs) const {
common_peg_arena autoparser::build_parser(const generation_params & inputs) const {
if (!analysis_complete) {
throw std::invalid_argument("Cannot call build_parser on autoparser without performing analysis first, call analyze_template(...)");
}
return build_chat_peg_parser([&](common_chat_peg_builder & p) {
// If the template uses Python dict format (single-quoted strings in JSON structures),
// pre-register a json-string rule that accepts both quote styles. This must happen
// before any call to p.json() so that all JSON parsing inherits the flexible rule.
if (tools.format.uses_python_dicts) {
p.rule("json-string", p.quoted_string());
}
parser_build_context ctx(p, inputs);
bool extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
@ -158,22 +96,24 @@ common_peg_arena autoparser::build_parser(const templates_params & inputs) const
// Build reasoning parser
ctx.reasoning_parser = reasoning.build_parser(ctx);
auto parser = p.eps();
bool has_tools = inputs.tools.is_array() && !inputs.tools.empty();
bool has_response_format = inputs.json_schema.is_object() && !inputs.json_schema.empty();
if (has_response_format) {
auto response_format = p.rule("response-format", p.content(p.schema(p.json(), "response-format-schema", inputs.json_schema)));
return ctx.reasoning_parser + p.space() + p.choice({
parser = ctx.reasoning_parser + p.space() + p.choice({
p.literal("```json") + p.space() + response_format + p.space() + p.literal("```"),
response_format
}) + p.end();
} else if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && jinja_caps.supports_tool_calls) {
parser = tools.build_parser(ctx);
} else {
parser = content.build_parser(ctx);
}
if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && jinja_caps.supports_tool_calls) {
return tools.build_parser(ctx);
}
return content.build_parser(ctx);
parser = wrap_for_generation_prompt(p, parser, inputs, reasoning);
return parser;
});
}
@ -188,10 +128,10 @@ common_peg_parser analyze_reasoning::build_parser(parser_build_context & ctx) co
if (!end.empty()) {
if (!start.empty()) {
// Standard tag-based: optional(<think>reasoning</think>)
return p.optional(start + p.reasoning(p.until(end)) + end);
return p.optional(start + p.reasoning(p.until(end)) + end + p.space());
}
// Delimiter-style (empty start)
return p.optional(p.reasoning(p.until(end)) + end);
return p.optional(p.reasoning(p.until(end)) + end + p.space());
}
}
@ -380,7 +320,7 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte
"tool-" + name + "-arg-" + param_name + "-schema",
param_schema, true)) :
p.tool_arg_json_value(p.schema(
p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema, format.uses_python_dicts)) +
p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema, false)) +
p.space()) +
p.tool_arg_close(p.literal(arguments.value_suffix)));

View File

@ -1,9 +1,11 @@
#include "chat-auto-parser-helpers.h"
#include "chat-auto-parser.h"
#include "chat-peg-parser.h"
#include "chat.h"
#include "log.h"
#include "nlohmann/json.hpp"
#include "peg-parser.h"
#include <cctype>
#include <numeric>
@ -291,10 +293,26 @@ std::vector<segment> prune_whitespace_segments(const std::vector<segment> & segm
return result;
}
common_peg_parser wrap_for_generation_prompt(common_chat_peg_builder & p,
const common_peg_parser & prs,
const autoparser::generation_params & inputs,
const autoparser::analyze_reasoning & reasoning) {
auto parser = prs;
if (!inputs.generation_prompt.empty()) {
size_t end_pos = inputs.generation_prompt.size();
if (!reasoning.start.empty() && inputs.generation_prompt.find(reasoning.start) != std::string::npos) {
end_pos = inputs.generation_prompt.find(reasoning.start);
}
std::string cut_genprompt = inputs.generation_prompt.substr(0, end_pos);
parser = p.literal(cut_genprompt) + parser;
}
return parser;
}
namespace autoparser {
std::string apply_template(const common_chat_template & tmpl, const template_params & params) {
templates_params tmpl_params;
generation_params tmpl_params;
tmpl_params.messages = params.messages;
tmpl_params.tools = params.tools;
tmpl_params.add_generation_prompt = params.add_generation_prompt;

View File

@ -1,6 +1,7 @@
#pragma once
#include "chat-auto-parser.h"
#include "peg-parser.h"
#include <functional>
#include <optional>
#include <string>
@ -57,6 +58,11 @@ std::vector<segment> segmentize_markers(const std::string & text);
// (MARKER, "</function>"), (MARKER, "</tool_call>") ]
std::vector<segment> prune_whitespace_segments(const std::vector<segment> & segments);
// Wrap parser with generation prompt parser
common_peg_parser wrap_for_generation_prompt(common_chat_peg_builder & p,
const common_peg_parser & prs,
const autoparser::generation_params & inputs,
const autoparser::analyze_reasoning & reasoning);
namespace autoparser {
// Apply a template with the given parameters, returning the rendered string (empty on failure)

View File

@ -50,7 +50,7 @@ namespace autoparser {
// High-level params for parser generation
// ============================================================================
struct templates_params {
struct generation_params {
json messages;
json tools;
common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
@ -62,6 +62,7 @@ struct templates_params {
bool add_generation_prompt = false;
bool enable_thinking = true;
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
std::string generation_prompt;
json extra_context;
bool add_bos = false;
bool add_eos = false;
@ -174,7 +175,6 @@ struct tool_format_analysis {
bool fun_name_is_key = false; // In JSON format function name is JSON key, i.e. { "<funname>": { ... arguments ... } }
bool tools_array_wrapped = false; // Tool calls wrapped in JSON array [...]
bool uses_python_dicts = false; // Tool call args use Python dict format (single-quoted strings)
std::string function_field = "function";
std::string name_field = "name";
@ -215,12 +215,12 @@ struct analyze_content;
struct parser_build_context {
common_chat_peg_builder & p;
const templates_params & inputs;
const generation_params & inputs;
common_peg_parser reasoning_parser;
bool extracting_reasoning = false;
const analyze_content * content = nullptr;
parser_build_context(common_chat_peg_builder & p, const templates_params & inputs);
parser_build_context(common_chat_peg_builder & p, const generation_params & inputs);
};
// ============================================================================
@ -250,6 +250,7 @@ struct analyze_reasoning : analyze_base {
analyze_reasoning() = default;
analyze_reasoning(const common_chat_template & tmpl, bool supports_tools);
analyze_reasoning(std::string start_, std::string end_) : start(std::move(start_)), end(std::move(end_)) {}
common_peg_parser build_parser(parser_build_context & ctx) const override;
@ -371,7 +372,7 @@ struct autoparser {
void analyze_template(const common_chat_template & tmpl);
// Build the PEG parser for this template
common_peg_arena build_parser(const templates_params & inputs) const;
common_peg_arena build_parser(const generation_params & inputs) const;
private:
// Collect tokens from entire analysis to preserve
@ -385,10 +386,10 @@ struct autoparser {
class peg_generator {
public:
static common_chat_params generate_parser(const common_chat_template & tmpl,
const struct templates_params & inputs);
const struct generation_params & inputs);
static common_chat_params generate_parser(const common_chat_template & tmpl,
const struct templates_params & inputs,
const struct generation_params & inputs,
const autoparser & autoparser);
};

View File

@ -2,6 +2,7 @@
#include "chat-auto-parser-helpers.h"
#include "chat-peg-parser.h"
#include "chat.h"
#include "common.h"
#include "log.h"
#include "nlohmann/json.hpp"
#include "peg-parser.h"
@ -31,6 +32,7 @@ static std::vector<std::function<void(const common_chat_template & tmpl, autopar
[](const common_chat_template & tmpl, autoparser & analysis) -> void {
if (tmpl.src.find("content.split('</think>')") != std::string::npos &&
tmpl.src.find("reasoning_content") == std::string::npos &&
tmpl.src.find("<SPECIAL_12>") == std::string::npos &&
analysis.reasoning.mode == reasoning_mode::NONE) {
analysis.reasoning.mode = reasoning_mode::TAG_BASED;
analysis.reasoning.start = "<think>";
@ -185,7 +187,6 @@ void autoparser::analyze_template(const common_chat_template & tmpl) {
LOG_DBG("func_name_prefix: '%s'\n", tools.function.name_prefix.c_str());
LOG_DBG("func_name_suffix: '%s'\n", tools.function.name_suffix.c_str());
LOG_DBG("func_close: '%s'\n", tools.function.close.c_str());
LOG_DBG("python_dict_format: %s\n", tools.format.uses_python_dicts ? "true" : "false");
LOG_DBG("arg_name_prefix: '%s'\n", tools.arguments.name_prefix.c_str());
LOG_DBG("arg_name_suffix: '%s'\n", tools.arguments.name_suffix.c_str());
LOG_DBG("arg_value_prefix: '%s'\n", tools.arguments.value_prefix.c_str());
@ -297,10 +298,10 @@ void analyze_reasoning::compare_reasoning_presence() {
if (!result.tags["pre"].empty() && !result.tags["post"].empty()) {
mode = reasoning_mode::TAG_BASED;
start = trim_whitespace(result.tags["pre"]);
end = result.tags["post"];
end = trim_trailing_whitespace(result.tags["post"]);
} else if (!result.tags["post"].empty()) {
mode = reasoning_mode::TAG_BASED;
end = result.tags["post"];
end = trim_trailing_whitespace(result.tags["post"]);
}
}
}
@ -327,54 +328,31 @@ void analyze_reasoning::compare_thinking_enabled() {
const auto & diff = comparison->diff;
std::string left_trimmed = trim_whitespace(diff.left);
std::string right_trimmed = trim_whitespace(diff.right);
if (left_trimmed.empty() && !diff.right.empty()) {
std::string right_trimmed = trim_whitespace(diff.right);
if (!right_trimmed.empty() && string_ends_with(comparison->output_B, right_trimmed)) {
if (start.empty()) {
start = right_trimmed;
mode = reasoning_mode::TAG_BASED;
}
}
} else if (right_trimmed.empty() && !diff.left.empty()) {
if (!left_trimmed.empty() && string_ends_with(comparison->output_A, left_trimmed)) {
if (end.empty()) {
auto seg = prune_whitespace_segments(segmentize_markers(comparison->output_A));
if (seg.size() >= 2 && seg[seg.size() - 1].value == left_trimmed && seg[seg.size() - 2].type == segment_type::MARKER) {
start = seg[seg.size() - 2].value;
}
end = left_trimmed;
mode = reasoning_mode::TAG_BASED;
}
}
}
if (mode == reasoning_mode::NONE && start.empty() && !end.empty()) {
mode = reasoning_mode::TAG_BASED;
}
// Check for start+end pattern: when enable_thinking=false produces both start and end markers,
// but enable_thinking=true produces only the start marker. Both cases are TAG_BASED.
if (!comparison->output_A.empty() && !comparison->output_B.empty()) {
auto parser_start = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
return p.literal(start) + p.space() + p.literal(end) + p.rest();
});
auto parser_start_end = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
return p.tag("pre", p.literal(start)) + p.space() + p.negate(p.literal(end)) + p.rest();
});
if (!start.empty() && parser_start_end.parse_anywhere_and_extract(comparison->output_A).result.success() &&
parser_start.parse_anywhere_and_extract(comparison->output_B).result.success()) {
mode = reasoning_mode::TAG_BASED;
} else if (!end.empty()) { // we extract the starting marker now since we didn't get it earlier
auto result = parser_start_end.parse_anywhere_and_extract(comparison->output_A);
if (result.result.success()) {
start = result.tags["pre"];
mode = reasoning_mode::TAG_BASED;
}
}
}
if (start.empty() && end.empty()) { // we might still have the case of "just open" and "just close"
if (!diff.left.empty() && !diff.right.empty()) {
auto seg_A = segmentize_markers(trim_trailing_whitespace(diff.left));
auto seg_B = segmentize_markers(trim_trailing_whitespace(diff.right));
if (seg_A.size() == 1 && seg_B.size() == 1) {
mode = reasoning_mode::TAG_BASED;
start = seg_B[0].value;
end = seg_A[0].value;
}
}
}
}
void analyze_reasoning::compare_reasoning_scope() {
@ -422,14 +400,14 @@ void analyze_reasoning::compare_reasoning_scope() {
auto result = parser_wrapped.parse_anywhere_and_extract(comparison->output_B);
if (result.result.success()) {
start = result.tags["pre"];
end = result.tags["post"];
end = trim_trailing_whitespace(result.tags["post"]);
} else {
auto parser_delimiter = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
return p.literal(reasoning_content) + p.space() + p.optional(p.tag("post", (p.marker() + p.space())));
});
result = parser_delimiter.parse_anywhere_and_extract(comparison->output_B);
if (result.result.success()) {
end = result.tags["post"];
end = trim_trailing_whitespace(result.tags["post"]);
} else {
LOG_DBG(ANSI_ORANGE "%s: Unable to extracft reasoning markers, falling back to reasoning = NONE\n" ANSI_RESET, __func__);
mode = reasoning_mode::NONE;
@ -596,33 +574,23 @@ void analyze_tools::analyze_tool_call_format(const std::string & haystack,
return;
}
enum class json_quote_style { NONE, DOUBLE_QUOTES, SINGLE_QUOTES };
auto in_json_haystack = [&haystack](const std::string & needle) -> json_quote_style {
auto in_json_haystack = [&haystack](const std::string & needle) -> bool {
auto parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
return p.choice({ p.literal("{"), p.literal(":") }) << p.choice({
p.tag("sq", p.literal("'") + p.literal(needle) + p.literal("'")),
p.tag("dq", p.literal("\"") + p.literal(needle) + p.literal("\"")) });
});
auto result = parser.parse_anywhere_and_extract(haystack);
if (!result.result.success()) {
return json_quote_style::NONE;
}
return result.tags.count("sq") && !result.tags["sq"].empty()
? json_quote_style::SINGLE_QUOTES
: json_quote_style::DOUBLE_QUOTES;
return result.result.success();
};
auto fun_quote = in_json_haystack(fun_name_needle);
auto arg_quote = in_json_haystack(arg_name_needle);
if (fun_quote != json_quote_style::NONE) {
if (fun_quote) {
// no need to check further, we're in JSON land
format.mode = tool_format::JSON_NATIVE;
format.uses_python_dicts = (fun_quote == json_quote_style::SINGLE_QUOTES);
} else if (arg_quote != json_quote_style::NONE) {
} else if (arg_quote) {
format.mode = tool_format::TAG_WITH_JSON;
format.uses_python_dicts = (arg_quote == json_quote_style::SINGLE_QUOTES);
} else {
format.mode = tool_format::TAG_WITH_TAGGED;
}

View File

@ -1,5 +1,6 @@
#include "chat.h"
#include "chat-auto-parser-helpers.h"
#include "chat-auto-parser.h"
#include "chat-peg-parser.h"
#include "common.h"
@ -22,6 +23,7 @@
#include <sstream>
#include <stdexcept>
#include <string>
#include <utility>
#include <vector>
using json = nlohmann::ordered_json;
@ -760,7 +762,7 @@ static void foreach_parameter(const json &
std::string common_chat_template_direct_apply(
const common_chat_template & tmpl,
const autoparser::templates_params & inputs,
const autoparser::generation_params & inputs,
const std::optional<json> & messages_override,
const std::optional<json> & tools_override,
const std::optional<json> & additional_context) {
@ -811,7 +813,7 @@ std::string common_chat_template_direct_apply(
}
static common_chat_params common_chat_params_init_ministral_3(const common_chat_template & tmpl,
const autoparser::templates_params & inputs) {
const autoparser::generation_params & inputs) {
common_chat_params data;
// Build up messages to follow the format: https://huggingface.co/mistralai/Ministral-3-14B-Reasoning-2512/blob/main/chat_template.jinja
@ -928,7 +930,7 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_
}
static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl,
const autoparser::templates_params & inputs) {
const autoparser::generation_params & inputs) {
common_chat_params data;
// Copy reasoning to the "thinking" field as expected by the gpt-oss template
@ -1074,7 +1076,7 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp
// Functionary v3.2 - uses recipient-based format: >>>recipient\n{content}
static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template & tmpl,
const autoparser::templates_params & inputs) {
const autoparser::generation_params & inputs) {
common_chat_params data;
data.prompt = common_chat_template_direct_apply(tmpl, inputs);
@ -1095,13 +1097,13 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_
// Build content parser for >>>all\n{content}
// When tools are present, content stops before the next ">>>" (tool call)
// When no tools, content goes until end
auto content_until_tool = p.literal(">>>all\n") + p.content(p.until(">>>"));
auto content_until_end = p.literal(">>>all\n") + p.content(p.rest());
auto content_until_tool = p.literal("all\n") + p.content(p.until(">>>"));
auto content_until_end = p.literal("all\n") + p.content(p.rest());
// If no tools or tool_choice is NONE, just parse content
if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
// When no tools, just match the prefix and capture everything after
return content_until_end + p.end();
return wrap_for_generation_prompt(p, content_until_end + p.end(), inputs, autoparser::analyze_reasoning());
}
// Build tool call parsers for each available function
@ -1113,7 +1115,7 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_
// Tool format: >>>function_name\n{json_args}
auto tool_parser = p.tool(
p.tool_open(p.literal(">>>") + p.tool_name(p.literal(name)) + p.literal("\n")) +
p.tool_open(p.tool_name(p.literal(name)) + p.literal("\n")) +
p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema))
);
@ -1124,17 +1126,20 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_
auto tools_only = p.trigger_rule("tools", p.one_or_more(tool_choice));
auto content_and_tools = content_until_tool + tools_only;
auto ret = p.eps();
if (inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED) {
if (inputs.parallel_tool_calls) {
return p.choice({ content_and_tools, tools_only }) + p.end();
ret = p.choice({ content_and_tools, tools_only }) + p.end();
} else {
ret = p.choice({ content_until_tool + tool_choice, tools_only }) + p.end();
}
return p.choice({ content_until_tool + tool_choice, tools_only }) + p.end();
} else if (inputs.parallel_tool_calls) {
ret = p.choice({ content_and_tools, content_only, tools_only }) + p.end();
} else {
auto content_and_tool = content_until_tool + tool_choice;
ret = p.choice({ content_and_tool, content_only, tool_choice }) + p.end();
}
if (inputs.parallel_tool_calls) {
return p.choice({ content_and_tools, content_only, tools_only }) + p.end();
}
auto content_and_tool = content_until_tool + tool_choice;
return p.choice({ content_and_tool, content_only, tool_choice }) + p.end();
return wrap_for_generation_prompt(p, ret, inputs, autoparser::analyze_reasoning());
});
data.parser = parser.save();
@ -1164,14 +1169,12 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_
// Kimi K2 Thinking - uses unique tool call ID format: functions.<name>:<index>
// The ID contains both the function name and an incrementing counter
static common_chat_params common_chat_params_init_kimi_k2(const common_chat_template & tmpl,
const autoparser::templates_params & inputs) {
const autoparser::generation_params & inputs) {
common_chat_params data;
data.prompt = common_chat_template_direct_apply(tmpl, inputs);
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
data.supports_thinking = true;
data.thinking_start_tag = "<think>";
data.thinking_end_tag = "</think>";
data.preserved_tokens = {
"<|tool_calls_section_begin|>",
"<|tool_calls_section_end|>",
@ -1186,6 +1189,18 @@ static common_chat_params common_chat_params_init_kimi_k2(const common_chat_temp
auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
auto include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
const std::string SECTION_BEGIN = "<|tool_calls_section_begin|>";
const std::string SECTION_END = "<|tool_calls_section_end|>";
const std::string CALL_BEGIN = "<|tool_call_begin|>";
const std::string ARGS_BEGIN = "<|tool_call_argument_begin|>";
const std::string CALL_END = "<|tool_call_end|>";
const std::string THINK_START = "<think>";
const std::string THINK_END = "</think>";
data.thinking_start_tag = THINK_START;
data.thinking_end_tag = THINK_END;
auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
// Kimi K2 Thinking format:
// - Reasoning: <think>{reasoning}</think>
@ -1197,16 +1212,7 @@ static common_chat_params common_chat_params_init_kimi_k2(const common_chat_temp
// <|tool_calls_section_end|>
// The ID format is: functions.<function_name>:<counter> where counter is 0, 1, 2, ...
// Tool call markers
const std::string SECTION_BEGIN = "<|tool_calls_section_begin|>";
const std::string SECTION_END = "<|tool_calls_section_end|>";
const std::string CALL_BEGIN = "<|tool_call_begin|>";
const std::string ARGS_BEGIN = "<|tool_call_argument_begin|>";
const std::string CALL_END = "<|tool_call_end|>";
const std::string THINK_START = "<think>";
const std::string THINK_END = "</think>";
// Tool call markers
auto end = p.end();
// Note: this model is CRAZY. It can diverge from its supposed tool calling pattern in so many ways it's not funny.
@ -1218,7 +1224,8 @@ static common_chat_params common_chat_params_init_kimi_k2(const common_chat_temp
// Content only parser (no tools)
if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
return reasoning + p.content(p.rest()) + end;
return wrap_for_generation_prompt(p, reasoning + p.content(p.rest()) + end,
inputs, autoparser::analyze_reasoning(THINK_START, THINK_END));
}
// Build tool call parsers for each available function
@ -1254,7 +1261,8 @@ static common_chat_params common_chat_params_init_kimi_k2(const common_chat_temp
auto content_before_tools = p.content(p.until_one_of({ SECTION_BEGIN, CALL_BEGIN }));
return reasoning + content_before_tools + tool_calls + end;
return wrap_for_generation_prompt(p, reasoning + content_before_tools + tool_calls + end,
inputs, autoparser::analyze_reasoning(THINK_START, THINK_END));
});
data.parser = parser.save();
@ -1284,7 +1292,7 @@ static common_chat_params common_chat_params_init_kimi_k2(const common_chat_temp
// - Tool calls: <|tool_call_start|>[function_name(arg1="value1", arg2="value2")]<|tool_call_end|>
// Tool calls can appear multiple times (parallel tool calls)
static common_chat_params common_chat_params_init_lfm2(const common_chat_template & tmpl,
const autoparser::templates_params & inputs) {
const autoparser::generation_params & inputs) {
common_chat_params data;
data.prompt = common_chat_template_direct_apply(tmpl, inputs);
@ -1303,13 +1311,15 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat
auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
auto include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
const std::string TOOL_CALL_START = "<|tool_call_start|>";
const std::string TOOL_CALL_END = "<|tool_call_end|>";
const std::string THINK_START = "<think>";
const std::string THINK_END = "</think>";
auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
data.thinking_start_tag = THINK_START;
data.thinking_end_tag = THINK_END;
auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
auto end = p.end();
auto reasoning = p.eps();
@ -1318,7 +1328,8 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat
}
if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
return reasoning + p.content(p.rest()) + end;
return wrap_for_generation_prompt(p, reasoning + p.content(p.rest()) + end, inputs,
autoparser::analyze_reasoning(THINK_START, THINK_END));
}
auto tool_calls = p.rule("tool-calls",
@ -1330,7 +1341,8 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat
auto content = p.content(p.until(TOOL_CALL_START));
return reasoning + content + tool_calls + end;
return wrap_for_generation_prompt(p, reasoning + content + tool_calls + end, inputs,
autoparser::analyze_reasoning(THINK_START, THINK_END));
});
data.parser = parser.save();
@ -1356,7 +1368,7 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat
static common_chat_params common_chat_params_init_gigachat_v3(
const common_chat_template & tmpl,
const autoparser::templates_params & inputs) {
const autoparser::generation_params & inputs) {
common_chat_params data;
@ -1373,6 +1385,7 @@ static common_chat_params common_chat_params_init_gigachat_v3(
auto tool_call_start_prefix = "<|message_sep|>\n\nfunction call<|role_sep|>\n";
auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
auto ret = p.eps();
if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
// Build a choice of all available tools
auto tool_choice = p.choice();
@ -1395,13 +1408,14 @@ static common_chat_params common_chat_params_init_gigachat_v3(
auto tool_call = p.rule("tool-call", p.literal(tool_call_start_prefix) + tool_choice);
auto tool_calls = p.trigger_rule("tool-call-root", p.repeat(tool_call, /* min = */ min_calls, /* max = */ max_calls));
return p.content(p.until("<|message_sep|>\n\n")) << tool_calls;
ret = p.content(p.until("<|message_sep|>\n\n")) << tool_calls;
} else {
// Content only parser
include_grammar = false;
ret = p.content(p.rest());
}
// Content only parser
include_grammar = false;
return p.content(p.rest());
return wrap_for_generation_prompt(p, ret, inputs, autoparser::analyze_reasoning());
});
data.parser = parser.save();
@ -1498,22 +1512,20 @@ static json common_chat_extra_context() {
static common_chat_params common_chat_templates_apply_jinja(const struct common_chat_templates * tmpls,
const struct common_chat_templates_inputs & inputs) {
autoparser::templates_params params;
autoparser::generation_params params;
params.tools = common_chat_tools_to_json_oaicompat(inputs.tools);
const auto & tmpl = params.tools.is_array() && tmpls->template_tool_use
? *tmpls->template_tool_use
: *tmpls->template_default;
const auto & src = tmpl.source();
const auto & caps = tmpl.original_caps();
params.messages = render_message_to_json(inputs.messages, tmpl.original_caps());
params.add_generation_prompt = inputs.add_generation_prompt;
params.tool_choice = inputs.tool_choice;
const auto & tmpl =
params.tools.is_array() && tmpls->template_tool_use ? *tmpls->template_tool_use : *tmpls->template_default;
const auto & src = tmpl.source();
const auto & caps = tmpl.original_caps();
params.messages = render_message_to_json(inputs.messages, tmpl.original_caps());
params.tool_choice = inputs.tool_choice;
params.reasoning_format = inputs.reasoning_format;
params.enable_thinking = inputs.enable_thinking;
params.grammar = inputs.grammar;
params.now = inputs.now;
params.add_bos = tmpls->add_bos;
params.add_eos = tmpls->add_eos;
params.enable_thinking = inputs.enable_thinking;
params.grammar = inputs.grammar;
params.now = inputs.now;
params.add_bos = tmpls->add_bos;
params.add_eos = tmpls->add_eos;
if (src.find("<|channel|>") == std::string::npos) {
// map developer to system for all models except for GPT-OSS
@ -1532,6 +1544,15 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_
workaround::requires_non_null_content(params.messages);
}
params.add_generation_prompt = false;
std::string no_gen_prompt = common_chat_template_direct_apply(tmpl, params);
params.add_generation_prompt = true;
std::string gen_prompt = common_chat_template_direct_apply(tmpl, params);
auto diff = calculate_diff_split(no_gen_prompt, gen_prompt);
params.generation_prompt = diff.right;
params.add_generation_prompt = inputs.add_generation_prompt;
params.extra_context = common_chat_extra_context();
for (auto el : inputs.chat_template_kwargs) {
params.extra_context[el.first] = json::parse(el.second);
@ -1541,12 +1562,7 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_
params.json_schema = json::parse(inputs.json_schema);
}
// if (inputs.parallel_tool_calls && !tmpl.original_caps().supports_parallel_tool_calls) {
// LOG_DBG("Disabling parallel_tool_calls because the template does not support it\n");
// params.parallel_tool_calls = false;
// } else {
params.parallel_tool_calls = inputs.parallel_tool_calls;
//}
if (params.tools.is_array()) {
if (params.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && !params.grammar.empty()) {
@ -1559,25 +1575,27 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_
}
}
common_chat_params early_return;
// Ministral/Mistral Large 3 - uses special reasoning structure fixes, can't use autoparser
// Note: Mistral Small 3.2 uses [CALL_ID] which Ministral doesn't have, so we can distinguish them
if (src.find("[SYSTEM_PROMPT]") != std::string::npos && src.find("[TOOL_CALLS]") != std::string::npos &&
src.find("[ARGS]") != std::string::npos && src.find("[CALL_ID]") == std::string::npos) {
LOG_DBG("Using specialized template: Ministral/Magistral Large 3\n");
return common_chat_params_init_ministral_3(tmpl, params);
early_return = common_chat_params_init_ministral_3(tmpl, params);
}
// GPT-OSS - has unique channel-based structure that needs dedicated handler
if (src.find("<|channel|>") != std::string::npos) {
LOG_DBG("Using specialized template: GPT-OSS\n");
return common_chat_params_init_gpt_oss(tmpl, params);
early_return = common_chat_params_init_gpt_oss(tmpl, params);
}
// Functionary v3.2 - uses recipient-based format with >>>recipient\n{content}
// Detection: template has ">>>all" for content and ">>>" prefix for tool calls
if (src.find(">>>all") != std::string::npos && src.find(">>>${recipient}") != std::string::npos) {
LOG_DBG("Using specialized template: Functionary v3.2\n");
return common_chat_params_init_functionary_v3_2(tmpl, params);
early_return = common_chat_params_init_functionary_v3_2(tmpl, params);
}
// Kimi K2 Thinking - uses unique tool call ID format: functions.<name>:<index>
@ -1585,7 +1603,7 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_
if (src.find("<|tool_calls_section_begin|>") != std::string::npos &&
src.find("<|tool_call_begin|>") != std::string::npos) {
LOG_DBG("Using specialized template: Kimi K2 Thinking\n");
return common_chat_params_init_kimi_k2(tmpl, params);
early_return = common_chat_params_init_kimi_k2(tmpl, params);
}
// LFM2 - uses <|tool_list_start|>/<|tool_list_end|> markers and <|tool_call_start|>[name(args)]<|tool_call_end|> format
@ -1593,7 +1611,7 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_
if (src.find("<|tool_list_start|>") != std::string::npos &&
src.find("<|tool_list_end|>") != std::string::npos) {
LOG_DBG("Using specialized template: LFM2\n");
return common_chat_params_init_lfm2(tmpl, params);
early_return = common_chat_params_init_lfm2(tmpl, params);
}
// GigaChatV3 format detection
@ -1602,7 +1620,12 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_
src.find("<|function_call|>") == std::string::npos
) {
LOG_DBG("Using specialized template: GigaChatV3\n");
return common_chat_params_init_gigachat_v3(tmpl, params);
early_return = common_chat_params_init_gigachat_v3(tmpl, params);
}
if (!early_return.parser.empty()) {
early_return.generation_prompt = params.generation_prompt;
return early_return;
}
try {
@ -1615,6 +1638,7 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_
auto_params.thinking_start_tag = autoparser.reasoning.start;
auto_params.thinking_end_tag = autoparser.reasoning.end;
}
auto_params.generation_prompt = params.generation_prompt;
return auto_params;
} catch (const std::exception & e) {
throw std::invalid_argument(std::string("Unable to generate parser for this template. Automatic parser generation failed: ") + e.what());
@ -1712,9 +1736,9 @@ common_chat_msg common_chat_peg_parse(const common_peg_arena & src_pars
LOG_DBG("No parser definition detected, assuming pure content parser.");
}
const std::string effective_input = params.prefill.empty()
const std::string effective_input = params.generation_prompt.empty()
? input
: params.prefill + input;
: params.generation_prompt + input;
LOG_DBG("Parsing PEG input with format %s: %s\n", common_chat_format_name(params.format), effective_input.c_str());

View File

@ -24,7 +24,7 @@ using json = nlohmann::ordered_json;
struct common_chat_templates;
namespace autoparser {
struct templates_params;
struct generation_params;
} // namespace autoparser
struct common_chat_tool_call {
@ -211,7 +211,7 @@ struct common_chat_params {
std::string prompt;
std::string grammar;
bool grammar_lazy = false;
std::string prefill;
std::string generation_prompt;
bool supports_thinking = false;
std::string thinking_start_tag; // e.g., "<think>"
std::string thinking_end_tag; // e.g., "</think>"
@ -228,14 +228,14 @@ struct common_chat_parser_params {
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool parse_reasoning"
// Whether reasoning_content should be inlined in the content (e.g. for reasoning_format=deepseek in stream mode)
bool reasoning_in_content = false;
std::string prefill;
std::string generation_prompt;
bool parse_tool_calls = true;
bool debug = false; // Enable debug output for PEG parser
common_peg_arena parser = {};
common_chat_parser_params() = default;
common_chat_parser_params(const common_chat_params & chat_params) {
format = chat_params.format;
prefill = chat_params.prefill;
format = chat_params.format;
generation_prompt = chat_params.generation_prompt;
}
};
@ -301,7 +301,7 @@ std::map<std::string, bool> common_chat_templates_get_caps(const common_chat_tem
std::string common_chat_template_direct_apply(
const common_chat_template & tmpl,
const autoparser::templates_params & inputs,
const autoparser::generation_params & inputs,
const std::optional<json> & messages_override = std::nullopt,
const std::optional<json> & tools_override = std::nullopt,
const std::optional<json> & additional_context = std::nullopt);

View File

@ -7,7 +7,6 @@
{%- set available_tool_string = '' -%}
{%- set add_tool_id = true -%}
{%- set add_thoughts = true -%} {# whether to include <thinking> reasoning blocks #}
{%- set add_generation_prompt = true -%} {# whether to emit reasoning starter before assistant response #}
{# Optional token placeholders (safe defaults) #}
{%- set bos_token = bos_token or '' -%}
{%- set eos_token = eos_token or '' -%}

View File

@ -15,10 +15,10 @@
{%- set ns.is_tool = false -%}
{%- for tool in message['tool_calls']-%}
{%- if not ns.is_first -%}
{{'<Assistant><tool▁calls▁begin><tool▁call▁begin>' + tool['type'] + '<tool▁sep>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<tool▁call▁end>'}}
{{'<Assistant><tool▁calls▁begin><tool▁call▁begin>' + tool['type'] + '<tool▁sep>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] | tojson + '\n' + '```' + '<tool▁call▁end>'}}
{%- set ns.is_first = true -%}
{%- else -%}
{{'\n' + '<tool▁call▁begin>' + tool['type'] + '<tool▁sep>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<tool▁call▁end>'}}
{{'\n' + '<tool▁call▁begin>' + tool['type'] + '<tool▁sep>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] | tojson + '\n' + '```' + '<tool▁call▁end>'}}
{%- endif -%}
{%- endfor -%}
{{'<tool▁calls▁end><end▁of▁sentence>'}}

View File

@ -28,25 +28,25 @@
{%- set ns.is_last_user = true -%}{{'<User>' + message['content']}}
{%- endif -%}
{%- if message['role'] == 'assistant' and message['tool_calls'] -%}
{%- if ns.is_last_user -%}{{'<Assistant></think>'}}
{%- if ns.is_last_user -%}{{'<Assistant><think></think>'}}
{%- endif -%}
{%- set ns.is_last_user = false -%}
{%- set ns.is_first = false -%}
{%- set ns.is_tool = false -%}
{%- for tool in message['tool_calls'] -%}
{%- if not ns.is_first -%}
{%- if not message['content'] -%}{{'<tool▁calls▁begin><tool▁call▁begin>'+ tool['function']['name'] + '<tool▁sep>' + tool['function']['arguments'] + '<tool▁call▁end>'}}
{%- else -%}{{message['content'] + '<tool▁calls▁begin><tool▁call▁begin>' + tool['function']['name'] + '<tool▁sep>' + tool['function']['arguments'] + '<tool▁call▁end>'}}
{%- if not message['content'] -%}{{'<tool▁calls▁begin><tool▁call▁begin>'+ tool['function']['name'] + '<tool▁sep>' + tool['function']['arguments'] | tojson + '<tool▁call▁end>'}}
{%- else -%}{{message['content'] + '<tool▁calls▁begin><tool▁call▁begin>' + tool['function']['name'] + '<tool▁sep>' + tool['function']['arguments'] | tojson + '<tool▁call▁end>'}}
{%- endif -%}
{%- set ns.is_first = true -%}
{%- else -%}{{'<tool▁call▁begin>'+ tool['function']['name'] + '<tool▁sep>' + tool['function']['arguments'] + '<tool▁call▁end>'}}
{%- else -%}{{'<tool▁call▁begin>'+ tool['function']['name'] + '<tool▁sep>' + tool['function']['arguments'] | tojson + '<tool▁call▁end>'}}
{%- endif -%}
{%- endfor -%}{{'<tool▁calls▁end><end▁of▁sentence>'}}
{%- endif -%}
{%- if message['role'] == 'assistant' and not message['tool_calls'] -%}
{%- if ns.is_last_user -%}{{'<Assistant>'}}
{%- if message['prefix'] is defined and message['prefix'] and thinking -%}{{'<think>'}}
{%- else -%}{{'</think>'}}
{%- else -%}{{'<think></think>'}}
{%- endif -%}
{%- endif -%}
{%- set ns.is_last_user = false -%}
@ -65,7 +65,7 @@
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool -%}{{'<Assistant>'}}
{%- if not thinking -%}{{'</think>'}}
{%- else -%}{{'<think>'}}
{%- if not thinking -%}{{'<think></think>'}}
{%- else -%}{{'<think>'}}
{%- endif -%}
{%- endif %}

View File

@ -49,7 +49,7 @@ Example function tool call syntax:
{%- endif -%}
{%- set tool_name = tc['function']['name'] -%}
{%- set tool_args = tc['function']['arguments'] -%}
{{- '<tool▁call▁begin>' + tc['type'] + '<tool▁sep>' + tool_name + '\n' + '```json' + '\n' + tool_args + '\n' + '```' + '<tool▁call▁end>' -}}
{{- '<tool▁call▁begin>' + tc['type'] + '<tool▁sep>' + tool_name + '\n' + '```json' + '\n' + tool_args | tojson + '\n' + '```' + '<tool▁call▁end>' -}}
{%- endfor -%}
{{- '<tool▁calls▁end><end▁of▁sentence>' -}}
{%- endif -%}

View File

@ -42,9 +42,9 @@
{%- if 'tool_calls' in message and message['tool_calls'] -%}
{%- for tool_call in message['tool_calls'] -%}
{%- if tool_call["function"]["name"] == "python" -%}
{{ '<|python_tag|>' + tool_call['function']['arguments'] }}
{{ '<|python_tag|>' + tool_call['function']['arguments'] | tojson }}
{%- else -%}
{{ '<function=' + tool_call['function']['name'] + '>' + tool_call['function']['arguments'] + '</function>' }}
{{ '<function=' + tool_call['function']['name'] + '>' + tool_call['function']['arguments'] | tojson + '</function>' }}
{%- endif -%}
{%- endfor -%}
{{ '<|eom_id|>' }}

View File

@ -1292,7 +1292,7 @@ static void test_nemotron_reasoning_detection(testing & t) {
// Check reasoning markers
t.assert_equal("reasoning_start should be '<think>'", "<think>", analysis.reasoning.start);
t.assert_equal("reasoning_end should be '</think>\\n'", "</think>\n", analysis.reasoning.end);
t.assert_equal("reasoning_end should be '</think>'", "</think>", analysis.reasoning.end);
// Check reasoning mode detection
// Nemotron uses tag-based reasoning; prefill handles the template's forced markers

View File

@ -145,7 +145,7 @@ static void test_example_native(testing & t) {
common_reasoning_format reasoning_format;
json json_schema;
bool parallel_tool_calls;
std::string prefill;
std::string generation_prompt;
std::string input;
// Expect
@ -157,7 +157,7 @@ static void test_example_native(testing & t) {
auto build_parser = [](const test_case & tc) {
return build_chat_peg_parser([&](common_chat_peg_builder & p) {
auto reasoning_in_content = (tc.reasoning_format == COMMON_REASONING_FORMAT_NONE);
// Always use optional TAG_BASED pattern; prefill is prepended to input
// Always use optional TAG_BASED pattern; generation_prompt is prepended to input
auto reasoning = p.optional("<think>" + p.reasoning(p.until("</think>")) + "</think>" + p.space());
// tool calling parser
@ -184,26 +184,26 @@ static void test_example_native(testing & t) {
std::vector<test_case> test_cases = std::vector<test_case>{
{
/* .name = */ "content with reasoning (no prefill)",
/* .name = */ "content with reasoning (no generation_prompt)",
/* .tools = */ {},
/* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE,
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
/* .json_schema = */ {},
/* .parallel_tool_calls = */ false,
/* .prefill = */ "",
/* .generation_prompt = */ "",
/* .input = */ ("<think>The user said hello, I must say hello back</think>\nHello"),
/* .expect_reasoning = */ "The user said hello, I must say hello back",
/* .expect_content = */ "Hello",
/* .expect_tool_calls = */ {},
},
{
/* .name = */ "content without reasoning (no prefill)",
/* .name = */ "content without reasoning (no generation_prompt)",
/* .tools = */ {},
/* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE,
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
/* .json_schema = */ {},
/* .parallel_tool_calls = */ false,
/* .prefill = */ "",
/* .generation_prompt = */ "",
/* .input = */ ("Hello"),
/* .expect_reasoning = */ "",
/* .expect_content = */ "Hello",
@ -216,59 +216,59 @@ static void test_example_native(testing & t) {
/* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
/* .json_schema = */ {},
/* .parallel_tool_calls = */ false,
/* .prefill = */ "",
/* .generation_prompt = */ "",
/* .input = */ ("<think>The user said hello, I must say hello back</think>\nHello"),
/* .expect_reasoning = */ "",
/* .expect_content = */ "<think>The user said hello, I must say hello back</think>\nHello",
/* .expect_tool_calls = */ {},
},
{
/* .name = */ "content with reasoning prefill",
/* .name = */ "content with reasoning generation_prompt",
/* .tools = */ {},
/* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE,
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
/* .json_schema = */ {},
/* .parallel_tool_calls = */ false,
/* .prefill = */ "<think>",
/* .generation_prompt = */ "<think>",
/* .input = */ ("The user said hello, I must say hello back</think>\nHello"),
/* .expect_reasoning = */ "The user said hello, I must say hello back",
/* .expect_content = */ "Hello",
/* .expect_tool_calls = */ {},
},
{
/* .name = */ "content with reasoning prefill and reasoning_format = none",
/* .name = */ "content with reasoning generation_prompt and reasoning_format = none",
/* .tools = */ {},
/* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE,
/* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
/* .json_schema = */ {},
/* .parallel_tool_calls = */ false,
/* .prefill = */ "",
/* .generation_prompt = */ "",
/* .input = */ ("The user said hello, I must say hello back</think>\nHello"),
/* .expect_reasoning = */ "",
/* .expect_content = */ "The user said hello, I must say hello back</think>\nHello",
/* .expect_tool_calls = */ {},
},
{
/* .name = */ "content with closed reasoning prefill (empty reasoning discarded)",
/* .name = */ "content with closed reasoning generation_prompt (empty reasoning discarded)",
/* .tools = */ {},
/* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE,
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
/* .json_schema = */ {},
/* .parallel_tool_calls = */ false,
/* .prefill = */ "<think></think>",
/* .generation_prompt = */ "<think></think>",
/* .input = */ ("Hello"),
/* .expect_reasoning = */ "",
/* .expect_content = */ "Hello",
/* .expect_tool_calls = */ {},
},
{
/* .name = */ "tools with reasoning prefill",
/* .name = */ "tools with reasoning generation_prompt",
/* .tools = */ create_tools(),
/* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_AUTO,
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
/* .json_schema = */ {},
/* .parallel_tool_calls = */ false,
/* .prefill = */ "<think>",
/* .generation_prompt = */ "<think>",
/* .input = */
("I must get the weather in New York</think>\n"
"<tool_call>["
@ -284,13 +284,13 @@ static void test_example_native(testing & t) {
} },
},
{
/* .name = */ "parallel tools with reasoning prefill",
/* .name = */ "parallel tools with reasoning generation_prompt",
/* .tools = */ create_tools(),
/* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_AUTO,
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
/* .json_schema = */ {},
/* .parallel_tool_calls = */ true,
/* .prefill = */ "<think>",
/* .generation_prompt = */ "<think>",
/* .input = */
("I must get the weather in New York and San Francisco and a 3 day forecast of each.</think>\nLet me "
"search that for you."
@ -328,7 +328,7 @@ static void test_example_native(testing & t) {
} },
},
{
/* .name = */ "response_format with reasoning prefill",
/* .name = */ "response_format with reasoning generation_prompt",
/* .tools = */ {},
/* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE,
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
@ -340,7 +340,7 @@ static void test_example_native(testing & t) {
{ "due_date", { { "type", "string" } } } } },
{ "required", { "invoice_number", "amount", "due_date" } } },
/* .parallel_tool_calls = */ false,
/* .prefill = */ "<think>",
/* .generation_prompt = */ "<think>",
/* .input = */
("I must produce the invoice in the requested format</think>\n"
R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})"),
@ -368,7 +368,7 @@ static void test_example_native(testing & t) {
t.log(line);
}
std::string effective_input = tc.prefill + tc.input;
std::string effective_input = tc.generation_prompt + tc.input;
common_peg_parse_context ctx(effective_input);
auto result = parser.parse(ctx);

View File

@ -1001,8 +1001,8 @@ static void test_peg_parser(common_chat_templates * tmpls,
// already placed the opening tag in the prompt.
// For lazy grammars, the grammar only activates from the trigger position, so the
// reasoning prefill is irrelevant — reasoning is handled by the PEG parser.
if (!parser.params_.prefill.empty() && earliest_trigger_pos == std::string::npos) {
constrained = parser.params_.prefill + constrained;
if (!parser.params_.generation_prompt.empty() && earliest_trigger_pos == std::string::npos) {
constrained = parser.params_.generation_prompt + constrained;
}
// Test the constrained portion against the grammar
@ -1326,12 +1326,15 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
// NVIDIA Nemotron-3 Nano
auto tst = peg_tester("models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja", detailed_debug);
tst.test("Hello, world!\nWhat's up?").enable_thinking(false).expect(message_assist).run();
tst.test("Hello, world!\nWhat's up?").
enable_thinking(false).
reasoning_format(COMMON_REASONING_FORMAT_AUTO).
expect(message_assist).run();
tst.test("I'm\nthinking\n</think>\nHello, world!\nWhat's up?")
.enable_thinking(false)
.enable_thinking(true)
.reasoning_format(COMMON_REASONING_FORMAT_NONE)
.expect_content("I'm\nthinking\n</think>\nHello, world!\nWhat's up?")
.expect_content("<think>I'm\nthinking\n</think>\nHello, world!\nWhat's up?")
.run();
tst.test("I'm\nthinking\n</think>\nHello, world!\nWhat's up?")
@ -1491,7 +1494,7 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
.expect(simple_assist_msg("The answer is 42.", "Let me think about this..."))
.run();
tst.test("Hello, world!").expect(simple_assist_msg("Hello, world!")).run();
tst.test("</think>Hello, world!").reasoning_format(COMMON_REASONING_FORMAT_AUTO).expect(simple_assist_msg("Hello, world!")).run();
}
{
// NousResearch-Hermes-2-Pro and Hermes-3 (tool calling models)
@ -1807,6 +1810,8 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
"<tool▁calls▁begin><tool▁call▁begin>get_time<tool▁sep>{\"city\": "
"\"XYZCITY\"}<tool▁call▁end><tool▁calls▁end>")
.tools({ get_time_tool })
.enable_thinking(false)
.reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
.expect(message_with_tool_calls("get_time", "{\"city\":\"XYZCITY\"}"))
.run();
}
@ -1852,7 +1857,8 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
{
auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);
tst.test("CONTENT").expect(simple_assist_msg("CONTENT", "")).run();
tst.test("CONTENT").enable_thinking(false).reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK).
expect(simple_assist_msg("CONTENT", "")).run();
}
// GLM-4.6 tests - format: <tool_call>function_name\n<arg_key>...</arg_key>\n<arg_value>...</arg_value>\n</tool_call>
@ -1915,6 +1921,7 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
"<arg_key>arg1</arg_key><arg_value>1</arg_value>"
"<arg_key>arg2</arg_key><arg_value>2</arg_value>"
"</tool_call>")
.enable_thinking(false)
.parallel_tool_calls(true)
.tools({
special_function_tool, special_function_tool_with_optional_param
@ -2231,10 +2238,11 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
{
auto tst = peg_tester("models/templates/MiniMax-M2.jinja", detailed_debug);
tst.test(
"<minimax:tool_call>\n<invoke name=\"special_function\">\n<parameter "
"</think><minimax:tool_call>\n<invoke name=\"special_function\">\n<parameter "
"name=\"arg1\">1</parameter>\n</invoke>\n</minimax:tool_call>")
.tools({ special_function_tool })
.expect(message_assist_call)
.reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
.run();
}
@ -2297,8 +2305,8 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
// Functionary v3.2 - recipient-based format: >>>recipient\n{content}
{
auto tst = peg_tester("models/templates/meetkai-functionary-medium-v3.2.jinja", detailed_debug);
tst.test(">>>all\nHello, world!\nWhat's up?").expect(message_assist).run();
tst.test(">>>special_function\n{\"arg1\": 1}")
tst.test("all\nHello, world!\nWhat's up?").expect(message_assist).run();
tst.test("special_function\n{\"arg1\": 1}")
.tools({ special_function_tool })
.expect(message_assist_call)
.run();
@ -2318,8 +2326,8 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
// Note: Template uses forced-open mode (prompt ends with <think>), so input shouldn't include opening tag
{
auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja", detailed_debug);
tst.test("Hello, world!\nWhat's up?")
.enable_thinking(true) // Forced open
tst.test("</think>Hello, world!\nWhat's up?")
.reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
.expect(message_assist)
.run();
tst.test("I'm\nthinking</think>Hello, world!\nWhat's up?")
@ -2331,14 +2339,15 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
// llama-cpp DeepSeek R1 template (always forced-open thinking)
{
auto tst = peg_tester("models/templates/llama-cpp-deepseek-r1.jinja", detailed_debug);
tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
tst.test("</think>Hello, world!\nWhat's up?").expect(message_assist).reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK).run();
tst.test("I'm\nthinking</think>Hello, world!\nWhat's up?")
.reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
.expect(message_assist_thoughts)
.run();
tst.test(
"<tool▁calls▁begin><tool▁call▁begin>function<tool▁sep>special_function\n"
"</think><tool▁calls▁begin><tool▁call▁begin>function<tool▁sep>special_function\n"
"```json\n{\"arg1\": 1}```<tool▁call▁end><tool▁calls▁end>")
.reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
.tools({ special_function_tool })
.parallel_tool_calls(true)
.expect(message_assist_call)
@ -2348,7 +2357,9 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
// Note: Template uses forced-open mode (prompt ends with <think>), so input shouldn't include opening tag
{
auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja", detailed_debug);
tst.test("Hello, world!\nWhat's up?").enable_thinking(true).expect(message_assist).run();
tst.test("</think>Hello, world!\nWhat's up?").enable_thinking(true).
reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK).
expect(message_assist).run();
tst.test("I'm\nthinking</think>Hello, world!\nWhat's up?")
.reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
.expect(message_assist_thoughts)
@ -2357,6 +2368,8 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
"<tool▁calls▁begin><tool▁call▁begin>function<tool▁sep>special_function\n"
"```json\n{\"arg1\": 1}```<tool▁call▁end><tool▁calls▁end>")
.tools({ special_function_tool })
.enable_thinking(false)
.reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
.expect(message_assist_call)
.run();
}
@ -2386,12 +2399,12 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
// Apriel 1.6 Thinker (reasoning-only support)
{
auto tst = peg_tester("models/templates/Apriel-1.6-15b-Thinker-fixed.jinja", detailed_debug);
tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
// Implicit reasoning start (forced open)
tst.test("I'm\nthinking\n[BEGIN FINAL RESPONSE]\nHello, world!\nWhat's up?")
.reasoning_format(COMMON_REASONING_FORMAT_AUTO)
.expect(message_assist_thoughts)
.enable_thinking(true)
.expect(simple_assist_msg("Hello, world!\nWhat's up?", "Here are my reasoning steps:\nI'm\nthinking"))
.run();
// Reasoning + Tool calls
@ -2399,8 +2412,9 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
"I'm\nthinking\n[BEGIN FINAL RESPONSE]\n<tool_calls>[{\"name\": \"special_function\", \"arguments\": "
"{\"arg1\": 1}}]</tool_calls>")
.reasoning_format(COMMON_REASONING_FORMAT_AUTO)
.enable_thinking(true)
.tools({ special_function_tool })
.expect(message_assist_call_thoughts)
.expect(simple_assist_msg("", "Here are my reasoning steps:\nI'm\nthinking", "special_function", "{\"arg1\":1}"))
.run();
}

View File

@ -105,7 +105,7 @@ struct cli_context {
llama_get_model(ctx_server.get_llama_context()));
task.params.sampling.reasoning_budget_tokens = reasoning_budget;
task.params.sampling.grammar_prefill = chat_params.prefill;
task.params.sampling.grammar_prefill = chat_params.generation_prompt;
if (!chat_params.thinking_start_tag.empty()) {
task.params.sampling.reasoning_budget_start =
@ -215,7 +215,7 @@ struct cli_context {
inputs.parallel_tool_calls = false;
inputs.add_generation_prompt = true;
inputs.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
inputs.enable_thinking = common_chat_templates_support_enable_thinking(chat_params.tmpls.get());
inputs.enable_thinking = chat_params.enable_thinking ? common_chat_templates_support_enable_thinking(chat_params.tmpls.get()) : false;
// Apply chat template to the list of messages
return common_chat_templates_apply(chat_params.tmpls.get(), inputs);

View File

@ -282,7 +282,7 @@ static void render_scenario(const common_chat_template & tmpl,
LOG_ERR("Messages:\n%s\n", final_messages.dump(2).c_str());
try {
autoparser::templates_params inputs;
autoparser::generation_params inputs;
inputs.messages = final_messages;
inputs.add_generation_prompt = add_generation_prompt;
inputs.extra_context["enable_thinking"] = enable_thinking;
@ -395,7 +395,7 @@ int main(int argc, char ** argv) {
analysis.analyze_template(chat_template);
// Generate Parser
autoparser::templates_params params;
autoparser::generation_params params;
params.messages = json::array({ build_user_message() });
params.reasoning_format =
opts.enable_reasoning ? COMMON_REASONING_FORMAT_DEEPSEEK : COMMON_REASONING_FORMAT_NONE;

View File

@ -400,12 +400,12 @@ static void analyze_template(const std::string & template_path) {
{
json user_msg = make_user_msg();
autoparser::templates_params params_no_tools;
autoparser::generation_params params_no_tools;
params_no_tools.messages = json::array({ user_msg });
params_no_tools.add_generation_prompt = false;
params_no_tools.tools = json::array();
autoparser::templates_params params_with_tools = params_no_tools;
autoparser::generation_params params_with_tools = params_no_tools;
params_with_tools.tools = tools;
std::string output_no_tools = common_chat_template_direct_apply(chat_template, params_no_tools);
@ -419,12 +419,12 @@ static void analyze_template(const std::string & template_path) {
{
json user_msg = make_user_msg();
autoparser::templates_params params_no_prompt;
autoparser::generation_params params_no_prompt;
params_no_prompt.messages = json::array({ user_msg });
params_no_prompt.add_generation_prompt = false;
params_no_prompt.tools = json::array();
autoparser::templates_params params_with_prompt = params_no_prompt;
autoparser::generation_params params_with_prompt = params_no_prompt;
params_with_prompt.add_generation_prompt = true;
std::string output_no_prompt = common_chat_template_direct_apply(chat_template, params_no_prompt);
@ -438,12 +438,12 @@ static void analyze_template(const std::string & template_path) {
{
json user_msg = make_user_msg();
autoparser::templates_params params_no_reasoning;
autoparser::generation_params params_no_reasoning;
params_no_reasoning.messages = json::array({ user_msg, make_assistant_no_reasoning() });
params_no_reasoning.add_generation_prompt = false;
params_no_reasoning.enable_thinking = true;
autoparser::templates_params params_with_reasoning = params_no_reasoning;
autoparser::generation_params params_with_reasoning = params_no_reasoning;
params_with_reasoning.messages = json::array({ user_msg, make_assistant_with_reasoning() });
std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning);
@ -458,12 +458,12 @@ static void analyze_template(const std::string & template_path) {
json user_msg = make_user_msg();
json user_msg2 = make_user_msg2();
autoparser::templates_params params_no_reasoning;
autoparser::generation_params params_no_reasoning;
params_no_reasoning.messages = json::array({ user_msg, make_assistant_no_reasoning(), user_msg2 });
params_no_reasoning.add_generation_prompt = false;
params_no_reasoning.enable_thinking = true;
autoparser::templates_params params_with_reasoning = params_no_reasoning;
autoparser::generation_params params_with_reasoning = params_no_reasoning;
params_with_reasoning.messages = json::array({ user_msg, make_assistant_with_reasoning(), user_msg2 });
std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning);
@ -477,12 +477,12 @@ static void analyze_template(const std::string & template_path) {
{
json user_msg = make_user_msg();
autoparser::templates_params params_no_tool;
autoparser::generation_params params_no_tool;
params_no_tool.messages = json::array({ user_msg, make_assistant_no_tool() });
params_no_tool.add_generation_prompt = false;
params_no_tool.tools = tools;
autoparser::templates_params params_with_tool = params_no_tool;
autoparser::generation_params params_with_tool = params_no_tool;
params_with_tool.messages = json::array({ user_msg, make_assistant_one_tool() });
std::string output_no_tool = common_chat_template_direct_apply(chat_template, params_no_tool);
@ -497,12 +497,12 @@ static void analyze_template(const std::string & template_path) {
json user_msg = make_user_msg();
json user_msg2 = make_user_msg2_continue();
autoparser::templates_params params_no_tool;
autoparser::generation_params params_no_tool;
params_no_tool.messages = json::array({ user_msg, make_assistant_no_tool(), user_msg2 });
params_no_tool.add_generation_prompt = false;
params_no_tool.tools = tools;
autoparser::templates_params params_with_tool = params_no_tool;
autoparser::generation_params params_with_tool = params_no_tool;
params_with_tool.messages = json::array({ user_msg, make_assistant_one_tool(), user_msg2 });
std::string output_no_tool = common_chat_template_direct_apply(chat_template, params_no_tool);
@ -516,12 +516,12 @@ static void analyze_template(const std::string & template_path) {
{
json user_msg = make_user_msg();
autoparser::templates_params params_one_tool;
autoparser::generation_params params_one_tool;
params_one_tool.messages = json::array({ user_msg, make_assistant_one_tool() });
params_one_tool.add_generation_prompt = false;
params_one_tool.tools = tools;
autoparser::templates_params params_two_tools = params_one_tool;
autoparser::generation_params params_two_tools = params_one_tool;
params_two_tools.messages = json::array({ user_msg, make_assistant_two_tools() });
std::string output_one_tool = common_chat_template_direct_apply(chat_template, params_one_tool);
@ -536,12 +536,12 @@ static void analyze_template(const std::string & template_path) {
json user_msg = make_user_msg();
json user_msg2 = make_user_msg2_continue();
autoparser::templates_params params_one_tool;
autoparser::generation_params params_one_tool;
params_one_tool.messages = json::array({ user_msg, make_assistant_one_tool(), user_msg2 });
params_one_tool.add_generation_prompt = false;
params_one_tool.tools = tools;
autoparser::templates_params params_two_tools = params_one_tool;
autoparser::generation_params params_two_tools = params_one_tool;
params_two_tools.messages = json::array({ user_msg, make_assistant_two_tools(), user_msg2 });
std::string output_one_tool = common_chat_template_direct_apply(chat_template, params_one_tool);
@ -555,13 +555,13 @@ static void analyze_template(const std::string & template_path) {
{
json user_msg = make_user_msg();
autoparser::templates_params params_no_reasoning;
autoparser::generation_params params_no_reasoning;
params_no_reasoning.messages = json::array({ user_msg, make_assistant_one_tool() });
params_no_reasoning.add_generation_prompt = false;
params_no_reasoning.tools = tools;
params_no_reasoning.enable_thinking = true;
autoparser::templates_params params_with_reasoning = params_no_reasoning;
autoparser::generation_params params_with_reasoning = params_no_reasoning;
params_with_reasoning.messages = json::array({ user_msg, make_assistant_one_tool_with_reasoning() });
std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning);

View File

@ -1080,20 +1080,20 @@ json oaicompat_chat_params_parse(
}
}
llama_params["chat_format"] = static_cast<int>(chat_params.format);
llama_params["prompt"] = chat_params.prompt;
llama_params["chat_format"] = static_cast<int>(chat_params.format);
llama_params["prompt"] = chat_params.prompt;
if (!chat_params.grammar.empty()) {
llama_params["grammar"] = chat_params.grammar;
}
llama_params["grammar_lazy"] = chat_params.grammar_lazy;
auto grammar_triggers = json::array();
llama_params["grammar_lazy"] = chat_params.grammar_lazy;
auto grammar_triggers = json::array();
for (const auto & trigger : chat_params.grammar_triggers) {
server_grammar_trigger ct(trigger);
grammar_triggers.push_back(ct.to_json());
}
llama_params["grammar_triggers"] = grammar_triggers;
llama_params["preserved_tokens"] = chat_params.preserved_tokens;
llama_params["prefill"] = chat_params.prefill;
llama_params["grammar_triggers"] = grammar_triggers;
llama_params["preserved_tokens"] = chat_params.preserved_tokens;
llama_params["generation_prompt"] = chat_params.generation_prompt;
for (const auto & stop : chat_params.additional_stops) {
llama_params["stop"].push_back(stop);
}

View File

@ -38,53 +38,53 @@ json task_params::to_json(bool only_metrics) const {
}
if (only_metrics) {
return json {
{"seed", sampling.seed},
{"temperature", sampling.temp},
{"dynatemp_range", sampling.dynatemp_range},
{"dynatemp_exponent", sampling.dynatemp_exponent},
{"top_k", sampling.top_k},
{"top_p", sampling.top_p},
{"min_p", sampling.min_p},
{"top_n_sigma", sampling.top_n_sigma},
{"xtc_probability", sampling.xtc_probability},
{"xtc_threshold", sampling.xtc_threshold},
{"typical_p", sampling.typ_p},
{"repeat_last_n", sampling.penalty_last_n},
{"repeat_penalty", sampling.penalty_repeat},
{"presence_penalty", sampling.penalty_present},
{"frequency_penalty", sampling.penalty_freq},
{"dry_multiplier", sampling.dry_multiplier},
{"dry_base", sampling.dry_base},
{"dry_allowed_length", sampling.dry_allowed_length},
{"dry_penalty_last_n", sampling.dry_penalty_last_n},
{"mirostat", sampling.mirostat},
{"mirostat_tau", sampling.mirostat_tau},
{"mirostat_eta", sampling.mirostat_eta},
{"max_tokens", n_predict},
{"n_predict", n_predict}, // TODO: deduplicate?
{"n_keep", n_keep},
{"n_discard", n_discard},
{"ignore_eos", sampling.ignore_eos},
{"stream", stream},
{"n_probs", sampling.n_probs},
{"min_keep", sampling.min_keep},
{"chat_format", common_chat_format_name(chat_parser_params.format)},
{"reasoning_format", common_reasoning_format_name(chat_parser_params.reasoning_format)},
{"reasoning_in_content", chat_parser_params.reasoning_in_content},
{"prefill", chat_parser_params.prefill},
{"samplers", samplers},
{"speculative.n_max", speculative.n_max},
{"speculative.n_min", speculative.n_min},
{"speculative.p_min", speculative.p_min},
{"speculative.type", common_speculative_type_to_str(speculative.type)},
{"speculative.ngram_size_n", speculative.ngram_size_n},
{"speculative.ngram_size_m", speculative.ngram_size_m},
{"speculative.ngram_m_hits", speculative.ngram_min_hits},
{"timings_per_token", timings_per_token},
{"post_sampling_probs", post_sampling_probs},
{"backend_sampling", sampling.backend_sampling},
{"lora", lora},
return json{
{ "seed", sampling.seed },
{ "temperature", sampling.temp },
{ "dynatemp_range", sampling.dynatemp_range },
{ "dynatemp_exponent", sampling.dynatemp_exponent },
{ "top_k", sampling.top_k },
{ "top_p", sampling.top_p },
{ "min_p", sampling.min_p },
{ "top_n_sigma", sampling.top_n_sigma },
{ "xtc_probability", sampling.xtc_probability },
{ "xtc_threshold", sampling.xtc_threshold },
{ "typical_p", sampling.typ_p },
{ "repeat_last_n", sampling.penalty_last_n },
{ "repeat_penalty", sampling.penalty_repeat },
{ "presence_penalty", sampling.penalty_present },
{ "frequency_penalty", sampling.penalty_freq },
{ "dry_multiplier", sampling.dry_multiplier },
{ "dry_base", sampling.dry_base },
{ "dry_allowed_length", sampling.dry_allowed_length },
{ "dry_penalty_last_n", sampling.dry_penalty_last_n },
{ "mirostat", sampling.mirostat },
{ "mirostat_tau", sampling.mirostat_tau },
{ "mirostat_eta", sampling.mirostat_eta },
{ "max_tokens", n_predict },
{ "n_predict", n_predict }, // TODO: deduplicate?
{ "n_keep", n_keep },
{ "n_discard", n_discard },
{ "ignore_eos", sampling.ignore_eos },
{ "stream", stream },
{ "n_probs", sampling.n_probs },
{ "min_keep", sampling.min_keep },
{ "chat_format", common_chat_format_name(chat_parser_params.format) },
{ "reasoning_format", common_reasoning_format_name(chat_parser_params.reasoning_format) },
{ "reasoning_in_content", chat_parser_params.reasoning_in_content },
{ "generation_prompt", chat_parser_params.generation_prompt },
{ "samplers", samplers },
{ "speculative.n_max", speculative.n_max },
{ "speculative.n_min", speculative.n_min },
{ "speculative.p_min", speculative.p_min },
{ "speculative.type", common_speculative_type_to_str(speculative.type) },
{ "speculative.ngram_size_n", speculative.ngram_size_n },
{ "speculative.ngram_size_m", speculative.ngram_size_m },
{ "speculative.ngram_m_hits", speculative.ngram_min_hits },
{ "timings_per_token", timings_per_token },
{ "post_sampling_probs", post_sampling_probs },
{ "backend_sampling", sampling.backend_sampling },
{ "lora", lora },
};
}
@ -135,7 +135,7 @@ json task_params::to_json(bool only_metrics) const {
{"chat_format", common_chat_format_name(chat_parser_params.format)},
{"reasoning_format", common_reasoning_format_name(chat_parser_params.reasoning_format)},
{"reasoning_in_content", chat_parser_params.reasoning_in_content},
{"prefill", chat_parser_params.prefill},
{"generation_prompt", chat_parser_params.generation_prompt},
{"samplers", samplers},
{"speculative.n_max", speculative.n_max},
{"speculative.n_min", speculative.n_min},
@ -402,8 +402,8 @@ task_params server_task::params_from_json_cmpl(
}
params.chat_parser_params.reasoning_format = reasoning_format;
params.chat_parser_params.reasoning_in_content = params.stream && (reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY);
params.chat_parser_params.prefill = json_value(data, "prefill", std::string());
params.sampling.grammar_prefill = params.chat_parser_params.prefill;
params.chat_parser_params.generation_prompt = json_value(data, "generation_prompt", std::string());
params.sampling.grammar_prefill = params.chat_parser_params.generation_prompt;
params.chat_parser_params.parse_tool_calls = json_value(data, "parse_tool_calls", false);
if (data.contains("chat_parser")) {
params.chat_parser_params.parser.load(data.at("chat_parser").get<std::string>());