diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp index 3586469212..aeb7c3542b 100644 --- a/common/chat-auto-parser-generator.cpp +++ b/common/chat-auto-parser-generator.cpp @@ -1,5 +1,5 @@ -#include "chat-auto-parser.h" #include "chat-auto-parser-helpers.h" +#include "chat-auto-parser.h" #include "chat-peg-parser.h" #include "chat.h" #include "common.h" @@ -24,13 +24,13 @@ static void foreach_function(const json & tools, const std::functiondiff.right; - } - } - - // Fallback for templates that ignore add_generation_prompt: search the rendered prompt. - // Excluded for TOOLS_ONLY: the start tag there is model-generated and may appear in prior turns. - const std::string & prompt_to_search = - (gen_prompt_suffix.empty() && autoparser.reasoning.mode != reasoning_mode::TOOLS_ONLY) - ? data.prompt - : gen_prompt_suffix; - - bool clear_reasoning_start = false; - if (inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE && - autoparser.reasoning.mode != reasoning_mode::NONE && - !autoparser.reasoning.end.empty()) { - const auto & r_start = autoparser.reasoning.start; - const auto & r_end = autoparser.reasoning.end; - auto r_end_t = trim_trailing_whitespace(r_end); - auto r_start_t = trim_trailing_whitespace(r_start); - - if (!r_start_t.empty()) { - auto start_pos = prompt_to_search.rfind(r_start_t); - if (start_pos != std::string::npos) { - std::string from_start = prompt_to_search.substr(start_pos); - auto fs_trimmed = trim_trailing_whitespace(from_start); - - if (string_ends_with(fs_trimmed, r_end_t)) { - data.prefill = r_start + r_end; - } else if (string_ends_with(fs_trimmed, r_start_t)) { - data.prefill = from_start; - } else { - clear_reasoning_start = true; - } - } - } - } - - common_peg_arena parser; - if (clear_reasoning_start) { - struct autoparser modified = autoparser; - modified.reasoning.start.clear(); - parser = modified.build_parser(inputs); - } else { - parser = autoparser.build_parser(inputs); - } + auto parser = autoparser.build_parser(inputs); data.parser = parser.save(); // Build grammar if tools are present @@ -137,18 +82,11 @@ common_chat_params peg_generator::generate_parser(const common_chat_template & return data; } -common_peg_arena autoparser::build_parser(const templates_params & inputs) const { +common_peg_arena autoparser::build_parser(const generation_params & inputs) const { if (!analysis_complete) { throw std::invalid_argument("Cannot call build_parser on autoparser without performing analysis first, call analyze_template(...)"); } return build_chat_peg_parser([&](common_chat_peg_builder & p) { - // If the template uses Python dict format (single-quoted strings in JSON structures), - // pre-register a json-string rule that accepts both quote styles. This must happen - // before any call to p.json() so that all JSON parsing inherits the flexible rule. - if (tools.format.uses_python_dicts) { - p.rule("json-string", p.quoted_string()); - } - parser_build_context ctx(p, inputs); bool extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE; @@ -158,22 +96,24 @@ common_peg_arena autoparser::build_parser(const templates_params & inputs) const // Build reasoning parser ctx.reasoning_parser = reasoning.build_parser(ctx); + auto parser = p.eps(); + bool has_tools = inputs.tools.is_array() && !inputs.tools.empty(); bool has_response_format = inputs.json_schema.is_object() && !inputs.json_schema.empty(); if (has_response_format) { auto response_format = p.rule("response-format", p.content(p.schema(p.json(), "response-format-schema", inputs.json_schema))); - return ctx.reasoning_parser + p.space() + p.choice({ + parser = ctx.reasoning_parser + p.space() + p.choice({ p.literal("```json") + p.space() + response_format + p.space() + p.literal("```"), response_format }) + p.end(); + } else if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && jinja_caps.supports_tool_calls) { + parser = tools.build_parser(ctx); + } else { + parser = content.build_parser(ctx); } - - if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && jinja_caps.supports_tool_calls) { - return tools.build_parser(ctx); - } - - return content.build_parser(ctx); + parser = wrap_for_generation_prompt(p, parser, inputs, reasoning); + return parser; }); } @@ -188,10 +128,10 @@ common_peg_parser analyze_reasoning::build_parser(parser_build_context & ctx) co if (!end.empty()) { if (!start.empty()) { // Standard tag-based: optional(reasoning) - return p.optional(start + p.reasoning(p.until(end)) + end); + return p.optional(start + p.reasoning(p.until(end)) + end + p.space()); } // Delimiter-style (empty start) - return p.optional(p.reasoning(p.until(end)) + end); + return p.optional(p.reasoning(p.until(end)) + end + p.space()); } } @@ -380,7 +320,7 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte "tool-" + name + "-arg-" + param_name + "-schema", param_schema, true)) : p.tool_arg_json_value(p.schema( - p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema, format.uses_python_dicts)) + + p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema, false)) + p.space()) + p.tool_arg_close(p.literal(arguments.value_suffix))); diff --git a/common/chat-auto-parser-helpers.cpp b/common/chat-auto-parser-helpers.cpp index 787d7bab98..d2f70b404e 100644 --- a/common/chat-auto-parser-helpers.cpp +++ b/common/chat-auto-parser-helpers.cpp @@ -1,9 +1,11 @@ #include "chat-auto-parser-helpers.h" #include "chat-auto-parser.h" +#include "chat-peg-parser.h" #include "chat.h" #include "log.h" #include "nlohmann/json.hpp" +#include "peg-parser.h" #include #include @@ -291,10 +293,26 @@ std::vector prune_whitespace_segments(const std::vector & segm return result; } +common_peg_parser wrap_for_generation_prompt(common_chat_peg_builder & p, + const common_peg_parser & prs, + const autoparser::generation_params & inputs, + const autoparser::analyze_reasoning & reasoning) { + auto parser = prs; + if (!inputs.generation_prompt.empty()) { + size_t end_pos = inputs.generation_prompt.size(); + if (!reasoning.start.empty() && inputs.generation_prompt.find(reasoning.start) != std::string::npos) { + end_pos = inputs.generation_prompt.find(reasoning.start); + } + std::string cut_genprompt = inputs.generation_prompt.substr(0, end_pos); + parser = p.literal(cut_genprompt) + parser; + } + return parser; +} + namespace autoparser { std::string apply_template(const common_chat_template & tmpl, const template_params & params) { - templates_params tmpl_params; + generation_params tmpl_params; tmpl_params.messages = params.messages; tmpl_params.tools = params.tools; tmpl_params.add_generation_prompt = params.add_generation_prompt; diff --git a/common/chat-auto-parser-helpers.h b/common/chat-auto-parser-helpers.h index 6e3df79db8..cfc2f4e8e7 100644 --- a/common/chat-auto-parser-helpers.h +++ b/common/chat-auto-parser-helpers.h @@ -1,6 +1,7 @@ #pragma once #include "chat-auto-parser.h" +#include "peg-parser.h" #include #include #include @@ -57,6 +58,11 @@ std::vector segmentize_markers(const std::string & text); // (MARKER, ""), (MARKER, "") ] std::vector prune_whitespace_segments(const std::vector & segments); +// Wrap parser with generation prompt parser +common_peg_parser wrap_for_generation_prompt(common_chat_peg_builder & p, + const common_peg_parser & prs, + const autoparser::generation_params & inputs, + const autoparser::analyze_reasoning & reasoning); namespace autoparser { // Apply a template with the given parameters, returning the rendered string (empty on failure) diff --git a/common/chat-auto-parser.h b/common/chat-auto-parser.h index 55713f4ef4..73888276f4 100644 --- a/common/chat-auto-parser.h +++ b/common/chat-auto-parser.h @@ -50,7 +50,7 @@ namespace autoparser { // High-level params for parser generation // ============================================================================ -struct templates_params { +struct generation_params { json messages; json tools; common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO; @@ -62,6 +62,7 @@ struct templates_params { bool add_generation_prompt = false; bool enable_thinking = true; std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); + std::string generation_prompt; json extra_context; bool add_bos = false; bool add_eos = false; @@ -174,7 +175,6 @@ struct tool_format_analysis { bool fun_name_is_key = false; // In JSON format function name is JSON key, i.e. { "": { ... arguments ... } } bool tools_array_wrapped = false; // Tool calls wrapped in JSON array [...] - bool uses_python_dicts = false; // Tool call args use Python dict format (single-quoted strings) std::string function_field = "function"; std::string name_field = "name"; @@ -215,12 +215,12 @@ struct analyze_content; struct parser_build_context { common_chat_peg_builder & p; - const templates_params & inputs; + const generation_params & inputs; common_peg_parser reasoning_parser; bool extracting_reasoning = false; const analyze_content * content = nullptr; - parser_build_context(common_chat_peg_builder & p, const templates_params & inputs); + parser_build_context(common_chat_peg_builder & p, const generation_params & inputs); }; // ============================================================================ @@ -250,6 +250,7 @@ struct analyze_reasoning : analyze_base { analyze_reasoning() = default; analyze_reasoning(const common_chat_template & tmpl, bool supports_tools); + analyze_reasoning(std::string start_, std::string end_) : start(std::move(start_)), end(std::move(end_)) {} common_peg_parser build_parser(parser_build_context & ctx) const override; @@ -371,7 +372,7 @@ struct autoparser { void analyze_template(const common_chat_template & tmpl); // Build the PEG parser for this template - common_peg_arena build_parser(const templates_params & inputs) const; + common_peg_arena build_parser(const generation_params & inputs) const; private: // Collect tokens from entire analysis to preserve @@ -385,10 +386,10 @@ struct autoparser { class peg_generator { public: static common_chat_params generate_parser(const common_chat_template & tmpl, - const struct templates_params & inputs); + const struct generation_params & inputs); static common_chat_params generate_parser(const common_chat_template & tmpl, - const struct templates_params & inputs, + const struct generation_params & inputs, const autoparser & autoparser); }; diff --git a/common/chat-diff-analyzer.cpp b/common/chat-diff-analyzer.cpp index 57bc234fca..8b1b59b692 100644 --- a/common/chat-diff-analyzer.cpp +++ b/common/chat-diff-analyzer.cpp @@ -2,6 +2,7 @@ #include "chat-auto-parser-helpers.h" #include "chat-peg-parser.h" #include "chat.h" +#include "common.h" #include "log.h" #include "nlohmann/json.hpp" #include "peg-parser.h" @@ -31,6 +32,7 @@ static std::vector void { if (tmpl.src.find("content.split('')") != std::string::npos && tmpl.src.find("reasoning_content") == std::string::npos && + tmpl.src.find("") == std::string::npos && analysis.reasoning.mode == reasoning_mode::NONE) { analysis.reasoning.mode = reasoning_mode::TAG_BASED; analysis.reasoning.start = ""; @@ -185,7 +187,6 @@ void autoparser::analyze_template(const common_chat_template & tmpl) { LOG_DBG("func_name_prefix: '%s'\n", tools.function.name_prefix.c_str()); LOG_DBG("func_name_suffix: '%s'\n", tools.function.name_suffix.c_str()); LOG_DBG("func_close: '%s'\n", tools.function.close.c_str()); - LOG_DBG("python_dict_format: %s\n", tools.format.uses_python_dicts ? "true" : "false"); LOG_DBG("arg_name_prefix: '%s'\n", tools.arguments.name_prefix.c_str()); LOG_DBG("arg_name_suffix: '%s'\n", tools.arguments.name_suffix.c_str()); LOG_DBG("arg_value_prefix: '%s'\n", tools.arguments.value_prefix.c_str()); @@ -297,10 +298,10 @@ void analyze_reasoning::compare_reasoning_presence() { if (!result.tags["pre"].empty() && !result.tags["post"].empty()) { mode = reasoning_mode::TAG_BASED; start = trim_whitespace(result.tags["pre"]); - end = result.tags["post"]; + end = trim_trailing_whitespace(result.tags["post"]); } else if (!result.tags["post"].empty()) { mode = reasoning_mode::TAG_BASED; - end = result.tags["post"]; + end = trim_trailing_whitespace(result.tags["post"]); } } } @@ -327,54 +328,31 @@ void analyze_reasoning::compare_thinking_enabled() { const auto & diff = comparison->diff; std::string left_trimmed = trim_whitespace(diff.left); + std::string right_trimmed = trim_whitespace(diff.right); if (left_trimmed.empty() && !diff.right.empty()) { - std::string right_trimmed = trim_whitespace(diff.right); - if (!right_trimmed.empty() && string_ends_with(comparison->output_B, right_trimmed)) { if (start.empty()) { start = right_trimmed; mode = reasoning_mode::TAG_BASED; } } + } else if (right_trimmed.empty() && !diff.left.empty()) { + if (!left_trimmed.empty() && string_ends_with(comparison->output_A, left_trimmed)) { + if (end.empty()) { + auto seg = prune_whitespace_segments(segmentize_markers(comparison->output_A)); + if (seg.size() >= 2 && seg[seg.size() - 1].value == left_trimmed && seg[seg.size() - 2].type == segment_type::MARKER) { + start = seg[seg.size() - 2].value; + } + end = left_trimmed; + mode = reasoning_mode::TAG_BASED; + } + } } if (mode == reasoning_mode::NONE && start.empty() && !end.empty()) { mode = reasoning_mode::TAG_BASED; } - - // Check for start+end pattern: when enable_thinking=false produces both start and end markers, - // but enable_thinking=true produces only the start marker. Both cases are TAG_BASED. - if (!comparison->output_A.empty() && !comparison->output_B.empty()) { - auto parser_start = build_tagged_peg_parser([&](common_peg_parser_builder &p) { - return p.literal(start) + p.space() + p.literal(end) + p.rest(); - }); - auto parser_start_end = build_tagged_peg_parser([&](common_peg_parser_builder &p) { - return p.tag("pre", p.literal(start)) + p.space() + p.negate(p.literal(end)) + p.rest(); - }); - if (!start.empty() && parser_start_end.parse_anywhere_and_extract(comparison->output_A).result.success() && - parser_start.parse_anywhere_and_extract(comparison->output_B).result.success()) { - mode = reasoning_mode::TAG_BASED; - } else if (!end.empty()) { // we extract the starting marker now since we didn't get it earlier - auto result = parser_start_end.parse_anywhere_and_extract(comparison->output_A); - if (result.result.success()) { - start = result.tags["pre"]; - mode = reasoning_mode::TAG_BASED; - } - } - } - - if (start.empty() && end.empty()) { // we might still have the case of "just open" and "just close" - if (!diff.left.empty() && !diff.right.empty()) { - auto seg_A = segmentize_markers(trim_trailing_whitespace(diff.left)); - auto seg_B = segmentize_markers(trim_trailing_whitespace(diff.right)); - if (seg_A.size() == 1 && seg_B.size() == 1) { - mode = reasoning_mode::TAG_BASED; - start = seg_B[0].value; - end = seg_A[0].value; - } - } - } } void analyze_reasoning::compare_reasoning_scope() { @@ -422,14 +400,14 @@ void analyze_reasoning::compare_reasoning_scope() { auto result = parser_wrapped.parse_anywhere_and_extract(comparison->output_B); if (result.result.success()) { start = result.tags["pre"]; - end = result.tags["post"]; + end = trim_trailing_whitespace(result.tags["post"]); } else { auto parser_delimiter = build_tagged_peg_parser([&](common_peg_parser_builder &p) { return p.literal(reasoning_content) + p.space() + p.optional(p.tag("post", (p.marker() + p.space()))); }); result = parser_delimiter.parse_anywhere_and_extract(comparison->output_B); if (result.result.success()) { - end = result.tags["post"]; + end = trim_trailing_whitespace(result.tags["post"]); } else { LOG_DBG(ANSI_ORANGE "%s: Unable to extracft reasoning markers, falling back to reasoning = NONE\n" ANSI_RESET, __func__); mode = reasoning_mode::NONE; @@ -596,33 +574,23 @@ void analyze_tools::analyze_tool_call_format(const std::string & haystack, return; } - enum class json_quote_style { NONE, DOUBLE_QUOTES, SINGLE_QUOTES }; - - auto in_json_haystack = [&haystack](const std::string & needle) -> json_quote_style { + auto in_json_haystack = [&haystack](const std::string & needle) -> bool { auto parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) { return p.choice({ p.literal("{"), p.literal(":") }) << p.choice({ - p.tag("sq", p.literal("'") + p.literal(needle) + p.literal("'")), p.tag("dq", p.literal("\"") + p.literal(needle) + p.literal("\"")) }); }); auto result = parser.parse_anywhere_and_extract(haystack); - if (!result.result.success()) { - return json_quote_style::NONE; - } - return result.tags.count("sq") && !result.tags["sq"].empty() - ? json_quote_style::SINGLE_QUOTES - : json_quote_style::DOUBLE_QUOTES; + return result.result.success(); }; auto fun_quote = in_json_haystack(fun_name_needle); auto arg_quote = in_json_haystack(arg_name_needle); - if (fun_quote != json_quote_style::NONE) { + if (fun_quote) { // no need to check further, we're in JSON land format.mode = tool_format::JSON_NATIVE; - format.uses_python_dicts = (fun_quote == json_quote_style::SINGLE_QUOTES); - } else if (arg_quote != json_quote_style::NONE) { + } else if (arg_quote) { format.mode = tool_format::TAG_WITH_JSON; - format.uses_python_dicts = (arg_quote == json_quote_style::SINGLE_QUOTES); } else { format.mode = tool_format::TAG_WITH_TAGGED; } diff --git a/common/chat.cpp b/common/chat.cpp index f446835ad9..947f8bf41c 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1,5 +1,6 @@ #include "chat.h" +#include "chat-auto-parser-helpers.h" #include "chat-auto-parser.h" #include "chat-peg-parser.h" #include "common.h" @@ -22,6 +23,7 @@ #include #include #include +#include #include using json = nlohmann::ordered_json; @@ -760,7 +762,7 @@ static void foreach_parameter(const json & std::string common_chat_template_direct_apply( const common_chat_template & tmpl, - const autoparser::templates_params & inputs, + const autoparser::generation_params & inputs, const std::optional & messages_override, const std::optional & tools_override, const std::optional & additional_context) { @@ -811,7 +813,7 @@ std::string common_chat_template_direct_apply( } static common_chat_params common_chat_params_init_ministral_3(const common_chat_template & tmpl, - const autoparser::templates_params & inputs) { + const autoparser::generation_params & inputs) { common_chat_params data; // Build up messages to follow the format: https://huggingface.co/mistralai/Ministral-3-14B-Reasoning-2512/blob/main/chat_template.jinja @@ -928,7 +930,7 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_ } static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, - const autoparser::templates_params & inputs) { + const autoparser::generation_params & inputs) { common_chat_params data; // Copy reasoning to the "thinking" field as expected by the gpt-oss template @@ -1074,7 +1076,7 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp // Functionary v3.2 - uses recipient-based format: >>>recipient\n{content} static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template & tmpl, - const autoparser::templates_params & inputs) { + const autoparser::generation_params & inputs) { common_chat_params data; data.prompt = common_chat_template_direct_apply(tmpl, inputs); @@ -1095,13 +1097,13 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_ // Build content parser for >>>all\n{content} // When tools are present, content stops before the next ">>>" (tool call) // When no tools, content goes until end - auto content_until_tool = p.literal(">>>all\n") + p.content(p.until(">>>")); - auto content_until_end = p.literal(">>>all\n") + p.content(p.rest()); + auto content_until_tool = p.literal("all\n") + p.content(p.until(">>>")); + auto content_until_end = p.literal("all\n") + p.content(p.rest()); // If no tools or tool_choice is NONE, just parse content if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) { // When no tools, just match the prefix and capture everything after - return content_until_end + p.end(); + return wrap_for_generation_prompt(p, content_until_end + p.end(), inputs, autoparser::analyze_reasoning()); } // Build tool call parsers for each available function @@ -1113,7 +1115,7 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_ // Tool format: >>>function_name\n{json_args} auto tool_parser = p.tool( - p.tool_open(p.literal(">>>") + p.tool_name(p.literal(name)) + p.literal("\n")) + + p.tool_open(p.tool_name(p.literal(name)) + p.literal("\n")) + p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)) ); @@ -1124,17 +1126,20 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_ auto tools_only = p.trigger_rule("tools", p.one_or_more(tool_choice)); auto content_and_tools = content_until_tool + tools_only; + auto ret = p.eps(); if (inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED) { if (inputs.parallel_tool_calls) { - return p.choice({ content_and_tools, tools_only }) + p.end(); + ret = p.choice({ content_and_tools, tools_only }) + p.end(); + } else { + ret = p.choice({ content_until_tool + tool_choice, tools_only }) + p.end(); } - return p.choice({ content_until_tool + tool_choice, tools_only }) + p.end(); + } else if (inputs.parallel_tool_calls) { + ret = p.choice({ content_and_tools, content_only, tools_only }) + p.end(); + } else { + auto content_and_tool = content_until_tool + tool_choice; + ret = p.choice({ content_and_tool, content_only, tool_choice }) + p.end(); } - if (inputs.parallel_tool_calls) { - return p.choice({ content_and_tools, content_only, tools_only }) + p.end(); - } - auto content_and_tool = content_until_tool + tool_choice; - return p.choice({ content_and_tool, content_only, tool_choice }) + p.end(); + return wrap_for_generation_prompt(p, ret, inputs, autoparser::analyze_reasoning()); }); data.parser = parser.save(); @@ -1164,14 +1169,12 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_ // Kimi K2 Thinking - uses unique tool call ID format: functions.: // The ID contains both the function name and an incrementing counter static common_chat_params common_chat_params_init_kimi_k2(const common_chat_template & tmpl, - const autoparser::templates_params & inputs) { + const autoparser::generation_params & inputs) { common_chat_params data; data.prompt = common_chat_template_direct_apply(tmpl, inputs); data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; data.supports_thinking = true; - data.thinking_start_tag = ""; - data.thinking_end_tag = ""; data.preserved_tokens = { "<|tool_calls_section_begin|>", "<|tool_calls_section_end|>", @@ -1186,6 +1189,18 @@ static common_chat_params common_chat_params_init_kimi_k2(const common_chat_temp auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE; auto include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE; + const std::string SECTION_BEGIN = "<|tool_calls_section_begin|>"; + const std::string SECTION_END = "<|tool_calls_section_end|>"; + const std::string CALL_BEGIN = "<|tool_call_begin|>"; + const std::string ARGS_BEGIN = "<|tool_call_argument_begin|>"; + const std::string CALL_END = "<|tool_call_end|>"; + + const std::string THINK_START = ""; + const std::string THINK_END = ""; + + data.thinking_start_tag = THINK_START; + data.thinking_end_tag = THINK_END; + auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) { // Kimi K2 Thinking format: // - Reasoning: {reasoning} @@ -1197,16 +1212,7 @@ static common_chat_params common_chat_params_init_kimi_k2(const common_chat_temp // <|tool_calls_section_end|> // The ID format is: functions.: where counter is 0, 1, 2, ... - // Tool call markers - const std::string SECTION_BEGIN = "<|tool_calls_section_begin|>"; - const std::string SECTION_END = "<|tool_calls_section_end|>"; - const std::string CALL_BEGIN = "<|tool_call_begin|>"; - const std::string ARGS_BEGIN = "<|tool_call_argument_begin|>"; - const std::string CALL_END = "<|tool_call_end|>"; - - const std::string THINK_START = ""; - const std::string THINK_END = ""; - + // Tool call markers auto end = p.end(); // Note: this model is CRAZY. It can diverge from its supposed tool calling pattern in so many ways it's not funny. @@ -1218,7 +1224,8 @@ static common_chat_params common_chat_params_init_kimi_k2(const common_chat_temp // Content only parser (no tools) if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) { - return reasoning + p.content(p.rest()) + end; + return wrap_for_generation_prompt(p, reasoning + p.content(p.rest()) + end, + inputs, autoparser::analyze_reasoning(THINK_START, THINK_END)); } // Build tool call parsers for each available function @@ -1254,7 +1261,8 @@ static common_chat_params common_chat_params_init_kimi_k2(const common_chat_temp auto content_before_tools = p.content(p.until_one_of({ SECTION_BEGIN, CALL_BEGIN })); - return reasoning + content_before_tools + tool_calls + end; + return wrap_for_generation_prompt(p, reasoning + content_before_tools + tool_calls + end, + inputs, autoparser::analyze_reasoning(THINK_START, THINK_END)); }); data.parser = parser.save(); @@ -1284,7 +1292,7 @@ static common_chat_params common_chat_params_init_kimi_k2(const common_chat_temp // - Tool calls: <|tool_call_start|>[function_name(arg1="value1", arg2="value2")]<|tool_call_end|> // Tool calls can appear multiple times (parallel tool calls) static common_chat_params common_chat_params_init_lfm2(const common_chat_template & tmpl, - const autoparser::templates_params & inputs) { + const autoparser::generation_params & inputs) { common_chat_params data; data.prompt = common_chat_template_direct_apply(tmpl, inputs); @@ -1303,13 +1311,15 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE; auto include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE; - const std::string TOOL_CALL_START = "<|tool_call_start|>"; const std::string TOOL_CALL_END = "<|tool_call_end|>"; const std::string THINK_START = ""; const std::string THINK_END = ""; - auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) { + data.thinking_start_tag = THINK_START; + data.thinking_end_tag = THINK_END; + + auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) { auto end = p.end(); auto reasoning = p.eps(); @@ -1318,7 +1328,8 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat } if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) { - return reasoning + p.content(p.rest()) + end; + return wrap_for_generation_prompt(p, reasoning + p.content(p.rest()) + end, inputs, + autoparser::analyze_reasoning(THINK_START, THINK_END)); } auto tool_calls = p.rule("tool-calls", @@ -1330,7 +1341,8 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat auto content = p.content(p.until(TOOL_CALL_START)); - return reasoning + content + tool_calls + end; + return wrap_for_generation_prompt(p, reasoning + content + tool_calls + end, inputs, + autoparser::analyze_reasoning(THINK_START, THINK_END)); }); data.parser = parser.save(); @@ -1356,7 +1368,7 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat static common_chat_params common_chat_params_init_gigachat_v3( const common_chat_template & tmpl, - const autoparser::templates_params & inputs) { + const autoparser::generation_params & inputs) { common_chat_params data; @@ -1373,6 +1385,7 @@ static common_chat_params common_chat_params_init_gigachat_v3( auto tool_call_start_prefix = "<|message_sep|>\n\nfunction call<|role_sep|>\n"; auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) { + auto ret = p.eps(); if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) { // Build a choice of all available tools auto tool_choice = p.choice(); @@ -1395,13 +1408,14 @@ static common_chat_params common_chat_params_init_gigachat_v3( auto tool_call = p.rule("tool-call", p.literal(tool_call_start_prefix) + tool_choice); auto tool_calls = p.trigger_rule("tool-call-root", p.repeat(tool_call, /* min = */ min_calls, /* max = */ max_calls)); - return p.content(p.until("<|message_sep|>\n\n")) << tool_calls; + ret = p.content(p.until("<|message_sep|>\n\n")) << tool_calls; + } else { + // Content only parser + include_grammar = false; + ret = p.content(p.rest()); } - // Content only parser - include_grammar = false; - return p.content(p.rest()); - + return wrap_for_generation_prompt(p, ret, inputs, autoparser::analyze_reasoning()); }); data.parser = parser.save(); @@ -1498,22 +1512,20 @@ static json common_chat_extra_context() { static common_chat_params common_chat_templates_apply_jinja(const struct common_chat_templates * tmpls, const struct common_chat_templates_inputs & inputs) { - autoparser::templates_params params; + autoparser::generation_params params; params.tools = common_chat_tools_to_json_oaicompat(inputs.tools); - const auto & tmpl = params.tools.is_array() && tmpls->template_tool_use - ? *tmpls->template_tool_use - : *tmpls->template_default; - const auto & src = tmpl.source(); - const auto & caps = tmpl.original_caps(); - params.messages = render_message_to_json(inputs.messages, tmpl.original_caps()); - params.add_generation_prompt = inputs.add_generation_prompt; - params.tool_choice = inputs.tool_choice; + const auto & tmpl = + params.tools.is_array() && tmpls->template_tool_use ? *tmpls->template_tool_use : *tmpls->template_default; + const auto & src = tmpl.source(); + const auto & caps = tmpl.original_caps(); + params.messages = render_message_to_json(inputs.messages, tmpl.original_caps()); + params.tool_choice = inputs.tool_choice; params.reasoning_format = inputs.reasoning_format; - params.enable_thinking = inputs.enable_thinking; - params.grammar = inputs.grammar; - params.now = inputs.now; - params.add_bos = tmpls->add_bos; - params.add_eos = tmpls->add_eos; + params.enable_thinking = inputs.enable_thinking; + params.grammar = inputs.grammar; + params.now = inputs.now; + params.add_bos = tmpls->add_bos; + params.add_eos = tmpls->add_eos; if (src.find("<|channel|>") == std::string::npos) { // map developer to system for all models except for GPT-OSS @@ -1532,6 +1544,15 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_ workaround::requires_non_null_content(params.messages); } + params.add_generation_prompt = false; + std::string no_gen_prompt = common_chat_template_direct_apply(tmpl, params); + params.add_generation_prompt = true; + std::string gen_prompt = common_chat_template_direct_apply(tmpl, params); + auto diff = calculate_diff_split(no_gen_prompt, gen_prompt); + params.generation_prompt = diff.right; + + params.add_generation_prompt = inputs.add_generation_prompt; + params.extra_context = common_chat_extra_context(); for (auto el : inputs.chat_template_kwargs) { params.extra_context[el.first] = json::parse(el.second); @@ -1541,12 +1562,7 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_ params.json_schema = json::parse(inputs.json_schema); } - // if (inputs.parallel_tool_calls && !tmpl.original_caps().supports_parallel_tool_calls) { - // LOG_DBG("Disabling parallel_tool_calls because the template does not support it\n"); - // params.parallel_tool_calls = false; - // } else { params.parallel_tool_calls = inputs.parallel_tool_calls; - //} if (params.tools.is_array()) { if (params.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && !params.grammar.empty()) { @@ -1559,25 +1575,27 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_ } } + common_chat_params early_return; + // Ministral/Mistral Large 3 - uses special reasoning structure fixes, can't use autoparser // Note: Mistral Small 3.2 uses [CALL_ID] which Ministral doesn't have, so we can distinguish them if (src.find("[SYSTEM_PROMPT]") != std::string::npos && src.find("[TOOL_CALLS]") != std::string::npos && src.find("[ARGS]") != std::string::npos && src.find("[CALL_ID]") == std::string::npos) { LOG_DBG("Using specialized template: Ministral/Magistral Large 3\n"); - return common_chat_params_init_ministral_3(tmpl, params); + early_return = common_chat_params_init_ministral_3(tmpl, params); } // GPT-OSS - has unique channel-based structure that needs dedicated handler if (src.find("<|channel|>") != std::string::npos) { LOG_DBG("Using specialized template: GPT-OSS\n"); - return common_chat_params_init_gpt_oss(tmpl, params); + early_return = common_chat_params_init_gpt_oss(tmpl, params); } // Functionary v3.2 - uses recipient-based format with >>>recipient\n{content} // Detection: template has ">>>all" for content and ">>>" prefix for tool calls if (src.find(">>>all") != std::string::npos && src.find(">>>${recipient}") != std::string::npos) { LOG_DBG("Using specialized template: Functionary v3.2\n"); - return common_chat_params_init_functionary_v3_2(tmpl, params); + early_return = common_chat_params_init_functionary_v3_2(tmpl, params); } // Kimi K2 Thinking - uses unique tool call ID format: functions.: @@ -1585,7 +1603,7 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_ if (src.find("<|tool_calls_section_begin|>") != std::string::npos && src.find("<|tool_call_begin|>") != std::string::npos) { LOG_DBG("Using specialized template: Kimi K2 Thinking\n"); - return common_chat_params_init_kimi_k2(tmpl, params); + early_return = common_chat_params_init_kimi_k2(tmpl, params); } // LFM2 - uses <|tool_list_start|>/<|tool_list_end|> markers and <|tool_call_start|>[name(args)]<|tool_call_end|> format @@ -1593,7 +1611,7 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_ if (src.find("<|tool_list_start|>") != std::string::npos && src.find("<|tool_list_end|>") != std::string::npos) { LOG_DBG("Using specialized template: LFM2\n"); - return common_chat_params_init_lfm2(tmpl, params); + early_return = common_chat_params_init_lfm2(tmpl, params); } // GigaChatV3 format detection @@ -1602,7 +1620,12 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_ src.find("<|function_call|>") == std::string::npos ) { LOG_DBG("Using specialized template: GigaChatV3\n"); - return common_chat_params_init_gigachat_v3(tmpl, params); + early_return = common_chat_params_init_gigachat_v3(tmpl, params); + } + + if (!early_return.parser.empty()) { + early_return.generation_prompt = params.generation_prompt; + return early_return; } try { @@ -1615,6 +1638,7 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_ auto_params.thinking_start_tag = autoparser.reasoning.start; auto_params.thinking_end_tag = autoparser.reasoning.end; } + auto_params.generation_prompt = params.generation_prompt; return auto_params; } catch (const std::exception & e) { throw std::invalid_argument(std::string("Unable to generate parser for this template. Automatic parser generation failed: ") + e.what()); @@ -1712,9 +1736,9 @@ common_chat_msg common_chat_peg_parse(const common_peg_arena & src_pars LOG_DBG("No parser definition detected, assuming pure content parser."); } - const std::string effective_input = params.prefill.empty() + const std::string effective_input = params.generation_prompt.empty() ? input - : params.prefill + input; + : params.generation_prompt + input; LOG_DBG("Parsing PEG input with format %s: %s\n", common_chat_format_name(params.format), effective_input.c_str()); diff --git a/common/chat.h b/common/chat.h index ff4e84bfed..ca954bdc92 100644 --- a/common/chat.h +++ b/common/chat.h @@ -24,7 +24,7 @@ using json = nlohmann::ordered_json; struct common_chat_templates; namespace autoparser { -struct templates_params; +struct generation_params; } // namespace autoparser struct common_chat_tool_call { @@ -211,7 +211,7 @@ struct common_chat_params { std::string prompt; std::string grammar; bool grammar_lazy = false; - std::string prefill; + std::string generation_prompt; bool supports_thinking = false; std::string thinking_start_tag; // e.g., "" std::string thinking_end_tag; // e.g., "" @@ -228,14 +228,14 @@ struct common_chat_parser_params { common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool parse_reasoning" // Whether reasoning_content should be inlined in the content (e.g. for reasoning_format=deepseek in stream mode) bool reasoning_in_content = false; - std::string prefill; + std::string generation_prompt; bool parse_tool_calls = true; bool debug = false; // Enable debug output for PEG parser common_peg_arena parser = {}; common_chat_parser_params() = default; common_chat_parser_params(const common_chat_params & chat_params) { - format = chat_params.format; - prefill = chat_params.prefill; + format = chat_params.format; + generation_prompt = chat_params.generation_prompt; } }; @@ -301,7 +301,7 @@ std::map common_chat_templates_get_caps(const common_chat_tem std::string common_chat_template_direct_apply( const common_chat_template & tmpl, - const autoparser::templates_params & inputs, + const autoparser::generation_params & inputs, const std::optional & messages_override = std::nullopt, const std::optional & tools_override = std::nullopt, const std::optional & additional_context = std::nullopt); diff --git a/models/templates/Apriel-1.6-15b-Thinker-fixed.jinja b/models/templates/Apriel-1.6-15b-Thinker-fixed.jinja index a60a95f44d..8a282b8231 100755 --- a/models/templates/Apriel-1.6-15b-Thinker-fixed.jinja +++ b/models/templates/Apriel-1.6-15b-Thinker-fixed.jinja @@ -7,7 +7,6 @@ {%- set available_tool_string = '' -%} {%- set add_tool_id = true -%} {%- set add_thoughts = true -%} {# whether to include reasoning blocks #} -{%- set add_generation_prompt = true -%} {# whether to emit reasoning starter before assistant response #} {# Optional token placeholders (safe defaults) #} {%- set bos_token = bos_token or '' -%} {%- set eos_token = eos_token or '' -%} diff --git a/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja index 9e6ec845d3..7349ce9eca 100644 --- a/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja +++ b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja @@ -15,10 +15,10 @@ {%- set ns.is_tool = false -%} {%- for tool in message['tool_calls']-%} {%- if not ns.is_first -%} - {{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}} + {{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] | tojson + '\n' + '```' + '<|tool▁call▁end|>'}} {%- set ns.is_first = true -%} {%- else -%} - {{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}} + {{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] | tojson + '\n' + '```' + '<|tool▁call▁end|>'}} {%- endif -%} {%- endfor -%} {{'<|tool▁calls▁end|><|end▁of▁sentence|>'}} diff --git a/models/templates/deepseek-ai-DeepSeek-V3.1.jinja b/models/templates/deepseek-ai-DeepSeek-V3.1.jinja index 2fd1c415b8..e987c2a16e 100644 --- a/models/templates/deepseek-ai-DeepSeek-V3.1.jinja +++ b/models/templates/deepseek-ai-DeepSeek-V3.1.jinja @@ -28,25 +28,25 @@ {%- set ns.is_last_user = true -%}{{'<|User|>' + message['content']}} {%- endif -%} {%- if message['role'] == 'assistant' and message['tool_calls'] -%} - {%- if ns.is_last_user -%}{{'<|Assistant|>'}} + {%- if ns.is_last_user -%}{{'<|Assistant|>'}} {%- endif -%} {%- set ns.is_last_user = false -%} {%- set ns.is_first = false -%} {%- set ns.is_tool = false -%} {%- for tool in message['tool_calls'] -%} {%- if not ns.is_first -%} - {%- if not message['content'] -%}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}} - {%- else -%}{{message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}} + {%- if not message['content'] -%}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] | tojson + '<|tool▁call▁end|>'}} + {%- else -%}{{message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] | tojson + '<|tool▁call▁end|>'}} {%- endif -%} {%- set ns.is_first = true -%} - {%- else -%}{{'<|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}} + {%- else -%}{{'<|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] | tojson + '<|tool▁call▁end|>'}} {%- endif -%} {%- endfor -%}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}} {%- endif -%} {%- if message['role'] == 'assistant' and not message['tool_calls'] -%} {%- if ns.is_last_user -%}{{'<|Assistant|>'}} {%- if message['prefix'] is defined and message['prefix'] and thinking -%}{{''}} - {%- else -%}{{''}} + {%- else -%}{{''}} {%- endif -%} {%- endif -%} {%- set ns.is_last_user = false -%} @@ -65,7 +65,7 @@ {%- endif -%} {%- endfor -%} {%- if add_generation_prompt and ns.is_last_user and not ns.is_tool -%}{{'<|Assistant|>'}} - {%- if not thinking -%}{{''}} - {%- else -%}{{''}} + {%- if not thinking -%}{{''}} + {%- else -%}{{''}} {%- endif -%} {%- endif %} \ No newline at end of file diff --git a/models/templates/llama-cpp-deepseek-r1.jinja b/models/templates/llama-cpp-deepseek-r1.jinja index 0d18870870..151b2edd02 100644 --- a/models/templates/llama-cpp-deepseek-r1.jinja +++ b/models/templates/llama-cpp-deepseek-r1.jinja @@ -49,7 +49,7 @@ Example function tool call syntax: {%- endif -%} {%- set tool_name = tc['function']['name'] -%} {%- set tool_args = tc['function']['arguments'] -%} - {{- '<|tool▁call▁begin|>' + tc['type'] + '<|tool▁sep|>' + tool_name + '\n' + '```json' + '\n' + tool_args + '\n' + '```' + '<|tool▁call▁end|>' -}} + {{- '<|tool▁call▁begin|>' + tc['type'] + '<|tool▁sep|>' + tool_name + '\n' + '```json' + '\n' + tool_args | tojson + '\n' + '```' + '<|tool▁call▁end|>' -}} {%- endfor -%} {{- '<|tool▁calls▁end|><|end▁of▁sentence|>' -}} {%- endif -%} diff --git a/models/templates/meetkai-functionary-medium-v3.1.jinja b/models/templates/meetkai-functionary-medium-v3.1.jinja index 29d64a215a..5f74b72f33 100644 --- a/models/templates/meetkai-functionary-medium-v3.1.jinja +++ b/models/templates/meetkai-functionary-medium-v3.1.jinja @@ -42,9 +42,9 @@ {%- if 'tool_calls' in message and message['tool_calls'] -%} {%- for tool_call in message['tool_calls'] -%} {%- if tool_call["function"]["name"] == "python" -%} - {{ '<|python_tag|>' + tool_call['function']['arguments'] }} + {{ '<|python_tag|>' + tool_call['function']['arguments'] | tojson }} {%- else -%} - {{ '' + tool_call['function']['arguments'] + '' }} + {{ '' + tool_call['function']['arguments'] | tojson + '' }} {%- endif -%} {%- endfor -%} {{ '<|eom_id|>' }} diff --git a/tests/test-chat-auto-parser.cpp b/tests/test-chat-auto-parser.cpp index e140eb8ebf..6abf71d6cf 100644 --- a/tests/test-chat-auto-parser.cpp +++ b/tests/test-chat-auto-parser.cpp @@ -1292,7 +1292,7 @@ static void test_nemotron_reasoning_detection(testing & t) { // Check reasoning markers t.assert_equal("reasoning_start should be ''", "", analysis.reasoning.start); - t.assert_equal("reasoning_end should be '\\n'", "\n", analysis.reasoning.end); + t.assert_equal("reasoning_end should be ''", "", analysis.reasoning.end); // Check reasoning mode detection // Nemotron uses tag-based reasoning; prefill handles the template's forced markers diff --git a/tests/test-chat-peg-parser.cpp b/tests/test-chat-peg-parser.cpp index e1eeedd526..908b13fd0c 100644 --- a/tests/test-chat-peg-parser.cpp +++ b/tests/test-chat-peg-parser.cpp @@ -145,7 +145,7 @@ static void test_example_native(testing & t) { common_reasoning_format reasoning_format; json json_schema; bool parallel_tool_calls; - std::string prefill; + std::string generation_prompt; std::string input; // Expect @@ -157,7 +157,7 @@ static void test_example_native(testing & t) { auto build_parser = [](const test_case & tc) { return build_chat_peg_parser([&](common_chat_peg_builder & p) { auto reasoning_in_content = (tc.reasoning_format == COMMON_REASONING_FORMAT_NONE); - // Always use optional TAG_BASED pattern; prefill is prepended to input + // Always use optional TAG_BASED pattern; generation_prompt is prepended to input auto reasoning = p.optional("" + p.reasoning(p.until("")) + "" + p.space()); // tool calling parser @@ -184,26 +184,26 @@ static void test_example_native(testing & t) { std::vector test_cases = std::vector{ { - /* .name = */ "content with reasoning (no prefill)", + /* .name = */ "content with reasoning (no generation_prompt)", /* .tools = */ {}, /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE, /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, /* .json_schema = */ {}, /* .parallel_tool_calls = */ false, - /* .prefill = */ "", + /* .generation_prompt = */ "", /* .input = */ ("The user said hello, I must say hello back\nHello"), /* .expect_reasoning = */ "The user said hello, I must say hello back", /* .expect_content = */ "Hello", /* .expect_tool_calls = */ {}, }, { - /* .name = */ "content without reasoning (no prefill)", + /* .name = */ "content without reasoning (no generation_prompt)", /* .tools = */ {}, /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE, /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, /* .json_schema = */ {}, /* .parallel_tool_calls = */ false, - /* .prefill = */ "", + /* .generation_prompt = */ "", /* .input = */ ("Hello"), /* .expect_reasoning = */ "", /* .expect_content = */ "Hello", @@ -216,59 +216,59 @@ static void test_example_native(testing & t) { /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE, /* .json_schema = */ {}, /* .parallel_tool_calls = */ false, - /* .prefill = */ "", + /* .generation_prompt = */ "", /* .input = */ ("The user said hello, I must say hello back\nHello"), /* .expect_reasoning = */ "", /* .expect_content = */ "The user said hello, I must say hello back\nHello", /* .expect_tool_calls = */ {}, }, { - /* .name = */ "content with reasoning prefill", + /* .name = */ "content with reasoning generation_prompt", /* .tools = */ {}, /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE, /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, /* .json_schema = */ {}, /* .parallel_tool_calls = */ false, - /* .prefill = */ "", + /* .generation_prompt = */ "", /* .input = */ ("The user said hello, I must say hello back\nHello"), /* .expect_reasoning = */ "The user said hello, I must say hello back", /* .expect_content = */ "Hello", /* .expect_tool_calls = */ {}, }, { - /* .name = */ "content with reasoning prefill and reasoning_format = none", + /* .name = */ "content with reasoning generation_prompt and reasoning_format = none", /* .tools = */ {}, /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE, /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE, /* .json_schema = */ {}, /* .parallel_tool_calls = */ false, - /* .prefill = */ "", + /* .generation_prompt = */ "", /* .input = */ ("The user said hello, I must say hello back\nHello"), /* .expect_reasoning = */ "", /* .expect_content = */ "The user said hello, I must say hello back\nHello", /* .expect_tool_calls = */ {}, }, { - /* .name = */ "content with closed reasoning prefill (empty reasoning discarded)", + /* .name = */ "content with closed reasoning generation_prompt (empty reasoning discarded)", /* .tools = */ {}, /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE, /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, /* .json_schema = */ {}, /* .parallel_tool_calls = */ false, - /* .prefill = */ "", + /* .generation_prompt = */ "", /* .input = */ ("Hello"), /* .expect_reasoning = */ "", /* .expect_content = */ "Hello", /* .expect_tool_calls = */ {}, }, { - /* .name = */ "tools with reasoning prefill", + /* .name = */ "tools with reasoning generation_prompt", /* .tools = */ create_tools(), /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_AUTO, /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, /* .json_schema = */ {}, /* .parallel_tool_calls = */ false, - /* .prefill = */ "", + /* .generation_prompt = */ "", /* .input = */ ("I must get the weather in New York\n" "[" @@ -284,13 +284,13 @@ static void test_example_native(testing & t) { } }, }, { - /* .name = */ "parallel tools with reasoning prefill", + /* .name = */ "parallel tools with reasoning generation_prompt", /* .tools = */ create_tools(), /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_AUTO, /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, /* .json_schema = */ {}, /* .parallel_tool_calls = */ true, - /* .prefill = */ "", + /* .generation_prompt = */ "", /* .input = */ ("I must get the weather in New York and San Francisco and a 3 day forecast of each.\nLet me " "search that for you." @@ -328,7 +328,7 @@ static void test_example_native(testing & t) { } }, }, { - /* .name = */ "response_format with reasoning prefill", + /* .name = */ "response_format with reasoning generation_prompt", /* .tools = */ {}, /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE, /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, @@ -340,7 +340,7 @@ static void test_example_native(testing & t) { { "due_date", { { "type", "string" } } } } }, { "required", { "invoice_number", "amount", "due_date" } } }, /* .parallel_tool_calls = */ false, - /* .prefill = */ "", + /* .generation_prompt = */ "", /* .input = */ ("I must produce the invoice in the requested format\n" R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})"), @@ -368,7 +368,7 @@ static void test_example_native(testing & t) { t.log(line); } - std::string effective_input = tc.prefill + tc.input; + std::string effective_input = tc.generation_prompt + tc.input; common_peg_parse_context ctx(effective_input); auto result = parser.parse(ctx); diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 844d619a51..0651750955 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -1001,8 +1001,8 @@ static void test_peg_parser(common_chat_templates * tmpls, // already placed the opening tag in the prompt. // For lazy grammars, the grammar only activates from the trigger position, so the // reasoning prefill is irrelevant — reasoning is handled by the PEG parser. - if (!parser.params_.prefill.empty() && earliest_trigger_pos == std::string::npos) { - constrained = parser.params_.prefill + constrained; + if (!parser.params_.generation_prompt.empty() && earliest_trigger_pos == std::string::npos) { + constrained = parser.params_.generation_prompt + constrained; } // Test the constrained portion against the grammar @@ -1326,12 +1326,15 @@ static void test_template_output_peg_parsers(bool detailed_debug) { // NVIDIA Nemotron-3 Nano auto tst = peg_tester("models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja", detailed_debug); - tst.test("Hello, world!\nWhat's up?").enable_thinking(false).expect(message_assist).run(); + tst.test("Hello, world!\nWhat's up?"). + enable_thinking(false). + reasoning_format(COMMON_REASONING_FORMAT_AUTO). + expect(message_assist).run(); tst.test("I'm\nthinking\n\nHello, world!\nWhat's up?") - .enable_thinking(false) + .enable_thinking(true) .reasoning_format(COMMON_REASONING_FORMAT_NONE) - .expect_content("I'm\nthinking\n\nHello, world!\nWhat's up?") + .expect_content("I'm\nthinking\n\nHello, world!\nWhat's up?") .run(); tst.test("I'm\nthinking\n\nHello, world!\nWhat's up?") @@ -1491,7 +1494,7 @@ static void test_template_output_peg_parsers(bool detailed_debug) { .expect(simple_assist_msg("The answer is 42.", "Let me think about this...")) .run(); - tst.test("Hello, world!").expect(simple_assist_msg("Hello, world!")).run(); + tst.test("Hello, world!").reasoning_format(COMMON_REASONING_FORMAT_AUTO).expect(simple_assist_msg("Hello, world!")).run(); } { // NousResearch-Hermes-2-Pro and Hermes-3 (tool calling models) @@ -1807,6 +1810,8 @@ static void test_template_output_peg_parsers(bool detailed_debug) { "<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": " "\"XYZCITY\"}<|tool▁call▁end|><|tool▁calls▁end|>") .tools({ get_time_tool }) + .enable_thinking(false) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) .expect(message_with_tool_calls("get_time", "{\"city\":\"XYZCITY\"}")) .run(); } @@ -1852,7 +1857,8 @@ static void test_template_output_peg_parsers(bool detailed_debug) { { auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug); - tst.test("CONTENT").expect(simple_assist_msg("CONTENT", "")).run(); + tst.test("CONTENT").enable_thinking(false).reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK). + expect(simple_assist_msg("CONTENT", "")).run(); } // GLM-4.6 tests - format: function_name\n...\n...\n @@ -1915,6 +1921,7 @@ static void test_template_output_peg_parsers(bool detailed_debug) { "arg11" "arg22" "") + .enable_thinking(false) .parallel_tool_calls(true) .tools({ special_function_tool, special_function_tool_with_optional_param @@ -2231,10 +2238,11 @@ static void test_template_output_peg_parsers(bool detailed_debug) { { auto tst = peg_tester("models/templates/MiniMax-M2.jinja", detailed_debug); tst.test( - "\n\n\n\n1\n\n") .tools({ special_function_tool }) .expect(message_assist_call) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) .run(); } @@ -2297,8 +2305,8 @@ static void test_template_output_peg_parsers(bool detailed_debug) { // Functionary v3.2 - recipient-based format: >>>recipient\n{content} { auto tst = peg_tester("models/templates/meetkai-functionary-medium-v3.2.jinja", detailed_debug); - tst.test(">>>all\nHello, world!\nWhat's up?").expect(message_assist).run(); - tst.test(">>>special_function\n{\"arg1\": 1}") + tst.test("all\nHello, world!\nWhat's up?").expect(message_assist).run(); + tst.test("special_function\n{\"arg1\": 1}") .tools({ special_function_tool }) .expect(message_assist_call) .run(); @@ -2318,8 +2326,8 @@ static void test_template_output_peg_parsers(bool detailed_debug) { // Note: Template uses forced-open mode (prompt ends with ), so input shouldn't include opening tag { auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja", detailed_debug); - tst.test("Hello, world!\nWhat's up?") - .enable_thinking(true) // Forced open + tst.test("Hello, world!\nWhat's up?") + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) .expect(message_assist) .run(); tst.test("I'm\nthinkingHello, world!\nWhat's up?") @@ -2331,14 +2339,15 @@ static void test_template_output_peg_parsers(bool detailed_debug) { // llama-cpp DeepSeek R1 template (always forced-open thinking) { auto tst = peg_tester("models/templates/llama-cpp-deepseek-r1.jinja", detailed_debug); - tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); + tst.test("Hello, world!\nWhat's up?").expect(message_assist).reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK).run(); tst.test("I'm\nthinkingHello, world!\nWhat's up?") .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) .expect(message_assist_thoughts) .run(); tst.test( - "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n" + "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n" "```json\n{\"arg1\": 1}```<|tool▁call▁end|><|tool▁calls▁end|>") + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) .tools({ special_function_tool }) .parallel_tool_calls(true) .expect(message_assist_call) @@ -2348,7 +2357,9 @@ static void test_template_output_peg_parsers(bool detailed_debug) { // Note: Template uses forced-open mode (prompt ends with ), so input shouldn't include opening tag { auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja", detailed_debug); - tst.test("Hello, world!\nWhat's up?").enable_thinking(true).expect(message_assist).run(); + tst.test("Hello, world!\nWhat's up?").enable_thinking(true). + reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK). + expect(message_assist).run(); tst.test("I'm\nthinkingHello, world!\nWhat's up?") .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) .expect(message_assist_thoughts) @@ -2357,6 +2368,8 @@ static void test_template_output_peg_parsers(bool detailed_debug) { "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n" "```json\n{\"arg1\": 1}```<|tool▁call▁end|><|tool▁calls▁end|>") .tools({ special_function_tool }) + .enable_thinking(false) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) .expect(message_assist_call) .run(); } @@ -2386,12 +2399,12 @@ static void test_template_output_peg_parsers(bool detailed_debug) { // Apriel 1.6 Thinker (reasoning-only support) { auto tst = peg_tester("models/templates/Apriel-1.6-15b-Thinker-fixed.jinja", detailed_debug); - tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); // Implicit reasoning start (forced open) tst.test("I'm\nthinking\n[BEGIN FINAL RESPONSE]\nHello, world!\nWhat's up?") .reasoning_format(COMMON_REASONING_FORMAT_AUTO) - .expect(message_assist_thoughts) + .enable_thinking(true) + .expect(simple_assist_msg("Hello, world!\nWhat's up?", "Here are my reasoning steps:\nI'm\nthinking")) .run(); // Reasoning + Tool calls @@ -2399,8 +2412,9 @@ static void test_template_output_peg_parsers(bool detailed_debug) { "I'm\nthinking\n[BEGIN FINAL RESPONSE]\n[{\"name\": \"special_function\", \"arguments\": " "{\"arg1\": 1}}]") .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .enable_thinking(true) .tools({ special_function_tool }) - .expect(message_assist_call_thoughts) + .expect(simple_assist_msg("", "Here are my reasoning steps:\nI'm\nthinking", "special_function", "{\"arg1\":1}")) .run(); } diff --git a/tools/cli/cli.cpp b/tools/cli/cli.cpp index 76995bede1..ee1ff6ebe7 100644 --- a/tools/cli/cli.cpp +++ b/tools/cli/cli.cpp @@ -105,7 +105,7 @@ struct cli_context { llama_get_model(ctx_server.get_llama_context())); task.params.sampling.reasoning_budget_tokens = reasoning_budget; - task.params.sampling.grammar_prefill = chat_params.prefill; + task.params.sampling.grammar_prefill = chat_params.generation_prompt; if (!chat_params.thinking_start_tag.empty()) { task.params.sampling.reasoning_budget_start = @@ -215,7 +215,7 @@ struct cli_context { inputs.parallel_tool_calls = false; inputs.add_generation_prompt = true; inputs.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; - inputs.enable_thinking = common_chat_templates_support_enable_thinking(chat_params.tmpls.get()); + inputs.enable_thinking = chat_params.enable_thinking ? common_chat_templates_support_enable_thinking(chat_params.tmpls.get()) : false; // Apply chat template to the list of messages return common_chat_templates_apply(chat_params.tmpls.get(), inputs); diff --git a/tools/parser/debug-template-parser.cpp b/tools/parser/debug-template-parser.cpp index ffa3a5af7a..a837971571 100644 --- a/tools/parser/debug-template-parser.cpp +++ b/tools/parser/debug-template-parser.cpp @@ -282,7 +282,7 @@ static void render_scenario(const common_chat_template & tmpl, LOG_ERR("Messages:\n%s\n", final_messages.dump(2).c_str()); try { - autoparser::templates_params inputs; + autoparser::generation_params inputs; inputs.messages = final_messages; inputs.add_generation_prompt = add_generation_prompt; inputs.extra_context["enable_thinking"] = enable_thinking; @@ -395,7 +395,7 @@ int main(int argc, char ** argv) { analysis.analyze_template(chat_template); // Generate Parser - autoparser::templates_params params; + autoparser::generation_params params; params.messages = json::array({ build_user_message() }); params.reasoning_format = opts.enable_reasoning ? COMMON_REASONING_FORMAT_DEEPSEEK : COMMON_REASONING_FORMAT_NONE; diff --git a/tools/parser/template-analysis.cpp b/tools/parser/template-analysis.cpp index a92e104ac0..bf898a2290 100644 --- a/tools/parser/template-analysis.cpp +++ b/tools/parser/template-analysis.cpp @@ -400,12 +400,12 @@ static void analyze_template(const std::string & template_path) { { json user_msg = make_user_msg(); - autoparser::templates_params params_no_tools; + autoparser::generation_params params_no_tools; params_no_tools.messages = json::array({ user_msg }); params_no_tools.add_generation_prompt = false; params_no_tools.tools = json::array(); - autoparser::templates_params params_with_tools = params_no_tools; + autoparser::generation_params params_with_tools = params_no_tools; params_with_tools.tools = tools; std::string output_no_tools = common_chat_template_direct_apply(chat_template, params_no_tools); @@ -419,12 +419,12 @@ static void analyze_template(const std::string & template_path) { { json user_msg = make_user_msg(); - autoparser::templates_params params_no_prompt; + autoparser::generation_params params_no_prompt; params_no_prompt.messages = json::array({ user_msg }); params_no_prompt.add_generation_prompt = false; params_no_prompt.tools = json::array(); - autoparser::templates_params params_with_prompt = params_no_prompt; + autoparser::generation_params params_with_prompt = params_no_prompt; params_with_prompt.add_generation_prompt = true; std::string output_no_prompt = common_chat_template_direct_apply(chat_template, params_no_prompt); @@ -438,12 +438,12 @@ static void analyze_template(const std::string & template_path) { { json user_msg = make_user_msg(); - autoparser::templates_params params_no_reasoning; + autoparser::generation_params params_no_reasoning; params_no_reasoning.messages = json::array({ user_msg, make_assistant_no_reasoning() }); params_no_reasoning.add_generation_prompt = false; params_no_reasoning.enable_thinking = true; - autoparser::templates_params params_with_reasoning = params_no_reasoning; + autoparser::generation_params params_with_reasoning = params_no_reasoning; params_with_reasoning.messages = json::array({ user_msg, make_assistant_with_reasoning() }); std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning); @@ -458,12 +458,12 @@ static void analyze_template(const std::string & template_path) { json user_msg = make_user_msg(); json user_msg2 = make_user_msg2(); - autoparser::templates_params params_no_reasoning; + autoparser::generation_params params_no_reasoning; params_no_reasoning.messages = json::array({ user_msg, make_assistant_no_reasoning(), user_msg2 }); params_no_reasoning.add_generation_prompt = false; params_no_reasoning.enable_thinking = true; - autoparser::templates_params params_with_reasoning = params_no_reasoning; + autoparser::generation_params params_with_reasoning = params_no_reasoning; params_with_reasoning.messages = json::array({ user_msg, make_assistant_with_reasoning(), user_msg2 }); std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning); @@ -477,12 +477,12 @@ static void analyze_template(const std::string & template_path) { { json user_msg = make_user_msg(); - autoparser::templates_params params_no_tool; + autoparser::generation_params params_no_tool; params_no_tool.messages = json::array({ user_msg, make_assistant_no_tool() }); params_no_tool.add_generation_prompt = false; params_no_tool.tools = tools; - autoparser::templates_params params_with_tool = params_no_tool; + autoparser::generation_params params_with_tool = params_no_tool; params_with_tool.messages = json::array({ user_msg, make_assistant_one_tool() }); std::string output_no_tool = common_chat_template_direct_apply(chat_template, params_no_tool); @@ -497,12 +497,12 @@ static void analyze_template(const std::string & template_path) { json user_msg = make_user_msg(); json user_msg2 = make_user_msg2_continue(); - autoparser::templates_params params_no_tool; + autoparser::generation_params params_no_tool; params_no_tool.messages = json::array({ user_msg, make_assistant_no_tool(), user_msg2 }); params_no_tool.add_generation_prompt = false; params_no_tool.tools = tools; - autoparser::templates_params params_with_tool = params_no_tool; + autoparser::generation_params params_with_tool = params_no_tool; params_with_tool.messages = json::array({ user_msg, make_assistant_one_tool(), user_msg2 }); std::string output_no_tool = common_chat_template_direct_apply(chat_template, params_no_tool); @@ -516,12 +516,12 @@ static void analyze_template(const std::string & template_path) { { json user_msg = make_user_msg(); - autoparser::templates_params params_one_tool; + autoparser::generation_params params_one_tool; params_one_tool.messages = json::array({ user_msg, make_assistant_one_tool() }); params_one_tool.add_generation_prompt = false; params_one_tool.tools = tools; - autoparser::templates_params params_two_tools = params_one_tool; + autoparser::generation_params params_two_tools = params_one_tool; params_two_tools.messages = json::array({ user_msg, make_assistant_two_tools() }); std::string output_one_tool = common_chat_template_direct_apply(chat_template, params_one_tool); @@ -536,12 +536,12 @@ static void analyze_template(const std::string & template_path) { json user_msg = make_user_msg(); json user_msg2 = make_user_msg2_continue(); - autoparser::templates_params params_one_tool; + autoparser::generation_params params_one_tool; params_one_tool.messages = json::array({ user_msg, make_assistant_one_tool(), user_msg2 }); params_one_tool.add_generation_prompt = false; params_one_tool.tools = tools; - autoparser::templates_params params_two_tools = params_one_tool; + autoparser::generation_params params_two_tools = params_one_tool; params_two_tools.messages = json::array({ user_msg, make_assistant_two_tools(), user_msg2 }); std::string output_one_tool = common_chat_template_direct_apply(chat_template, params_one_tool); @@ -555,13 +555,13 @@ static void analyze_template(const std::string & template_path) { { json user_msg = make_user_msg(); - autoparser::templates_params params_no_reasoning; + autoparser::generation_params params_no_reasoning; params_no_reasoning.messages = json::array({ user_msg, make_assistant_one_tool() }); params_no_reasoning.add_generation_prompt = false; params_no_reasoning.tools = tools; params_no_reasoning.enable_thinking = true; - autoparser::templates_params params_with_reasoning = params_no_reasoning; + autoparser::generation_params params_with_reasoning = params_no_reasoning; params_with_reasoning.messages = json::array({ user_msg, make_assistant_one_tool_with_reasoning() }); std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning); diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index 924d6367a5..e0a6e86e7f 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1080,20 +1080,20 @@ json oaicompat_chat_params_parse( } } - llama_params["chat_format"] = static_cast(chat_params.format); - llama_params["prompt"] = chat_params.prompt; + llama_params["chat_format"] = static_cast(chat_params.format); + llama_params["prompt"] = chat_params.prompt; if (!chat_params.grammar.empty()) { llama_params["grammar"] = chat_params.grammar; } - llama_params["grammar_lazy"] = chat_params.grammar_lazy; - auto grammar_triggers = json::array(); + llama_params["grammar_lazy"] = chat_params.grammar_lazy; + auto grammar_triggers = json::array(); for (const auto & trigger : chat_params.grammar_triggers) { server_grammar_trigger ct(trigger); grammar_triggers.push_back(ct.to_json()); } - llama_params["grammar_triggers"] = grammar_triggers; - llama_params["preserved_tokens"] = chat_params.preserved_tokens; - llama_params["prefill"] = chat_params.prefill; + llama_params["grammar_triggers"] = grammar_triggers; + llama_params["preserved_tokens"] = chat_params.preserved_tokens; + llama_params["generation_prompt"] = chat_params.generation_prompt; for (const auto & stop : chat_params.additional_stops) { llama_params["stop"].push_back(stop); } diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp index 534c2f5f40..a60ade594a 100644 --- a/tools/server/server-task.cpp +++ b/tools/server/server-task.cpp @@ -38,53 +38,53 @@ json task_params::to_json(bool only_metrics) const { } if (only_metrics) { - return json { - {"seed", sampling.seed}, - {"temperature", sampling.temp}, - {"dynatemp_range", sampling.dynatemp_range}, - {"dynatemp_exponent", sampling.dynatemp_exponent}, - {"top_k", sampling.top_k}, - {"top_p", sampling.top_p}, - {"min_p", sampling.min_p}, - {"top_n_sigma", sampling.top_n_sigma}, - {"xtc_probability", sampling.xtc_probability}, - {"xtc_threshold", sampling.xtc_threshold}, - {"typical_p", sampling.typ_p}, - {"repeat_last_n", sampling.penalty_last_n}, - {"repeat_penalty", sampling.penalty_repeat}, - {"presence_penalty", sampling.penalty_present}, - {"frequency_penalty", sampling.penalty_freq}, - {"dry_multiplier", sampling.dry_multiplier}, - {"dry_base", sampling.dry_base}, - {"dry_allowed_length", sampling.dry_allowed_length}, - {"dry_penalty_last_n", sampling.dry_penalty_last_n}, - {"mirostat", sampling.mirostat}, - {"mirostat_tau", sampling.mirostat_tau}, - {"mirostat_eta", sampling.mirostat_eta}, - {"max_tokens", n_predict}, - {"n_predict", n_predict}, // TODO: deduplicate? - {"n_keep", n_keep}, - {"n_discard", n_discard}, - {"ignore_eos", sampling.ignore_eos}, - {"stream", stream}, - {"n_probs", sampling.n_probs}, - {"min_keep", sampling.min_keep}, - {"chat_format", common_chat_format_name(chat_parser_params.format)}, - {"reasoning_format", common_reasoning_format_name(chat_parser_params.reasoning_format)}, - {"reasoning_in_content", chat_parser_params.reasoning_in_content}, - {"prefill", chat_parser_params.prefill}, - {"samplers", samplers}, - {"speculative.n_max", speculative.n_max}, - {"speculative.n_min", speculative.n_min}, - {"speculative.p_min", speculative.p_min}, - {"speculative.type", common_speculative_type_to_str(speculative.type)}, - {"speculative.ngram_size_n", speculative.ngram_size_n}, - {"speculative.ngram_size_m", speculative.ngram_size_m}, - {"speculative.ngram_m_hits", speculative.ngram_min_hits}, - {"timings_per_token", timings_per_token}, - {"post_sampling_probs", post_sampling_probs}, - {"backend_sampling", sampling.backend_sampling}, - {"lora", lora}, + return json{ + { "seed", sampling.seed }, + { "temperature", sampling.temp }, + { "dynatemp_range", sampling.dynatemp_range }, + { "dynatemp_exponent", sampling.dynatemp_exponent }, + { "top_k", sampling.top_k }, + { "top_p", sampling.top_p }, + { "min_p", sampling.min_p }, + { "top_n_sigma", sampling.top_n_sigma }, + { "xtc_probability", sampling.xtc_probability }, + { "xtc_threshold", sampling.xtc_threshold }, + { "typical_p", sampling.typ_p }, + { "repeat_last_n", sampling.penalty_last_n }, + { "repeat_penalty", sampling.penalty_repeat }, + { "presence_penalty", sampling.penalty_present }, + { "frequency_penalty", sampling.penalty_freq }, + { "dry_multiplier", sampling.dry_multiplier }, + { "dry_base", sampling.dry_base }, + { "dry_allowed_length", sampling.dry_allowed_length }, + { "dry_penalty_last_n", sampling.dry_penalty_last_n }, + { "mirostat", sampling.mirostat }, + { "mirostat_tau", sampling.mirostat_tau }, + { "mirostat_eta", sampling.mirostat_eta }, + { "max_tokens", n_predict }, + { "n_predict", n_predict }, // TODO: deduplicate? + { "n_keep", n_keep }, + { "n_discard", n_discard }, + { "ignore_eos", sampling.ignore_eos }, + { "stream", stream }, + { "n_probs", sampling.n_probs }, + { "min_keep", sampling.min_keep }, + { "chat_format", common_chat_format_name(chat_parser_params.format) }, + { "reasoning_format", common_reasoning_format_name(chat_parser_params.reasoning_format) }, + { "reasoning_in_content", chat_parser_params.reasoning_in_content }, + { "generation_prompt", chat_parser_params.generation_prompt }, + { "samplers", samplers }, + { "speculative.n_max", speculative.n_max }, + { "speculative.n_min", speculative.n_min }, + { "speculative.p_min", speculative.p_min }, + { "speculative.type", common_speculative_type_to_str(speculative.type) }, + { "speculative.ngram_size_n", speculative.ngram_size_n }, + { "speculative.ngram_size_m", speculative.ngram_size_m }, + { "speculative.ngram_m_hits", speculative.ngram_min_hits }, + { "timings_per_token", timings_per_token }, + { "post_sampling_probs", post_sampling_probs }, + { "backend_sampling", sampling.backend_sampling }, + { "lora", lora }, }; } @@ -135,7 +135,7 @@ json task_params::to_json(bool only_metrics) const { {"chat_format", common_chat_format_name(chat_parser_params.format)}, {"reasoning_format", common_reasoning_format_name(chat_parser_params.reasoning_format)}, {"reasoning_in_content", chat_parser_params.reasoning_in_content}, - {"prefill", chat_parser_params.prefill}, + {"generation_prompt", chat_parser_params.generation_prompt}, {"samplers", samplers}, {"speculative.n_max", speculative.n_max}, {"speculative.n_min", speculative.n_min}, @@ -402,8 +402,8 @@ task_params server_task::params_from_json_cmpl( } params.chat_parser_params.reasoning_format = reasoning_format; params.chat_parser_params.reasoning_in_content = params.stream && (reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY); - params.chat_parser_params.prefill = json_value(data, "prefill", std::string()); - params.sampling.grammar_prefill = params.chat_parser_params.prefill; + params.chat_parser_params.generation_prompt = json_value(data, "generation_prompt", std::string()); + params.sampling.grammar_prefill = params.chat_parser_params.generation_prompt; params.chat_parser_params.parse_tool_calls = json_value(data, "parse_tool_calls", false); if (data.contains("chat_parser")) { params.chat_parser_params.parser.load(data.at("chat_parser").get());