diff --git a/common/arg.cpp b/common/arg.cpp index 10aa1b5e4f..666339a094 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -3115,6 +3115,17 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.chat_template = read_file(value); } ).set_examples({LLAMA_EXAMPLE_COMPLETION, LLAMA_EXAMPLE_CLI, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CHAT_TEMPLATE_FILE")); + add_opt(common_arg( + {"--skip-chat-parsing"}, + {"--no-skip-chat-parsing"}, + string_format( + "force a pure content parser, even if a Jinja template is specified; model will output everything " + "in the content section, including any reasoning and/or tool calls (default: disabled)" + ), + [](common_params & params, bool value) { + params.force_pure_content_parser = value; + } + ).set_examples({LLAMA_EXAMPLE_COMPLETION, LLAMA_EXAMPLE_CLI, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_SKIP_CHAT_PARSING")); add_opt(common_arg( {"--prefill-assistant"}, {"--no-prefill-assistant"}, diff --git a/common/chat.cpp b/common/chat.cpp index 056feb9681..fb4751e531 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1562,6 +1562,21 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_ } } + if (inputs.force_pure_content) { + LOG_WRN("Forcing pure content template, will not render reasoning or tools separately."); + // Create the result structure + common_chat_params data; + auto params_copy = params; + params_copy.reasoning_format = COMMON_REASONING_FORMAT_NONE; + data.prompt = common_chat_template_direct_apply(tmpl, params_copy); + data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; + auto parser = build_chat_peg_parser([](common_chat_peg_builder &p) { + return p.content(p.rest()); + }); + data.parser = parser.save(); + return data; + } + // Ministral/Mistral Large 3 - uses special reasoning structure fixes, can't use autoparser // Note: Mistral Small 3.2 uses [CALL_ID] which Ministral doesn't have, so we can distinguish them if (src.find("[SYSTEM_PROMPT]") != std::string::npos && src.find("[TOOL_CALLS]") != std::string::npos && diff --git a/common/chat.h b/common/chat.h index 930987cf77..23e80baf69 100644 --- a/common/chat.h +++ b/common/chat.h @@ -204,6 +204,7 @@ struct common_chat_templates_inputs { std::map chat_template_kwargs; bool add_bos = false; bool add_eos = false; + bool force_pure_content = false; }; struct common_chat_params { diff --git a/common/common.h b/common/common.h index ee7a2d805e..073ef566d2 100644 --- a/common/common.h +++ b/common/common.h @@ -544,6 +544,7 @@ struct common_params { std::string chat_template = ""; // NOLINT bool use_jinja = true; // NOLINT bool enable_chat_template = true; + bool force_pure_content_parser = false; common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; int enable_reasoning = -1; // -1 = auto, 0 = disable, 1 = enable int reasoning_budget = -1; diff --git a/tools/cli/cli.cpp b/tools/cli/cli.cpp index 7c4342d6bf..94182b2ad8 100644 --- a/tools/cli/cli.cpp +++ b/tools/cli/cli.cpp @@ -215,6 +215,7 @@ struct cli_context { inputs.parallel_tool_calls = false; inputs.add_generation_prompt = true; inputs.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; + inputs.force_pure_content = chat_params.force_pure_content; inputs.enable_thinking = chat_params.enable_thinking ? common_chat_templates_support_enable_thinking(chat_params.tmpls.get()) : false; // Apply chat template to the list of messages diff --git a/tools/completion/completion.cpp b/tools/completion/completion.cpp index 2e0f087184..58d598fcc0 100644 --- a/tools/completion/completion.cpp +++ b/tools/completion/completion.cpp @@ -308,6 +308,7 @@ int main(int argc, char ** argv) { inputs.use_jinja = g_params->use_jinja; inputs.messages = chat_msgs; inputs.add_generation_prompt = !params.prompt.empty(); + inputs.force_pure_content = params.force_pure_content_parser; prompt = common_chat_templates_apply(chat_templates.get(), inputs).prompt; } diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index d55987c6d2..59ea11fc47 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1065,6 +1065,7 @@ json oaicompat_chat_params_parse( inputs.add_generation_prompt = true; } + inputs.force_pure_content = opt.force_pure_content; // Apply chat template to the list of messages auto chat_params = common_chat_templates_apply(opt.tmpls.get(), inputs); diff --git a/tools/server/server-common.h b/tools/server/server-common.h index 3e56b3d856..213ae52bb0 100644 --- a/tools/server/server-common.h +++ b/tools/server/server-common.h @@ -290,6 +290,7 @@ struct server_chat_params { int reasoning_budget = -1; std::string reasoning_budget_message; std::string media_path; + bool force_pure_content = false; }; // used by /completions endpoint diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index 05d6da1006..1e5ff101c8 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -911,6 +911,7 @@ private: /* reasoning_budget */ params_base.reasoning_budget, /* reasoning_budget_msg */ params_base.reasoning_budget_message, /* media_path */ params_base.media_path, + /* force_pure_content */ params_base.force_pure_content_parser }; }