Reasoning prefill
This commit is contained in:
parent
559646472d
commit
060d4e4cfd
|
|
@ -49,6 +49,42 @@ common_chat_params peg_generator::generate_parser(const common_chat_template &
|
|||
data.preserved_tokens = autoparser.preserved_tokens;
|
||||
data.parser = parser.save();
|
||||
|
||||
// Extract reasoning prefill from the end of the rendered prompt.
|
||||
// If the template added reasoning markers (e.g. <think> or <think></think>) at the end,
|
||||
// store them so they can be prepended to model output before parsing.
|
||||
if (inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE &&
|
||||
autoparser.reasoning.mode != reasoning_mode::NONE &&
|
||||
!autoparser.reasoning.end.empty()) {
|
||||
const auto & r_start = autoparser.reasoning.start;
|
||||
const auto & r_end = autoparser.reasoning.end;
|
||||
// Trim trailing whitespace from the prompt for suffix matching
|
||||
auto prompt_trimmed = data.prompt;
|
||||
while (!prompt_trimmed.empty() &&
|
||||
(prompt_trimmed.back() == ' ' || prompt_trimmed.back() == '\n' ||
|
||||
prompt_trimmed.back() == '\r' || prompt_trimmed.back() == '\t')) {
|
||||
prompt_trimmed.pop_back();
|
||||
}
|
||||
if (!r_start.empty()) {
|
||||
// Check for start+end at end of prompt (e.g. <think></think>)
|
||||
if (string_ends_with(prompt_trimmed, r_end)) {
|
||||
auto before_end = prompt_trimmed.substr(0, prompt_trimmed.size() - r_end.size());
|
||||
while (!before_end.empty() &&
|
||||
(before_end.back() == ' ' || before_end.back() == '\n' ||
|
||||
before_end.back() == '\r' || before_end.back() == '\t')) {
|
||||
before_end.pop_back();
|
||||
}
|
||||
if (string_ends_with(before_end, r_start)) {
|
||||
// Prompt ends with start + whitespace + end: extract from start to end of trimmed prompt
|
||||
data.reasoning_prefill = prompt_trimmed.substr(before_end.size() - r_start.size());
|
||||
}
|
||||
}
|
||||
// Check for just start at end of prompt (e.g. <think>)
|
||||
if (data.reasoning_prefill.empty() && string_ends_with(prompt_trimmed, r_start)) {
|
||||
data.reasoning_prefill = r_start;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build grammar if tools are present
|
||||
bool has_tools =
|
||||
autoparser.tools.format.mode != tool_format::NONE && inputs.tools.is_array() && !inputs.tools.empty();
|
||||
|
|
@ -96,9 +132,8 @@ common_peg_arena autoparser::build_parser(const templates_params & inputs) const
|
|||
|
||||
parser_build_context ctx(p, inputs);
|
||||
bool extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
|
||||
bool enable_thinking = inputs.enable_thinking;
|
||||
|
||||
ctx.extracting_reasoning = extract_reasoning && enable_thinking && reasoning.mode != reasoning_mode::NONE;
|
||||
ctx.extracting_reasoning = extract_reasoning && reasoning.mode != reasoning_mode::NONE;
|
||||
ctx.content = &content;
|
||||
|
||||
// Build reasoning parser
|
||||
|
|
@ -130,24 +165,15 @@ common_peg_parser analyze_reasoning::build_parser(parser_build_context & ctx) co
|
|||
return p.eps();
|
||||
}
|
||||
|
||||
bool thinking_forced_open = (mode == reasoning_mode::FORCED_OPEN);
|
||||
bool thinking_forced_closed = (mode == reasoning_mode::FORCED_CLOSED);
|
||||
|
||||
if (thinking_forced_open || thinking_forced_closed) {
|
||||
// Thinking is forced open OR forced closed with enable_thinking=true
|
||||
// In both cases, expect only the closing tag (opening was in template)
|
||||
// However, since we might have incorrectly detected the open/close pattern,
|
||||
// we admit an optional starting marker
|
||||
return p.optional(p.literal(start)) + p.reasoning(p.until(end)) + end;
|
||||
}
|
||||
if (mode == reasoning_mode::TAG_BASED || mode == reasoning_mode::TOOLS_ONLY) {
|
||||
// Standard tag-based reasoning OR tools-only mode (reasoning appears with tools)
|
||||
// Both use the same tag-based pattern if markers are available
|
||||
if (!start.empty() && !end.empty()) {
|
||||
return p.optional(start + p.reasoning(p.until(end)) + end);
|
||||
if (!end.empty()) {
|
||||
if (!start.empty()) {
|
||||
// Standard tag-based: optional(<think>reasoning</think>)
|
||||
return p.optional(start + p.reasoning(p.until(end)) + end);
|
||||
}
|
||||
// Delimiter-style (empty start): optional(reasoning[DELIMITER])
|
||||
return p.optional(p.reasoning(p.until(end)) + end);
|
||||
}
|
||||
} else if (mode == reasoning_mode::DELIMITER) {
|
||||
return p.optional(p.reasoning(p.until(end)) + end);
|
||||
}
|
||||
|
||||
return p.eps();
|
||||
|
|
|
|||
|
|
@ -77,11 +77,7 @@ struct templates_params {
|
|||
// Reasoning handling mode (derived from R1-R3 comparisons)
|
||||
enum class reasoning_mode {
|
||||
NONE, // No reasoning markers detected
|
||||
TAG_BASED, // Standard tag-based: <think>...</think>
|
||||
DELIMITER, // Delimiter-based: [BEGIN FINAL RESPONSE] (reasoning ends at delimiter)
|
||||
FORCED_OPEN, // Template ends with open reasoning tag (empty start, non-empty end)
|
||||
FORCED_CLOSED, // Template ends with open reasoning tag on enabled thinking but
|
||||
// with both opened and closed tag for disabled thinking
|
||||
TAG_BASED, // Tag-based: <think>...</think> (start can be empty for delimiter-style)
|
||||
TOOLS_ONLY // Only reason on tool calls, not on normal content
|
||||
};
|
||||
|
||||
|
|
@ -91,12 +87,6 @@ inline std::ostream & operator<<(std::ostream & os, const reasoning_mode & mode)
|
|||
return os << "NONE";
|
||||
case reasoning_mode::TAG_BASED:
|
||||
return os << "TAG_BASED";
|
||||
case reasoning_mode::DELIMITER:
|
||||
return os << "DELIMITER";
|
||||
case reasoning_mode::FORCED_OPEN:
|
||||
return os << "FORCED_OPEN";
|
||||
case reasoning_mode::FORCED_CLOSED:
|
||||
return os << "FORCED_CLOSED";
|
||||
case reasoning_mode::TOOLS_ONLY:
|
||||
return os << "TOOLS_ONLY";
|
||||
default:
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ static std::vector<std::function<void(const common_chat_template & tmpl, autopar
|
|||
if (tmpl.src.find("content.split('</think>')") != std::string::npos &&
|
||||
tmpl.src.find("reasoning_content") == std::string::npos &&
|
||||
analysis.reasoning.mode == reasoning_mode::NONE) {
|
||||
analysis.reasoning.mode = reasoning_mode::FORCED_OPEN;
|
||||
analysis.reasoning.mode = reasoning_mode::TAG_BASED;
|
||||
analysis.reasoning.start = "<think>";
|
||||
analysis.reasoning.end = "</think>";
|
||||
analysis.preserved_tokens.push_back("<think>");
|
||||
|
|
@ -295,15 +295,11 @@ void analyze_reasoning::compare_reasoning_presence() {
|
|||
}
|
||||
if (result.result.success()) {
|
||||
if (!result.tags["pre"].empty() && !result.tags["post"].empty()) {
|
||||
if (parser_wrapped.parse_anywhere_and_extract(diff.right).result.success()) { // both tags in the diff = no forced close
|
||||
mode = reasoning_mode::TAG_BASED;
|
||||
} else {
|
||||
mode = reasoning_mode::FORCED_CLOSED;
|
||||
}
|
||||
mode = reasoning_mode::TAG_BASED;
|
||||
start = trim_whitespace(result.tags["pre"]);
|
||||
end = result.tags["post"];
|
||||
} else if (!result.tags["post"].empty()) {
|
||||
mode = reasoning_mode::DELIMITER;
|
||||
mode = reasoning_mode::TAG_BASED;
|
||||
end = result.tags["post"];
|
||||
}
|
||||
}
|
||||
|
|
@ -338,17 +334,17 @@ void analyze_reasoning::compare_thinking_enabled() {
|
|||
if (!right_trimmed.empty() && string_ends_with(comparison->output_B, right_trimmed)) {
|
||||
if (start.empty()) {
|
||||
start = right_trimmed;
|
||||
mode = reasoning_mode::FORCED_OPEN;
|
||||
mode = reasoning_mode::TAG_BASED;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (start.empty() && !end.empty()) {
|
||||
mode = reasoning_mode::DELIMITER;
|
||||
if (mode == reasoning_mode::NONE && start.empty() && !end.empty()) {
|
||||
mode = reasoning_mode::TAG_BASED;
|
||||
}
|
||||
|
||||
// Check for FORCED_CLOSED: when enable_thinking=false produces both start and end markers,
|
||||
// but enable_thinking=true produces only the start marker
|
||||
// Check for start+end pattern: when enable_thinking=false produces both start and end markers,
|
||||
// but enable_thinking=true produces only the start marker. Both cases are TAG_BASED.
|
||||
if (!comparison->output_A.empty() && !comparison->output_B.empty()) {
|
||||
auto parser_start = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
|
||||
return p.literal(start) + p.space() + p.literal(end) + p.rest();
|
||||
|
|
@ -358,12 +354,12 @@ void analyze_reasoning::compare_thinking_enabled() {
|
|||
});
|
||||
if (!start.empty() && parser_start_end.parse_anywhere_and_extract(comparison->output_A).result.success() &&
|
||||
parser_start.parse_anywhere_and_extract(comparison->output_B).result.success()) {
|
||||
mode = reasoning_mode::FORCED_CLOSED;
|
||||
mode = reasoning_mode::TAG_BASED;
|
||||
} else if (!end.empty()) { // we extract the starting marker now since we didn't get it earlier
|
||||
auto result = parser_start_end.parse_anywhere_and_extract(comparison->output_A);
|
||||
if (result.result.success()) {
|
||||
start = result.tags["pre"];
|
||||
mode = reasoning_mode::FORCED_CLOSED;
|
||||
mode = reasoning_mode::TAG_BASED;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -373,7 +369,7 @@ void analyze_reasoning::compare_thinking_enabled() {
|
|||
auto seg_A = segmentize_markers(trim_trailing_whitespace(diff.left));
|
||||
auto seg_B = segmentize_markers(trim_trailing_whitespace(diff.right));
|
||||
if (seg_A.size() == 1 && seg_B.size() == 1) {
|
||||
mode = reasoning_mode::FORCED_CLOSED;
|
||||
mode = reasoning_mode::TAG_BASED;
|
||||
start = seg_B[0].value;
|
||||
end = seg_A[0].value;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -229,6 +229,20 @@ void common_chat_peg_mapper::from_ast(const common_peg_ast_arena & arena,
|
|||
result.tool_calls.push_back(pending_tool_call.value());
|
||||
pending_tool_call.reset();
|
||||
}
|
||||
|
||||
// Discard whitespace-only reasoning content (e.g. from <think></think> prefill)
|
||||
if (!result.reasoning_content.empty()) {
|
||||
bool all_whitespace = true;
|
||||
for (char c : result.reasoning_content) {
|
||||
if (c != ' ' && c != '\n' && c != '\r' && c != '\t') {
|
||||
all_whitespace = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (all_whitespace) {
|
||||
result.reasoning_content.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void common_chat_peg_mapper::map(const common_peg_ast_node & node) {
|
||||
|
|
|
|||
|
|
@ -1718,14 +1718,20 @@ common_chat_msg common_chat_peg_parse(const common_peg_arena & src_pars
|
|||
LOG_DBG("No parser definition detected, assuming pure content parser.");
|
||||
}
|
||||
|
||||
LOG_DBG("Parsing PEG input with format %s: %s\n", common_chat_format_name(params.format), input.c_str());
|
||||
// Prepend reasoning prefill (e.g. <think> or <think></think> from template prompt)
|
||||
// so the parser can detect reasoning markers that were part of the template output.
|
||||
const std::string effective_input = params.reasoning_prefill.empty()
|
||||
? input
|
||||
: params.reasoning_prefill + input;
|
||||
|
||||
LOG_DBG("Parsing PEG input with format %s: %s\n", common_chat_format_name(params.format), effective_input.c_str());
|
||||
|
||||
common_peg_parse_flags flags = COMMON_PEG_PARSE_FLAG_LENIENT;
|
||||
if (params.debug) {
|
||||
flags |= COMMON_PEG_PARSE_FLAG_DEBUG;
|
||||
}
|
||||
|
||||
common_peg_parse_context ctx(input, flags);
|
||||
common_peg_parse_context ctx(effective_input, flags);
|
||||
auto result = parser.parse(ctx);
|
||||
|
||||
if (result.fail()) {
|
||||
|
|
|
|||
|
|
@ -211,7 +211,7 @@ struct common_chat_params {
|
|||
std::string prompt;
|
||||
std::string grammar;
|
||||
bool grammar_lazy = false;
|
||||
bool thinking_forced_open = false;
|
||||
std::string reasoning_prefill;
|
||||
bool supports_thinking = false;
|
||||
std::string thinking_start_tag; // e.g., "<think>"
|
||||
std::string thinking_end_tag; // e.g., "</think>"
|
||||
|
|
@ -228,14 +228,14 @@ struct common_chat_parser_params {
|
|||
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool parse_reasoning"
|
||||
// Whether reasoning_content should be inlined in the content (e.g. for reasoning_format=deepseek in stream mode)
|
||||
bool reasoning_in_content = false;
|
||||
bool thinking_forced_open = false;
|
||||
std::string reasoning_prefill;
|
||||
bool parse_tool_calls = true;
|
||||
bool debug = false; // Enable debug output for PEG parser
|
||||
common_peg_arena parser = {};
|
||||
common_chat_parser_params() = default;
|
||||
common_chat_parser_params(const common_chat_params & chat_params) {
|
||||
format = chat_params.format;
|
||||
thinking_forced_open = chat_params.thinking_forced_open;
|
||||
reasoning_prefill = chat_params.reasoning_prefill;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -47,12 +47,11 @@ All structs are defined in [common/chat-auto-parser.h](common/chat-auto-parser.h
|
|||
| Value | Description |
|
||||
|-----------------|-----------------------------------------------------------------------------------|
|
||||
| `NONE` | No reasoning markers detected |
|
||||
| `TAG_BASED` | Standard tag-based: `<think>...</think>` |
|
||||
| `DELIMITER` | Delimiter-based: reasoning ends at a delimiter (e.g., `[BEGIN FINAL RESPONSE]`) |
|
||||
| `FORCED_OPEN` | Template ends with open reasoning tag when `enable_thinking=true` |
|
||||
| `FORCED_CLOSED` | `enable_thinking=false` emits both tags; `enable_thinking=true` emits only start |
|
||||
| `TAG_BASED` | Tag-based: `<think>...</think>` (start can be empty for delimiter-style formats) |
|
||||
| `TOOLS_ONLY` | Reasoning only appears in tool call responses, not plain content |
|
||||
|
||||
**Reasoning Prefill**: When a template adds reasoning markers (e.g., `<think>` or `<think></think>`) at the end of the prompt, these are extracted as `reasoning_prefill` and prepended to the model output before parsing. This allows the parser to always use an optional TAG_BASED pattern while correctly handling templates that force thinking mode open or closed. Whitespace-only reasoning content (from `<think></think>` prefill) is automatically discarded.
|
||||
|
||||
**`content_mode`**: How the template wraps assistant content.
|
||||
|
||||
| Value | Description |
|
||||
|
|
@ -263,14 +262,15 @@ Text is segmentized into markers and non-marker fragments using `segmentize_mark
|
|||
- Uses PEG parsers to find surrounding markers:
|
||||
- If both pre/post markers found in `diff.right` → `TAG_BASED` (both tags visible in diff = no forced close)
|
||||
- If both found but post marker only in the full output B → `FORCED_CLOSED`
|
||||
- If only post marker found → `DELIMITER`
|
||||
- If only post marker found → `TAG_BASED` (delimiter-style, empty start)
|
||||
- Sets `reasoning.start` and `reasoning.end`
|
||||
|
||||
**R2 — `compare_thinking_enabled()`**: Compares `enable_thinking=false` vs `true` with a generation prompt.
|
||||
|
||||
- Detects `FORCED_OPEN`: `enable_thinking=true` adds a non-empty marker at the end of the prompt (where model will start generating) — sets `reasoning.start`, mode = `FORCED_OPEN`
|
||||
- Detects `FORCED_CLOSED`: `enable_thinking=false` produces both start+end markers; `enable_thinking=true` produces only start marker
|
||||
- Detects template-added reasoning markers: `enable_thinking=true` adds a non-empty marker at the end of the prompt — sets `reasoning.start`, mode = `TAG_BASED`
|
||||
- Detects start+end pattern: `enable_thinking=false` produces both start+end markers; `enable_thinking=true` produces only start marker — both classified as `TAG_BASED`
|
||||
- Handles the reverse case: if both start and end are still empty, looks for a single-segment diff on each side to extract both markers
|
||||
- The reasoning prefill (markers added by the template) is later extracted in `generate_parser()` and prepended to model output before parsing
|
||||
|
||||
**R3 — `compare_reasoning_scope()`**: Compares assistant message with reasoning+text-content vs reasoning+tool-calls.
|
||||
|
||||
|
|
@ -358,9 +358,10 @@ Each analyzer struct (`analyze_reasoning`, `analyze_content`, `analyze_tools`) i
|
|||
| Mode | Parser |
|
||||
|-----------------------------------|---------------------------------------------------------------------|
|
||||
| Not extracting reasoning | `eps()` |
|
||||
| `FORCED_OPEN` or `FORCED_CLOSED` | `reasoning(until(end)) + end` — opening tag was in the prompt |
|
||||
| `TAG_BASED` or `TOOLS_ONLY` | `optional(start + reasoning(until(end)) + end)` |
|
||||
| `DELIMITER` | `optional(reasoning(until(end)) + end)` — no start marker |
|
||||
| `TAG_BASED` or `TOOLS_ONLY` (non-empty start) | `optional(start + reasoning(until(end)) + end)` |
|
||||
| `TAG_BASED` or `TOOLS_ONLY` (empty start) | `optional(reasoning(until(end)) + end)` — delimiter-style|
|
||||
|
||||
Note: Templates that add reasoning markers to the prompt (e.g., `<think>`) have these extracted as `reasoning_prefill` and prepended to model output before parsing. The parser always uses the optional TAG_BASED pattern.
|
||||
|
||||
#### Content Parser (`analyze_content::build_parser`)
|
||||
|
||||
|
|
@ -516,7 +517,7 @@ To support a new template format:
|
|||
|
||||
## Edge Cases and Quirks
|
||||
|
||||
1. **Forced Thinking**: When `enable_thinking=true` and the model prompt ends with an open reasoning tag (e.g., `<think>`), the parser enters forced thinking mode and immediately expects reasoning content without waiting for a start marker.
|
||||
1. **Reasoning Prefill**: When `enable_thinking=true` and the model prompt ends with reasoning markers (e.g., `<think>` or `<think></think>`), these are extracted as `reasoning_prefill` and prepended to model output before parsing. The parser always uses optional TAG_BASED reasoning, so it handles both thinking and non-thinking outputs dynamically. Whitespace-only reasoning content (from closed prefill like `<think></think>`) is discarded.
|
||||
2. **Per-Call vs Per-Section Markers**: Some templates wrap each tool call individually (`per_call_start/end`); others wrap the entire section (`section_start/end`). T2 (`check_per_call_markers()`) disambiguates by checking if the second call in a two-call output starts with the section marker.
|
||||
3. **Python Dict Format**: The Seed template family uses single-quoted JSON (`'key': 'value'`). The `uses_python_dicts` flag causes the PEG builder to register a flexible `json-string` rule accepting both quote styles before any JSON rules are built.
|
||||
4. **Tag Boundary Fixing**: `calculate_diff_split()` iteratively adjusts prefix/suffix boundaries to avoid splitting `<tag>` or `[marker]` tokens, ensuring clean extraction.
|
||||
|
|
|
|||
|
|
@ -1295,8 +1295,8 @@ static void test_nemotron_reasoning_detection(testing & t) {
|
|||
t.assert_equal("reasoning_end should be '</think>\\n'", "</think>\n", analysis.reasoning.end);
|
||||
|
||||
// Check reasoning mode detection
|
||||
// Nemotron uses forced closed reasoning with add_generation_prompt
|
||||
t.assert_equal("reasoning should be FORCED_CLOSED", reasoning_mode::FORCED_CLOSED, analysis.reasoning.mode);
|
||||
// Nemotron uses tag-based reasoning (formerly FORCED_CLOSED; prefill handles the template's forced markers)
|
||||
t.assert_equal("reasoning should be TAG_BASED", reasoning_mode::TAG_BASED, analysis.reasoning.mode);
|
||||
|
||||
// Make sure reasoning markers don't spill over to content markers
|
||||
t.assert_equal("content start should be empty", "", analysis.content.start);
|
||||
|
|
|
|||
|
|
@ -145,7 +145,7 @@ static void test_example_native(testing & t) {
|
|||
common_reasoning_format reasoning_format;
|
||||
json json_schema;
|
||||
bool parallel_tool_calls;
|
||||
bool thinking_forced_open;
|
||||
std::string reasoning_prefill;
|
||||
std::string input;
|
||||
|
||||
// Expect
|
||||
|
|
@ -157,14 +157,8 @@ static void test_example_native(testing & t) {
|
|||
auto build_parser = [](const test_case & tc) {
|
||||
return build_chat_peg_parser([&](common_chat_peg_builder & p) {
|
||||
auto reasoning_in_content = (tc.reasoning_format == COMMON_REASONING_FORMAT_NONE);
|
||||
auto reasoning = p.eps();
|
||||
if (tc.thinking_forced_open) {
|
||||
// If thinking is forced open, expect a closing tag
|
||||
reasoning = p.reasoning(p.until("</think>")) + "</think>" + p.space();
|
||||
} else {
|
||||
// Otherwise, optionally accept thinking wrapped in tags
|
||||
reasoning = p.optional("<think>" + p.reasoning(p.until("</think>")) + "</think>" + p.space());
|
||||
}
|
||||
// Always use optional TAG_BASED pattern; reasoning_prefill is prepended to input
|
||||
auto reasoning = p.optional("<think>" + p.reasoning(p.until("</think>")) + "</think>" + p.space());
|
||||
|
||||
// tool calling parser
|
||||
if (tc.tools.is_array() && !tc.tools.empty()) {
|
||||
|
|
@ -190,78 +184,91 @@ static void test_example_native(testing & t) {
|
|||
|
||||
std::vector<test_case> test_cases = std::vector<test_case>{
|
||||
{
|
||||
/* .name = */ "content with thinking_forced_open = false",
|
||||
/* .name = */ "content with reasoning (no prefill)",
|
||||
/* .tools = */ {},
|
||||
/* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
|
||||
/* .json_schema = */ {},
|
||||
/* .parallel_tool_calls = */ false,
|
||||
/* .thinking_forced_open = */ false,
|
||||
/* .reasoning_prefill = */ "",
|
||||
/* .input = */ ("<think>The user said hello, I must say hello back</think>\nHello"),
|
||||
/* .expect_reasoning = */ "The user said hello, I must say hello back",
|
||||
/* .expect_content = */ "Hello",
|
||||
/* .expect_tool_calls = */ {},
|
||||
},
|
||||
{
|
||||
/* .name = */ "content with thinking_forced_open = false and no reasoning",
|
||||
/* .name = */ "content without reasoning (no prefill)",
|
||||
/* .tools = */ {},
|
||||
/* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
|
||||
/* .json_schema = */ {},
|
||||
/* .parallel_tool_calls = */ false,
|
||||
/* .thinking_forced_open = */ false,
|
||||
/* .reasoning_prefill = */ "",
|
||||
/* .input = */ ("Hello"),
|
||||
/* .expect_reasoning = */ "",
|
||||
/* .expect_content = */ "Hello",
|
||||
/* .expect_tool_calls = */ {},
|
||||
},
|
||||
{
|
||||
/* .name = */ "content with thinking_forced_open = false and reasoning_format = none",
|
||||
/* .name = */ "content with reasoning_format = none (tags appear in content)",
|
||||
/* .tools = */ {},
|
||||
/* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
|
||||
/* .json_schema = */ {},
|
||||
/* .parallel_tool_calls = */ false,
|
||||
/* .thinking_forced_open = */ true,
|
||||
/* .reasoning_prefill = */ "",
|
||||
/* .input = */ ("<think>The user said hello, I must say hello back</think>\nHello"),
|
||||
/* .expect_reasoning = */ "",
|
||||
/* .expect_content = */ "<think>The user said hello, I must say hello back</think>\nHello",
|
||||
/* .expect_tool_calls = */ {},
|
||||
},
|
||||
{
|
||||
/* .name = */ "content with thinking_forced_open = true",
|
||||
/* .name = */ "content with reasoning prefill",
|
||||
/* .tools = */ {},
|
||||
/* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
|
||||
/* .json_schema = */ {},
|
||||
/* .parallel_tool_calls = */ false,
|
||||
/* .thinking_forced_open = */ true,
|
||||
/* .reasoning_prefill = */ "<think>",
|
||||
/* .input = */ ("The user said hello, I must say hello back</think>\nHello"),
|
||||
/* .expect_reasoning = */ "The user said hello, I must say hello back",
|
||||
/* .expect_content = */ "Hello",
|
||||
/* .expect_tool_calls = */ {},
|
||||
},
|
||||
{
|
||||
/* .name = */ "content with thinking_forced_open = true and reasoning_format = none",
|
||||
/* .name = */ "content with reasoning prefill and reasoning_format = none",
|
||||
/* .tools = */ {},
|
||||
/* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
|
||||
/* .json_schema = */ {},
|
||||
/* .parallel_tool_calls = */ false,
|
||||
/* .thinking_forced_open = */ true,
|
||||
/* .reasoning_prefill = */ "",
|
||||
/* .input = */ ("The user said hello, I must say hello back</think>\nHello"),
|
||||
/* .expect_reasoning = */ "",
|
||||
/* .expect_content = */ "The user said hello, I must say hello back</think>\nHello",
|
||||
/* .expect_tool_calls = */ {},
|
||||
},
|
||||
{
|
||||
/* .name = */ "tools with tool_choice = auto and no parallel_tool_calls",
|
||||
/* .name = */ "content with closed reasoning prefill (empty reasoning discarded)",
|
||||
/* .tools = */ {},
|
||||
/* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
|
||||
/* .json_schema = */ {},
|
||||
/* .parallel_tool_calls = */ false,
|
||||
/* .reasoning_prefill = */ "<think></think>",
|
||||
/* .input = */ ("Hello"),
|
||||
/* .expect_reasoning = */ "",
|
||||
/* .expect_content = */ "Hello",
|
||||
/* .expect_tool_calls = */ {},
|
||||
},
|
||||
{
|
||||
/* .name = */ "tools with reasoning prefill",
|
||||
/* .tools = */ create_tools(),
|
||||
/* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_AUTO,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
|
||||
/* .json_schema = */ {},
|
||||
/* .parallel_tool_calls = */ false,
|
||||
/* .thinking_forced_open = */ true,
|
||||
/* .reasoning_prefill = */ "<think>",
|
||||
/* .input = */
|
||||
("I must get the weather in New York</think>\n"
|
||||
"<tool_call>["
|
||||
|
|
@ -277,13 +284,13 @@ static void test_example_native(testing & t) {
|
|||
} },
|
||||
},
|
||||
{
|
||||
/* .name = */ "tools with tool_choice = auto and parallel_tool_calls",
|
||||
/* .name = */ "parallel tools with reasoning prefill",
|
||||
/* .tools = */ create_tools(),
|
||||
/* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_AUTO,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
|
||||
/* .json_schema = */ {},
|
||||
/* .parallel_tool_calls = */ true,
|
||||
/* .thinking_forced_open = */ true,
|
||||
/* .reasoning_prefill = */ "<think>",
|
||||
/* .input = */
|
||||
("I must get the weather in New York and San Francisco and a 3 day forecast of each.</think>\nLet me "
|
||||
"search that for you."
|
||||
|
|
@ -321,7 +328,7 @@ static void test_example_native(testing & t) {
|
|||
} },
|
||||
},
|
||||
{
|
||||
/* .name = */ "response_format with thinking_forced_open = true",
|
||||
/* .name = */ "response_format with reasoning prefill",
|
||||
/* .tools = */ {},
|
||||
/* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
|
||||
|
|
@ -333,7 +340,7 @@ static void test_example_native(testing & t) {
|
|||
{ "due_date", { { "type", "string" } } } } },
|
||||
{ "required", { "invoice_number", "amount", "due_date" } } },
|
||||
/* .parallel_tool_calls = */ false,
|
||||
/* .thinking_forced_open = */ true,
|
||||
/* .reasoning_prefill = */ "<think>",
|
||||
/* .input = */
|
||||
("I must produce the invoice in the requested format</think>\n"
|
||||
R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})"),
|
||||
|
|
@ -361,7 +368,8 @@ static void test_example_native(testing & t) {
|
|||
t.log(line);
|
||||
}
|
||||
|
||||
common_peg_parse_context ctx(tc.input);
|
||||
std::string effective_input = tc.reasoning_prefill + tc.input;
|
||||
common_peg_parse_context ctx(effective_input);
|
||||
auto result = parser.parse(ctx);
|
||||
|
||||
t.assert_true("success", result.success());
|
||||
|
|
|
|||
|
|
@ -822,8 +822,7 @@ struct make_peg_parser {
|
|||
}
|
||||
|
||||
common_chat_msg parse(const std::string & msg, bool is_partial) const {
|
||||
common_chat_parser_params parser_params;
|
||||
parser_params.format = params_.format;
|
||||
common_chat_parser_params parser_params(params_);
|
||||
parser_params.debug = detailed_debug_;
|
||||
return common_chat_peg_parse(arena_, msg, is_partial, parser_params);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -907,7 +907,7 @@ If query param `?fail_on_no_slot=1` is set, this endpoint will respond with stat
|
|||
"chat_format": "GPT-OSS",
|
||||
"reasoning_format": "none",
|
||||
"reasoning_in_content": false,
|
||||
"thinking_forced_open": false,
|
||||
"reasoning_prefill": "",
|
||||
"samplers": [
|
||||
"penalties",
|
||||
"dry",
|
||||
|
|
@ -972,7 +972,7 @@ If query param `?fail_on_no_slot=1` is set, this endpoint will respond with stat
|
|||
"chat_format": "GPT-OSS",
|
||||
"reasoning_format": "none",
|
||||
"reasoning_in_content": false,
|
||||
"thinking_forced_open": false,
|
||||
"reasoning_prefill": "",
|
||||
"samplers": [
|
||||
"penalties",
|
||||
"dry",
|
||||
|
|
@ -1193,7 +1193,7 @@ The `response_format` parameter supports both plain JSON output (e.g. `{"type":
|
|||
|
||||
`reasoning_format`: The reasoning format to be parsed. If set to `none`, it will output the raw generated text.
|
||||
|
||||
`thinking_forced_open`: Force a reasoning model to always output the reasoning. Only works on certain models.
|
||||
`reasoning_prefill`: The reasoning markers that were prefilled in the prompt by the template. Prepended to model output before parsing to handle dynamic thinking/non-thinking modes.
|
||||
|
||||
`parse_tool_calls`: Whether to parse the generated tool call.
|
||||
|
||||
|
|
|
|||
|
|
@ -1093,7 +1093,7 @@ json oaicompat_chat_params_parse(
|
|||
}
|
||||
llama_params["grammar_triggers"] = grammar_triggers;
|
||||
llama_params["preserved_tokens"] = chat_params.preserved_tokens;
|
||||
llama_params["thinking_forced_open"] = chat_params.thinking_forced_open;
|
||||
llama_params["reasoning_prefill"] = chat_params.reasoning_prefill;
|
||||
for (const auto & stop : chat_params.additional_stops) {
|
||||
llama_params["stop"].push_back(stop);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ json task_params::to_json(bool only_metrics) const {
|
|||
{"chat_format", common_chat_format_name(chat_parser_params.format)},
|
||||
{"reasoning_format", common_reasoning_format_name(chat_parser_params.reasoning_format)},
|
||||
{"reasoning_in_content", chat_parser_params.reasoning_in_content},
|
||||
{"thinking_forced_open", chat_parser_params.thinking_forced_open},
|
||||
{"reasoning_prefill", chat_parser_params.reasoning_prefill},
|
||||
{"samplers", samplers},
|
||||
{"speculative.n_max", speculative.n_max},
|
||||
{"speculative.n_min", speculative.n_min},
|
||||
|
|
@ -135,7 +135,7 @@ json task_params::to_json(bool only_metrics) const {
|
|||
{"chat_format", common_chat_format_name(chat_parser_params.format)},
|
||||
{"reasoning_format", common_reasoning_format_name(chat_parser_params.reasoning_format)},
|
||||
{"reasoning_in_content", chat_parser_params.reasoning_in_content},
|
||||
{"thinking_forced_open", chat_parser_params.thinking_forced_open},
|
||||
{"reasoning_prefill", chat_parser_params.reasoning_prefill},
|
||||
{"samplers", samplers},
|
||||
{"speculative.n_max", speculative.n_max},
|
||||
{"speculative.n_min", speculative.n_min},
|
||||
|
|
@ -402,7 +402,7 @@ task_params server_task::params_from_json_cmpl(
|
|||
}
|
||||
params.chat_parser_params.reasoning_format = reasoning_format;
|
||||
params.chat_parser_params.reasoning_in_content = params.stream && (reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY);
|
||||
params.chat_parser_params.thinking_forced_open = json_value(data, "thinking_forced_open", false);
|
||||
params.chat_parser_params.reasoning_prefill = json_value(data, "reasoning_prefill", std::string());
|
||||
params.chat_parser_params.parse_tool_calls = json_value(data, "parse_tool_calls", false);
|
||||
if (data.contains("chat_parser")) {
|
||||
params.chat_parser_params.parser.load(data.at("chat_parser").get<std::string>());
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ describe('ParameterSyncService', () => {
|
|||
chat_format: '',
|
||||
reasoning_format: '',
|
||||
reasoning_in_content: false,
|
||||
thinking_forced_open: false,
|
||||
reasoning_prefill: '',
|
||||
'speculative.n_max': 0,
|
||||
'speculative.n_min': 0,
|
||||
'speculative.p_min': 0.0,
|
||||
|
|
@ -116,7 +116,7 @@ describe('ParameterSyncService', () => {
|
|||
chat_format: '',
|
||||
reasoning_format: '',
|
||||
reasoning_in_content: false,
|
||||
thinking_forced_open: false,
|
||||
reasoning_prefill: '',
|
||||
'speculative.n_max': 0,
|
||||
'speculative.n_min': 0,
|
||||
'speculative.p_min': 0.0,
|
||||
|
|
|
|||
|
|
@ -164,7 +164,7 @@ export interface ApiLlamaCppServerProps {
|
|||
chat_format: string;
|
||||
reasoning_format: string;
|
||||
reasoning_in_content: boolean;
|
||||
thinking_forced_open: boolean;
|
||||
reasoning_prefill: string;
|
||||
samplers: string[];
|
||||
backend_sampling: boolean;
|
||||
'speculative.n_max': number;
|
||||
|
|
@ -332,7 +332,7 @@ export interface ApiSlotData {
|
|||
chat_format: string;
|
||||
reasoning_format: string;
|
||||
reasoning_in_content: boolean;
|
||||
thinking_forced_open: boolean;
|
||||
reasoning_prefill: string;
|
||||
samplers: string[];
|
||||
backend_sampling: boolean;
|
||||
'speculative.n_max': number;
|
||||
|
|
|
|||
Loading…
Reference in New Issue