This commit is contained in:
Alec Koumjian 2026-02-01 12:33:05 +02:00 committed by GitHub
commit 460fd00d67
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 173 additions and 9 deletions

View File

@ -667,18 +667,42 @@ inline void parse_msg_with_xml_tool_calls(common_chat_msg_parser & builder, cons
return l;
};
constexpr auto trim_suffix = [](std::string &content, std::initializer_list<std::string_view> list) {
auto best_match = content.size();
for (auto pattern: list) {
if (pattern.size() == 0) continue;
// Trim partial suffixes that look like an incomplete special marker (e.g. "<|tool_call_end|>").
//
// Some tool syntaxes include a normal JSON delimiter *before* a special token, e.g. "}<|tool_call_end|>".
// In that case we must avoid trimming the valid JSON '}' when only the beginning of the pattern matches.
auto best_erase_from = content.size();
for (auto pattern : list) {
if (pattern.empty()) {
continue;
}
// If the pattern contains a '<', treat everything before it as a "normal prefix" and only trim if the
// model actually started emitting the special token (i.e. matched beyond the prefix).
const auto special_pos = pattern.find('<');
for (auto match_idx = content.size() - std::min(pattern.size(), content.size()); content.size() > match_idx; match_idx++) {
auto match_len = content.size() - match_idx;
if (content.compare(match_idx, match_len, pattern.data(), match_len) == 0 && best_match > match_idx) {
best_match = match_idx;
const auto match_len = content.size() - match_idx;
if (content.compare(match_idx, match_len, pattern.data(), match_len) != 0) {
continue;
}
if (special_pos != std::string_view::npos && special_pos > 0) {
// Only matched the normal prefix (e.g. "}") - do not trim.
if (match_len <= special_pos) {
continue;
}
// Trim from the start of the special token, preserving the normal prefix.
best_erase_from = std::min(best_erase_from, match_idx + special_pos);
} else {
best_erase_from = std::min(best_erase_from, match_idx);
}
}
}
if (content.size() > best_match) {
content.erase(best_match);
if (content.size() > best_erase_from) {
content.erase(best_erase_from);
}
};
const auto trim_potential_partial_word = [&start_think, &end_think, &form, trim_suffix](std::string &content) {

View File

@ -1880,11 +1880,29 @@ static common_chat_params common_chat_params_init_qwen3_coder_xml(const common_c
static common_chat_params common_chat_params_init_kimi_k2(const common_chat_template & tmpl, const struct templates_params & params) {
common_chat_params data;
data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
const bool has_tools = params.tools.is_array() && !params.tools.empty();
const bool has_schema = params.json_schema.is_object();
data.grammar_lazy = has_tools && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
data.prompt = apply(tmpl, params);
data.format = COMMON_CHAT_FORMAT_KIMI_K2;
if (has_tools && has_schema) {
throw std::runtime_error("Kimi K2: cannot combine \"tools\" with \"json_schema\"/response_format; remove tools or remove response_format");
}
if (!has_tools && has_schema) {
if (!params.grammar.empty()) {
throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both");
}
// Mirror the generic "content-only" schema enforcement behavior
data.grammar = json_schema_to_grammar(params.json_schema);
} else {
data.grammar = params.grammar;
}
data.preserved_tokens = {
"<think>",
"</think>",

View File

@ -188,6 +188,7 @@ llama_build_and_test(test-chat-peg-parser.cpp peg-parser/simple-tokenize.cpp)
llama_build_and_test(test-chat-template.cpp)
llama_build_and_test(test-jinja.cpp)
llama_test(test-jinja NAME test-jinja-py ARGS -py LABEL python)
llama_build_and_test(test-kimi-response-format.cpp)
llama_build_and_test(test-json-partial.cpp)
llama_build_and_test(test-log.cpp)
llama_build_and_test(

View File

@ -0,0 +1,121 @@
#include <cassert>
#include <stdexcept>
#include <string>
#include <vector>
#include "chat.h"
// Regression test:
// - llama-server /chat/completions parses `response_format` into a JSON schema and passes it into
// common_chat_templates_apply() as inputs.json_schema.
// - For templates detected as "Kimi K2", llama.cpp selected a Kimi-specific handler that did not
// apply json_schema-to-grammar conversion, so schema enforcement was silently dropped.
//
// This test asserts that for the Kimi K2 chat template, providing a json_schema results in a
// non-empty grammar being returned by common_chat_templates_apply() (hard enforcement expected).
static const char * KIMI_K2_TEMPLATE = R"JINJA({%- if tools -%}
<|im_system|>tool_declare<|im_middle|>
# Tools
{{ tools | tojson }}<|im_end|>
{%- endif -%}
{%- for message in messages -%}
{%- if loop.first and messages[0]['role'] != 'system' -%}
<|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|>
{%- endif -%}
{%- set role_name = message.get('name') or message['role'] -%}
{%- if message['role'] == 'user' -%}
<|im_user|>{{role_name}}<|im_middle|>
{%- elif message['role'] == 'assistant' -%}
<|im_assistant|>{{role_name}}<|im_middle|>
{%- else -%}
<|im_system|>{{role_name}}<|im_middle|>
{% endif %}
{%- if message['role'] == 'assistant' and message.get('tool_calls') -%}
{%- if message['content'] -%}{{ message['content'] }}{%- endif -%}
<|tool_calls_section_begin|>
{%- for tool_call in message['tool_calls'] -%}
{%- set formatted_id = tool_call['id'] -%}
<|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{% if tool_call['function']['arguments'] is string %}{{ tool_call['function']['arguments'] }}{% else %}{{ tool_call['function']['arguments'] | tojson }}{% endif %}<|tool_call_end|>
{%- endfor -%}
<|tool_calls_section_end|>
{%- elif message['role'] == 'tool' -%}
## Return of {{ message.tool_call_id }}
{{ message['content'] }}
{%- elif message['content'] is string -%}
{{ message['content'] }}
{%- elif message['content'] is not none -%}
{% for content in message['content'] -%}
{% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
<|media_start|>image<|media_content|><|media_pad|><|media_end|>
{% else -%}
{{ content['text'] }}
{%- endif -%}
{%- endfor -%}
{%- endif -%}
<|im_end|>
{%- endfor -%}
{%- if add_generation_prompt -%}
<|im_assistant|>assistant<|im_middle|>
{%- endif -%})JINJA";
int main() {
auto tmpls = common_chat_templates_init(/* model= */ nullptr, KIMI_K2_TEMPLATE);
common_chat_templates_inputs inputs;
inputs.use_jinja = true;
inputs.add_generation_prompt = true;
// No tools
inputs.tools = {};
inputs.tool_choice = COMMON_CHAT_TOOL_CHOICE_NONE;
inputs.json_schema = R"JSON({
"type": "object",
"properties": { "ok": { "type": "boolean" } },
"required": ["ok"],
"additionalProperties": false
})JSON";
inputs.messages = {
common_chat_msg{"system", "Return ONLY JSON with key ok.", {}, {}, "", "", ""},
common_chat_msg{"user", "ok", {}, {}, "", "", ""},
};
const auto out = common_chat_templates_apply(tmpls.get(), inputs);
// Confirm the Kimi K2 handler was actually selected (not a generic fallback).
assert(out.format == COMMON_CHAT_FORMAT_KIMI_K2);
assert(!out.grammar.empty());
// tools + json_schema is explicitly unsupported for Kimi K2 (ambiguous composition).
// Ensure we fail loudly rather than silently dropping schema enforcement.
inputs.tools = {
common_chat_tool{
/* .name = */ "noop",
/* .description = */ "No-op tool",
/* .parameters = */ R"JSON({
"type": "object",
"properties": { "x": { "type": "string" } },
"required": ["x"],
"additionalProperties": false
})JSON",
},
};
inputs.tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
bool threw = false;
try {
(void) common_chat_templates_apply(tmpls.get(), inputs);
} catch (const std::exception &) {
threw = true;
}
// Avoid relying on assert() in Release builds (may be compiled out).
if (!threw) {
return 2;
}
return 0;
}