Merge 3829263010 into 2634ed207a
This commit is contained in:
commit
460fd00d67
|
|
@ -667,18 +667,42 @@ inline void parse_msg_with_xml_tool_calls(common_chat_msg_parser & builder, cons
|
|||
return l;
|
||||
};
|
||||
constexpr auto trim_suffix = [](std::string &content, std::initializer_list<std::string_view> list) {
|
||||
auto best_match = content.size();
|
||||
for (auto pattern: list) {
|
||||
if (pattern.size() == 0) continue;
|
||||
// Trim partial suffixes that look like an incomplete special marker (e.g. "<|tool_call_end|>").
|
||||
//
|
||||
// Some tool syntaxes include a normal JSON delimiter *before* a special token, e.g. "}<|tool_call_end|>".
|
||||
// In that case we must avoid trimming the valid JSON '}' when only the beginning of the pattern matches.
|
||||
auto best_erase_from = content.size();
|
||||
|
||||
for (auto pattern : list) {
|
||||
if (pattern.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// If the pattern contains a '<', treat everything before it as a "normal prefix" and only trim if the
|
||||
// model actually started emitting the special token (i.e. matched beyond the prefix).
|
||||
const auto special_pos = pattern.find('<');
|
||||
|
||||
for (auto match_idx = content.size() - std::min(pattern.size(), content.size()); content.size() > match_idx; match_idx++) {
|
||||
auto match_len = content.size() - match_idx;
|
||||
if (content.compare(match_idx, match_len, pattern.data(), match_len) == 0 && best_match > match_idx) {
|
||||
best_match = match_idx;
|
||||
const auto match_len = content.size() - match_idx;
|
||||
if (content.compare(match_idx, match_len, pattern.data(), match_len) != 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (special_pos != std::string_view::npos && special_pos > 0) {
|
||||
// Only matched the normal prefix (e.g. "}") - do not trim.
|
||||
if (match_len <= special_pos) {
|
||||
continue;
|
||||
}
|
||||
// Trim from the start of the special token, preserving the normal prefix.
|
||||
best_erase_from = std::min(best_erase_from, match_idx + special_pos);
|
||||
} else {
|
||||
best_erase_from = std::min(best_erase_from, match_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (content.size() > best_match) {
|
||||
content.erase(best_match);
|
||||
|
||||
if (content.size() > best_erase_from) {
|
||||
content.erase(best_erase_from);
|
||||
}
|
||||
};
|
||||
const auto trim_potential_partial_word = [&start_think, &end_think, &form, trim_suffix](std::string &content) {
|
||||
|
|
|
|||
|
|
@ -1880,11 +1880,29 @@ static common_chat_params common_chat_params_init_qwen3_coder_xml(const common_c
|
|||
|
||||
static common_chat_params common_chat_params_init_kimi_k2(const common_chat_template & tmpl, const struct templates_params & params) {
|
||||
common_chat_params data;
|
||||
data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
||||
const bool has_tools = params.tools.is_array() && !params.tools.empty();
|
||||
const bool has_schema = params.json_schema.is_object();
|
||||
|
||||
data.grammar_lazy = has_tools && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
||||
|
||||
data.prompt = apply(tmpl, params);
|
||||
data.format = COMMON_CHAT_FORMAT_KIMI_K2;
|
||||
|
||||
|
||||
if (has_tools && has_schema) {
|
||||
throw std::runtime_error("Kimi K2: cannot combine \"tools\" with \"json_schema\"/response_format; remove tools or remove response_format");
|
||||
}
|
||||
|
||||
if (!has_tools && has_schema) {
|
||||
if (!params.grammar.empty()) {
|
||||
throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both");
|
||||
}
|
||||
// Mirror the generic "content-only" schema enforcement behavior
|
||||
data.grammar = json_schema_to_grammar(params.json_schema);
|
||||
} else {
|
||||
data.grammar = params.grammar;
|
||||
}
|
||||
|
||||
data.preserved_tokens = {
|
||||
"<think>",
|
||||
"</think>",
|
||||
|
|
|
|||
|
|
@ -188,6 +188,7 @@ llama_build_and_test(test-chat-peg-parser.cpp peg-parser/simple-tokenize.cpp)
|
|||
llama_build_and_test(test-chat-template.cpp)
|
||||
llama_build_and_test(test-jinja.cpp)
|
||||
llama_test(test-jinja NAME test-jinja-py ARGS -py LABEL python)
|
||||
llama_build_and_test(test-kimi-response-format.cpp)
|
||||
llama_build_and_test(test-json-partial.cpp)
|
||||
llama_build_and_test(test-log.cpp)
|
||||
llama_build_and_test(
|
||||
|
|
|
|||
|
|
@ -0,0 +1,121 @@
|
|||
#include <cassert>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "chat.h"
|
||||
|
||||
// Regression test:
|
||||
// - llama-server /chat/completions parses `response_format` into a JSON schema and passes it into
|
||||
// common_chat_templates_apply() as inputs.json_schema.
|
||||
// - For templates detected as "Kimi K2", llama.cpp selected a Kimi-specific handler that did not
|
||||
// apply json_schema-to-grammar conversion, so schema enforcement was silently dropped.
|
||||
//
|
||||
// This test asserts that for the Kimi K2 chat template, providing a json_schema results in a
|
||||
// non-empty grammar being returned by common_chat_templates_apply() (hard enforcement expected).
|
||||
|
||||
static const char * KIMI_K2_TEMPLATE = R"JINJA({%- if tools -%}
|
||||
<|im_system|>tool_declare<|im_middle|>
|
||||
# Tools
|
||||
{{ tools | tojson }}<|im_end|>
|
||||
{%- endif -%}
|
||||
{%- for message in messages -%}
|
||||
{%- if loop.first and messages[0]['role'] != 'system' -%}
|
||||
<|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|>
|
||||
{%- endif -%}
|
||||
|
||||
{%- set role_name = message.get('name') or message['role'] -%}
|
||||
{%- if message['role'] == 'user' -%}
|
||||
<|im_user|>{{role_name}}<|im_middle|>
|
||||
{%- elif message['role'] == 'assistant' -%}
|
||||
<|im_assistant|>{{role_name}}<|im_middle|>
|
||||
{%- else -%}
|
||||
<|im_system|>{{role_name}}<|im_middle|>
|
||||
{% endif %}
|
||||
|
||||
{%- if message['role'] == 'assistant' and message.get('tool_calls') -%}
|
||||
{%- if message['content'] -%}{{ message['content'] }}{%- endif -%}
|
||||
<|tool_calls_section_begin|>
|
||||
{%- for tool_call in message['tool_calls'] -%}
|
||||
{%- set formatted_id = tool_call['id'] -%}
|
||||
<|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{% if tool_call['function']['arguments'] is string %}{{ tool_call['function']['arguments'] }}{% else %}{{ tool_call['function']['arguments'] | tojson }}{% endif %}<|tool_call_end|>
|
||||
{%- endfor -%}
|
||||
<|tool_calls_section_end|>
|
||||
{%- elif message['role'] == 'tool' -%}
|
||||
## Return of {{ message.tool_call_id }}
|
||||
{{ message['content'] }}
|
||||
{%- elif message['content'] is string -%}
|
||||
{{ message['content'] }}
|
||||
{%- elif message['content'] is not none -%}
|
||||
{% for content in message['content'] -%}
|
||||
{% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
|
||||
<|media_start|>image<|media_content|><|media_pad|><|media_end|>
|
||||
{% else -%}
|
||||
{{ content['text'] }}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{%- endif -%}
|
||||
<|im_end|>
|
||||
{%- endfor -%}
|
||||
{%- if add_generation_prompt -%}
|
||||
<|im_assistant|>assistant<|im_middle|>
|
||||
{%- endif -%})JINJA";
|
||||
|
||||
int main() {
|
||||
auto tmpls = common_chat_templates_init(/* model= */ nullptr, KIMI_K2_TEMPLATE);
|
||||
|
||||
common_chat_templates_inputs inputs;
|
||||
inputs.use_jinja = true;
|
||||
inputs.add_generation_prompt = true;
|
||||
|
||||
// No tools
|
||||
inputs.tools = {};
|
||||
inputs.tool_choice = COMMON_CHAT_TOOL_CHOICE_NONE;
|
||||
|
||||
inputs.json_schema = R"JSON({
|
||||
"type": "object",
|
||||
"properties": { "ok": { "type": "boolean" } },
|
||||
"required": ["ok"],
|
||||
"additionalProperties": false
|
||||
})JSON";
|
||||
|
||||
inputs.messages = {
|
||||
common_chat_msg{"system", "Return ONLY JSON with key ok.", {}, {}, "", "", ""},
|
||||
common_chat_msg{"user", "ok", {}, {}, "", "", ""},
|
||||
};
|
||||
|
||||
const auto out = common_chat_templates_apply(tmpls.get(), inputs);
|
||||
|
||||
// Confirm the Kimi K2 handler was actually selected (not a generic fallback).
|
||||
assert(out.format == COMMON_CHAT_FORMAT_KIMI_K2);
|
||||
assert(!out.grammar.empty());
|
||||
|
||||
// tools + json_schema is explicitly unsupported for Kimi K2 (ambiguous composition).
|
||||
// Ensure we fail loudly rather than silently dropping schema enforcement.
|
||||
inputs.tools = {
|
||||
common_chat_tool{
|
||||
/* .name = */ "noop",
|
||||
/* .description = */ "No-op tool",
|
||||
/* .parameters = */ R"JSON({
|
||||
"type": "object",
|
||||
"properties": { "x": { "type": "string" } },
|
||||
"required": ["x"],
|
||||
"additionalProperties": false
|
||||
})JSON",
|
||||
},
|
||||
};
|
||||
inputs.tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
|
||||
|
||||
bool threw = false;
|
||||
try {
|
||||
(void) common_chat_templates_apply(tmpls.get(), inputs);
|
||||
} catch (const std::exception &) {
|
||||
threw = true;
|
||||
}
|
||||
// Avoid relying on assert() in Release builds (may be compiled out).
|
||||
if (!threw) {
|
||||
return 2;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue