From 38292630106ca50a91d7c9bd7aaa13d5f78ba157 Mon Sep 17 00:00:00 2001 From: Alec Koumjian Date: Sun, 11 Jan 2026 19:17:35 +0000 Subject: [PATCH] enforce response_format and json_schema for Kimi K2 --- common/chat-parser-xml-toolcall.cpp | 40 +++++++-- common/chat.cpp | 20 ++++- tests/CMakeLists.txt | 1 + tests/test-kimi-response-format.cpp | 121 ++++++++++++++++++++++++++++ 4 files changed, 173 insertions(+), 9 deletions(-) create mode 100644 tests/test-kimi-response-format.cpp diff --git a/common/chat-parser-xml-toolcall.cpp b/common/chat-parser-xml-toolcall.cpp index a80900ff8d..11bddc307e 100644 --- a/common/chat-parser-xml-toolcall.cpp +++ b/common/chat-parser-xml-toolcall.cpp @@ -667,18 +667,42 @@ inline void parse_msg_with_xml_tool_calls(common_chat_msg_parser & builder, cons return l; }; constexpr auto trim_suffix = [](std::string &content, std::initializer_list list) { - auto best_match = content.size(); - for (auto pattern: list) { - if (pattern.size() == 0) continue; + // Trim partial suffixes that look like an incomplete special marker (e.g. "<|tool_call_end|>"). + // + // Some tool syntaxes include a normal JSON delimiter *before* a special token, e.g. "}<|tool_call_end|>". + // In that case we must avoid trimming the valid JSON '}' when only the beginning of the pattern matches. + auto best_erase_from = content.size(); + + for (auto pattern : list) { + if (pattern.empty()) { + continue; + } + + // If the pattern contains a '<', treat everything before it as a "normal prefix" and only trim if the + // model actually started emitting the special token (i.e. matched beyond the prefix). + const auto special_pos = pattern.find('<'); + for (auto match_idx = content.size() - std::min(pattern.size(), content.size()); content.size() > match_idx; match_idx++) { - auto match_len = content.size() - match_idx; - if (content.compare(match_idx, match_len, pattern.data(), match_len) == 0 && best_match > match_idx) { - best_match = match_idx; + const auto match_len = content.size() - match_idx; + if (content.compare(match_idx, match_len, pattern.data(), match_len) != 0) { + continue; + } + + if (special_pos != std::string_view::npos && special_pos > 0) { + // Only matched the normal prefix (e.g. "}") - do not trim. + if (match_len <= special_pos) { + continue; + } + // Trim from the start of the special token, preserving the normal prefix. + best_erase_from = std::min(best_erase_from, match_idx + special_pos); + } else { + best_erase_from = std::min(best_erase_from, match_idx); } } } - if (content.size() > best_match) { - content.erase(best_match); + + if (content.size() > best_erase_from) { + content.erase(best_erase_from); } }; const auto trim_potential_partial_word = [&start_think, &end_think, &form, trim_suffix](std::string &content) { diff --git a/common/chat.cpp b/common/chat.cpp index eeb38ad06a..bd2597f498 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1881,11 +1881,29 @@ static common_chat_params common_chat_params_init_qwen3_coder_xml(const common_c static common_chat_params common_chat_params_init_kimi_k2(const common_chat_template & tmpl, const struct templates_params & params) { common_chat_params data; - data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; + const bool has_tools = params.tools.is_array() && !params.tools.empty(); + const bool has_schema = params.json_schema.is_object(); + + data.grammar_lazy = has_tools && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; data.prompt = apply(tmpl, params); data.format = COMMON_CHAT_FORMAT_KIMI_K2; + + if (has_tools && has_schema) { + throw std::runtime_error("Kimi K2: cannot combine \"tools\" with \"json_schema\"/response_format; remove tools or remove response_format"); + } + + if (!has_tools && has_schema) { + if (!params.grammar.empty()) { + throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both"); + } + // Mirror the generic "content-only" schema enforcement behavior + data.grammar = json_schema_to_grammar(params.json_schema); + } else { + data.grammar = params.grammar; + } + data.preserved_tokens = { "", "", diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index c9436c5995..196f689ac7 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -188,6 +188,7 @@ llama_build_and_test(test-chat-peg-parser.cpp peg-parser/simple-tokenize.cpp) llama_build_and_test(test-chat-template.cpp) llama_build_and_test(test-jinja.cpp) llama_test(test-jinja NAME test-jinja-py ARGS -py LABEL python) +llama_build_and_test(test-kimi-response-format.cpp) llama_build_and_test(test-json-partial.cpp) llama_build_and_test(test-log.cpp) llama_build_and_test( diff --git a/tests/test-kimi-response-format.cpp b/tests/test-kimi-response-format.cpp new file mode 100644 index 0000000000..813c9afb6e --- /dev/null +++ b/tests/test-kimi-response-format.cpp @@ -0,0 +1,121 @@ +#include +#include +#include +#include + +#include "chat.h" + +// Regression test: +// - llama-server /chat/completions parses `response_format` into a JSON schema and passes it into +// common_chat_templates_apply() as inputs.json_schema. +// - For templates detected as "Kimi K2", llama.cpp selected a Kimi-specific handler that did not +// apply json_schema-to-grammar conversion, so schema enforcement was silently dropped. +// +// This test asserts that for the Kimi K2 chat template, providing a json_schema results in a +// non-empty grammar being returned by common_chat_templates_apply() (hard enforcement expected). + +static const char * KIMI_K2_TEMPLATE = R"JINJA({%- if tools -%} + <|im_system|>tool_declare<|im_middle|> + # Tools + {{ tools | tojson }}<|im_end|> +{%- endif -%} +{%- for message in messages -%} + {%- if loop.first and messages[0]['role'] != 'system' -%} + <|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|> + {%- endif -%} + + {%- set role_name = message.get('name') or message['role'] -%} + {%- if message['role'] == 'user' -%} + <|im_user|>{{role_name}}<|im_middle|> + {%- elif message['role'] == 'assistant' -%} + <|im_assistant|>{{role_name}}<|im_middle|> + {%- else -%} + <|im_system|>{{role_name}}<|im_middle|> + {% endif %} + + {%- if message['role'] == 'assistant' and message.get('tool_calls') -%} + {%- if message['content'] -%}{{ message['content'] }}{%- endif -%} + <|tool_calls_section_begin|> + {%- for tool_call in message['tool_calls'] -%} + {%- set formatted_id = tool_call['id'] -%} + <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{% if tool_call['function']['arguments'] is string %}{{ tool_call['function']['arguments'] }}{% else %}{{ tool_call['function']['arguments'] | tojson }}{% endif %}<|tool_call_end|> + {%- endfor -%} + <|tool_calls_section_end|> + {%- elif message['role'] == 'tool' -%} + ## Return of {{ message.tool_call_id }} + {{ message['content'] }} + {%- elif message['content'] is string -%} + {{ message['content'] }} + {%- elif message['content'] is not none -%} + {% for content in message['content'] -%} + {% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%} + <|media_start|>image<|media_content|><|media_pad|><|media_end|> + {% else -%} + {{ content['text'] }} + {%- endif -%} + {%- endfor -%} + {%- endif -%} + <|im_end|> +{%- endfor -%} +{%- if add_generation_prompt -%} + <|im_assistant|>assistant<|im_middle|> +{%- endif -%})JINJA"; + +int main() { + auto tmpls = common_chat_templates_init(/* model= */ nullptr, KIMI_K2_TEMPLATE); + + common_chat_templates_inputs inputs; + inputs.use_jinja = true; + inputs.add_generation_prompt = true; + + // No tools + inputs.tools = {}; + inputs.tool_choice = COMMON_CHAT_TOOL_CHOICE_NONE; + + inputs.json_schema = R"JSON({ + "type": "object", + "properties": { "ok": { "type": "boolean" } }, + "required": ["ok"], + "additionalProperties": false + })JSON"; + + inputs.messages = { + common_chat_msg{"system", "Return ONLY JSON with key ok.", {}, {}, "", "", ""}, + common_chat_msg{"user", "ok", {}, {}, "", "", ""}, + }; + + const auto out = common_chat_templates_apply(tmpls.get(), inputs); + + // Confirm the Kimi K2 handler was actually selected (not a generic fallback). + assert(out.format == COMMON_CHAT_FORMAT_KIMI_K2); + assert(!out.grammar.empty()); + + // tools + json_schema is explicitly unsupported for Kimi K2 (ambiguous composition). + // Ensure we fail loudly rather than silently dropping schema enforcement. + inputs.tools = { + common_chat_tool{ + /* .name = */ "noop", + /* .description = */ "No-op tool", + /* .parameters = */ R"JSON({ + "type": "object", + "properties": { "x": { "type": "string" } }, + "required": ["x"], + "additionalProperties": false + })JSON", + }, + }; + inputs.tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO; + + bool threw = false; + try { + (void) common_chat_templates_apply(tmpls.get(), inputs); + } catch (const std::exception &) { + threw = true; + } + // Avoid relying on assert() in Release builds (may be compiled out). + if (!threw) { + return 2; + } + return 0; +} +