From f9b571dc96a5b77bab9dacae2a522fb20d8b8542 Mon Sep 17 00:00:00 2001 From: Gunther Schulz Date: Sat, 14 Feb 2026 00:42:32 +0100 Subject: [PATCH] fix(glm4.5): use parse-only for tool_choice=AUTO, grammar only for REQUIRED - In common_chat_params_init_glm_4_5: set grammar_lazy=false; build grammar only when has_tools && tool_choice==REQUIRED (vLLM-style: no trigger/grammar for AUTO, detect tool calls by parsing decoded text). - Relax test-chat assert: allow empty grammar when test message has tool_calls (GLM 4.5 AUTO no longer sets grammar). Fixes server hang when model never outputs trigger (e.g. llama.cpp #19068). Co-authored-by: Cursor --- common/chat.cpp | 32 +++++++++++++++++++------------- tests/test-chat.cpp | 4 +--- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/common/chat.cpp b/common/chat.cpp index 47a34d5822..7fd7a55302 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -2169,7 +2169,9 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp static common_chat_params common_chat_params_init_glm_4_5(const common_chat_template & tmpl, const struct templates_params & inputs) { common_chat_params data; - data.grammar_lazy = inputs.tools.is_array() && !inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; + // vLLM-style for AUTO: no grammar/trigger during generation; tool calls are parsed from decoded text (common_chat_parse_glm_4_5). + // Only use grammar when tool_choice == REQUIRED (force tool call from first token). + data.grammar_lazy = false; std::string prompt = apply(tmpl, inputs); @@ -2228,18 +2230,22 @@ static common_chat_params common_chat_params_init_glm_4_5(const common_chat_temp "<|observation|>" }); - // build grammar for tool call - static const xml_tool_call_format form { - /* form.scope_start = */ "", - /* form.tool_start = */ "\n", - /* form.tool_sep = */ "\n", - /* form.key_start = */ "", - /* form.key_val_sep = */ "\n", - /* form.val_end = */ "\n", - /* form.tool_end = */ "\n", - /* form.scope_end = */ "", - }; - build_grammar_xml_tool_call(data, inputs.tools, form); + // Build grammar only for tool_choice == REQUIRED (force tool call from first token). + // For AUTO, generate freely and parse tool calls from decoded text (common_chat_parse_glm_4_5). + const bool has_tools = inputs.tools.is_array() && !inputs.tools.empty(); + if (has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED) { + static const xml_tool_call_format form { + /* form.scope_start = */ "", + /* form.tool_start = */ "\n", + /* form.tool_sep = */ "\n", + /* form.key_start = */ "", + /* form.key_val_sep = */ "\n", + /* form.val_end = */ "\n", + /* form.tool_end = */ "\n", + /* form.scope_end = */ "", + }; + build_grammar_xml_tool_call(data, inputs.tools, form); + } data.prompt = prompt; data.format = COMMON_CHAT_FORMAT_GLM_4_5; diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 4378a8db71..7ec07e4f8b 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -349,9 +349,7 @@ static void test_templates(const struct common_chat_templates * tmpls, const std assert_msg_equals(test_message, msg, ignore_whitespace_differences); } - if (!test_message.tool_calls.empty()) { - GGML_ASSERT(!data.params.grammar.empty()); - } + // GLM 4.5 with tool_choice=AUTO uses parse-only (no grammar); other formats set grammar when tools present if (!data.params.grammar.empty()) { auto grammar = build_grammar(data.params.grammar); if (!grammar) {