fix(glm4.5): use parse-only for tool_choice=AUTO, grammar only for REQUIRED

- In common_chat_params_init_glm_4_5: set grammar_lazy=false; build grammar
  only when has_tools && tool_choice==REQUIRED (vLLM-style: no trigger/grammar
  for AUTO, detect tool calls by parsing decoded text).
- Relax test-chat assert: allow empty grammar when test message has tool_calls
  (GLM 4.5 AUTO no longer sets grammar).

Fixes server hang when model never outputs trigger (e.g. llama.cpp #19068).

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Gunther Schulz 2026-02-14 00:42:32 +01:00
parent 73cd5e1b97
commit f9b571dc96
2 changed files with 20 additions and 16 deletions

View File

@ -2169,7 +2169,9 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp
static common_chat_params common_chat_params_init_glm_4_5(const common_chat_template & tmpl, const struct templates_params & inputs) {
common_chat_params data;
data.grammar_lazy = inputs.tools.is_array() && !inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
// vLLM-style for AUTO: no grammar/trigger during generation; tool calls are parsed from decoded text (common_chat_parse_glm_4_5).
// Only use grammar when tool_choice == REQUIRED (force tool call from first token).
data.grammar_lazy = false;
std::string prompt = apply(tmpl, inputs);
@ -2228,18 +2230,22 @@ static common_chat_params common_chat_params_init_glm_4_5(const common_chat_temp
"<|observation|>"
});
// build grammar for tool call
static const xml_tool_call_format form {
/* form.scope_start = */ "",
/* form.tool_start = */ "\n<tool_call>",
/* form.tool_sep = */ "\n",
/* form.key_start = */ "<arg_key>",
/* form.key_val_sep = */ "</arg_key>\n<arg_value>",
/* form.val_end = */ "</arg_value>\n",
/* form.tool_end = */ "</tool_call>\n",
/* form.scope_end = */ "",
};
build_grammar_xml_tool_call(data, inputs.tools, form);
// Build grammar only for tool_choice == REQUIRED (force tool call from first token).
// For AUTO, generate freely and parse tool calls from decoded text (common_chat_parse_glm_4_5).
const bool has_tools = inputs.tools.is_array() && !inputs.tools.empty();
if (has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED) {
static const xml_tool_call_format form {
/* form.scope_start = */ "",
/* form.tool_start = */ "\n<tool_call>",
/* form.tool_sep = */ "\n",
/* form.key_start = */ "<arg_key>",
/* form.key_val_sep = */ "</arg_key>\n<arg_value>",
/* form.val_end = */ "</arg_value>\n",
/* form.tool_end = */ "</tool_call>\n",
/* form.scope_end = */ "",
};
build_grammar_xml_tool_call(data, inputs.tools, form);
}
data.prompt = prompt;
data.format = COMMON_CHAT_FORMAT_GLM_4_5;

View File

@ -349,9 +349,7 @@ static void test_templates(const struct common_chat_templates * tmpls, const std
assert_msg_equals(test_message, msg, ignore_whitespace_differences);
}
if (!test_message.tool_calls.empty()) {
GGML_ASSERT(!data.params.grammar.empty());
}
// GLM 4.5 with tool_choice=AUTO uses parse-only (no grammar); other formats set grammar when tools present
if (!data.params.grammar.empty()) {
auto grammar = build_grammar(data.params.grammar);
if (!grammar) {