diff --git a/common/chat.cpp b/common/chat.cpp
index 47a34d5822..7fd7a55302 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -2169,7 +2169,9 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp
static common_chat_params common_chat_params_init_glm_4_5(const common_chat_template & tmpl, const struct templates_params & inputs) {
common_chat_params data;
- data.grammar_lazy = inputs.tools.is_array() && !inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+ // vLLM-style for AUTO: no grammar/trigger during generation; tool calls are parsed from decoded text (common_chat_parse_glm_4_5).
+ // Only use grammar when tool_choice == REQUIRED (force tool call from first token).
+ data.grammar_lazy = false;
std::string prompt = apply(tmpl, inputs);
@@ -2228,18 +2230,22 @@ static common_chat_params common_chat_params_init_glm_4_5(const common_chat_temp
"<|observation|>"
});
- // build grammar for tool call
- static const xml_tool_call_format form {
- /* form.scope_start = */ "",
- /* form.tool_start = */ "\n",
- /* form.tool_sep = */ "\n",
- /* form.key_start = */ "",
- /* form.key_val_sep = */ "\n",
- /* form.val_end = */ "\n",
- /* form.tool_end = */ "\n",
- /* form.scope_end = */ "",
- };
- build_grammar_xml_tool_call(data, inputs.tools, form);
+ // Build grammar only for tool_choice == REQUIRED (force tool call from first token).
+ // For AUTO, generate freely and parse tool calls from decoded text (common_chat_parse_glm_4_5).
+ const bool has_tools = inputs.tools.is_array() && !inputs.tools.empty();
+ if (has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED) {
+ static const xml_tool_call_format form {
+ /* form.scope_start = */ "",
+ /* form.tool_start = */ "\n",
+ /* form.tool_sep = */ "\n",
+ /* form.key_start = */ "",
+ /* form.key_val_sep = */ "\n",
+ /* form.val_end = */ "\n",
+ /* form.tool_end = */ "\n",
+ /* form.scope_end = */ "",
+ };
+ build_grammar_xml_tool_call(data, inputs.tools, form);
+ }
data.prompt = prompt;
data.format = COMMON_CHAT_FORMAT_GLM_4_5;
diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp
index 4378a8db71..7ec07e4f8b 100644
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@@ -349,9 +349,7 @@ static void test_templates(const struct common_chat_templates * tmpls, const std
assert_msg_equals(test_message, msg, ignore_whitespace_differences);
}
- if (!test_message.tool_calls.empty()) {
- GGML_ASSERT(!data.params.grammar.empty());
- }
+ // GLM 4.5 with tool_choice=AUTO uses parse-only (no grammar); other formats set grammar when tools present
if (!data.params.grammar.empty()) {
auto grammar = build_grammar(data.params.grammar);
if (!grammar) {