Merge 1fbad237f6 into ec2b787ebe

2026-03-23 00:43:04 +00:00 · 2026-03-23 00:43:04 +00:00 · fccce44563
parent ec2b787ebe 1fbad237f6
commit fccce44563
5 changed files with 307 additions and 5 deletions
--- a/models/templates/ibm-granite-granite-4.0.jinja
+++ b/models/templates/ibm-granite-granite-4.0.jinja
@ -0,0 +1,118 @@
+{%- set tools_system_message_prefix = 'You are a helpful assistant with access to the following tools. You may call one or more tools to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>'  %}
+{%- set tools_system_message_suffix = '\n</tools>\n\nFor each tool call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call>. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request.' %}
+{%- set documents_system_message_prefix = 'You are a helpful assistant with access to the following documents. You may use one or more documents to assist with the user query.\n\nYou are given a list of documents within <documents></documents> XML tags:\n<documents>' %}
+{%- set documents_system_message_suffix = '\n</documents>\n\nWrite the response to the user\'s input by strictly aligning with the facts in the provided documents. If the information needed to answer the question is not available in the documents, inform the user that the question cannot be answered based on the available data.' %}
+{%- set g4_default_system_message = 'You are a helpful assistant. Please ensure responses are professional, accurate, and safe.' %}
+{%- if available_tools is defined and available_tools %}
+    {%- set tools = available_tools %}
+{%- endif %}
+{%- set ns = namespace(tools_system_message=tools_system_message_prefix,
+                       documents_system_message=documents_system_message_prefix,
+                       default_system_message=g4_default_system_message,
+                       system_message=''
+                       ) %}
+{%- if tools %}
+    {%- for tool in tools %}
+        {%- set ns.tools_system_message = ns.tools_system_message + '\n' + (tool | tojson) %}
+    {%- endfor %}
+    {%- set ns.tools_system_message = ns.tools_system_message + tools_system_message_suffix %}
+{%- else %}
+    {%- set ns.tools_system_message = '' %}
+{%- endif %}
+{%- if documents %}
+    {%- for document in documents %}
+        {%- set ns.documents_system_message = ns.documents_system_message + '\n' + (document | tojson) %}
+    {%- endfor %}
+    {%- set ns.documents_system_message = ns.documents_system_message + documents_system_message_suffix %}
+{%- else %}
+    {%- set ns.documents_system_message = '' %}
+{%- endif %}
+{%- if messages[0].role == 'system' %}
+    {%- if messages[0].content is string %}
+        {%- set ns.system_message = messages[0].content %}
+    {%- elif messages[0].content is iterable %}
+        {%- for entry in messages[0].content %}
+            {%- if entry.type== 'text' %}
+                {%- if ns.system_message != '' %}
+                    {%- set ns.system_message = ns.system_message + '\n' %}
+                {%- endif %}
+                {%- set ns.system_message = ns.system_message + entry.text %}
+            {%- endif %}
+        {%- endfor %}
+    {%- endif %}
+    {%- if tools and documents %}
+        {%- set ns.system_message = ns.system_message + '\n\n' +  ns.tools_system_message + '\n\n' + ns.documents_system_message %}
+    {%- elif tools %}
+        {%- set ns.system_message = ns.system_message + '\n\n' + ns.tools_system_message %}
+    {%- elif documents %}
+        {%- set ns.system_message = ns.system_message + '\n\n' + ns.documents_system_message %}
+    {%- endif %}
+{%- else %}
+    {%- if tools and documents %}
+        {%- set ns.system_message = ns.tools_system_message + '\n\n' + ns.documents_system_message  %}
+    {%- elif tools %}
+        {%- set ns.system_message = ns.tools_system_message %}
+    {%- elif documents %}
+        {%- set ns.system_message = ns.documents_system_message %}
+    {%- endif %}
+{%- endif %}
+{%- if ns.system_message %}
+    {{- '<|start_of_role|>system<|end_of_role|>' + ns.system_message + '<|end_of_text|>\n' }}
+{%- else %}
+    {{- '<|start_of_role|>system<|end_of_role|>' + ns.default_system_message + '<|end_of_text|>\n' }}
+{%- endif %}
+{%- for message in messages %}
+    {%- set content = namespace(val='') %}
+    {%- if message.content is string %}
+        {%- set content.val = message.content %}
+    {%- else %}
+        {%- if message.content is iterable %}
+            {%- for entry in message.content %}
+                {%- if entry.type== 'text' %}
+                    {%- if content.val != '' %}
+                        {%- set content.val = content.val + '\n' %}
+                    {%- endif %}
+                    {%- set content.val = content.val + entry.text %}
+                {%- endif %}
+            {%- endfor %}
+        {%- endif %}
+    {%- endif %}
+    {%- if (message.role == 'user') or (message.role == 'system' and not loop.first) %}
+        {{- '<|start_of_role|>' + message.role + '<|end_of_role|>' + content.val + '<|end_of_text|>\n' }}
+    {%- elif message.role == 'assistant' %}
+        {{- '<|start_of_role|>' + message.role + '<|end_of_role|>' + content.val }}
+        {%- if message.tool_calls %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if (loop.first and content.val) or (not loop.first) %}
+                    {{- '\n' }}
+                {%- endif %}
+                {%- if tool_call.function %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '<tool_call>\n{"name": "' }}
+                {{- tool_call.name }}
+                {{- '", "arguments": ' }}
+                {%- if tool_call.arguments is string %}
+                    {{- tool_call.arguments }}
+                {%- else %}
+                    {{- tool_call.arguments | tojson }}
+                {%- endif %}
+                {{- '}\n</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|end_of_text|>\n' }}
+    {%- elif message.role == 'tool' %}
+        {%- if loop.first or (messages[loop.index0 - 1].role != 'tool') %}
+            {{- '<|start_of_role|>user<|end_of_role|>' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- content.val }}
+        {{- '\n</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != 'tool') %}
+            {{- '<|end_of_text|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|start_of_role|>assistant<|end_of_role|>' }}
+{%- endif %}
--- a/src/llama-chat.cpp
+++ b/src/llama-chat.cpp
@ -59,7 +59,8 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
    { "exaone4",           LLM_CHAT_TEMPLATE_EXAONE_4          },
    { "exaone-moe",        LLM_CHAT_TEMPLATE_EXAONE_MOE        },
    { "rwkv-world",        LLM_CHAT_TEMPLATE_RWKV_WORLD        },
-    { "granite",           LLM_CHAT_TEMPLATE_GRANITE           },
+    { "granite",           LLM_CHAT_TEMPLATE_GRANITE_3_X       },
+    { "granite-4.0",       LLM_CHAT_TEMPLATE_GRANITE_4_0       },
    { "gigachat",          LLM_CHAT_TEMPLATE_GIGACHAT          },
    { "megrez",            LLM_CHAT_TEMPLATE_MEGREZ            },
    { "yandex",            LLM_CHAT_TEMPLATE_YANDEX            },
@ -190,7 +191,10 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
    } else if (tmpl_contains("rwkv-world") || tmpl_contains("{{- 'User: ' + message['content']|trim + '\\n\\n' -}}")) {
        return LLM_CHAT_TEMPLATE_RWKV_WORLD;
    } else if (tmpl_contains("<|start_of_role|>")) {
-        return LLM_CHAT_TEMPLATE_GRANITE;
+        if (tmpl_contains("<tool_call>") || tmpl_contains("<tools>")) {
+            return LLM_CHAT_TEMPLATE_GRANITE_4_0;
+        }
+        return LLM_CHAT_TEMPLATE_GRANITE_3_X;
    } else if (tmpl_contains("message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1]")) {
        return LLM_CHAT_TEMPLATE_GIGACHAT;
    } else if (tmpl_contains("<|role_start|>")) {
@ -611,8 +615,8 @@ int32_t llm_chat_apply_template(
                ss << "Assistant: " << trim(chat[i]->content) << "\n\n";
            }
        }
-    } else if (tmpl == LLM_CHAT_TEMPLATE_GRANITE) {
-        // IBM Granite template
+    } else if (tmpl == LLM_CHAT_TEMPLATE_GRANITE_3_X) {
+        // IBM Granite 3.x template
        for (const auto & message : chat) {
            std::string role(message->role);
            ss << "<|start_of_role|>" << role << "<|end_of_role|>";
@ -624,6 +628,20 @@ int32_t llm_chat_apply_template(
        if (add_ass) {
            ss << "<|start_of_role|>assistant<|end_of_role|>";
        }
+    } else if (tmpl == LLM_CHAT_TEMPLATE_GRANITE_4_0) {
+        // IBM Granite 4.0 template
+        for (const auto & message : chat) {
+            std::string role(message->role);
+            if (role == "assistant_tool_call") {
+                ss << "<|start_of_role|>assistant<|end_of_role|><|tool_call|>";
+            } else {
+                ss << "<|start_of_role|>" << role << "<|end_of_role|>";
+            }
+            ss << message->content << "<|end_of_text|>\n";
+        }
+        if (add_ass) {
+            ss << "<|start_of_role|>assistant<|end_of_role|>";
+        }
    } else if (tmpl == LLM_CHAT_TEMPLATE_GIGACHAT) {
        // GigaChat template
        bool has_system = !chat.empty() && std::string(chat[0]->role) == "system";
--- a/src/llama-chat.h
+++ b/src/llama-chat.h
@ -38,7 +38,8 @@ enum llm_chat_template {
    LLM_CHAT_TEMPLATE_EXAONE_4,
    LLM_CHAT_TEMPLATE_EXAONE_MOE,
    LLM_CHAT_TEMPLATE_RWKV_WORLD,
-    LLM_CHAT_TEMPLATE_GRANITE,
+    LLM_CHAT_TEMPLATE_GRANITE_3_X,
+    LLM_CHAT_TEMPLATE_GRANITE_4_0,
    LLM_CHAT_TEMPLATE_GIGACHAT,
    LLM_CHAT_TEMPLATE_MEGREZ,
    LLM_CHAT_TEMPLATE_YANDEX,
--- a/tests/test-chat-template.cpp
+++ b/tests/test-chat-template.cpp
@ -679,6 +679,155 @@ int main_automated_tests(void) {
        }
    }

+    // Test Granite 3.x template (LLM_CHAT_TEMPLATE_GRANITE_3_X) — backwards compatibility
+    // The 3.x C++ handler passes assistant_tool_call through as a literal role
+    {
+        std::cout << "\n\n=== Granite 3.x assistant_tool_call (backwards compat) ===\n\n";
+
+        std::vector<llama_chat_message> tool_conversation {
+            {"system",               "You are a helpful assistant"},
+            {"user",                 "What is the weather?"},
+            {"assistant_tool_call",  "[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"NYC\"}}]"},
+            {"tool_response",        "{\"temperature\": 72}"},
+        };
+
+        // Granite 3.x template — no <tool_call> XML, detected as LLM_CHAT_TEMPLATE_GRANITE_3_X
+        const char * granite_3x_tmpl = "{%- for message in messages %}\n"
+            "    {%- if message['role'] == 'assistant_tool_call' %}\n"
+            "    {{- '<|start_of_role|>assistant<|end_of_role|><|tool_call|>' + message['content'] + '<|end_of_text|>\\n' }}\n"
+            "    {%- else %}\n"
+            "    {{- '<|start_of_role|>' + message['role'] + '<|end_of_role|>' + message['content'] + '<|end_of_text|>\\n' }}\n"
+            "    {%- endif %}\n"
+            "    {%- if loop.last and add_generation_prompt %}\n"
+            "    {{- '<|start_of_role|>assistant<|end_of_role|>' }}\n"
+            "    {%- endif %}\n"
+            "{%- endfor %}";
+
+        // 3.x C++ path: role is passed through literally (existing behavior preserved)
+        std::string expected_3x_cpp =
+            "<|start_of_role|>system<|end_of_role|>You are a helpful assistant<|end_of_text|>\n"
+            "<|start_of_role|>user<|end_of_role|>What is the weather?<|end_of_text|>\n"
+            "<|start_of_role|>assistant_tool_call<|end_of_role|><|tool_call|>"
+            "[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"NYC\"}}]<|end_of_text|>\n"
+            "<|start_of_role|>tool_response<|end_of_role|>{\"temperature\": 72}<|end_of_text|>\n"
+            "<|start_of_role|>assistant<|end_of_role|>";
+
+        formatted_chat.resize(2048);
+        res = llama_chat_apply_template(
+            granite_3x_tmpl,
+            tool_conversation.data(),
+            tool_conversation.size(),
+            true,
+            formatted_chat.data(),
+            formatted_chat.size()
+        );
+        formatted_chat.resize(res);
+        std::string output_3x(formatted_chat.data(), formatted_chat.size());
+        if (output_3x != expected_3x_cpp) {
+            std::cout << "Expected:\n" << expected_3x_cpp << "\n";
+            std::cout << "-------------------------\n";
+            std::cout << "Actual:\n" << output_3x << "\n";
+            std::cout.flush();
+            assert(output_3x == expected_3x_cpp);
+        }
+        std::cout << "  Granite 3.x C++ template: PASS\n";
+
+        // 3.x Jinja path: Jinja handles the role correctly (template has the if/else)
+        std::string expected_3x_jinja =
+            "<|start_of_role|>system<|end_of_role|>You are a helpful assistant<|end_of_text|>\n"
+            "<|start_of_role|>user<|end_of_role|>What is the weather?<|end_of_text|>\n"
+            "<|start_of_role|>assistant<|end_of_role|><|tool_call|>"
+            "[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"NYC\"}}]<|end_of_text|>\n"
+            "<|start_of_role|>tool_response<|end_of_role|>{\"temperature\": 72}<|end_of_text|>\n"
+            "<|start_of_role|>assistant<|end_of_role|>";
+
+        std::vector<common_chat_msg> tool_messages;
+        tool_messages.reserve(tool_conversation.size());
+        for (const auto & msg : tool_conversation) {
+            tool_messages.push_back(simple_msg(msg.role, msg.content));
+        }
+        auto jinja_output_3x = format_using_common(granite_3x_tmpl, "", "", tool_messages);
+        if (jinja_output_3x != expected_3x_jinja) {
+            std::cout << "Expected (jinja):\n" << expected_3x_jinja << "\n";
+            std::cout << "-------------------------\n";
+            std::cout << "Actual (jinja):\n" << jinja_output_3x << "\n";
+            std::cout.flush();
+            assert(jinja_output_3x == expected_3x_jinja);
+        }
+        std::cout << "  Granite 3.x Jinja template: PASS\n";
+    }
+
+    // Test Granite 4.0 template (LLM_CHAT_TEMPLATE_GRANITE_4_0)
+    // Verifies: assistant_tool_call maps to <|start_of_role|>assistant<|end_of_role|><|tool_call|>
+    {
+        std::cout << "\n\n=== Granite 4.0 assistant_tool_call fix ===\n\n";
+
+        std::vector<llama_chat_message> tool_conversation {
+            {"system",               "You are a helpful assistant"},
+            {"user",                 "What is the weather?"},
+            {"assistant_tool_call",  "<tool_call>\n{\"name\": \"get_weather\", \"arguments\": {\"location\": \"NYC\"}}\n</tool_call>"},
+            {"tool_response",        "{\"temperature\": 72}"},
+        };
+
+        // Granite 4.0 template — contains <tool_call> XML, detected as LLM_CHAT_TEMPLATE_GRANITE_4_0
+        const char * granite_40_tmpl = "{%- for message in messages %}\n"
+            "    {%- if message['role'] == 'assistant_tool_call' %}\n"
+            "    {{- '<|start_of_role|>assistant<|end_of_role|><|tool_call|>' + message['content'] + '<|end_of_text|>\\n' }}\n"
+            "    {%- else %}\n"
+            "    {{- '<|start_of_role|>' + message['role'] + '<|end_of_role|>' + message['content'] + '<|end_of_text|>\\n' }}\n"
+            "    {%- endif %}\n"
+            "    {%- if loop.last and add_generation_prompt %}\n"
+            "    {{- '<|start_of_role|>assistant<|end_of_role|>' }}\n"
+            "    {%- endif %}\n"
+            "{%- endfor %}\n"
+            "{# <tool_call> <tools> #}";
+
+        std::string expected_40 =
+            "<|start_of_role|>system<|end_of_role|>You are a helpful assistant<|end_of_text|>\n"
+            "<|start_of_role|>user<|end_of_role|>What is the weather?<|end_of_text|>\n"
+            "<|start_of_role|>assistant<|end_of_role|><|tool_call|>"
+            "<tool_call>\n{\"name\": \"get_weather\", \"arguments\": {\"location\": \"NYC\"}}\n</tool_call><|end_of_text|>\n"
+            "<|start_of_role|>tool_response<|end_of_role|>{\"temperature\": 72}<|end_of_text|>\n"
+            "<|start_of_role|>assistant<|end_of_role|>";
+
+        // Test C++ template path
+        formatted_chat.resize(2048);
+        res = llama_chat_apply_template(
+            granite_40_tmpl,
+            tool_conversation.data(),
+            tool_conversation.size(),
+            true,
+            formatted_chat.data(),
+            formatted_chat.size()
+        );
+        formatted_chat.resize(res);
+        std::string output_40(formatted_chat.data(), formatted_chat.size());
+        if (output_40 != expected_40) {
+            std::cout << "Expected:\n" << expected_40 << "\n";
+            std::cout << "-------------------------\n";
+            std::cout << "Actual:\n" << output_40 << "\n";
+            std::cout.flush();
+            assert(output_40 == expected_40);
+        }
+        std::cout << "  Granite 4.0 C++ template: PASS\n";
+
+        // Test Jinja template path
+        std::vector<common_chat_msg> tool_messages;
+        tool_messages.reserve(tool_conversation.size());
+        for (const auto & msg : tool_conversation) {
+            tool_messages.push_back(simple_msg(msg.role, msg.content));
+        }
+        auto jinja_output_40 = format_using_common(granite_40_tmpl, "", "", tool_messages);
+        if (jinja_output_40 != expected_40) {
+            std::cout << "Expected (jinja):\n" << expected_40 << "\n";
+            std::cout << "-------------------------\n";
+            std::cout << "Actual (jinja):\n" << jinja_output_40 << "\n";
+            std::cout.flush();
+            assert(jinja_output_40 == expected_40);
+        }
+        std::cout << "  Granite 4.0 Jinja template: PASS\n";
+    }
+
    std::cout << "\nOK: All tests passed successfully.\n";

    return 0;
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@ -1546,6 +1546,22 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
        //     .run();
    }

+    {
+        // IBM Granite 4.0 (production template shared by h-tiny, h-small, micro)
+        // Uses <tool_call> XML tags for tool calls, tools in system message
+        auto tst = peg_tester("models/templates/ibm-granite-granite-4.0.jinja", detailed_debug);
+
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+
+        tst.test(
+               "<tool_call>\n"
+               "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
+               "</tool_call>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
+
    {
        // ByteDance-Seed-OSS (reasoning and tool calling model)
        auto tst = peg_tester("models/templates/ByteDance-Seed-OSS.jinja", detailed_debug);