llama.cpp/tools/parser/debug-template-parser.cpp

#include "../src/llama-grammar.h"
#include "chat-auto-parser.h"
#include "chat-diff-analyzer.h"
#include "chat.h"
#include "common.h"
#include "gguf.h"
#include "jinja/runtime.h"
#include "log.h"

#include <fstream>
#include <numeric>
#include <sstream>
#include <string>

#include "nlohmann/json.hpp"
#include "peg-parser.h"

using json = nlohmann::ordered_json;

enum class output_mode {
    ANALYSIS,  // Only output analysis results (default)
    TEMPLATE,  // Only output rendered template
    BOTH       // Output both
};

enum class input_message_type {
    NONE,                    // Don't render any message scenarios (only analysis)
    CONTENT_ONLY,            // Simple assistant message with content
    REASONING_CONTENT,       // Message with reasoning_content + content
    TOOL_CALL_ONLY,          // Message with tool_calls only
    CONTENT_TOOL_CALL,       // Message with content + tool_calls
    REASONING_TOOL_CALL,     // Message with reasoning_content + tool_calls
    CONTENT_FAKE_TOOL_CALL,  // Message with content but no actual tool_calls (for testing)
    ALL                      // Render all scenarios
};

struct debug_options {
    std::string      template_path;
    bool             with_tools        = true;
    bool             generation_prompt = true;
    bool             enable_reasoning  = true;
    output_mode       mode             = output_mode::BOTH;
    input_message_type input_message     = input_message_type::NONE;
};

static std::string read_file(const std::string & path) {
    std::ifstream fin(path, std::ios::binary);
    if (!fin.is_open()) {
        throw std::runtime_error("Could not open file: " + path);
    }
    std::ostringstream buf;
    buf << fin.rdbuf();
    return buf.str();
}

static std::string read_gguf_chat_template(const std::string & path) {
    struct gguf_init_params params = { /*no_alloc =*/true,  // We only need metadata, not tensor data
                                       /*ctx=*/nullptr };

    struct gguf_context * ctx = gguf_init_from_file(path.c_str(), params);
    if (ctx == nullptr) {
        throw std::runtime_error("Could not open GGUF file: " + path);
    }

    const char * key    = "tokenizer.chat_template";
    int64_t      key_id = gguf_find_key(ctx, key);

    if (key_id == -1) {
        gguf_free(ctx);
        throw std::runtime_error("GGUF file does not contain chat template key: " + std::string(key));
    }

    const char * template_str = gguf_get_val_str(ctx, key_id);
    if (template_str == nullptr) {
        gguf_free(ctx);
        throw std::runtime_error("GGUF file contains chat template key but value is null");
    }

    std::string result = template_str;
    gguf_free(ctx);
    return result;
}

static void print_usage(const char * program_name) {
    LOG_ERR("Usage: %s <template_or_gguf_path> [options]\n", program_name);
    LOG_ERR("\nOptions:\n");
    LOG_ERR("  --no-tools              Disable tool definitions\n");
    LOG_ERR("  --generation-prompt=0|1 Set add_generation_prompt (default: 1)\n");
    LOG_ERR("  --enable-reasoning=0|1  Enable reasoning parsing (default: 1)\n");
    LOG_ERR("  --output=MODE           Output mode: analysis, template, both (default: both)\n");
    LOG_ERR("  --input-message=TYPE    Message type to render:\n");
    LOG_ERR("                          content_only, reasoning_content, tool_call_only,\n");
    LOG_ERR("                          content_tool_call, reasoning_tool_call,\n");
    LOG_ERR("                          content_fake_tool_call, all\n");
    LOG_ERR("\nExamples:\n");
    LOG_ERR("  %s template.jinja --input-message=all --generation-prompt=1\n", program_name);
    LOG_ERR("  %s template.jinja --output=template --input-message=tool_call_only\n", program_name);
}

static bool parse_bool_option(const std::string & value) {
    return value == "1" || value == "true" || value == "yes";
}

static bool parse_options(int argc, char ** argv, debug_options & opts) {
    if (argc < 2) {
        print_usage(argv[0]);
        return false;
    }

    opts.template_path = argv[1];

    for (int i = 2; i < argc; ++i) {
        std::string arg = argv[i];

        if (arg == "--no-tools") {
            opts.with_tools = false;
        } else if (arg.rfind("--generation-prompt=", 0) == 0) {
            opts.generation_prompt = parse_bool_option(arg.substr(20));
        } else if (arg.rfind("--enable-reasoning=", 0) == 0) {
            opts.enable_reasoning = parse_bool_option(arg.substr(19));
        } else if (arg.rfind("--output=", 0) == 0) {
            std::string mode = arg.substr(9);
            if (mode == "analysis") {
                opts.mode = output_mode::ANALYSIS;
            } else if (mode == "template") {
                opts.mode = output_mode::TEMPLATE;
            } else if (mode == "both") {
                opts.mode = output_mode::BOTH;
            } else {
                LOG_ERR("Unknown output mode: %s\n", mode.c_str());
                return false;
            }
        } else if (arg.rfind("--input-message=", 0) == 0) {
            std::string type = arg.substr(16);
            if (type == "content_only") {
                opts.input_message = input_message_type::CONTENT_ONLY;
            } else if (type == "reasoning_content") {
                opts.input_message = input_message_type::REASONING_CONTENT;
            } else if (type == "tool_call_only") {
                opts.input_message = input_message_type::TOOL_CALL_ONLY;
            } else if (type == "content_tool_call") {
                opts.input_message = input_message_type::CONTENT_TOOL_CALL;
            } else if (type == "reasoning_tool_call") {
                opts.input_message = input_message_type::REASONING_TOOL_CALL;
            } else if (type == "content_fake_tool_call") {
                opts.input_message = input_message_type::CONTENT_FAKE_TOOL_CALL;
            } else if (type == "all") {
                opts.input_message = input_message_type::ALL;
            } else {
                LOG_ERR("Unknown input message type: %s\n", type.c_str());
                return false;
            }
        } else {
            LOG_ERR("Unknown option: %s\n", arg.c_str());
            print_usage(argv[0]);
            return false;
        }
    }

    return true;
}

static json build_user_message() {
    return json{
        { "role",    "user"                               },
        { "content", "Hello, please help me with a task." }
    };
}

static json build_content_only_message() {
    return json{
        { "role",    "assistant"                                   },
        { "content", "Hello! I'm here to help you with your task." }
    };
}

static json build_reasoning_content_message() {
    return json{
        { "role",              "assistant"                                                               },
        { "content",           "Hello! I'm here to help you with your task."                             },
        { "reasoning_content", "The user is greeting me and asking for help. I should respond politely." }
    };
}

static json build_tool_call_only_message() {
    return json{
        { "role",       "assistant"      },
        { "content",    nullptr          },
        { "tool_calls",
         json::array({ json{
              { "type", "function" },
              { "function", json{ { "name", "test_function_name" },
                                  { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } },
              { "id", "123456789" } } }) }
    };
}

static json build_content_tool_call_message() {
    return json{
        { "role",       "assistant"                                                                              },
        { "content",    "I'll help you by calling a function."                                                   },
        { "tool_calls",
         json::array({ json{
              { "type", "function" },
              { "function",
                json{ { "name", "test_function_name" },
                      { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } } } }) }
    };
}

static json build_reasoning_tool_call_message() {
    return json{
        { "role",              "assistant"                                                                       },
        { "content",           nullptr                                                                           },
        { "reasoning_content", "I need to call a function to help with this task."                               },
        { "tool_calls",
         json::array({ json{
              { "type", "function" },
              { "function",
                json{ { "name", "test_function_name" },
                      { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } } } }) }
    };
}

static json build_content_fake_tool_call_message() {
    // This message has content but NO tool_calls field
    // It's used to test if a template renders tool definitions but not tool calls
    return json{
        { "role",    "assistant"                            },
        { "content", "I'll help you by calling a function." }
    };
}

static json build_tools_definition() {
    json parameters_schema                    = json::object();
    parameters_schema["type"]                 = "object";
    parameters_schema["properties"]           = json::object();
    parameters_schema["properties"]["param1"] = json::object({
        { "type",        "string"          },
        { "description", "First parameter" }
    });
    parameters_schema["properties"]["param2"] = json::object({
        { "type",        "string"           },
        { "description", "Second parameter" }
    });
    parameters_schema["required"]             = json::array({ "param1" });

    return json::array({
        json{ { "type", "function" },
             { "function", json{ { "name", "test_function_name" },
                                  { "description", "A test function for debugging" },
                                  { "parameters", parameters_schema } } } }
    });
}

static void render_scenario(const common_chat_template & tmpl,
                            const std::string &          scenario_name,
                            const json &                 messages,
                            const json &                 tools,
                            bool                         add_generation_prompt,
                            bool                         enable_thinking) {
    LOG_ERR("\n=== Scenario: %s ===\n", scenario_name.c_str());
    LOG_ERR("add_generation_prompt: %s, enable_thinking: %s\n", add_generation_prompt ? "true" : "false",
            enable_thinking ? "true" : "false");

    // When add_generation_prompt is true, add a trailing user message to trigger the prompt
    json final_messages = messages;
    if (add_generation_prompt && !messages.empty() && messages.back().value("role", "") == "assistant") {
        final_messages.push_back(json{
            { "role",    "user" },
            { "content", "Now please continue with another response." }
        });
    }

    LOG_ERR("Messages:\n%s\n", final_messages.dump(2).c_str());

    try {
        templates_params inputs;
        inputs.messages                         = final_messages;
        inputs.add_generation_prompt            = add_generation_prompt;
        inputs.extra_context["enable_thinking"] = enable_thinking;

        if (!tools.is_null() && tools.is_array() && !tools.empty()) {
            inputs.tools = tools;
        }

        std::string output = common_chat_template_direct_apply(tmpl, inputs);

        LOG_ERR("\n--- Rendered Output ---\n");
        LOG_ERR("%s\n", output.c_str());
        LOG_ERR("--- End Output (length: %zu) ---\n", output.length());
    } catch (const std::exception & e) {
        LOG_ERR("Rendering failed: %s\n", e.what());
    }
}

static void render_all_scenarios(const common_chat_template & tmpl,
                                 const json &                 tools,
                                 bool                         add_generation_prompt,
                                 bool                         enable_thinking,
                                 input_message_type             message_type) {
    json user_msg = build_user_message();

    auto render_if = [&](input_message_type type, const std::string & name, const json & assistant_msg) {
        if (message_type == input_message_type::ALL || message_type == type) {
            json messages = json::array({ user_msg, assistant_msg });
            render_scenario(tmpl, name, messages, tools, add_generation_prompt, enable_thinking);
        }
    };

    render_if(input_message_type::CONTENT_ONLY, "content_only", build_content_only_message());
    render_if(input_message_type::REASONING_CONTENT, "reasoning_content", build_reasoning_content_message());
    render_if(input_message_type::TOOL_CALL_ONLY, "tool_call_only", build_tool_call_only_message());
    render_if(input_message_type::CONTENT_TOOL_CALL, "content_tool_call", build_content_tool_call_message());
    render_if(input_message_type::REASONING_TOOL_CALL, "reasoning_tool_call", build_reasoning_tool_call_message());
    render_if(input_message_type::CONTENT_FAKE_TOOL_CALL, "content_fake_tool_call",
              build_content_fake_tool_call_message());

    // Also render with add_generation_prompt=true to show the prompt ending
    if (message_type == input_message_type::ALL) {
        LOG_ERR("\n\n=== Generation Prompt Scenarios (add_generation_prompt=true) ===\n");

        json prompt_messages = json::array({ user_msg });
        render_scenario(tmpl, "generation_prompt_only", prompt_messages, tools, true, enable_thinking);

        // With enable_thinking toggled
        render_scenario(tmpl, "generation_prompt_thinking_disabled", prompt_messages, tools, true, false);
    }
}

template <typename T>
static std::string mode_to_str(T mode) {
    std::ostringstream os;
    os << mode;
    return os.str();
}

int main(int argc, char ** argv) {
    // Set log level to most verbose to capture all debug output
    common_log_set_verbosity_thold(99);

    if (std::getenv("LLAMA_DEBUG_JINJA") != nullptr) {
        jinja::enable_debug(true);
    }

    debug_options opts;
    if (!parse_options(argc, argv, opts)) {
        return 1;
    }

    std::string template_source;
    try {
        // Check if the file is a GGUF file
        if (opts.template_path.size() >= 5 &&
            opts.template_path.compare(opts.template_path.size() - 5, 5, ".gguf") == 0) {
            template_source = read_gguf_chat_template(opts.template_path);
        } else {
            template_source = read_file(opts.template_path);
        }
    } catch (const std::exception & e) {
        LOG_ERR("Error reading template: %s\n", e.what());
        return 1;
    }

    LOG_ERR("Analyzing template: %s\n", opts.template_path.c_str());
    LOG_ERR("Options: with_tools=%s, generation_prompt=%s, enable_reasoning=%s\n", opts.with_tools ? "true" : "false",
            opts.generation_prompt ? "true" : "false", opts.enable_reasoning ? "true" : "false");

    try {
        common_chat_template chat_template(template_source, "", "");

        // Build tools definition
        json tools = opts.with_tools ? build_tools_definition() : json();

        // Render template scenarios if requested
        if (opts.input_message != input_message_type::NONE &&
            (opts.mode == output_mode::TEMPLATE || opts.mode == output_mode::BOTH)) {
            LOG_ERR("\n");
            LOG_ERR("================================================================================\n");
            LOG_ERR("                         TEMPLATE RENDERING OUTPUT\n");
            LOG_ERR("================================================================================\n");

            render_all_scenarios(chat_template, tools, opts.generation_prompt, opts.enable_reasoning,
                                 opts.input_message);
        }

        // Output analysis if requested
        if (opts.mode == output_mode::ANALYSIS || opts.mode == output_mode::BOTH) {
            LOG_ERR("\n");
            LOG_ERR("================================================================================\n");
            LOG_ERR("                           TEMPLATE ANALYSIS\n");
            LOG_ERR("================================================================================\n");

            diff_analysis_result analysis = differential_analyzer::analyze(chat_template);

            // Generate Parser
            templates_params params;
            params.messages = json::array();
            params.reasoning_format =
                opts.enable_reasoning ? COMMON_REASONING_FORMAT_DEEPSEEK : COMMON_REASONING_FORMAT_NONE;
            params.enable_thinking       = opts.enable_reasoning;
            params.add_generation_prompt = opts.generation_prompt;

            if (opts.with_tools) {
                params.tools       = tools;
                params.tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
            } else {
                params.tools       = json();
                params.tool_choice = COMMON_CHAT_TOOL_CHOICE_NONE;
            }
            params.parallel_tool_calls = false;

            auto parser_data = universal_peg_generator::generate_parser(chat_template, params, analysis);

            LOG_ERR("\n=== Differential Analysis Results ===\n");

            LOG_ERR("\n--- Reasoning & Content Structure ---\n");
            LOG_ERR("reasoning_mode: %s\n", mode_to_str(analysis.reasoning).c_str());
            LOG_ERR("reasoning_start: '%s'\n", analysis.markers.reasoning_start.c_str());
            LOG_ERR("reasoning_end: '%s'\n", analysis.markers.reasoning_end.c_str());
            LOG_ERR("content_mode: %s\n", mode_to_str(analysis.content).c_str());
            LOG_ERR("content_start: '%s'\n", analysis.markers.content_start.c_str());
            LOG_ERR("content_end: '%s'\n", analysis.markers.content_end.c_str());

            LOG_ERR("\n--- Tool Call Structure ---\n");
            LOG_ERR("tool_mode: %s\n", mode_to_str(analysis.tools).c_str());
            LOG_ERR("supports_tools: %s\n", analysis.supports_tools ? "true" : "false");
            LOG_ERR("supports_parallel_calls: %s\n", analysis.supports_parallel_calls ? "true" : "false");
            LOG_ERR("tool_section_start: '%s'\n", analysis.markers.tool_section_start.c_str());
            LOG_ERR("tool_section_end: '%s'\n", analysis.markers.tool_section_end.c_str());
            LOG_ERR("per_call_start: '%s'\n", analysis.markers.per_call_start.c_str());
            LOG_ERR("per_call_end: '%s'\n", analysis.markers.per_call_end.c_str());
            LOG_ERR("func_name_prefix: '%s'\n", analysis.markers.func_name_prefix.c_str());
            LOG_ERR("func_name_suffix: '%s'\n", analysis.markers.func_name_suffix.c_str());
            LOG_ERR("func_close: '%s'\n", analysis.markers.func_close.c_str());
            LOG_ERR("arg_name_prefix: '%s'\n", analysis.markers.arg_name_prefix.c_str());
            LOG_ERR("arg_name_suffix: '%s'\n", analysis.markers.arg_name_suffix.c_str());
            LOG_ERR("arg_value_prefix: '%s'\n", analysis.markers.arg_value_prefix.c_str());
            LOG_ERR("arg_value_suffix: '%s'\n", analysis.markers.arg_value_suffix.c_str());
            LOG_ERR("name_field: '%s'\n", analysis.name_field.c_str());
            LOG_ERR("args_field: '%s'\n", analysis.args_field.c_str());
            LOG_ERR("id_field: '%s'\n", analysis.id_field.c_str());
            LOG_ERR("gen_id_field: '%s'\n", analysis.gen_id_field.c_str());
            LOG_ERR("parameter_order: '%s'\n", std::accumulate(analysis.parameter_order.begin(), analysis.parameter_order.end(),
                std::string(""), [] (const std::string & a, const std::string & b) { return a.empty() ? b : a + ", " + b; }
                ).c_str());

            LOG_ERR("\n=== Generated Parser ===\n");
            common_peg_arena arena;
            arena.load(parser_data.parser);
            LOG_ERR("%s\n", arena.dump(arena.root()).c_str());

            LOG_ERR("\n=== Generated Grammar ===\n");
            LOG_ERR("%s\n", parser_data.grammar.c_str());

            LOG_ERR("\n=== Generated Lazy Grammar ===\n");
            LOG_ERR("%d\n", parser_data.grammar_lazy);

            LOG_ERR("\n=== Generated Grammar Triggers ===\n");
            for (const common_grammar_trigger & cgt : parser_data.grammar_triggers) {
                LOG_ERR("Token: %d | Type: %d | Value: %s\n", cgt.token, cgt.type, cgt.value.c_str());
            }

            LOG_ERR("\n=== Preserved Tokens ===\n");
            for (const std::string & token : parser_data.preserved_tokens) {
                LOG_ERR("  '%s'\n", token.c_str());
            }

            LOG_ERR("\n=== Verifying created grammar ===\n");
            auto * grammar = llama_grammar_init_impl(nullptr, parser_data.grammar.c_str(), "root",
                                                     parser_data.grammar_lazy, nullptr, 0, nullptr, 0);
            if (grammar != nullptr) {
                LOG_ERR("\n=== Grammar successfully created ===\n");
            }
        }
    } catch (const std::exception & e) {
        LOG_ERR("Analysis failed: %s\n", e.what());
        return 1;
    }

    return 0;
}