From 1784a57e7bec130c51a4175ba94adbb2ce136eb6 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Sun, 28 Dec 2025 23:15:48 +0100 Subject: [PATCH] impl global_from_json --- common/jinja/jinja-value.cpp | 49 +++++++++++++++++++++++++++++++++++ common/jinja/jinja-value.h | 33 ++++++++++++++++++++++++ common/jinja/jinja-vm.cpp | 2 +- tests/test-chat-jinja.cpp | 50 +++++++++++++++++++++--------------- 4 files changed, 112 insertions(+), 22 deletions(-) diff --git a/common/jinja/jinja-value.cpp b/common/jinja/jinja-value.cpp index cdf39a8f66..9461901c6d 100644 --- a/common/jinja/jinja-value.cpp +++ b/common/jinja/jinja-value.cpp @@ -3,6 +3,9 @@ #include "jinja-parser.h" #include "jinja-value.h" +// for converting from JSON to jinja values +#include + #include #include #include @@ -520,4 +523,50 @@ const func_builtins & value_object_t::get_builtins() const { return builtins; } +static value from_json(const nlohmann::json & j) { + if (j.is_null()) { + return mk_val(); + } else if (j.is_boolean()) { + return mk_val(j.get()); + } else if (j.is_number_integer()) { + return mk_val(j.get()); + } else if (j.is_number_float()) { + return mk_val(j.get()); + } else if (j.is_string()) { + return mk_val(j.get()); + } else if (j.is_array()) { + auto arr = mk_val(); + for (const auto & item : j) { + arr->push_back(from_json(item)); + } + return arr; + } else if (j.is_object()) { + if (j.contains("__input__")) { + // handle input marking + auto str = mk_val(j.at("__input__").get()); + str->mark_input(); + return str; + } else { + // normal object + auto obj = mk_val(); + for (auto it = j.begin(); it != j.end(); ++it) { + obj->insert(it.key(), from_json(it.value())); + } + return obj; + } + } else { + throw std::runtime_error("Unsupported JSON value type"); + } +} + +template<> +void global_from_json(context & ctx, const nlohmann::json & json_obj) { + if (json_obj.is_null() || !json_obj.is_object()) { + throw std::runtime_error("global_from_json: input JSON value must be an object"); + } + for (auto it = json_obj.begin(); it != json_obj.end(); ++it) { + ctx.var[it.key()] = from_json(it.value()); + } +} + } // namespace jinja diff --git a/common/jinja/jinja-value.h b/common/jinja/jinja-value.h index b5ce893162..04c6c6da28 100644 --- a/common/jinja/jinja-value.h +++ b/common/jinja/jinja-value.h @@ -57,8 +57,41 @@ void ensure_val(const value & ptr) { } // End Helper + struct context; // forward declaration + +// for converting from JSON to jinja values +// example input JSON: +// { +// "messages": [ +// {"role": "user", "content": "Hello!"}, +// {"role": "assistant", "content": "Hi there!"} +// ], +// "bos_token": "", +// "eos_token": "", +// } +// +// to mark strings as user input, wrap them in a special object: +// { +// "messages": [ +// { +// "role": "user", +// "content": {"__input__": "Hello!"} // this string is user input +// }, +// ... +// ], +// } +// +// marking input can be useful for tracking data provenance +// and preventing template injection attacks +// +// Note: T_JSON can be nlohmann::json or similar types +template +void global_from_json(context & ctx, const T_JSON & json_obj); + + + struct func_args { std::vector args; context & ctx; diff --git a/common/jinja/jinja-vm.cpp b/common/jinja/jinja-vm.cpp index edb9363123..4c38ebde7d 100644 --- a/common/jinja/jinja-vm.cpp +++ b/common/jinja/jinja-vm.cpp @@ -226,7 +226,7 @@ static value try_builtin_func(const std::string & name, const value & input, boo } value filter_expression::execute_impl(context & ctx) { - value input = operand->execute(ctx); + value input = operand ? operand->execute(ctx) : val; JJ_DEBUG("Applying filter to %s", input->type().c_str()); diff --git a/tests/test-chat-jinja.cpp b/tests/test-chat-jinja.cpp index 1f9dedb1e4..997d463061 100644 --- a/tests/test-chat-jinja.cpp +++ b/tests/test-chat-jinja.cpp @@ -6,6 +6,8 @@ #include #include +#include + #undef NDEBUG #include @@ -24,10 +26,14 @@ int main(void) { //std::ifstream infile("models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja"); std::string contents((std::istreambuf_iterator(infile)), std::istreambuf_iterator()); + std::vector failed_tests; + // list all files in models/templates/ and run each + size_t test_count = 0; std::string dir_path = "models/templates/"; for (const auto & entry : std::filesystem::directory_iterator(dir_path)) { if (entry.is_regular_file()) { + test_count++; std::cout << "\n\n=== RUNNING TEMPLATE FILE: " << entry.path().string() << " ===\n"; std::ifstream infile(entry.path()); std::string contents((std::istreambuf_iterator(infile)), std::istreambuf_iterator()); @@ -35,11 +41,18 @@ int main(void) { run(contents); } catch (const std::exception & e) { std::cout << "Exception: " << e.what() << "\n"; - std::cout << "=== CURRENT TEMPLATE FILE: " << entry.path().string() << " ===\n"; - exit(1); + std::cout << "=== ERROR WITH TEMPLATE FILE: " << entry.path().string() << " ===\n"; + failed_tests.push_back(entry.path().string()); } } } + + std::cout << "\n\n=== TEST SUMMARY ===\n"; + std::cout << "Total tests run: " << test_count << "\n"; + std::cout << "Total failed tests: " << failed_tests.size() << "\n"; + for (const auto & test : failed_tests) { + std::cout << "FAILED TEST: " << test << "\n"; + } return 0; } @@ -66,25 +79,20 @@ void run(std::string contents) { jinja::context ctx; ctx.source = lexer_res.preprocessed_source; - auto make_non_special_string = [](const std::string & s) { - jinja::value_string str_val = jinja::mk_val(s); - str_val->mark_input(); - return str_val; - }; - - jinja::value_array messages = jinja::mk_val(); - jinja::value_object msg1 = jinja::mk_val(); - msg1->insert("role", make_non_special_string("user")); - msg1->insert("content", make_non_special_string("Hello, how are you?")); - messages->push_back(std::move(msg1)); - jinja::value_object msg2 = jinja::mk_val(); - msg2->insert("role", make_non_special_string("assistant")); - msg2->insert("content", make_non_special_string("I am fine, thank you!")); - messages->push_back(std::move(msg2)); - - ctx.var["messages"] = std::move(messages); - ctx.var["eos_token"] = jinja::mk_val(""); - // ctx.var["tools"] = jinja::mk_val(); + std::string json_inp = R"({ + "messages": [ + { + "role": "user", + "content": {"__input__": "Hello, how are you?"} + }, + { + "role": "assistant", + "content": {"__input__": "I am fine, thank you!"} + } + ], + "eos_token": "" + })"; + jinja::global_from_json(ctx, nlohmann::json::parse(json_inp)); jinja::vm vm(ctx); const jinja::value results = vm.execute(ast);