From 9e9a70f72f2361875cbe494c61b467b17ecc6df6 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Mon, 29 Dec 2025 15:07:18 +0100 Subject: [PATCH] more fixes --- common/jinja/jinja-lexer.cpp | 3 ++- common/jinja/jinja-value.cpp | 11 +++++++++++ common/jinja/jinja-vm.cpp | 34 +++++++++++++++++++++------------ common/jinja/jinja-vm.h | 8 ++++---- common/jinja/jinja-workaround.h | 4 ++++ tests/test-chat-jinja.cpp | 19 ++++++++++++++---- 6 files changed, 58 insertions(+), 21 deletions(-) diff --git a/common/jinja/jinja-lexer.cpp b/common/jinja/jinja-lexer.cpp index 285ccc0151..189f8f5b10 100644 --- a/common/jinja/jinja-lexer.cpp +++ b/common/jinja/jinja-lexer.cpp @@ -105,7 +105,8 @@ std::string lexer::preprocess(const std::string & template_str, const preprocess // Handle custom transformers-specific `generation` tag // See https://github.com/huggingface/transformers/pull/30650 for more information. - // result = std::regex_replace(result, std::regex(R"((?s)\{%\s*generation\s*%\}.+?\{%\s*endgeneration\s*%\})"), ""); + result = std::regex_replace(result, std::regex(R"(\{%\s*generation\s*%\})"), ""); + result = std::regex_replace(result, std::regex(R"(\{%\s*endgeneration\s*%\})"), ""); return result; } diff --git a/common/jinja/jinja-value.cpp b/common/jinja/jinja-value.cpp index 218d893e26..688f6cdb0f 100644 --- a/common/jinja/jinja-value.cpp +++ b/common/jinja/jinja-value.cpp @@ -404,6 +404,17 @@ const func_builtins & value_string_t::get_builtins() const { res->val_str.mark_input_based_on(input->as_string()); return res; }}, + {"selectattr", [](const func_args & args) -> value { + if (args.ctx.wrk_around.string_has_selectattr) { + // no-op, return an array containing the original string + args.ensure_vals(); + auto result = mk_val(); + result->push_back(args.args[0]); + return result; + } else { + throw raised_exception("String selectattr builtin not supported"); + } + }}, {"indent", [](const func_args &) -> value { throw std::runtime_error("String indent builtin not implemented"); }}, diff --git a/common/jinja/jinja-vm.cpp b/common/jinja/jinja-vm.cpp index 94ee370029..8797b866f4 100644 --- a/common/jinja/jinja-vm.cpp +++ b/common/jinja/jinja-vm.cpp @@ -35,6 +35,10 @@ static value_string exec_statements(const statements & stmts, context & ctx) { value statement::execute(context & ctx) { try { return execute_impl(ctx); + } catch (const continue_statement::signal & ex) { + throw ex; + } catch (const break_statement::signal & ex) { + throw ex; } catch (const std::exception & e) { if (ctx.source.empty()) { std::ostringstream oss; @@ -359,15 +363,17 @@ value if_statement::execute_impl(context & ctx) { value for_statement::execute_impl(context & ctx) { context scope(ctx); // new scope for loop variables - statement_ptr iter_expr = std::move(iterable); - statement_ptr test_expr = nullptr; + jinja::select_expression * select_expr = cast_stmt(iterable); + statement_ptr test_expr_nullptr; - if (is_stmt(iterable)) { - JJ_DEBUG("%s", "For loop has test expression"); - auto select = cast_stmt(iterable); - iter_expr = std::move(select->lhs); - test_expr = std::move(select->test); - } + statement_ptr & iter_expr = [&]() -> statement_ptr & { + auto tmp = cast_stmt(iterable); + return tmp ? tmp->lhs : iterable; + }(); + statement_ptr & test_expr = [&]() -> statement_ptr & { + auto tmp = cast_stmt(iterable); + return tmp ? tmp->test : test_expr_nullptr; + }(); JJ_DEBUG("Executing for statement, iterable type: %s", iter_expr->type().c_str()); @@ -436,21 +442,23 @@ value for_statement::execute_impl(context & ctx) { } else { throw std::runtime_error("Invalid loop variable(s): " + loopvar->type()); } - if (test_expr) { + if (select_expr && test_expr) { scope_update_fn(loop_scope); value test_val = test_expr->execute(loop_scope); if (!test_val->as_bool()) { continue; } } + JJ_DEBUG("For loop: adding item type %s at index %zu", current->type().c_str(), i); filtered_items.push_back(current); scope_update_fns.push_back(scope_update_fn); } + JJ_DEBUG("For loop: %zu items after filtering", filtered_items.size()); auto result = mk_val(); bool noIteration = true; - for (size_t i = 0; i < filtered_items.size(); ++i) { + for (size_t i = 0; i < filtered_items.size(); i++) { JJ_DEBUG("For loop iteration %zu/%zu", i + 1, filtered_items.size()); value_object loop_obj = mk_val(); loop_obj->insert("index", mk_val(i + 1)); @@ -469,13 +477,15 @@ value for_statement::execute_impl(context & ctx) { value val = stmt->execute(ctx); result->push_back(val); } - } catch (const continue_statement::exception &) { + } catch (const continue_statement::signal &) { continue; - } catch (const break_statement::exception &) { + } catch (const break_statement::signal &) { break; } noIteration = false; } + + JJ_DEBUG("For loop complete, total iterations: %zu", filtered_items.size()); if (noIteration) { for (auto & stmt : default_block) { value val = stmt->execute(ctx); diff --git a/common/jinja/jinja-vm.h b/common/jinja/jinja-vm.h index 02790945a9..1526a365a1 100644 --- a/common/jinja/jinja-vm.h +++ b/common/jinja/jinja-vm.h @@ -160,28 +160,28 @@ struct for_statement : public statement { struct break_statement : public statement { std::string type() const override { return "Break"; } - struct exception : public std::exception { + struct signal : public std::exception { const char* what() const noexcept override { return "Break statement executed"; } }; value execute_impl(context &) override { - throw break_statement::exception(); + throw break_statement::signal(); } }; struct continue_statement : public statement { std::string type() const override { return "Continue"; } - struct exception : public std::exception { + struct signal : public std::exception { const char* what() const noexcept override { return "Continue statement executed"; } }; value execute_impl(context &) override { - throw continue_statement::exception(); + throw continue_statement::signal(); } }; diff --git a/common/jinja/jinja-workaround.h b/common/jinja/jinja-workaround.h index 766132c0ca..ed7e92df45 100644 --- a/common/jinja/jinja-workaround.h +++ b/common/jinja/jinja-workaround.h @@ -8,6 +8,7 @@ namespace jinja { // containing workarounds for Jinja templates that rely on non-standard behavior +// NOTE: this is kept as a dedicated file for better documentation struct workarounds { // meetkai-functionary-medium-v3.1.jinja call filter on None type @@ -15,6 +16,9 @@ struct workarounds { // Olmo calls operation + between string and undefined bool string_plus_undefined_is_string = true; + + // sheldonrobinson-Llama-Guard call selectattr on string + bool string_has_selectattr = true; }; } // namespace jinja diff --git a/tests/test-chat-jinja.cpp b/tests/test-chat-jinja.cpp index f16ebb9e07..0e2f5e4faa 100644 --- a/tests/test-chat-jinja.cpp +++ b/tests/test-chat-jinja.cpp @@ -34,6 +34,10 @@ int main(void) { std::vector ignored_files = { "Apriel-", "Olmo-3-7B-Instruct-Heretic-GGUF", + "sheldonrobinson-Llama-Guard", + "deepseek-community-Janus-Pro-1B", + "bitshrine-gemma-2-2B-function-calling", + "PaddlePaddle-PaddleOCR-VL", }; for (const auto & ignored : ignored_files) { if (filename.find(ignored) != std::string::npos) { @@ -119,11 +123,18 @@ void run(std::string contents) { ], "bos_token": "", "eos_token": "", - "tools": [], - "functions": "", - "datetime": "" + "tools": [] })"; - jinja::global_from_json(ctx, nlohmann::json::parse(json_inp)); + auto input_json = nlohmann::json::parse(json_inp); + + // workaround for functionary models + input_json["functions"] = ""; + input_json["datetime"] = ""; + + // workaround for Llama Guard models + input_json["excluded_category_keys"] = nlohmann::json::array(); + + jinja::global_from_json(ctx, input_json); jinja::vm vm(ctx); const jinja::value results = vm.execute(ast);