rm workarounds

This commit is contained in:
Xuan Son Nguyen 2025-12-30 16:07:23 +01:00
parent 9e9a70f72f
commit 9c0fa6f810
6 changed files with 30 additions and 80 deletions

View File

@ -404,16 +404,11 @@ const func_builtins & value_string_t::get_builtins() const {
res->val_str.mark_input_based_on(input->as_string()); res->val_str.mark_input_based_on(input->as_string());
return res; return res;
}}, }},
{"selectattr", [](const func_args & args) -> value { {"selectattr", [](const func_args &) -> value {
if (args.ctx.wrk_around.string_has_selectattr) { throw std::runtime_error("String selectattr builtin not supported");
// no-op, return an array containing the original string }},
args.ensure_vals<value_string>(); {"rejectattr", [](const func_args &) -> value {
auto result = mk_val<value_array>(); throw std::runtime_error("String rejectattr builtin not supported");
result->push_back(args.args[0]);
return result;
} else {
throw raised_exception("String selectattr builtin not supported");
}
}}, }},
{"indent", [](const func_args &) -> value { {"indent", [](const func_args &) -> value {
throw std::runtime_error("String indent builtin not implemented"); throw std::runtime_error("String indent builtin not implemented");
@ -662,22 +657,7 @@ const func_builtins & value_object_t::get_builtins() const {
const func_builtins & value_null_t::get_builtins() const { const func_builtins & value_null_t::get_builtins() const {
static const func_builtins builtins = { static const func_builtins builtins = {
{"list", [](const func_args & args) -> value { // TODO: may need to implement this, idk
// fix for meetkai-functionary-medium-v3.1.jinja
if (args.ctx.wrk_around.none_has_builtins) {
return mk_val<value_array>();
} else {
throw raised_exception("'list' builtin not supported for none type");
}
}},
{"selectattr", [](const func_args & args) -> value {
// fix for meetkai-functionary-medium-v3.1.jinja
if (args.ctx.wrk_around.none_has_builtins) {
return mk_val<value_array>();
} else {
throw raised_exception("'selectattr' builtin not supported for none type");
}
}},
}; };
return builtins; return builtins;
} }

View File

@ -132,12 +132,6 @@ struct value_t {
string val_str; string val_str;
bool val_bool; bool val_bool;
// array and object are stored as shared_ptr to allow reference access
// example:
// my_obj = {"a": 1, "b": 2}
// my_arr = [my_obj]
// my_obj["a"] = 3
// print(my_arr[0]["a"]) # should print 3
std::vector<value> val_arr; std::vector<value> val_arr;
std::map<std::string, value> val_obj; std::map<std::string, value> val_obj;

View File

@ -113,15 +113,15 @@ value binary_expression::execute_impl(context & ctx) {
// Special case: `anything in undefined` is `false` and `anything not in undefined` is `true` // Special case: `anything in undefined` is `false` and `anything not in undefined` is `true`
return mk_val<value_bool>(op.value == "not in"); return mk_val<value_bool>(op.value == "not in");
} }
if (ctx.wrk_around.string_plus_undefined_is_string && (op.value == "+" || op.value == "~")) { // if (ctx.wrk_around.string_plus_undefined_is_string && (op.value == "+" || op.value == "~")) {
JJ_DEBUG("%s", "Workaround: treating undefined as empty string for string concatenation"); // JJ_DEBUG("%s", "Workaround: treating undefined as empty string for string concatenation");
auto left_str = left_val->is_undefined() ? string() : left_val->as_string(); // auto left_str = left_val->is_undefined() ? string() : left_val->as_string();
auto right_str = right_val->is_undefined() ? string() : right_val->as_string(); // auto right_str = right_val->is_undefined() ? string() : right_val->as_string();
auto output = left_str.append(right_str); // auto output = left_str.append(right_str);
auto res = mk_val<value_string>(); // auto res = mk_val<value_string>();
res->val_str = std::move(output); // res->val_str = std::move(output);
return res; // return res;
} // }
throw std::runtime_error("Cannot perform operation " + op.value + " on undefined values"); throw std::runtime_error("Cannot perform operation " + op.value + " on undefined values");
} else if (is_val<value_null>(left_val) || is_val<value_null>(right_val)) { } else if (is_val<value_null>(left_val) || is_val<value_null>(right_val)) {
throw std::runtime_error("Cannot perform operation on null values"); throw std::runtime_error("Cannot perform operation on null values");

View File

@ -2,7 +2,6 @@
#include "jinja-lexer.h" #include "jinja-lexer.h"
#include "jinja-value.h" #include "jinja-value.h"
#include "jinja-workaround.h"
#include <string> #include <string>
#include <vector> #include <vector>
@ -53,8 +52,6 @@ struct context {
std::time_t current_time; // for functions that need current time std::time_t current_time; // for functions that need current time
workarounds wrk_around; // workarounds for non-standard jinja behavior
context() { context() {
var["true"] = mk_val<value_bool>(true); var["true"] = mk_val<value_bool>(true);
var["false"] = mk_val<value_bool>(false); var["false"] = mk_val<value_bool>(false);

View File

@ -1,24 +0,0 @@
#pragma once
#include "jinja-value.h"
#include <string>
#include <vector>
namespace jinja {
// containing workarounds for Jinja templates that rely on non-standard behavior
// NOTE: this is kept as a dedicated file for better documentation
struct workarounds {
// meetkai-functionary-medium-v3.1.jinja call filter on None type
bool none_has_builtins = true;
// Olmo calls operation + between string and undefined
bool string_plus_undefined_is_string = true;
// sheldonrobinson-Llama-Guard call selectattr on string
bool string_has_selectattr = true;
};
} // namespace jinja

View File

@ -14,7 +14,8 @@
#include "jinja/jinja-parser.h" #include "jinja/jinja-parser.h"
#include "jinja/jinja-lexer.h" #include "jinja/jinja-lexer.h"
void run(std::string contents); void run_multiple();
void run_single(std::string contents);
int main(void) { int main(void) {
//std::string contents = "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\\n' + message['content'] | trim + '<end_of_turn>\\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\\n'}}{% endif %}"; //std::string contents = "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\\n' + message['content'] | trim + '<end_of_turn>\\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\\n'}}{% endif %}";
@ -24,8 +25,16 @@ int main(void) {
//std::string contents = "<some_tokens> {{ messages[a]['content'] }} <another_token>"; //std::string contents = "<some_tokens> {{ messages[a]['content'] }} <another_token>";
//std::string contents = "{% if a is not defined %}hello{% endif %}"; //std::string contents = "{% if a is not defined %}hello{% endif %}";
//std::ifstream infile("models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja"); std::string contents((std::istreambuf_iterator<char>(infile)), std::istreambuf_iterator<char>()); std::ifstream infile("models/templates/Qwen-Qwen3-0.6B.jinja"); std::string contents((std::istreambuf_iterator<char>(infile)), std::istreambuf_iterator<char>());
run_single(contents);
//run_multiple();
return 0;
}
void run_multiple(void) {
std::vector<std::string> failed_tests; std::vector<std::string> failed_tests;
bool stop_on_first_failure = false; bool stop_on_first_failure = false;
@ -65,7 +74,7 @@ int main(void) {
std::ifstream infile(entry.path()); std::ifstream infile(entry.path());
std::string contents((std::istreambuf_iterator<char>(infile)), std::istreambuf_iterator<char>()); std::string contents((std::istreambuf_iterator<char>(infile)), std::istreambuf_iterator<char>());
try { try {
run(contents); run_single(contents);
} catch (const std::exception & e) { } catch (const std::exception & e) {
std::cout << "Exception: " << e.what() << "\n"; std::cout << "Exception: " << e.what() << "\n";
std::cout << "=== ERROR WITH TEMPLATE FILE: " << entry.path().string() << " ===\n"; std::cout << "=== ERROR WITH TEMPLATE FILE: " << entry.path().string() << " ===\n";
@ -84,27 +93,21 @@ int main(void) {
for (const auto & test : failed_tests) { for (const auto & test : failed_tests) {
std::cout << "FAILED TEST: " << test << "\n"; std::cout << "FAILED TEST: " << test << "\n";
} }
return 0;
} }
void run(std::string contents) { void run_single(std::string contents) {
jinja::enable_debug(true); jinja::enable_debug(true);
// lexing
jinja::lexer lexer; jinja::lexer lexer;
jinja::preprocess_options options; jinja::preprocess_options options;
options.trim_blocks = false; options.trim_blocks = false;
options.lstrip_blocks = false; options.lstrip_blocks = false;
auto lexer_res = lexer.tokenize(contents, options); auto lexer_res = lexer.tokenize(contents, options);
for (const auto & tok : lexer_res.tokens) {
//std::cout << "token: type=" << static_cast<int>(tok.t) << " text='" << tok.value << "' pos=" << tok.pos << "\n";
}
std::cout << "\n=== AST ===\n"; // compile to AST
jinja::program ast = jinja::parse_from_tokens(lexer_res); jinja::program ast = jinja::parse_from_tokens(lexer_res);
for (const auto & stmt : ast.body) {
//std::cout << "stmt type: " << stmt->type() << "\n";
}
std::cout << "\n=== RUN ===\n"; std::cout << "\n=== RUN ===\n";
jinja::context ctx; jinja::context ctx;