rm workarounds

This commit is contained in:
Xuan Son Nguyen 2025-12-30 16:07:23 +01:00
parent 9e9a70f72f
commit 9c0fa6f810
6 changed files with 30 additions and 80 deletions

View File

@ -404,16 +404,11 @@ const func_builtins & value_string_t::get_builtins() const {
res->val_str.mark_input_based_on(input->as_string());
return res;
}},
{"selectattr", [](const func_args & args) -> value {
if (args.ctx.wrk_around.string_has_selectattr) {
// no-op, return an array containing the original string
args.ensure_vals<value_string>();
auto result = mk_val<value_array>();
result->push_back(args.args[0]);
return result;
} else {
throw raised_exception("String selectattr builtin not supported");
}
{"selectattr", [](const func_args &) -> value {
throw std::runtime_error("String selectattr builtin not supported");
}},
{"rejectattr", [](const func_args &) -> value {
throw std::runtime_error("String rejectattr builtin not supported");
}},
{"indent", [](const func_args &) -> value {
throw std::runtime_error("String indent builtin not implemented");
@ -662,22 +657,7 @@ const func_builtins & value_object_t::get_builtins() const {
const func_builtins & value_null_t::get_builtins() const {
static const func_builtins builtins = {
{"list", [](const func_args & args) -> value {
// fix for meetkai-functionary-medium-v3.1.jinja
if (args.ctx.wrk_around.none_has_builtins) {
return mk_val<value_array>();
} else {
throw raised_exception("'list' builtin not supported for none type");
}
}},
{"selectattr", [](const func_args & args) -> value {
// fix for meetkai-functionary-medium-v3.1.jinja
if (args.ctx.wrk_around.none_has_builtins) {
return mk_val<value_array>();
} else {
throw raised_exception("'selectattr' builtin not supported for none type");
}
}},
// TODO: may need to implement this, idk
};
return builtins;
}

View File

@ -132,12 +132,6 @@ struct value_t {
string val_str;
bool val_bool;
// array and object are stored as shared_ptr to allow reference access
// example:
// my_obj = {"a": 1, "b": 2}
// my_arr = [my_obj]
// my_obj["a"] = 3
// print(my_arr[0]["a"]) # should print 3
std::vector<value> val_arr;
std::map<std::string, value> val_obj;

View File

@ -113,15 +113,15 @@ value binary_expression::execute_impl(context & ctx) {
// Special case: `anything in undefined` is `false` and `anything not in undefined` is `true`
return mk_val<value_bool>(op.value == "not in");
}
if (ctx.wrk_around.string_plus_undefined_is_string && (op.value == "+" || op.value == "~")) {
JJ_DEBUG("%s", "Workaround: treating undefined as empty string for string concatenation");
auto left_str = left_val->is_undefined() ? string() : left_val->as_string();
auto right_str = right_val->is_undefined() ? string() : right_val->as_string();
auto output = left_str.append(right_str);
auto res = mk_val<value_string>();
res->val_str = std::move(output);
return res;
}
// if (ctx.wrk_around.string_plus_undefined_is_string && (op.value == "+" || op.value == "~")) {
// JJ_DEBUG("%s", "Workaround: treating undefined as empty string for string concatenation");
// auto left_str = left_val->is_undefined() ? string() : left_val->as_string();
// auto right_str = right_val->is_undefined() ? string() : right_val->as_string();
// auto output = left_str.append(right_str);
// auto res = mk_val<value_string>();
// res->val_str = std::move(output);
// return res;
// }
throw std::runtime_error("Cannot perform operation " + op.value + " on undefined values");
} else if (is_val<value_null>(left_val) || is_val<value_null>(right_val)) {
throw std::runtime_error("Cannot perform operation on null values");

View File

@ -2,7 +2,6 @@
#include "jinja-lexer.h"
#include "jinja-value.h"
#include "jinja-workaround.h"
#include <string>
#include <vector>
@ -53,8 +52,6 @@ struct context {
std::time_t current_time; // for functions that need current time
workarounds wrk_around; // workarounds for non-standard jinja behavior
context() {
var["true"] = mk_val<value_bool>(true);
var["false"] = mk_val<value_bool>(false);

View File

@ -1,24 +0,0 @@
#pragma once
#include "jinja-value.h"
#include <string>
#include <vector>
namespace jinja {
// containing workarounds for Jinja templates that rely on non-standard behavior
// NOTE: this is kept as a dedicated file for better documentation
struct workarounds {
// meetkai-functionary-medium-v3.1.jinja call filter on None type
bool none_has_builtins = true;
// Olmo calls operation + between string and undefined
bool string_plus_undefined_is_string = true;
// sheldonrobinson-Llama-Guard call selectattr on string
bool string_has_selectattr = true;
};
} // namespace jinja

View File

@ -14,7 +14,8 @@
#include "jinja/jinja-parser.h"
#include "jinja/jinja-lexer.h"
void run(std::string contents);
void run_multiple();
void run_single(std::string contents);
int main(void) {
//std::string contents = "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\\n' + message['content'] | trim + '<end_of_turn>\\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\\n'}}{% endif %}";
@ -24,8 +25,16 @@ int main(void) {
//std::string contents = "<some_tokens> {{ messages[a]['content'] }} <another_token>";
//std::string contents = "{% if a is not defined %}hello{% endif %}";
//std::ifstream infile("models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja"); std::string contents((std::istreambuf_iterator<char>(infile)), std::istreambuf_iterator<char>());
std::ifstream infile("models/templates/Qwen-Qwen3-0.6B.jinja"); std::string contents((std::istreambuf_iterator<char>(infile)), std::istreambuf_iterator<char>());
run_single(contents);
//run_multiple();
return 0;
}
void run_multiple(void) {
std::vector<std::string> failed_tests;
bool stop_on_first_failure = false;
@ -65,7 +74,7 @@ int main(void) {
std::ifstream infile(entry.path());
std::string contents((std::istreambuf_iterator<char>(infile)), std::istreambuf_iterator<char>());
try {
run(contents);
run_single(contents);
} catch (const std::exception & e) {
std::cout << "Exception: " << e.what() << "\n";
std::cout << "=== ERROR WITH TEMPLATE FILE: " << entry.path().string() << " ===\n";
@ -84,27 +93,21 @@ int main(void) {
for (const auto & test : failed_tests) {
std::cout << "FAILED TEST: " << test << "\n";
}
return 0;
}
void run(std::string contents) {
void run_single(std::string contents) {
jinja::enable_debug(true);
// lexing
jinja::lexer lexer;
jinja::preprocess_options options;
options.trim_blocks = false;
options.lstrip_blocks = false;
auto lexer_res = lexer.tokenize(contents, options);
for (const auto & tok : lexer_res.tokens) {
//std::cout << "token: type=" << static_cast<int>(tok.t) << " text='" << tok.value << "' pos=" << tok.pos << "\n";
}
std::cout << "\n=== AST ===\n";
// compile to AST
jinja::program ast = jinja::parse_from_tokens(lexer_res);
for (const auto & stmt : ast.body) {
//std::cout << "stmt type: " << stmt->type() << "\n";
}
std::cout << "\n=== RUN ===\n";
jinja::context ctx;