more fixes

This commit is contained in:
Xuan Son Nguyen 2025-12-29 15:07:18 +01:00
parent 026730e8e3
commit 9e9a70f72f
6 changed files with 58 additions and 21 deletions

View File

@ -105,7 +105,8 @@ std::string lexer::preprocess(const std::string & template_str, const preprocess
// Handle custom transformers-specific `generation` tag
// See https://github.com/huggingface/transformers/pull/30650 for more information.
// result = std::regex_replace(result, std::regex(R"((?s)\{%\s*generation\s*%\}.+?\{%\s*endgeneration\s*%\})"), "");
result = std::regex_replace(result, std::regex(R"(\{%\s*generation\s*%\})"), "");
result = std::regex_replace(result, std::regex(R"(\{%\s*endgeneration\s*%\})"), "");
return result;
}

View File

@ -404,6 +404,17 @@ const func_builtins & value_string_t::get_builtins() const {
res->val_str.mark_input_based_on(input->as_string());
return res;
}},
{"selectattr", [](const func_args & args) -> value {
if (args.ctx.wrk_around.string_has_selectattr) {
// no-op, return an array containing the original string
args.ensure_vals<value_string>();
auto result = mk_val<value_array>();
result->push_back(args.args[0]);
return result;
} else {
throw raised_exception("String selectattr builtin not supported");
}
}},
{"indent", [](const func_args &) -> value {
throw std::runtime_error("String indent builtin not implemented");
}},

View File

@ -35,6 +35,10 @@ static value_string exec_statements(const statements & stmts, context & ctx) {
value statement::execute(context & ctx) {
try {
return execute_impl(ctx);
} catch (const continue_statement::signal & ex) {
throw ex;
} catch (const break_statement::signal & ex) {
throw ex;
} catch (const std::exception & e) {
if (ctx.source.empty()) {
std::ostringstream oss;
@ -359,15 +363,17 @@ value if_statement::execute_impl(context & ctx) {
value for_statement::execute_impl(context & ctx) {
context scope(ctx); // new scope for loop variables
statement_ptr iter_expr = std::move(iterable);
statement_ptr test_expr = nullptr;
jinja::select_expression * select_expr = cast_stmt<select_expression>(iterable);
statement_ptr test_expr_nullptr;
if (is_stmt<select_expression>(iterable)) {
JJ_DEBUG("%s", "For loop has test expression");
auto select = cast_stmt<select_expression>(iterable);
iter_expr = std::move(select->lhs);
test_expr = std::move(select->test);
}
statement_ptr & iter_expr = [&]() -> statement_ptr & {
auto tmp = cast_stmt<select_expression>(iterable);
return tmp ? tmp->lhs : iterable;
}();
statement_ptr & test_expr = [&]() -> statement_ptr & {
auto tmp = cast_stmt<select_expression>(iterable);
return tmp ? tmp->test : test_expr_nullptr;
}();
JJ_DEBUG("Executing for statement, iterable type: %s", iter_expr->type().c_str());
@ -436,21 +442,23 @@ value for_statement::execute_impl(context & ctx) {
} else {
throw std::runtime_error("Invalid loop variable(s): " + loopvar->type());
}
if (test_expr) {
if (select_expr && test_expr) {
scope_update_fn(loop_scope);
value test_val = test_expr->execute(loop_scope);
if (!test_val->as_bool()) {
continue;
}
}
JJ_DEBUG("For loop: adding item type %s at index %zu", current->type().c_str(), i);
filtered_items.push_back(current);
scope_update_fns.push_back(scope_update_fn);
}
JJ_DEBUG("For loop: %zu items after filtering", filtered_items.size());
auto result = mk_val<value_array>();
bool noIteration = true;
for (size_t i = 0; i < filtered_items.size(); ++i) {
for (size_t i = 0; i < filtered_items.size(); i++) {
JJ_DEBUG("For loop iteration %zu/%zu", i + 1, filtered_items.size());
value_object loop_obj = mk_val<value_object>();
loop_obj->insert("index", mk_val<value_int>(i + 1));
@ -469,13 +477,15 @@ value for_statement::execute_impl(context & ctx) {
value val = stmt->execute(ctx);
result->push_back(val);
}
} catch (const continue_statement::exception &) {
} catch (const continue_statement::signal &) {
continue;
} catch (const break_statement::exception &) {
} catch (const break_statement::signal &) {
break;
}
noIteration = false;
}
JJ_DEBUG("For loop complete, total iterations: %zu", filtered_items.size());
if (noIteration) {
for (auto & stmt : default_block) {
value val = stmt->execute(ctx);

View File

@ -160,28 +160,28 @@ struct for_statement : public statement {
struct break_statement : public statement {
std::string type() const override { return "Break"; }
struct exception : public std::exception {
struct signal : public std::exception {
const char* what() const noexcept override {
return "Break statement executed";
}
};
value execute_impl(context &) override {
throw break_statement::exception();
throw break_statement::signal();
}
};
struct continue_statement : public statement {
std::string type() const override { return "Continue"; }
struct exception : public std::exception {
struct signal : public std::exception {
const char* what() const noexcept override {
return "Continue statement executed";
}
};
value execute_impl(context &) override {
throw continue_statement::exception();
throw continue_statement::signal();
}
};

View File

@ -8,6 +8,7 @@
namespace jinja {
// containing workarounds for Jinja templates that rely on non-standard behavior
// NOTE: this is kept as a dedicated file for better documentation
struct workarounds {
// meetkai-functionary-medium-v3.1.jinja call filter on None type
@ -15,6 +16,9 @@ struct workarounds {
// Olmo calls operation + between string and undefined
bool string_plus_undefined_is_string = true;
// sheldonrobinson-Llama-Guard call selectattr on string
bool string_has_selectattr = true;
};
} // namespace jinja

View File

@ -34,6 +34,10 @@ int main(void) {
std::vector<std::string> ignored_files = {
"Apriel-",
"Olmo-3-7B-Instruct-Heretic-GGUF",
"sheldonrobinson-Llama-Guard",
"deepseek-community-Janus-Pro-1B",
"bitshrine-gemma-2-2B-function-calling",
"PaddlePaddle-PaddleOCR-VL",
};
for (const auto & ignored : ignored_files) {
if (filename.find(ignored) != std::string::npos) {
@ -119,11 +123,18 @@ void run(std::string contents) {
],
"bos_token": "<s>",
"eos_token": "</s>",
"tools": [],
"functions": "",
"datetime": ""
"tools": []
})";
jinja::global_from_json(ctx, nlohmann::json::parse(json_inp));
auto input_json = nlohmann::json::parse(json_inp);
// workaround for functionary models
input_json["functions"] = "";
input_json["datetime"] = "";
// workaround for Llama Guard models
input_json["excluded_category_keys"] = nlohmann::json::array();
jinja::global_from_json(ctx, input_json);
jinja::vm vm(ctx);
const jinja::value results = vm.execute(ast);