allow print source on exception

This commit is contained in:
Xuan Son Nguyen 2025-12-28 18:45:41 +01:00
parent 64e29a5848
commit acb0effa25
8 changed files with 167 additions and 77 deletions

View File

@ -54,12 +54,13 @@ std::string lexer::preprocess(const std::string & template_str, const preprocess
return result; return result;
} }
std::vector<token> lexer::tokenize(const std::string & input, const preprocess_options & options) { lexer_result lexer::tokenize(const std::string & input, const preprocess_options & options) {
std::vector<token> tokens; std::vector<token> tokens;
std::string src = preprocess(input, options); std::string src = preprocess(input, options);
JJ_DEBUG("preprocessed input: '%s'", src.c_str()); JJ_DEBUG("preprocessed input: '%s'", src.c_str());
size_t pos = 0; size_t pos = 0;
size_t start_pos = 0;
size_t curly_bracket_depth = 0; size_t curly_bracket_depth = 0;
using pred = std::function<bool(char)>; using pred = std::function<bool(char)>;
@ -101,6 +102,7 @@ std::vector<token> lexer::tokenize(const std::string & input, const preprocess_o
}; };
while (pos < src.size()) { while (pos < src.size()) {
start_pos = pos;
JJ_DEBUG("lexer main loop at pos %zu: '%s...'", pos, src.substr(pos, 10).c_str()); JJ_DEBUG("lexer main loop at pos %zu: '%s...'", pos, src.substr(pos, 10).c_str());
// First, consume all text that is outside of a Jinja statement or expression // First, consume all text that is outside of a Jinja statement or expression
@ -122,13 +124,14 @@ std::vector<token> lexer::tokenize(const std::string & input, const preprocess_o
} }
JJ_DEBUG("consumed text: '%s'", text.c_str()); JJ_DEBUG("consumed text: '%s'", text.c_str());
if (!text.empty()) { if (!text.empty()) {
tokens.push_back({token::text, text}); tokens.push_back({token::text, text, start_pos});
continue; continue;
} }
} }
// Possibly consume a comment // Possibly consume a comment
if (src[pos] == '{' && next_pos_is( {'#'} )) { if (src[pos] == '{' && next_pos_is( {'#'} )) {
start_pos = pos;
pos += 2; // Skip the opening {# pos += 2; // Skip the opening {#
std::string comment; std::string comment;
while (!(src[pos] == '#' && next_pos_is( {'}'} ))) { while (!(src[pos] == '#' && next_pos_is( {'}'} ))) {
@ -138,7 +141,7 @@ std::vector<token> lexer::tokenize(const std::string & input, const preprocess_o
comment += src[pos++]; comment += src[pos++];
} }
JJ_DEBUG("consumed comment: '%s'", comment.c_str()); JJ_DEBUG("consumed comment: '%s'", comment.c_str());
tokens.push_back({token::comment, comment}); tokens.push_back({token::comment, comment, start_pos});
pos += 2; // Skip the closing #} pos += 2; // Skip the closing #}
continue; continue;
} }
@ -152,6 +155,7 @@ std::vector<token> lexer::tokenize(const std::string & input, const preprocess_o
// Check for unary operators // Check for unary operators
if (ch == '-' || ch == '+') { if (ch == '-' || ch == '+') {
start_pos = pos;
token::type last_token_type = tokens.empty() ? token::undefined : tokens.back().t; token::type last_token_type = tokens.empty() ? token::undefined : tokens.back().t;
if (last_token_type == token::text || last_token_type == token::undefined) { if (last_token_type == token::text || last_token_type == token::undefined) {
throw std::runtime_error(std::string("lexer: unexpected character: ") + ch); throw std::runtime_error(std::string("lexer: unexpected character: ") + ch);
@ -176,7 +180,7 @@ std::vector<token> lexer::tokenize(const std::string & input, const preprocess_o
std::string value = std::string(1, ch) + num; std::string value = std::string(1, ch) + num;
token::type t = num.empty() ? token::unary_operator : token::numeric_literal; token::type t = num.empty() ? token::unary_operator : token::numeric_literal;
JJ_DEBUG("consumed unary operator or numeric literal: '%s'", value.c_str()); JJ_DEBUG("consumed unary operator or numeric literal: '%s'", value.c_str());
tokens.push_back({t, value}); tokens.push_back({t, value, start_pos});
continue; continue;
} }
} }
@ -185,12 +189,13 @@ std::vector<token> lexer::tokenize(const std::string & input, const preprocess_o
// Try to match one of the tokens in the mapping table // Try to match one of the tokens in the mapping table
bool matched = false; bool matched = false;
for (const auto & [seq, typ] : ordered_mapping_table) { for (const auto & [seq, typ] : ordered_mapping_table) {
start_pos = pos;
// Inside an object literal, don't treat "}}" as expression-end // Inside an object literal, don't treat "}}" as expression-end
if (seq == "}}" && curly_bracket_depth > 0) { if (seq == "}}" && curly_bracket_depth > 0) {
continue; continue;
} }
if (pos + seq.size() <= src.size() && src.substr(pos, seq.size()) == seq) { if (pos + seq.size() <= src.size() && src.substr(pos, seq.size()) == seq) {
tokens.push_back({typ, seq}); tokens.push_back({typ, seq, start_pos});
if (typ == token::open_expression) { if (typ == token::open_expression) {
curly_bracket_depth = 0; curly_bracket_depth = 0;
} else if (typ == token::open_curly_bracket) { } else if (typ == token::open_curly_bracket) {
@ -207,36 +212,39 @@ std::vector<token> lexer::tokenize(const std::string & input, const preprocess_o
// Strings // Strings
if (ch == '\'' || ch == '"') { if (ch == '\'' || ch == '"') {
start_pos = pos;
++pos; // Skip opening quote ++pos; // Skip opening quote
std::string str = consume_while([ch](char c) { return c != ch; }); std::string str = consume_while([ch](char c) { return c != ch; });
tokens.push_back({token::string_literal, str}); tokens.push_back({token::string_literal, str, start_pos});
++pos; // Skip closing quote ++pos; // Skip closing quote
continue; continue;
} }
// Numbers // Numbers
if (is_integer(ch)) { if (is_integer(ch)) {
start_pos = pos;
std::string num = consume_while(is_integer); std::string num = consume_while(is_integer);
if (pos < src.size() && src[pos] == '.' && pos + 1 < src.size() && is_integer(src[pos + 1])) { if (pos < src.size() && src[pos] == '.' && pos + 1 < src.size() && is_integer(src[pos + 1])) {
++pos; // Consume '.' ++pos; // Consume '.'
std::string frac = consume_while(is_integer); std::string frac = consume_while(is_integer);
num += "." + frac; num += "." + frac;
} }
tokens.push_back({token::numeric_literal, num}); tokens.push_back({token::numeric_literal, num, start_pos});
continue; continue;
} }
// Identifiers // Identifiers
if (is_word(ch)) { if (is_word(ch)) {
start_pos = pos;
std::string word = consume_while(is_word); std::string word = consume_while(is_word);
tokens.push_back({token::identifier, word}); tokens.push_back({token::identifier, word, start_pos});
continue; continue;
} }
throw std::runtime_error(std::string("lexer: unexpected character: ") + ch); throw std::runtime_error(std::string("lexer: unexpected character: ") + ch);
} }
return tokens; return {std::move(tokens), std::move(src)};
} }
} // namespace jinja } // namespace jinja

View File

@ -48,6 +48,7 @@ struct token {
}; };
type t; type t;
std::string value; std::string value;
size_t pos;
}; };
static std::string type_to_string(token::type t) { static std::string type_to_string(token::type t) {
@ -82,6 +83,11 @@ static std::string type_to_string(token::type t) {
} }
} }
struct lexer_result {
std::vector<token> tokens;
std::string preprocessed_source;
};
struct lexer { struct lexer {
const std::map<char, char> escape_chars = { const std::map<char, char> escape_chars = {
{'n', '\n'}, {'n', '\n'},
@ -140,7 +146,7 @@ struct lexer {
std::string preprocess(const std::string& template_str, const preprocess_options& options) const; std::string preprocess(const std::string& template_str, const preprocess_options& options) const;
std::vector<token> tokenize(const std::string & input, const preprocess_options & options); lexer_result tokenize(const std::string & input, const preprocess_options & options);
}; };
} // namespace jinja } // namespace jinja

View File

@ -8,6 +8,8 @@
#include <stdexcept> #include <stdexcept>
#include <algorithm> #include <algorithm>
#define FILENAME "jinja-parser"
namespace jinja { namespace jinja {
// Helper to check type without asserting (useful for logic) // Helper to check type without asserting (useful for logic)
@ -19,9 +21,18 @@ static bool is_type(const statement_ptr & ptr) {
class parser { class parser {
const std::vector<token> & tokens; const std::vector<token> & tokens;
size_t current = 0; size_t current = 0;
size_t prev_cur = 0;
// for debugging; a token can be multiple chars in source
std::vector<size_t> tok_pos_to_src_pos;
public: public:
parser(const std::vector<token> & t) : tokens(t) {} parser(const std::vector<token> & t) : tokens(t) {
tok_pos_to_src_pos.resize(tokens.size());
for (size_t i = 0; i < tokens.size(); i++) {
tok_pos_to_src_pos[i] = tokens[i].pos;
}
}
program parse() { program parse() {
statements body; statements body;
@ -31,10 +42,18 @@ public:
return program(std::move(body)); return program(std::move(body));
} }
template<typename T, typename... Args>
std::unique_ptr<T> mk_stmt(Args&&... args) {
auto ptr = std::make_unique<T>(std::forward<Args>(args)...);
ptr->pos = tok_pos_to_src_pos[prev_cur];
JJ_DEBUG("Created %s statement at src pos %zu", ptr->type().c_str(), ptr->pos);
return ptr;
}
private: private:
const token & peek(size_t offset = 0) const { const token & peek(size_t offset = 0) const {
if (current + offset >= tokens.size()) { if (current + offset >= tokens.size()) {
static const token end_token{token::undefined, ""}; static const token end_token{token::undefined, "", 0};
return end_token; return end_token;
} }
return tokens[current + offset]; return tokens[current + offset];
@ -74,6 +93,7 @@ private:
} }
statement_ptr parse_any() { statement_ptr parse_any() {
prev_cur = current;
switch (peek().t) { switch (peek().t) {
case token::comment: case token::comment:
return mk_stmt<comment_statement>(tokens[current++].value); return mk_stmt<comment_statement>(tokens[current++].value);
@ -90,6 +110,7 @@ private:
statement_ptr parse_jinja_expression() { statement_ptr parse_jinja_expression() {
// Consume {{ }} tokens // Consume {{ }} tokens
prev_cur = current;
expect(token::open_expression, "Expected {{"); expect(token::open_expression, "Expected {{");
auto result = parse_expression(); auto result = parse_expression();
expect(token::close_expression, "Expected }}"); expect(token::close_expression, "Expected }}");
@ -98,6 +119,7 @@ private:
statement_ptr parse_jinja_statement() { statement_ptr parse_jinja_statement() {
// Consume {% token // Consume {% token
prev_cur = current;
expect(token::open_statement, "Expected {%"); expect(token::open_statement, "Expected {%");
if (peek().t != token::identifier) { if (peek().t != token::identifier) {
@ -194,6 +216,8 @@ private:
auto left = parse_expression_sequence(); auto left = parse_expression_sequence();
statement_ptr value = nullptr; statement_ptr value = nullptr;
statements body; statements body;
prev_cur = current;
if (is(token::equals)) { if (is(token::equals)) {
current++; current++;
@ -218,6 +242,8 @@ private:
statements body; statements body;
statements alternate; statements alternate;
prev_cur = current;
// Keep parsing 'if' body until we reach the first {% elif %} or {% else %} or {% endif %} // Keep parsing 'if' body until we reach the first {% elif %} or {% else %} or {% endif %}
while (!is_statement({"elif", "else", "endif"})) { while (!is_statement({"elif", "else", "endif"})) {
body.push_back(parse_any()); body.push_back(parse_any());
@ -257,6 +283,7 @@ private:
exprs.push_back(primary ? parse_primary_expression() : parse_expression()); exprs.push_back(primary ? parse_primary_expression() : parse_expression());
bool is_tuple = is(token::comma); bool is_tuple = is(token::comma);
while (is(token::comma)) { while (is(token::comma)) {
prev_cur = current;
current++; // consume comma current++; // consume comma
exprs.push_back(primary ? parse_primary_expression() : parse_expression()); exprs.push_back(primary ? parse_primary_expression() : parse_expression());
if (!is(token::comma)) break; if (!is(token::comma)) break;
@ -283,6 +310,7 @@ private:
} }
if (is_statement({"else"})) { if (is_statement({"else"})) {
prev_cur = current;
current += 2; current += 2;
expect(token::close_statement, "Expected %}"); expect(token::close_statement, "Expected %}");
while (!is_statement({"endfor"})) { while (!is_statement({"endfor"})) {
@ -303,10 +331,12 @@ private:
auto a = parse_logical_or_expression(); auto a = parse_logical_or_expression();
if (is_identifier("if")) { if (is_identifier("if")) {
// Ternary expression // Ternary expression
prev_cur = current;
++current; // consume 'if' ++current; // consume 'if'
auto test = parse_logical_or_expression(); auto test = parse_logical_or_expression();
if (is_identifier("else")) { if (is_identifier("else")) {
// Ternary expression with else // Ternary expression with else
prev_cur = current;
++current; // consume 'else' ++current; // consume 'else'
auto false_expr = parse_if_expression(); // recurse to support chained ternaries auto false_expr = parse_if_expression(); // recurse to support chained ternaries
return mk_stmt<ternary_expression>(std::move(test), std::move(a), std::move(false_expr)); return mk_stmt<ternary_expression>(std::move(test), std::move(a), std::move(false_expr));
@ -321,6 +351,7 @@ private:
statement_ptr parse_logical_or_expression() { statement_ptr parse_logical_or_expression() {
auto left = parse_logical_and_expression(); auto left = parse_logical_and_expression();
while (is_identifier("or")) { while (is_identifier("or")) {
prev_cur = current;
token op = tokens[current++]; token op = tokens[current++];
left = mk_stmt<binary_expression>(op, std::move(left), parse_logical_and_expression()); left = mk_stmt<binary_expression>(op, std::move(left), parse_logical_and_expression());
} }
@ -330,6 +361,7 @@ private:
statement_ptr parse_logical_and_expression() { statement_ptr parse_logical_and_expression() {
auto left = parse_logical_negation_expression(); auto left = parse_logical_negation_expression();
while (is_identifier("and")) { while (is_identifier("and")) {
prev_cur = current;
auto op = tokens[current++]; auto op = tokens[current++];
left = mk_stmt<binary_expression>(op, std::move(left), parse_logical_negation_expression()); left = mk_stmt<binary_expression>(op, std::move(left), parse_logical_negation_expression());
} }
@ -339,6 +371,7 @@ private:
statement_ptr parse_logical_negation_expression() { statement_ptr parse_logical_negation_expression() {
// Try parse unary operators // Try parse unary operators
if (is_identifier("not")) { if (is_identifier("not")) {
prev_cur = current;
auto op = tokens[current]; auto op = tokens[current];
++current; // consume 'not' ++current; // consume 'not'
return mk_stmt<unary_expression>(op, parse_logical_negation_expression()); return mk_stmt<unary_expression>(op, parse_logical_negation_expression());
@ -352,8 +385,9 @@ private:
auto left = parse_additive_expression(); auto left = parse_additive_expression();
while (true) { while (true) {
token op; token op;
prev_cur = current;
if (is_identifier("not") && peek(1).t == token::identifier && peek(1).value == "in") { if (is_identifier("not") && peek(1).t == token::identifier && peek(1).value == "in") {
op = {token::identifier, "not in"}; op = {token::identifier, "not in", tokens[current].pos};
current += 2; current += 2;
} else if (is_identifier("in")) { } else if (is_identifier("in")) {
op = tokens[current++]; op = tokens[current++];
@ -368,6 +402,7 @@ private:
statement_ptr parse_additive_expression() { statement_ptr parse_additive_expression() {
auto left = parse_multiplicative_expression(); auto left = parse_multiplicative_expression();
while (is(token::additive_binary_operator)) { while (is(token::additive_binary_operator)) {
prev_cur = current;
auto op = tokens[current++]; auto op = tokens[current++];
left = mk_stmt<binary_expression>(op, std::move(left), parse_multiplicative_expression()); left = mk_stmt<binary_expression>(op, std::move(left), parse_multiplicative_expression());
} }
@ -377,6 +412,7 @@ private:
statement_ptr parse_multiplicative_expression() { statement_ptr parse_multiplicative_expression() {
auto left = parse_test_expression(); auto left = parse_test_expression();
while (is(token::multiplicative_binary_operator)) { while (is(token::multiplicative_binary_operator)) {
prev_cur = current;
auto op = tokens[current++]; auto op = tokens[current++];
left = mk_stmt<binary_expression>(op, std::move(left), parse_test_expression()); left = mk_stmt<binary_expression>(op, std::move(left), parse_test_expression());
} }
@ -386,6 +422,7 @@ private:
statement_ptr parse_test_expression() { statement_ptr parse_test_expression() {
auto operand = parse_filter_expression(); auto operand = parse_filter_expression();
while (is_identifier("is")) { while (is_identifier("is")) {
prev_cur = current;
current++; current++;
bool negate = false; bool negate = false;
if (is_identifier("not")) { current++; negate = true; } if (is_identifier("not")) { current++; negate = true; }
@ -398,6 +435,7 @@ private:
statement_ptr parse_filter_expression() { statement_ptr parse_filter_expression() {
auto operand = parse_call_member_expression(); auto operand = parse_call_member_expression();
while (is(token::pipe)) { while (is(token::pipe)) {
prev_cur = current;
current++; current++;
auto filter = parse_primary_expression(); auto filter = parse_primary_expression();
if (is(token::open_paren)) filter = parse_call_expression(std::move(filter)); if (is(token::open_paren)) filter = parse_call_expression(std::move(filter));
@ -428,6 +466,7 @@ private:
statements args; statements args;
while (!is(token::close_paren)) { while (!is(token::close_paren)) {
statement_ptr arg; statement_ptr arg;
prev_cur = current;
// unpacking: *expr // unpacking: *expr
if (peek().t == token::multiplicative_binary_operator && peek().value == "*") { if (peek().t == token::multiplicative_binary_operator && peek().value == "*") {
++current; // consume * ++current; // consume *
@ -472,6 +511,7 @@ private:
statements slices; statements slices;
bool is_slice = false; bool is_slice = false;
while (!is(token::close_square_bracket)) { while (!is(token::close_square_bracket)) {
prev_cur = current;
if (is(token::colon)) { if (is(token::colon)) {
// A case where a default is used // A case where a default is used
// e.g., [:2] will be parsed as [undefined, 2] // e.g., [:2] will be parsed as [undefined, 2]
@ -496,6 +536,7 @@ private:
} }
statement_ptr parse_primary_expression() { statement_ptr parse_primary_expression() {
prev_cur = current;
auto t = tokens[current++]; auto t = tokens[current++];
switch (t.t) { switch (t.t) {
case token::numeric_literal: case token::numeric_literal:

View File

@ -164,6 +164,9 @@ struct value_string_t : public value_t {
} }
return ss.str(); return ss.str();
} }
virtual bool as_bool() const override {
return val_str.length() > 0;
}
virtual const func_builtins & get_builtins() const override; virtual const func_builtins & get_builtins() const override;
void mark_input() { void mark_input() {
val_str.mark_input(); val_str.mark_input();

View File

@ -173,25 +173,6 @@ const func_builtins & value_float_t::get_builtins() const {
return builtins; return builtins;
} }
// static std::string string_strip(const std::string & str, bool left, bool right) {
// size_t start = 0;
// size_t end = str.length();
// if (left) {
// while (start < end && isspace(static_cast<unsigned char>(str[start]))) {
// ++start;
// }
// }
// if (right) {
// while (end > start && isspace(static_cast<unsigned char>(str[end - 1]))) {
// --end;
// }
// }
// return str.substr(start, end - start);
// }
static bool string_startswith(const std::string & str, const std::string & prefix) { static bool string_startswith(const std::string & str, const std::string & prefix) {
if (str.length() < prefix.length()) return false; if (str.length() < prefix.length()) return false;
return str.compare(0, prefix.length(), prefix) == 0; return str.compare(0, prefix.length(), prefix) == 0;

View File

@ -8,8 +8,9 @@
#include <memory> #include <memory>
#include <algorithm> #include <algorithm>
#define JJ_DEBUG(msg, ...) printf("jinja-vm:%3d : " msg "\n", __LINE__, __VA_ARGS__) #define FILENAME "jinja-vm"
//#define JJ_DEBUG(msg, ...) // no-op
bool g_jinja_debug = true;
namespace jinja { namespace jinja {
@ -22,7 +23,51 @@ static value_array exec_statements(const statements & stmts, context & ctx) {
return result; return result;
} }
value identifier::execute(context & ctx) { static void string_replace_all(std::string & s, const std::string & search, const std::string & replace) {
if (search.empty()) {
return;
}
std::string builder;
builder.reserve(s.length());
size_t pos = 0;
size_t last_pos = 0;
while ((pos = s.find(search, last_pos)) != std::string::npos) {
builder.append(s, last_pos, pos - last_pos);
builder.append(replace);
last_pos = pos + search.length();
}
builder.append(s, last_pos, std::string::npos);
s = std::move(builder);
}
// execute with error handling
value statement::execute(context & ctx) {
try {
return execute_impl(ctx);
} catch (const std::exception & e) {
if (ctx.source.empty()) {
std::ostringstream oss;
oss << "\nError executing " << type() << " at position " << pos << ": " << e.what();
throw raised_exception(oss.str());
} else {
std::ostringstream oss;
constexpr int max_peak_chars = 40;
oss << "\n------------\n";
oss << "While executing " << type() << " at position " << pos << " in source:\n";
size_t start = (pos >= max_peak_chars) ? (pos - max_peak_chars) : 0;
size_t end = std::min(pos + max_peak_chars, ctx.source.length());
std::string substr = ctx.source.substr(start, end - start);
string_replace_all(substr, "\n", "\\n");
oss << "..." << substr << "...\n";
std::string spaces(pos - start + 3, ' ');
oss << spaces << "^\n";
oss << "Error: " << e.what();
throw raised_exception(oss.str());
}
}
}
value identifier::execute_impl(context & ctx) {
auto it = ctx.var.find(val); auto it = ctx.var.find(val);
auto builtins = global_builtins(); auto builtins = global_builtins();
if (it != ctx.var.end()) { if (it != ctx.var.end()) {
@ -37,7 +82,7 @@ value identifier::execute(context & ctx) {
} }
} }
value binary_expression::execute(context & ctx) { value binary_expression::execute_impl(context & ctx) {
value left_val = left->execute(ctx); value left_val = left->execute(ctx);
JJ_DEBUG("Executing binary expression %s '%s' %s", left_val->type().c_str(), op.value.c_str(), right->type().c_str()); JJ_DEBUG("Executing binary expression %s '%s' %s", left_val->type().c_str(), op.value.c_str(), right->type().c_str());
@ -176,7 +221,7 @@ static value try_builtin_func(const std::string & name, const value & input, boo
throw std::runtime_error("Unknown (built-in) filter '" + name + "' for type " + input->type()); throw std::runtime_error("Unknown (built-in) filter '" + name + "' for type " + input->type());
} }
value filter_expression::execute(context & ctx) { value filter_expression::execute_impl(context & ctx) {
value input = operand->execute(ctx); value input = operand->execute(ctx);
if (is_stmt<identifier>(filter)) { if (is_stmt<identifier>(filter)) {
@ -203,7 +248,7 @@ value filter_expression::execute(context & ctx) {
} }
} }
value test_expression::execute(context & ctx) { value test_expression::execute_impl(context & ctx) {
// NOTE: "value is something" translates to function call "test_is_something(value)" // NOTE: "value is something" translates to function call "test_is_something(value)"
const auto & builtins = global_builtins(); const auto & builtins = global_builtins();
if (!is_stmt<identifier>(test)) { if (!is_stmt<identifier>(test)) {
@ -222,7 +267,7 @@ value test_expression::execute(context & ctx) {
return it->second(args); return it->second(args);
} }
value unary_expression::execute(context & ctx) { value unary_expression::execute_impl(context & ctx) {
value operand_val = argument->execute(ctx); value operand_val = argument->execute(ctx);
JJ_DEBUG("Executing unary expression with operator '%s'", op.value.c_str()); JJ_DEBUG("Executing unary expression with operator '%s'", op.value.c_str());
@ -241,7 +286,7 @@ value unary_expression::execute(context & ctx) {
throw std::runtime_error("Unknown unary operator '" + op.value + "'"); throw std::runtime_error("Unknown unary operator '" + op.value + "'");
} }
value if_statement::execute(context & ctx) { value if_statement::execute_impl(context & ctx) {
value test_val = test->execute(ctx); value test_val = test->execute(ctx);
auto out = mk_val<value_array>(); auto out = mk_val<value_array>();
if (test_val->as_bool()) { if (test_val->as_bool()) {
@ -258,7 +303,7 @@ value if_statement::execute(context & ctx) {
return out; return out;
} }
value for_statement::execute(context & ctx) { value for_statement::execute_impl(context & ctx) {
context scope(ctx); // new scope for loop variables context scope(ctx); // new scope for loop variables
statement_ptr iter_expr = std::move(iterable); statement_ptr iter_expr = std::move(iterable);
@ -377,7 +422,7 @@ value for_statement::execute(context & ctx) {
return result; return result;
} }
value set_statement::execute(context & ctx) { value set_statement::execute_impl(context & ctx) {
auto rhs = val ? val->execute(ctx) : exec_statements(body, ctx); auto rhs = val ? val->execute(ctx) : exec_statements(body, ctx);
if (is_stmt<identifier>(assignee)) { if (is_stmt<identifier>(assignee)) {
@ -427,7 +472,7 @@ value set_statement::execute(context & ctx) {
return mk_val<value_null>(); return mk_val<value_null>();
} }
value macro_statement::execute(context & ctx) { value macro_statement::execute_impl(context & ctx) {
std::string name = cast_stmt<identifier>(this->name)->val; std::string name = cast_stmt<identifier>(this->name)->val;
const func_handler func = [this, &ctx, name](const func_args & args) -> value { const func_handler func = [this, &ctx, name](const func_args & args) -> value {
JJ_DEBUG("Invoking macro '%s' with %zu arguments", name.c_str(), args.args.size()); JJ_DEBUG("Invoking macro '%s' with %zu arguments", name.c_str(), args.args.size());
@ -454,7 +499,7 @@ value macro_statement::execute(context & ctx) {
return mk_val<value_null>(); return mk_val<value_null>();
} }
value member_expression::execute(context & ctx) { value member_expression::execute_impl(context & ctx) {
value object = this->object->execute(ctx); value object = this->object->execute(ctx);
value property; value property;
@ -536,7 +581,7 @@ value member_expression::execute(context & ctx) {
return val; return val;
} }
value call_expression::execute(context & ctx) { value call_expression::execute_impl(context & ctx) {
// gather arguments // gather arguments
func_args args; func_args args;
for (auto & arg_stmt : this->args) { for (auto & arg_stmt : this->args) {
@ -587,7 +632,7 @@ bool value_compare(const value & a, const value & b) {
return false; return false;
} }
value keyword_argument_expression::execute(context & ctx) { value keyword_argument_expression::execute_impl(context & ctx) {
if (!is_stmt<identifier>(key)) { if (!is_stmt<identifier>(key)) {
throw std::runtime_error("Keyword argument key must be identifiers"); throw std::runtime_error("Keyword argument key must be identifiers");
} }

View File

@ -9,6 +9,9 @@
#include <memory> #include <memory>
#include <sstream> #include <sstream>
#define JJ_DEBUG(msg, ...) if (g_jinja_debug) printf("%s:%3d : " msg "\n", FILENAME, __LINE__, __VA_ARGS__)
extern bool g_jinja_debug;
namespace jinja { namespace jinja {
@ -37,14 +40,11 @@ template<typename T>
const T * cast_stmt(const statement_ptr & ptr) { const T * cast_stmt(const statement_ptr & ptr) {
return dynamic_cast<const T*>(ptr.get()); return dynamic_cast<const T*>(ptr.get());
} }
template<typename T, typename... Args>
std::unique_ptr<T> mk_stmt(Args&&... args) {
return std::make_unique<T>(std::forward<Args>(args)...);
}
// End Helpers // End Helpers
struct context { struct context {
std::map<std::string, value> var; std::map<std::string, value> var;
std::string source; // for debugging
context() { context() {
var["true"] = mk_val<value_bool>(true); var["true"] = mk_val<value_bool>(true);
@ -65,9 +65,13 @@ struct context {
* Base class for all nodes in the AST. * Base class for all nodes in the AST.
*/ */
struct statement { struct statement {
size_t pos; // position in source, for debugging
virtual ~statement() = default; virtual ~statement() = default;
virtual std::string type() const { return "Statement"; } virtual std::string type() const { return "Statement"; }
virtual value execute(context &) { throw std::runtime_error("cannot exec " + type()); } // execute_impl must be overridden by derived classes
virtual value execute_impl(context &) { throw std::runtime_error("cannot exec " + type()); }
// execute is the public method to execute a statement with error handling
virtual value execute(context &);
}; };
// Type Checking Utilities // Type Checking Utilities
@ -100,7 +104,7 @@ struct program : public statement {
explicit program(statements && body) : body(std::move(body)) {} explicit program(statements && body) : body(std::move(body)) {}
std::string type() const override { return "Program"; } std::string type() const override { return "Program"; }
value execute(context &) override { value execute_impl(context &) override {
throw std::runtime_error("Cannot execute program directly, use jinja::vm instead"); throw std::runtime_error("Cannot execute program directly, use jinja::vm instead");
} }
}; };
@ -116,7 +120,7 @@ struct if_statement : public statement {
} }
std::string type() const override { return "If"; } std::string type() const override { return "If"; }
value execute(context & ctx) override; value execute_impl(context & ctx) override;
}; };
struct identifier; struct identifier;
@ -140,7 +144,7 @@ struct for_statement : public statement {
} }
std::string type() const override { return "For"; } std::string type() const override { return "For"; }
value execute(context & ctx) override; value execute_impl(context & ctx) override;
}; };
struct break_statement : public statement { struct break_statement : public statement {
@ -152,7 +156,7 @@ struct break_statement : public statement {
} }
}; };
value execute(context &) override { value execute_impl(context &) override {
throw break_statement::exception(); throw break_statement::exception();
} }
}; };
@ -166,7 +170,7 @@ struct continue_statement : public statement {
} }
}; };
value execute(context &) override { value execute_impl(context &) override {
throw continue_statement::exception(); throw continue_statement::exception();
} }
}; };
@ -183,7 +187,7 @@ struct set_statement : public statement {
} }
std::string type() const override { return "Set"; } std::string type() const override { return "Set"; }
value execute(context & ctx) override; value execute_impl(context & ctx) override;
}; };
struct macro_statement : public statement { struct macro_statement : public statement {
@ -198,14 +202,14 @@ struct macro_statement : public statement {
} }
std::string type() const override { return "Macro"; } std::string type() const override { return "Macro"; }
value execute(context & ctx) override; value execute_impl(context & ctx) override;
}; };
struct comment_statement : public statement { struct comment_statement : public statement {
std::string val; std::string val;
explicit comment_statement(const std::string & v) : val(v) {} explicit comment_statement(const std::string & v) : val(v) {}
std::string type() const override { return "Comment"; } std::string type() const override { return "Comment"; }
value execute(context &) override { value execute_impl(context &) override {
return mk_val<value_null>(); return mk_val<value_null>();
} }
}; };
@ -223,7 +227,7 @@ struct member_expression : public expression {
chk_type<expression>(this->property); chk_type<expression>(this->property);
} }
std::string type() const override { return "MemberExpression"; } std::string type() const override { return "MemberExpression"; }
value execute(context & ctx) override; value execute_impl(context & ctx) override;
}; };
struct call_expression : public expression { struct call_expression : public expression {
@ -236,7 +240,7 @@ struct call_expression : public expression {
for (const auto& arg : this->args) chk_type<expression>(arg); for (const auto& arg : this->args) chk_type<expression>(arg);
} }
std::string type() const override { return "CallExpression"; } std::string type() const override { return "CallExpression"; }
value execute(context & ctx) override; value execute_impl(context & ctx) override;
}; };
/** /**
@ -246,7 +250,7 @@ struct identifier : public expression {
std::string val; std::string val;
explicit identifier(const std::string & val) : val(val) {} explicit identifier(const std::string & val) : val(val) {}
std::string type() const override { return "Identifier"; } std::string type() const override { return "Identifier"; }
value execute(context & ctx) override; value execute_impl(context & ctx) override;
}; };
// Literals // Literals
@ -255,7 +259,7 @@ struct integer_literal : public expression {
int64_t val; int64_t val;
explicit integer_literal(int64_t val) : val(val) {} explicit integer_literal(int64_t val) : val(val) {}
std::string type() const override { return "IntegerLiteral"; } std::string type() const override { return "IntegerLiteral"; }
value execute(context &) override { value execute_impl(context &) override {
return std::make_unique<value_int_t>(val); return std::make_unique<value_int_t>(val);
} }
}; };
@ -264,7 +268,7 @@ struct float_literal : public expression {
double val; double val;
explicit float_literal(double val) : val(val) {} explicit float_literal(double val) : val(val) {}
std::string type() const override { return "FloatLiteral"; } std::string type() const override { return "FloatLiteral"; }
value execute(context &) override { value execute_impl(context &) override {
return std::make_unique<value_float_t>(val); return std::make_unique<value_float_t>(val);
} }
}; };
@ -273,7 +277,7 @@ struct string_literal : public expression {
std::string val; std::string val;
explicit string_literal(const std::string & val) : val(val) {} explicit string_literal(const std::string & val) : val(val) {}
std::string type() const override { return "StringLiteral"; } std::string type() const override { return "StringLiteral"; }
value execute(context &) override { value execute_impl(context &) override {
return std::make_unique<value_string_t>(val); return std::make_unique<value_string_t>(val);
} }
}; };
@ -324,7 +328,7 @@ struct binary_expression : public expression {
chk_type<expression>(this->right); chk_type<expression>(this->right);
} }
std::string type() const override { return "BinaryExpression"; } std::string type() const override { return "BinaryExpression"; }
value execute(context & ctx) override; value execute_impl(context & ctx) override;
}; };
/** /**
@ -341,7 +345,7 @@ struct filter_expression : public expression {
chk_type<identifier, call_expression>(this->filter); chk_type<identifier, call_expression>(this->filter);
} }
std::string type() const override { return "FilterExpression"; } std::string type() const override { return "FilterExpression"; }
value execute(context & ctx) override; value execute_impl(context & ctx) override;
}; };
struct filter_statement : public statement { struct filter_statement : public statement {
@ -388,7 +392,7 @@ struct test_expression : public expression {
chk_type<identifier>(this->test); chk_type<identifier>(this->test);
} }
std::string type() const override { return "TestExpression"; } std::string type() const override { return "TestExpression"; }
value execute(context & ctx) override; value execute_impl(context & ctx) override;
}; };
/** /**
@ -403,7 +407,7 @@ struct unary_expression : public expression {
chk_type<expression>(this->argument); chk_type<expression>(this->argument);
} }
std::string type() const override { return "UnaryExpression"; } std::string type() const override { return "UnaryExpression"; }
value execute(context & ctx) override; value execute_impl(context & ctx) override;
}; };
struct slice_expression : public expression { struct slice_expression : public expression {
@ -418,7 +422,7 @@ struct slice_expression : public expression {
chk_type<expression>(this->step_expr); chk_type<expression>(this->step_expr);
} }
std::string type() const override { return "SliceExpression"; } std::string type() const override { return "SliceExpression"; }
value execute(context &) override { value execute_impl(context &) override {
throw std::runtime_error("must be handled by MemberExpression"); throw std::runtime_error("must be handled by MemberExpression");
} }
}; };
@ -433,7 +437,7 @@ struct keyword_argument_expression : public expression {
chk_type<expression>(this->val); chk_type<expression>(this->val);
} }
std::string type() const override { return "KeywordArgumentExpression"; } std::string type() const override { return "KeywordArgumentExpression"; }
value execute(context & ctx) override; value execute_impl(context & ctx) override;
}; };
struct spread_expression : public expression { struct spread_expression : public expression {

View File

@ -16,9 +16,10 @@ int main(void) {
//std::string contents = "{% if messages[0]['role'] != 'system' %}nice {{ messages[0]['content'] }}{% endif %}"; //std::string contents = "{% if messages[0]['role'] != 'system' %}nice {{ messages[0]['content'] }}{% endif %}";
//std::string contents = "<some_tokens> {{ messages[0]['content'] }} <another_token>"; //std::string contents = "<some_tokens> {{ messages[a]['content'] }} <another_token>";
//std::string contents = "{{ aaa[bbb] }}";
std::ifstream infile("models/templates/Qwen-Qwen3-0.6B.jinja"); std::ifstream infile("models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja");
std::string contents((std::istreambuf_iterator<char>(infile)), std::istreambuf_iterator<char>()); std::string contents((std::istreambuf_iterator<char>(infile)), std::istreambuf_iterator<char>());
std::cout << "=== INPUT ===\n" << contents << "\n\n"; std::cout << "=== INPUT ===\n" << contents << "\n\n";
@ -27,19 +28,20 @@ int main(void) {
jinja::preprocess_options options; jinja::preprocess_options options;
options.trim_blocks = true; options.trim_blocks = true;
options.lstrip_blocks = false; options.lstrip_blocks = false;
auto tokens = lexer.tokenize(contents, options); auto lexer_res = lexer.tokenize(contents, options);
for (const auto & tok : tokens) { for (const auto & tok : lexer_res.tokens) {
std::cout << "token: type=" << static_cast<int>(tok.t) << " text='" << tok.value << "'\n"; std::cout << "token: type=" << static_cast<int>(tok.t) << " text='" << tok.value << "' pos=" << tok.pos << "\n";
} }
std::cout << "\n=== AST ===\n"; std::cout << "\n=== AST ===\n";
jinja::program ast = jinja::parse_from_tokens(tokens); jinja::program ast = jinja::parse_from_tokens(lexer_res.tokens);
for (const auto & stmt : ast.body) { for (const auto & stmt : ast.body) {
std::cout << "stmt type: " << stmt->type() << "\n"; std::cout << "stmt type: " << stmt->type() << "\n";
} }
std::cout << "\n=== RUN ===\n"; std::cout << "\n=== RUN ===\n";
jinja::context ctx; jinja::context ctx;
ctx.source = lexer_res.preprocessed_source;
auto make_non_special_string = [](const std::string & s) { auto make_non_special_string = [](const std::string & s) {
jinja::value_string str_val = jinja::mk_val<jinja::value_string>(s); jinja::value_string str_val = jinja::mk_val<jinja::value_string>(s);