diff --git a/common/jinja/jinja-parser.cpp b/common/jinja/jinja-parser.cpp index fa8cd9785a..07cb71fe11 100644 --- a/common/jinja/jinja-parser.cpp +++ b/common/jinja/jinja-parser.cpp @@ -1,39 +1,545 @@ #include "jinja-lexer.h" #include "jinja-vm.h" +#include +#include +#include +#include +#include + namespace jinja { -void parse(const std::vector & tokens) { - auto program = std::make_unique(); - size_t current = 0; - - /** - * Consume the next token if it matches the expected type, otherwise throw an error. - * @param type The expected token type - * @param error The error message to throw if the token does not match the expected type - * @returns The consumed token - */ - auto expect = [&](const token::type & type, const std::string & error) -> token { - const auto & prev = tokens[current++]; - if (prev.t != type) { - throw std::runtime_error("Parser Error: " + error + " (" + type_to_string(prev.t) + " != " + type_to_string(type) + ")"); - } - return prev; - }; - - auto next_token = [&]() -> const token & { - if (current >= tokens.size()) { - return token{token::undefined, ""}; - } - return tokens[current++]; - }; - - auto expect_identifier = [&](const std::string & name) -> void { - if (!is_identifier(name)) { - throw std::runtime_error("Expected " + name); - } - ++current; - }; +// Helper to check type without asserting (useful for logic) +template +static bool is_type(const statement_ptr & ptr) { + return dynamic_cast(ptr.get()) != nullptr; } -}; // namespace jinja +class parser { + const std::vector & tokens; + size_t current = 0; + +public: + parser(const std::vector & t) : tokens(t) {} + + statement_ptr parse() { + statements body; + while (current < tokens.size()) { + body.push_back(parse_any()); + } + return std::make_unique(std::move(body)); + } + +private: + const token & peek(size_t offset = 0) const { + if (current + offset >= tokens.size()) { + static const token end_token{token::undefined, ""}; + return end_token; + } + return tokens[current + offset]; + } + + token expect(token::type type, const std::string& error) { + const auto & t = peek(); + if (t.t != type) { + throw std::runtime_error("Parser Error: " + error + " (Got " + t.value + ")"); + } + current++; + return t; + } + + void expect_identifier(const std::string& name) { + const auto & t = peek(); + if (t.t != token::identifier || t.value != name) { + throw std::runtime_error("Expected identifier: " + name); + } + current++; + } + + bool is(token::type type) const { + return peek().t == type; + } + + bool is_identifier(const std::string& name) const { + return peek().t == token::identifier && peek().value == name; + } + + bool is_statement(const std::vector& names) const { + if (peek(0).t != token::open_statement || peek(1).t != token::identifier) { + return false; + } + std::string val = peek(1).value; + return std::find(names.begin(), names.end(), val) != names.end(); + } + + statement_ptr parse_any() { + switch (peek().t) { + case token::comment: + return std::make_unique(tokens[current++].value); + case token::text: + return std::make_unique(tokens[current++].value); + case token::open_statement: + return parse_jinja_statement(); + case token::open_expression: + return parse_jinja_expression(); + default: + throw std::runtime_error("Unexpected token type"); + } + } + + statement_ptr parse_jinja_expression() { + // Consume {{ }} tokens + expect(token::open_expression, "Expected {{"); + auto result = parse_expression(); + expect(token::close_expression, "Expected }}"); + return result; + } + + statement_ptr parse_jinja_statement() { + // Consume {% token + expect(token::open_statement, "Expected {%"); + + if (peek().t != token::identifier) { + throw std::runtime_error("Unknown statement"); + } + + std::string name = peek().value; + current++; // consume identifier + + statement_ptr result; + if (name == "set") { + result = parse_set_statement(); + + } else if (name == "if") { + result = parse_if_statement(); + // expect {% endif %} + expect(token::open_statement, "Expected {%"); + expect_identifier("endif"); + expect(token::close_statement, "Expected %}"); + + } else if (name == "macro") { + result = parse_macro_statement(); + // expect {% endmacro %} + expect(token::open_statement, "Expected {%"); + expect_identifier("endmacro"); + expect(token::close_statement, "Expected %}"); + + } else if (name == "for") { + result = parse_for_statement(); + // expect {% endfor %} + expect(token::open_statement, "Expected {%"); + expect_identifier("endfor"); + expect(token::close_statement, "Expected %}"); + + } else if (name == "break") { + expect(token::close_statement, "Expected %}"); + result = std::make_unique(); + + } else if (name == "continue") { + expect(token::close_statement, "Expected %}"); + result = std::make_unique(); + + } else if (name == "call") { + statements caller_args; + bool has_caller_args = false; + if (is(token::open_paren)) { + // Optional caller arguments, e.g. {% call(user) dump_users(...) %} + caller_args = parse_args(); + has_caller_args = true; + } + auto callee = parse_primary_expression(); + if (!is_type(callee)) throw std::runtime_error("Expected identifier"); + + auto call_args = parse_args(); + expect(token::close_statement, "Expected %}"); + + statements body; + while (!is_statement({"endcall"})) { + body.push_back(parse_any()); + } + + expect(token::open_statement, "Expected {%"); + expect_identifier("endcall"); + expect(token::close_statement, "Expected %}"); + + auto call_expr = std::make_unique(std::move(callee), std::move(call_args)); + result = std::make_unique(std::move(call_expr), std::move(caller_args), std::move(body)); + + } else if (name == "filter") { + auto filter_node = parse_primary_expression(); + if (is_type(filter_node) && is(token::open_paren)) { + filter_node = parse_call_expression(std::move(filter_node)); + } + expect(token::close_statement, "Expected %}"); + + statements body; + while (!is_statement({"endfilter"})) { + body.push_back(parse_any()); + } + + expect(token::open_statement, "Expected {%"); + expect_identifier("endfilter"); + expect(token::close_statement, "Expected %}"); + result = std::make_unique(std::move(filter_node), std::move(body)); + + } else { + throw std::runtime_error("Unknown statement: " + name); + } + return result; + } + + statement_ptr parse_set_statement() { + // NOTE: `set` acts as both declaration statement and assignment expression + auto left = parse_expression_sequence(); + statement_ptr value = nullptr; + statements body; + + if (is(token::equals)) { + current++; + value = parse_expression_sequence(); + } else { + // parsing multiline set here + expect(token::close_statement, "Expected %}"); + while (!is_statement({"endset"})) { + body.push_back(parse_any()); + } + expect(token::open_statement, "Expected {%"); + expect_identifier("endset"); + } + expect(token::close_statement, "Expected %}"); + return std::make_unique(std::move(left), std::move(value), std::move(body)); + } + + statement_ptr parse_if_statement() { + auto test = parse_expression(); + expect(token::close_statement, "Expected %}"); + + statements body; + statements alternate; + + // Keep parsing 'if' body until we reach the first {% elif %} or {% else %} or {% endif %} + while (!is_statement({"elif", "else", "endif"})) { + body.push_back(parse_any()); + } + + if (is_statement({"elif"})) { + ++current; // consume {% + ++current; // consume 'elif' + alternate.push_back(parse_if_statement()); // nested If + } else if (is_statement({"else"})) { + ++current; // consume {% + ++current; // consume 'else' + expect(token::close_statement, "Expected %}"); + + // keep going until we hit {% endif %} + while (!is_statement({"endif"})) { + alternate.push_back(parse_any()); + } + } + return std::make_unique(std::move(test), std::move(body), std::move(alternate)); + } + + statement_ptr parse_macro_statement() { + auto name = parse_primary_expression(); + auto args = parse_args(); + expect(token::close_statement, "Expected %}"); + statements body; + // Keep going until we hit {% endmacro + while (!is_statement({"endmacro"})) { + body.push_back(parse_any()); + } + return std::make_unique(std::move(name), std::move(args), std::move(body)); + } + + statement_ptr parse_expression_sequence(bool primary = false) { + statements exprs; + exprs.push_back(primary ? parse_primary_expression() : parse_expression()); + bool is_tuple = is(token::comma); + while (is(token::comma)) { + current++; // consume comma + exprs.push_back(primary ? parse_primary_expression() : parse_expression()); + if (!is(token::comma)) break; + } + return is_tuple ? std::make_unique(std::move(exprs)) : std::move(exprs[0]); + } + + statement_ptr parse_for_statement() { + // e.g., `message` in `for message in messages` + auto loop_var = parse_expression_sequence(true); // should be an identifier/tuple + if (!is_identifier("in")) throw std::runtime_error("Expected 'in'"); + current++; + + // `messages` in `for message in messages` + auto iterable = parse_expression(); + expect(token::close_statement, "Expected %}"); + + statements body; + statements alternate; + + // Keep going until we hit {% endfor or {% else + while (!is_statement({"endfor", "else"})) { + body.push_back(parse_any()); + } + + if (is_statement({"else"})) { + current += 2; + expect(token::close_statement, "Expected %}"); + while (!is_statement({"endfor"})) { + alternate.push_back(parse_any()); + } + } + return std::make_unique( + std::move(loop_var), std::move(iterable), + std::move(body), std::move(alternate)); + } + + statement_ptr parse_expression() { + // Choose parse function with lowest precedence + return parse_if_expression(); + } + + statement_ptr parse_if_expression() { + auto a = parse_logical_or_expression(); + if (is_identifier("if")) { + // Ternary expression + ++current; // consume 'if' + auto test = parse_logical_or_expression(); + if (is_identifier("else")) { + // Ternary expression with else + ++current; // consume 'else' + auto false_expr = parse_if_expression(); // recurse to support chained ternaries + return std::make_unique(std::move(test), std::move(a), std::move(false_expr)); + } else { + // Select expression on iterable + return std::make_unique(std::move(a), std::move(test)); + } + } + return a; + } + + statement_ptr parse_logical_or_expression() { + auto left = parse_logical_and_expression(); + while (is_identifier("or")) { + auto op = tokens[current++]; + left = std::make_unique(op, std::move(left), parse_logical_and_expression()); + } + return left; + } + + statement_ptr parse_logical_and_expression() { + auto left = parse_logical_negation_expression(); + while (is_identifier("and")) { + auto op = tokens[current++]; + left = std::make_unique(op, std::move(left), parse_logical_negation_expression()); + } + return left; + } + + statement_ptr parse_logical_negation_expression() { + // Try parse unary operators + if (is_identifier("not")) { + auto op = tokens[current]; + ++current; // consume 'not' + return std::make_unique(op, parse_logical_negation_expression()); + } + return parse_comparison_expression(); + } + + statement_ptr parse_comparison_expression() { + // NOTE: membership has same precedence as comparison + // e.g., ('a' in 'apple' == 'b' in 'banana') evaluates as ('a' in ('apple' == ('b' in 'banana'))) + auto left = parse_additive_expression(); + while (true) { + token op; + if (is_identifier("not") && peek(1).t == token::identifier && peek(1).value == "in") { + op = {token::identifier, "not in"}; + current += 2; + } else if (is_identifier("in")) { + op = tokens[current++]; + } else if (is(token::comparison_binary_operator)) { + op = tokens[current++]; + } else break; + left = std::make_unique(op, std::move(left), parse_additive_expression()); + } + return left; + } + + statement_ptr parse_additive_expression() { + auto left = parse_multiplicative_expression(); + while (is(token::additive_binary_operator)) { + auto op = tokens[current++]; + left = std::make_unique(op, std::move(left), parse_multiplicative_expression()); + } + return left; + } + + statement_ptr parse_multiplicative_expression() { + auto left = parse_test_expression(); + while (is(token::multiplicative_binary_operator)) { + auto op = tokens[current++]; + left = std::make_unique(op, std::move(left), parse_test_expression()); + } + return left; + } + + statement_ptr parse_test_expression() { + auto operand = parse_filter_expression(); + while (is_identifier("is")) { + current++; + bool negate = false; + if (is_identifier("not")) { current++; negate = true; } + auto test_id = parse_primary_expression(); + operand = std::make_unique(std::move(operand), negate, std::move(test_id)); + } + return operand; + } + + statement_ptr parse_filter_expression() { + auto operand = parse_call_member_expression(); + while (is(token::pipe)) { + current++; + auto filter = parse_primary_expression(); + if (is(token::open_paren)) filter = parse_call_expression(std::move(filter)); + operand = std::make_unique(std::move(operand), std::move(filter)); + } + return operand; + } + + statement_ptr parse_call_member_expression() { + // Handle member expressions recursively + auto member = parse_member_expression(parse_primary_expression()); + return is(token::open_paren) + ? parse_call_expression(std::move(member)) // foo.x() + : std::move(member); + } + + statement_ptr parse_call_expression(statement_ptr callee) { + auto expr = std::make_unique(std::move(callee), parse_args()); + auto member = parse_member_expression(std::move(expr)); // foo.x().y + return is(token::open_paren) + ? parse_call_expression(std::move(member)) // foo.x()() + : std::move(member); + } + + statements parse_args() { + // comma-separated arguments list + expect(token::open_paren, "Expected ("); + statements args; + while (!is(token::close_paren)) { + statement_ptr arg; + // unpacking: *expr + if (peek().t == token::multiplicative_binary_operator && peek().value == "*") { + ++current; // consume * + arg = std::make_unique(parse_expression()); + } else { + arg = parse_expression(); + if (is(token::equals)) { + // keyword argument + // e.g., func(x = 5, y = a or b) + ++current; // consume equals + arg = std::make_unique(std::move(arg), parse_expression()); + } + } + args.push_back(std::move(arg)); + if (is(token::comma)) { + ++current; // consume comma + } + } + expect(token::close_paren, "Expected )"); + return args; + } + + statement_ptr parse_member_expression(statement_ptr object) { + while (is(token::dot) || is(token::open_square_bracket)) { + auto op = tokens[current++]; + bool computed = op.t == token::open_square_bracket; + statement_ptr prop; + if (computed) { + prop = parse_member_expression_arguments(); + expect(token::close_square_bracket, "Expected ]"); + } else { + prop = parse_primary_expression(); + } + object = std::make_unique(std::move(object), std::move(prop), computed); + } + return object; + } + + statement_ptr parse_member_expression_arguments() { + // NOTE: This also handles slice expressions colon-separated arguments list + // e.g., ['test'], [0], [:2], [1:], [1:2], [1:2:3] + statements slices; + bool is_slice = false; + while (!is(token::close_square_bracket)) { + if (is(token::colon)) { + // A case where a default is used + // e.g., [:2] will be parsed as [undefined, 2] + slices.push_back(nullptr); + ++current; // consume colon + is_slice = true; + } else { + slices.push_back(parse_expression()); + if (is(token::colon)) { + ++current; // consume colon after expression, if it exists + is_slice = true; + } + } + } + if (is_slice) { + statement_ptr start = slices.size() > 0 ? std::move(slices[0]) : nullptr; + statement_ptr stop = slices.size() > 1 ? std::move(slices[1]) : nullptr; + statement_ptr step = slices.size() > 2 ? std::move(slices[2]) : nullptr; + return std::make_unique(std::move(start), std::move(stop), std::move(step)); + } + return std::move(slices[0]); + } + + statement_ptr parse_primary_expression() { + auto t = tokens[current++]; + switch (t.t) { + case token::numeric_literal: + if (t.value.find('.') != std::string::npos) return std::make_unique(std::stod(t.value)); + return std::make_unique(std::stoll(t.value)); + case token::string_literal: { + std::string val = t.value; + while (is(token::string_literal)) val += tokens[current++].value; + return std::make_unique(val); + } + case token::identifier: + return std::make_unique(t.value); + case token::open_paren: { + auto expr = parse_expression_sequence(); + expect(token::close_paren, "Expected )"); + return expr; + } + case token::open_square_bracket: { + statements vals; + while (!is(token::close_square_bracket)) { + vals.push_back(parse_expression()); + if (is(token::comma)) current++; + } + current++; + return std::make_unique(std::move(vals)); + } + case token::open_curly_bracket: { + std::vector> pairs; + while (!is(token::close_curly_bracket)) { + auto key = parse_expression(); + expect(token::colon, "Expected :"); + pairs.push_back({std::move(key), parse_expression()}); + if (is(token::comma)) current++; + } + current++; + return std::make_unique(std::move(pairs)); + } + default: + throw std::runtime_error("Unexpected token: " + t.value); + } + } +}; + +statement_ptr parse(const std::vector& tokens) { + return parser(tokens).parse(); +} + +} // namespace jinja