add vm types

2025-12-27 12:12:07 +01:00 · 2025-12-27 12:12:07 +01:00 · a35fcb00b5
parent 15b7c50e95
commit a35fcb00b5
3 changed files with 637 additions and 226 deletions
--- a/common/jinja/jinja-lexer.cpp
+++ b/common/jinja/jinja-lexer.cpp
@ -0,0 +1,242 @@
+#include "jinja-lexer.h"
+
+#include <vector>
+#include <string>
+#include <map>
+#include <regex>
+#include <stdexcept>
+#include <cctype>
+#include <functional>
+
+
+// #define JJ_DEBUG(msg, ...)  printf("jinja-lexer: " msg "\n", __VA_ARGS__)
+#define JJ_DEBUG(msg, ...)  // no-op
+
+namespace jinja {
+
+std::string lexer::preprocess(const std::string & template_str, const preprocess_options & options) const {
+    std::string result = template_str;
+    // According to https://jinja.palletsprojects.com/en/3.0.x/templates/#whitespace-control
+
+    // In the default configuration:
+    //  - a single trailing newline is stripped if present
+    //  - other whitespace (spaces, tabs, newlines etc.) is returned unchanged
+    if (!result.empty() && result.back() == '\n') {
+        result.pop_back();
+    }
+
+    if (options.lstrip_blocks) {
+        // The lstrip_blocks option can also be set to strip tabs and spaces from the
+        // beginning of a line to the start of a block. (Nothing will be stripped if
+        // there are other characters before the start of the block.)
+        // result = std::regex_replace(result, std::regex(R"((?m)^[ \t]*(\{[#%-]))"), "$1");
+        throw std::runtime_error("lstrip_blocks option is not implemented yet");
+    }
+
+    if (options.trim_blocks) {
+        // If an application configures Jinja to trim_blocks, the first newline after
+        // a template tag is removed automatically (like in PHP).
+        result = std::regex_replace(result, std::regex(R"(([#%-]\})\n)"), "$1");
+    }
+
+    // Handle whitespace control with - in tags
+    result = std::regex_replace(result, std::regex(R"(-%\}\s*)"), "%}");
+    result = std::regex_replace(result, std::regex(R"(\s*\{%-)"), "{%");
+    result = std::regex_replace(result, std::regex(R"(-\}\}\s*)"), "}}");
+    result = std::regex_replace(result, std::regex(R"(\s*\{\{-)"), "{{");
+    result = std::regex_replace(result, std::regex(R"(-#\}\s*)"), "#}");
+    result = std::regex_replace(result, std::regex(R"(\s*\{\#-)"), "{#");
+
+    // Handle custom transformers-specific `generation` tag
+    // See https://github.com/huggingface/transformers/pull/30650 for more information.
+    // result = std::regex_replace(result, std::regex(R"((?s)\{%\s*generation\s*%\}.+?\{%\s*endgeneration\s*%\})"), "");
+
+    return result;
+}
+
+std::vector<token> lexer::tokenize(const std::string & input, const preprocess_options & options) {
+    std::vector<token> tokens;
+    std::string src = preprocess(input, options);
+    JJ_DEBUG("preprocessed input: '%s'", src.c_str());
+
+    size_t pos = 0;
+    size_t curly_bracket_depth = 0;
+
+    using pred = std::function<bool(char)>;
+    auto consume_while = [&](pred predicate) -> std::string {
+        std::string str;
+        while (predicate(src[pos])) {
+            // check for escape char
+            if (src[pos] == '\\') {
+                // consume backslash
+                ++pos;
+                // check for end of input
+                if (pos >= src.size()) {
+                    throw std::runtime_error("lexer: unexpected end of input after escape character");
+                }
+                // add escaped char
+                char escaped_char = src[pos++];
+                if (escape_chars.find(escaped_char) == escape_chars.end()) {
+                    throw std::runtime_error(std::string("lexer: unknown escape character \\") + escaped_char);
+                }
+                char unescaped_char = escape_chars.at(escaped_char);
+                str += unescaped_char;
+                continue;
+            }
+
+            str += src[pos++];
+            if (pos > src.size()) {
+                throw std::runtime_error("lexer: unexpected end of input during consume_while");
+            }
+        }
+        return str;
+    };
+
+    auto next_pos_is = [&](std::initializer_list<char> chars) -> bool {
+        if (pos + 1 >= src.size()) return false;
+        for (char c : chars) {
+            if (src[pos + 1] == c) return true;
+        }
+        return false;
+    };
+
+    while (pos < src.size()) {
+        JJ_DEBUG("lexer main loop at pos %zu: '%s...'", pos, src.substr(pos, 10).c_str());
+
+        // First, consume all text that is outside of a Jinja statement or expression
+        token::type last_token_type = tokens.empty()
+                                            ? token::undefined
+                                            : tokens.back().t;
+        if (last_token_type == token::undefined ||
+            last_token_type == token::close_statement ||
+            last_token_type == token::close_expression ||
+            last_token_type == token::comment) {
+            std::string text;
+            while (pos < src.size() &&
+                    // Keep going until we hit the next Jinja statement or expression
+                    !(
+                        src[pos] == '{' &&
+                        next_pos_is( {'%', '{', '#'} )
+                    )) {
+                text += src[pos++];
+            }
+            JJ_DEBUG("consumed text: '%s'", text.c_str());
+            if (!text.empty()) {
+                tokens.push_back({token::text, text});
+                continue;
+            }
+        }
+
+        // Possibly consume a comment
+        if (src[pos] == '{' && next_pos_is( {'#'} )) {
+            pos += 2; // Skip the opening {#
+            std::string comment;
+            while (!(src[pos] == '#' && next_pos_is( {'}'} ))) {
+                if (pos + 2 >= src.size()) {
+                    throw std::runtime_error("lexer: missing end of comment tag");
+                }
+                comment += src[pos++];
+            }
+            JJ_DEBUG("consumed comment: '%s'", comment.c_str());
+            tokens.push_back({token::comment, comment});
+            pos += 2; // Skip the closing #}
+            continue;
+        }
+
+        // Consume (and ignore) all whitespace inside Jinja statements or expressions
+        consume_while([](char c) { return std::isspace(static_cast<unsigned char>(c)); });
+
+        if (pos >= src.size()) break;
+
+        char ch = src[pos];
+
+        // Check for unary operators
+        if (ch == '-' || ch == '+') {
+            token::type last_token_type = tokens.empty() ? token::undefined : tokens.back().t;
+            if (last_token_type == token::text || last_token_type == token::undefined) {
+                throw std::runtime_error(std::string("lexer: unexpected character: ") + ch);
+            }
+            switch (last_token_type) {
+                case token::identifier:
+                case token::numeric_literal:
+                case token::string_literal:
+                case token::close_paren:
+                case token::close_square_bracket:
+                    // Part of a binary operator
+                    // a - 1, 1 - 1, true - 1, "apple" - 1, (1) - 1, a[1] - 1
+                    // Continue parsing normally
+                    break;
+                default: {
+                    // Is part of a unary operator
+                    // (-1), [-1], (1 + -1), not -1, -apple
+                    ++pos; // Consume the operator
+
+                    // Check for numbers following the unary operator
+                    std::string num = consume_while(is_integer);
+                    std::string value = std::string(1, ch) + num;
+                    token::type t = num.empty() ? token::unary_operator : token::numeric_literal;
+                    JJ_DEBUG("consumed unary operator or numeric literal: '%s'", value.c_str());
+                    tokens.push_back({t, value});
+                    continue;
+                }
+            }
+        }
+
+        // Try to match one of the tokens in the mapping table
+        bool matched = false;
+        for (const auto & [seq, typ] : ordered_mapping_table) {
+            // Inside an object literal, don't treat "}}" as expression-end
+            if (seq == "}}" && curly_bracket_depth > 0) {
+                continue;
+            }
+            if (pos + seq.size() <= src.size() && src.substr(pos, seq.size()) == seq) {
+                tokens.push_back({typ, seq});
+                if (typ == token::open_expression) {
+                    curly_bracket_depth = 0;
+                } else if (typ == token::open_curly_bracket) {
+                    ++curly_bracket_depth;
+                } else if (typ == token::close_curly_bracket) {
+                    --curly_bracket_depth;
+                }
+                pos += seq.size();
+                matched = true;
+                break; // continue main loop
+            }
+        }
+        if (matched) continue; // continue main loop
+
+        // Strings
+        if (ch == '\'' || ch == '"') {
+            ++pos; // Skip opening quote
+            std::string str = consume_while([ch](char c) { return c != ch; });
+            tokens.push_back({token::string_literal, str});
+            ++pos; // Skip closing quote
+            continue;
+        }
+
+        // Numbers
+        if (is_integer(ch)) {
+            std::string num = consume_while(is_integer);
+            if (pos < src.size() && src[pos] == '.' && pos + 1 < src.size() && is_integer(src[pos + 1])) {
+                ++pos; // Consume '.'
+                std::string frac = consume_while(is_integer);
+                num += "." + frac;
+            }
+            tokens.push_back({token::numeric_literal, num});
+            continue;
+        }
+
+        // Identifiers
+        if (is_word(ch)) {
+            std::string word = consume_while(is_word);
+            tokens.push_back({token::identifier, word});
+            continue;
+        }
+
+        throw std::runtime_error(std::string("lexer: unexpected character: ") + ch);
+    }
+
+    return tokens;
+}
+
+} // namespace jinja
--- a/common/jinja/jinja-lexer.h
+++ b/common/jinja/jinja-lexer.h
@ -6,9 +6,6 @@
 #include <cctype>
 #include <functional>

-// #define JJ_DEBUG(msg, ...)  printf("jinja-lexer: " msg "\n", __VA_ARGS__)
-#define JJ_DEBUG(msg, ...)  // no-op
-
 namespace jinja {

 struct preprocess_options {
@ -107,230 +104,9 @@ struct lexer {
        {"=", token::equals},
    };

-    std::string preprocess(const std::string& template_str, const preprocess_options& options) const {
-        std::string result = template_str;
-        // According to https://jinja.palletsprojects.com/en/3.0.x/templates/#whitespace-control
+    std::string preprocess(const std::string& template_str, const preprocess_options& options) const;

-        // In the default configuration:
-        //  - a single trailing newline is stripped if present
-        //  - other whitespace (spaces, tabs, newlines etc.) is returned unchanged
-        if (!result.empty() && result.back() == '\n') {
-            result.pop_back();
-        }
-
-        if (options.lstrip_blocks) {
-            // The lstrip_blocks option can also be set to strip tabs and spaces from the
-            // beginning of a line to the start of a block. (Nothing will be stripped if
-            // there are other characters before the start of the block.)
-            // result = std::regex_replace(result, std::regex(R"((?m)^[ \t]*(\{[#%-]))"), "$1");
-            throw std::runtime_error("lstrip_blocks option is not implemented yet");
-        }
-
-        if (options.trim_blocks) {
-            // If an application configures Jinja to trim_blocks, the first newline after
-            // a template tag is removed automatically (like in PHP).
-            result = std::regex_replace(result, std::regex(R"(([#%-]\})\n)"), "$1");
-        }
-
-        // Handle whitespace control with - in tags
-        result = std::regex_replace(result, std::regex(R"(-%\}\s*)"), "%}");
-        result = std::regex_replace(result, std::regex(R"(\s*\{%-)"), "{%");
-        result = std::regex_replace(result, std::regex(R"(-\}\}\s*)"), "}}");
-        result = std::regex_replace(result, std::regex(R"(\s*\{\{-)"), "{{");
-        result = std::regex_replace(result, std::regex(R"(-#\}\s*)"), "#}");
-        result = std::regex_replace(result, std::regex(R"(\s*\{\#-)"), "{#");
-
-        // Handle custom transformers-specific `generation` tag
-        // See https://github.com/huggingface/transformers/pull/30650 for more information.
-        // result = std::regex_replace(result, std::regex(R"((?s)\{%\s*generation\s*%\}.+?\{%\s*endgeneration\s*%\})"), "");
-
-        return result;
-    }
-
-    std::vector<token> tokenize(const std::string & input, const preprocess_options & options = {}) {
-        std::vector<token> tokens;
-        std::string src = preprocess(input, options);
-        JJ_DEBUG("preprocessed input: '%s'", src.c_str());
-
-        size_t pos = 0;
-        size_t curly_bracket_depth = 0;
-
-        using pred = std::function<bool(char)>;
-        auto consume_while = [&](pred predicate) -> std::string {
-            std::string str;
-            while (predicate(src[pos])) {
-                // check for escape char
-                if (src[pos] == '\\') {
-                    // consume backslash
-                    ++pos;
-                    // check for end of input
-                    if (pos >= src.size()) {
-                        throw std::runtime_error("lexer: unexpected end of input after escape character");
-                    }
-                    // add escaped char
-                    char escaped_char = src[pos++];
-                    if (escape_chars.find(escaped_char) == escape_chars.end()) {
-                        throw std::runtime_error(std::string("lexer: unknown escape character \\") + escaped_char);
-                    }
-                    char unescaped_char = escape_chars.at(escaped_char);
-                    str += unescaped_char;
-                    continue;
-                }
-
-                str += src[pos++];
-                if (pos > src.size()) {
-                    throw std::runtime_error("lexer: unexpected end of input during consume_while");
-                }
-            }
-            return str;
-        };
-
-        auto next_pos_is = [&](std::initializer_list<char> chars) -> bool {
-            if (pos + 1 >= src.size()) return false;
-            for (char c : chars) {
-                if (src[pos + 1] == c) return true;
-            }
-            return false;
-        };
-
-        while (pos < src.size()) {
-            JJ_DEBUG("lexer main loop at pos %zu: '%s...'", pos, src.substr(pos, 10).c_str());
-
-            // First, consume all text that is outside of a Jinja statement or expression
-            token::type last_token_type = tokens.empty()
-                                                ? token::undefined
-                                                : tokens.back().t;
-            if (last_token_type == token::undefined ||
-                last_token_type == token::close_statement ||
-                last_token_type == token::close_expression ||
-                last_token_type == token::comment) {
-                std::string text;
-                while (pos < src.size() &&
-                        // Keep going until we hit the next Jinja statement or expression
-                        !(
-                            src[pos] == '{' &&
-                            next_pos_is( {'%', '{', '#'} )
-                        )) {
-                    text += src[pos++];
-                }
-                JJ_DEBUG("consumed text: '%s'", text.c_str());
-                if (!text.empty()) {
-                    tokens.push_back({token::text, text});
-                    continue;
-                }
-            }
-
-            // Possibly consume a comment
-            if (src[pos] == '{' && next_pos_is( {'#'} )) {
-                pos += 2; // Skip the opening {#
-                std::string comment;
-                while (!(src[pos] == '#' && next_pos_is( {'}'} ))) {
-                    if (pos + 2 >= src.size()) {
-                        throw std::runtime_error("lexer: missing end of comment tag");
-                    }
-                    comment += src[pos++];
-                }
-                JJ_DEBUG("consumed comment: '%s'", comment.c_str());
-                tokens.push_back({token::comment, comment});
-                pos += 2; // Skip the closing #}
-                continue;
-            }
-
-            // Consume (and ignore) all whitespace inside Jinja statements or expressions
-            consume_while([](char c) { return std::isspace(static_cast<unsigned char>(c)); });
-
-            if (pos >= src.size()) break;
-
-            char ch = src[pos];
-
-            // Check for unary operators
-            if (ch == '-' || ch == '+') {
-                token::type last_token_type = tokens.empty() ? token::undefined : tokens.back().t;
-                if (last_token_type == token::text || last_token_type == token::undefined) {
-                    throw std::runtime_error(std::string("lexer: unexpected character: ") + ch);
-                }
-                switch (last_token_type) {
-                    case token::identifier:
-                    case token::numeric_literal:
-                    case token::string_literal:
-                    case token::close_paren:
-                    case token::close_square_bracket:
-                        // Part of a binary operator
-                        // a - 1, 1 - 1, true - 1, "apple" - 1, (1) - 1, a[1] - 1
-                        // Continue parsing normally
-                        break;
-                    default: {
-                        // Is part of a unary operator
-                        // (-1), [-1], (1 + -1), not -1, -apple
-                        ++pos; // Consume the operator
-
-                        // Check for numbers following the unary operator
-                        std::string num = consume_while(is_integer);
-                        std::string value = std::string(1, ch) + num;
-                        token::type t = num.empty() ? token::unary_operator : token::numeric_literal;
-                        JJ_DEBUG("consumed unary operator or numeric literal: '%s'", value.c_str());
-                        tokens.push_back({t, value});
-                        continue;
-                    }
-                }
-            }
-
-            // Try to match one of the tokens in the mapping table
-            bool matched = false;
-            for (const auto & [seq, typ] : ordered_mapping_table) {
-                // Inside an object literal, don't treat "}}" as expression-end
-                if (seq == "}}" && curly_bracket_depth > 0) {
-                    continue;
-                }
-                if (pos + seq.size() <= src.size() && src.substr(pos, seq.size()) == seq) {
-                    tokens.push_back({typ, seq});
-                    if (typ == token::open_expression) {
-                        curly_bracket_depth = 0;
-                    } else if (typ == token::open_curly_bracket) {
-                        ++curly_bracket_depth;
-                    } else if (typ == token::close_curly_bracket) {
-                        --curly_bracket_depth;
-                    }
-                    pos += seq.size();
-                    matched = true;
-                    break; // continue main loop
-                }
-            }
-            if (matched) continue; // continue main loop
-
-            // Strings
-            if (ch == '\'' || ch == '"') {
-                ++pos; // Skip opening quote
-                std::string str = consume_while([ch](char c) { return c != ch; });
-                tokens.push_back({token::string_literal, str});
-                ++pos; // Skip closing quote
-                continue;
-            }
-
-            // Numbers
-            if (is_integer(ch)) {
-                std::string num = consume_while(is_integer);
-                if (pos < src.size() && src[pos] == '.' && pos + 1 < src.size() && is_integer(src[pos + 1])) {
-                    ++pos; // Consume '.'
-                    std::string frac = consume_while(is_integer);
-                    num += "." + frac;
-                }
-                tokens.push_back({token::numeric_literal, num});
-                continue;
-            }
-
-            // Identifiers
-            if (is_word(ch)) {
-                std::string word = consume_while(is_word);
-                tokens.push_back({token::identifier, word});
-                continue;
-            }
-
-            throw std::runtime_error(std::string("lexer: unexpected character: ") + ch);
-        }
-
-        return tokens;
-    }
+    std::vector<token> tokenize(const std::string & input, const preprocess_options & options);
 };

 } // namespace jinja
--- a/common/jinja/jinja-vm.h
+++ b/common/jinja/jinja-vm.h
@ -0,0 +1,393 @@
+#include "jinja-lexer.h"
+
+#include <string>
+#include <vector>
+#include <cassert>
+#include <memory>
+
+
+namespace jinja {
+
+struct context {
+    // TODO
+};
+
+/**
+ * Base class for all nodes in the AST.
+ */
+struct statement {
+    virtual ~statement() = default;
+    virtual std::string type() const { return "Statement"; }
+    virtual void execute(context & ctx) = 0;
+};
+
+using statement_ptr = std::unique_ptr<statement>;
+using statements = std::vector<statement_ptr>;
+
+// Type Checking Utilities
+
+template<typename T>
+static void chk_type(const statement_ptr & ptr) {
+    if (!ptr) return; // Allow null for optional fields
+    assert(dynamic_cast<T *>(ptr.get()) != nullptr);
+}
+
+template<typename T, typename U>
+static void chk_type(const statement_ptr & ptr) {
+    if (!ptr) return;
+    assert(dynamic_cast<T *>(ptr.get()) != nullptr || dynamic_cast<U *>(ptr.get()) != nullptr);
+}
+
+// Base Types
+
+/**
+ * Expressions will result in a value at runtime (unlike statements).
+ */
+struct expression : public statement {
+    std::string type() const override { return "Expression"; }
+    void execute(context & ctx) override {}
+};
+
+// Statements
+
+struct program : public statement {
+    statements body;
+
+    explicit program(statements && body) : body(std::move(body)) {}
+    std::string type() const override { return "Program"; }
+    void execute(context & ctx) override {}
+};
+
+struct if_statement : public statement {
+    statement_ptr test;
+    statements body;
+    statements alternate;
+
+    if_statement(statement_ptr && test, statements && body, statements && alternate)
+        : test(std::move(test)), body(std::move(body)), alternate(std::move(alternate)) {
+        chk_type<expression>(this->test);
+    }
+
+    std::string type() const override { return "If"; }
+    void execute(context & ctx) override {}
+};
+
+struct identifier;
+struct tuple_literal;
+
+/**
+ * Loop over each item in a sequence
+ * https://jinja.palletsprojects.com/en/3.0.x/templates/#for
+ */
+struct for_statement : public statement {
+    statement_ptr loopvar; // Identifier | TupleLiteral
+    statement_ptr iterable;
+    statements body;
+    statements default_block; // if no iteration took place
+
+    for_statement(statement_ptr && loopvar, statement_ptr && iterable, statements && body, statements && default_block)
+        : loopvar(std::move(loopvar)), iterable(std::move(iterable)), 
+          body(std::move(body)), default_block(std::move(default_block)) {
+        chk_type<identifier, tuple_literal>(this->loopvar);
+        chk_type<expression>(this->iterable);
+    }
+
+    std::string type() const override { return "For"; }
+    void execute(context & ctx) override {}
+};
+
+struct break_statement : public statement {
+    std::string type() const override { return "Break"; }
+    void execute(context & ctx) override {}
+};
+
+struct continue_statement : public statement {
+    std::string type() const override { return "Continue"; }
+    void execute(context & ctx) override {}
+};
+
+struct set_statement : public statement {
+    statement_ptr assignee;
+    statement_ptr value;
+    statements body;
+
+    set_statement(statement_ptr && assignee, statement_ptr && value, statements && body)
+        : assignee(std::move(assignee)), value(std::move(value)), body(std::move(body)) {
+        chk_type<expression>(this->assignee);
+        chk_type<expression>(this->value);
+    }
+
+    std::string type() const override { return "Set"; }
+    void execute(context & ctx) override {}
+};
+
+struct macro_statement : public statement {
+    statement_ptr name;
+    statements args;
+    statements body;
+
+    macro_statement(statement_ptr && name, statements && args, statements && body)
+        : name(std::move(name)), args(std::move(args)), body(std::move(body)) {
+        chk_type<identifier>(this->name);
+        for (const auto& arg : this->args) chk_type<expression>(arg);
+    }
+
+    std::string type() const override { return "Macro"; }
+    void execute(context & ctx) override {}
+};
+
+struct comment_statement : public statement {
+    std::string value;
+    explicit comment_statement(const std::string & value) : value(value) {}
+    std::string type() const override { return "Comment"; }
+    void execute(context & ctx) override {}
+};
+
+// Expressions
+
+struct member_expression : public expression {
+    statement_ptr object;
+    statement_ptr property;
+    bool computed;
+
+    member_expression(statement_ptr && object, statement_ptr && property, bool computed)
+        : object(std::move(object)), property(std::move(property)), computed(computed) {
+        chk_type<expression>(this->object);
+        chk_type<expression>(this->property);
+    }
+    std::string type() const override { return "MemberExpression"; }
+};
+
+struct call_expression : public expression {
+    statement_ptr callee;
+    statements args;
+
+    call_expression(statement_ptr && callee, statements && args)
+        : callee(std::move(callee)), args(std::move(args)) {
+        chk_type<expression>(this->callee);
+        for (const auto& arg : this->args) chk_type<expression>(arg);
+    }
+    std::string type() const override { return "CallExpression"; }
+};
+
+/**
+ * Represents a user-defined variable or symbol in the template.
+ */
+struct identifier : public expression {
+    std::string value;
+    explicit identifier(const std::string & value) : value(value) {}
+    std::string type() const override { return "Identifier"; }
+};
+
+// Literals
+
+/**
+ * Abstract base class for all Literal expressions.
+ * Should not be instantiated directly.
+ */
+template <typename T>
+struct literal : public expression {
+    T value;
+    explicit literal(T && value) : value(std::move(value)) {}
+    std::string type() const override { return "Literal"; }
+};
+
+struct integer_literal : public literal<int64_t> { 
+    std::string type() const override { return "IntegerLiteral"; }
+};
+
+struct float_literal : public literal<double> {
+    std::string type() const override { return "FloatLiteral"; }
+};
+
+struct string_literal : public literal<std::string> {
+    std::string type() const override { return "StringLiteral"; }
+};
+
+struct array_literal : public expression {
+    statements value;
+    explicit array_literal(statements && value) : value(std::move(value)) {
+        for (const auto& item : this->value) chk_type<expression>(item);
+    }
+    std::string type() const override { return "ArrayLiteral"; }
+};
+
+struct tuple_literal : public expression {
+    statements value;
+    explicit tuple_literal(statements && value) : value(std::move(value)) {
+        for (const auto& item : this->value) chk_type<expression>(item);
+    }
+    std::string type() const override { return "TupleLiteral"; }
+};
+
+struct object_literal : public expression {
+    std::vector<std::pair<statement_ptr, statement_ptr>> value;
+    explicit object_literal(std::vector<std::pair<statement_ptr, statement_ptr>> && value) 
+        : value(std::move(value)) {
+        for (const auto & pair : this->value) {
+            chk_type<expression>(pair.first);
+            chk_type<expression>(pair.second);
+        }
+    }
+    std::string type() const override { return "ObjectLiteral"; }
+};
+
+// Complex Expressions
+
+/**
+ * An operation with two sides, separated by an operator.
+ * Note: Either side can be a Complex Expression, with order
+ * of operations being determined by the operator.
+ */
+struct binary_expression : public expression {
+    token::type op;
+    statement_ptr left;
+    statement_ptr right;
+
+    binary_expression(token::type op, statement_ptr && left, statement_ptr && right)
+        : op(op), left(std::move(left)), right(std::move(right)) {
+        chk_type<expression>(this->left);
+        chk_type<expression>(this->right);
+    }
+    std::string type() const override { return "BinaryExpression"; }
+};
+
+/**
+ * An operation with two sides, separated by the | operator.
+ * Operator precedence: https://github.com/pallets/jinja/issues/379#issuecomment-168076202
+ */
+struct filter_expression : public expression {
+    statement_ptr operand;
+    statement_ptr filter;
+
+    filter_expression(statement_ptr && operand, statement_ptr && filter)
+        : operand(std::move(operand)), filter(std::move(filter)) {
+        chk_type<expression>(this->operand);
+        chk_type<identifier, call_expression>(this->filter);
+    }
+    std::string type() const override { return "FilterExpression"; }
+};
+
+struct filter_statement : public statement {
+    statement_ptr filter;
+    statements body;
+
+    filter_statement(statement_ptr && filter, statements && body)
+        : filter(std::move(filter)), body(std::move(body)) {
+        chk_type<identifier, call_expression>(this->filter);
+    }
+    std::string type() const override { return "FilterStatement"; }
+    void execute(context & ctx) override {}
+};
+
+/**
+ * An operation which filters a sequence of objects by applying a test to each object,
+ * and only selecting the objects with the test succeeding.
+ *
+ * It may also be used as a shortcut for a ternary operator.
+ */
+struct select_expression : public expression {
+    statement_ptr lhs;
+    statement_ptr test;
+
+    select_expression(statement_ptr && lhs, statement_ptr && test)
+        : lhs(std::move(lhs)), test(std::move(test)) {
+        chk_type<expression>(this->lhs);
+        chk_type<expression>(this->test);
+    }
+    std::string type() const override { return "SelectExpression"; }
+};
+
+/**
+ * An operation with two sides, separated by the "is" operator.
+ */
+struct test_expression : public expression {
+    statement_ptr operand;
+    bool negate;
+    statement_ptr test;
+
+    test_expression(statement_ptr && operand, bool negate, statement_ptr && test)
+        : operand(std::move(operand)), negate(negate), test(std::move(test)) {
+        chk_type<expression>(this->operand);
+        chk_type<identifier>(this->test);
+    }
+    std::string type() const override { return "TestExpression"; }
+};
+
+/**
+ * An operation with one side (operator on the left).
+ */
+struct unary_expression : public expression {
+    token op;
+    statement_ptr argument;
+
+    unary_expression(token op, statement_ptr && argument)
+        : op(std::move(op)), argument(std::move(argument)) {
+        chk_type<expression>(this->argument);
+    }
+    std::string type() const override { return "UnaryExpression"; }
+};
+
+struct slice_expression : public expression {
+    statement_ptr start;
+    statement_ptr stop;
+    statement_ptr step;
+
+    slice_expression(statement_ptr && start, statement_ptr && stop, statement_ptr && step)
+        : start(std::move(start)), stop(std::move(stop)), step(std::move(step)) {
+        chk_type<expression>(this->start);
+        chk_type<expression>(this->stop);
+        chk_type<expression>(this->step);
+    }
+    std::string type() const override { return "SliceExpression"; }
+};
+
+struct keyword_argument_expression : public expression {
+    statement_ptr key;
+    statement_ptr value;
+
+    keyword_argument_expression(statement_ptr && key, statement_ptr && value)
+        : key(std::move(key)), value(std::move(value)) {
+        chk_type<identifier>(this->key);
+        chk_type<expression>(this->value);
+    }
+    std::string type() const override { return "KeywordArgumentExpression"; }
+};
+
+struct spread_expression : public expression {
+    statement_ptr argument;
+    explicit spread_expression(statement_ptr && argument) : argument(std::move(argument)) {
+        chk_type<expression>(this->argument);
+    }
+    std::string type() const override { return "SpreadExpression"; }
+};
+
+struct call_statement : public statement {
+    statement_ptr call;
+    statements caller_args;
+    statements body;
+
+    call_statement(statement_ptr && call, statements && caller_args, statements && body)
+        : call(std::move(call)), caller_args(std::move(caller_args)), body(std::move(body)) {
+        chk_type<call_expression>(this->call);
+        for (const auto& arg : this->caller_args) chk_type<expression>(arg);
+    }
+    std::string type() const override { return "CallStatement"; }
+    void execute(context & ctx) override {}
+};
+
+struct ternary_expression : public expression {
+    statement_ptr condition;
+    statement_ptr true_expr;
+    statement_ptr false_expr;
+
+    ternary_expression(statement_ptr && condition, statement_ptr && true_expr, statement_ptr && false_expr)
+        : condition(std::move(condition)), true_expr(std::move(true_expr)), false_expr(std::move(false_expr)) {
+        chk_type<expression>(this->condition);
+        chk_type<expression>(this->true_expr);
+        chk_type<expression>(this->false_expr);
+    }
+    std::string type() const override { return "Ternary"; }
+};
+
+} // namespace jinja