#pragma once #include "utils.h" #include #include #include #include #include namespace jinja { struct token { enum type { eof, // end of source text, // The text between Jinja statements or expressions numeric_literal, // e.g., 123, 1.0 string_literal, // 'string' identifier, // Variables, functions, statements, booleans, etc. equals, // = open_paren, // ( close_paren, // ) open_statement, // {% close_statement, // %} open_expression, // {{ close_expression, // }} open_square_bracket, // [ close_square_bracket, // ] open_curly_bracket, // { close_curly_bracket, // } comma, // , dot, // . colon, // : pipe, // | call_operator, // () additive_binary_operator, // + - ~ multiplicative_binary_operator, // * / % comparison_binary_operator, // < > <= >= == != unary_operator, // ! - + comment, // {# ... #} }; type t; std::string value; size_t pos; }; static std::string type_to_string(token::type t) { switch (t) { case token::eof: return "eof"; case token::text: return "text"; case token::numeric_literal: return "numeric_literal"; case token::string_literal: return "string_literal"; case token::identifier: return "identifier"; case token::equals: return "equals"; case token::open_paren: return "open_paren"; case token::close_paren: return "close_paren"; case token::open_statement: return "open_statement"; case token::close_statement: return "close_statement"; case token::open_expression: return "open_expression"; case token::close_expression: return "close_expression"; case token::open_square_bracket: return "open_square_bracket"; case token::close_square_bracket: return "close_square_bracket"; case token::open_curly_bracket: return "open_curly_bracket"; case token::close_curly_bracket: return "close_curly_bracket"; case token::comma: return "comma"; case token::dot: return "dot"; case token::colon: return "colon"; case token::pipe: return "pipe"; case token::call_operator: return "call_operator"; case token::additive_binary_operator: return "additive_binary_operator"; case token::multiplicative_binary_operator: return "multiplicative_binary_operator"; case token::comparison_binary_operator: return "comparison_binary_operator"; case token::unary_operator: return "unary_operator"; case token::comment: return "comment"; default: return "unknown"; } } struct lexer_result { std::vector tokens; std::string source; }; struct lexer { const std::map escape_chars = { {'n', '\n'}, {'t', '\t'}, {'r', '\r'}, {'b', '\b'}, {'f', '\f'}, {'v', '\v'}, {'\\', '\\'}, {'\'', '\''}, {'\"', '\"'}, }; static bool is_word(char c) { return std::isalnum(static_cast(c)) || c == '_'; } static bool is_integer(char c) { return std::isdigit(static_cast(c)); } const std::vector> ordered_mapping_table = { // Trimmed control sequences {"{%-", token::open_statement}, {"-%}", token::close_statement}, {"{{-", token::open_expression}, {"-}}", token::close_expression}, // Control sequences {"{%", token::open_statement}, {"%}", token::close_statement}, {"{{", token::open_expression}, {"}}", token::close_expression}, // Single character tokens {"(", token::open_paren}, {")", token::close_paren}, {"{", token::open_curly_bracket}, {"}", token::close_curly_bracket}, {"[", token::open_square_bracket}, {"]", token::close_square_bracket}, {",", token::comma}, {".", token::dot}, {":", token::colon}, {"|", token::pipe}, // Comparison operators {"<=", token::comparison_binary_operator}, {">=", token::comparison_binary_operator}, {"==", token::comparison_binary_operator}, {"!=", token::comparison_binary_operator}, {"<", token::comparison_binary_operator}, {">", token::comparison_binary_operator}, // Arithmetic operators {"+", token::additive_binary_operator}, {"-", token::additive_binary_operator}, {"~", token::additive_binary_operator}, {"*", token::multiplicative_binary_operator}, {"/", token::multiplicative_binary_operator}, {"%", token::multiplicative_binary_operator}, // Assignment operator {"=", token::equals}, }; // tokenize the source string into a list of tokens // may throw lexer_exception on error lexer_result tokenize(const std::string & source); }; struct lexer_exception : public std::runtime_error { lexer_exception(const std::string & msg, const std::string & source, size_t pos) : std::runtime_error(fmt_error_with_source("lexer", msg, source, pos)) {} }; } // namespace jinja