From 8cea1ed6b0d81fada93e60a4e41f2b31df5cc283 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Sat, 27 Dec 2025 12:55:01 +0100 Subject: [PATCH] parser ok --- common/CMakeLists.txt | 3 +++ common/jinja/jinja-lexer.h | 4 +++- common/jinja/jinja-parser.cpp | 9 +++++---- common/jinja/jinja-parser.h | 16 ++++++++++++++++ common/jinja/jinja-vm.cpp | 0 common/jinja/jinja-vm.h | 28 ++++++++++++---------------- tests/test-chat-jinja.cpp | 33 ++++++--------------------------- 7 files changed, 45 insertions(+), 48 deletions(-) create mode 100644 common/jinja/jinja-parser.h create mode 100644 common/jinja/jinja-vm.cpp diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index f7b99159e3..49ce25a842 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -83,6 +83,9 @@ add_library(${TARGET} STATIC speculative.h unicode.cpp unicode.h + jinja/jinja-lexer.cpp + jinja/jinja-parser.cpp + jinja/jinja-vm.cpp ) target_include_directories(${TARGET} PUBLIC . ../vendor) diff --git a/common/jinja/jinja-lexer.h b/common/jinja/jinja-lexer.h index 2011e487b1..3ed173a4f0 100644 --- a/common/jinja/jinja-lexer.h +++ b/common/jinja/jinja-lexer.h @@ -1,3 +1,5 @@ +#pragma once + #include #include #include @@ -48,7 +50,7 @@ struct token { std::string value; }; -std::string type_to_string(token::type t) { +static std::string type_to_string(token::type t) { switch (t) { case token::undefined: return "undefined"; case token::text: return "text"; diff --git a/common/jinja/jinja-parser.cpp b/common/jinja/jinja-parser.cpp index 07cb71fe11..5b20f010dc 100644 --- a/common/jinja/jinja-parser.cpp +++ b/common/jinja/jinja-parser.cpp @@ -1,5 +1,6 @@ #include "jinja-lexer.h" #include "jinja-vm.h" +#include "jinja-parser.h" #include #include @@ -22,12 +23,12 @@ class parser { public: parser(const std::vector & t) : tokens(t) {} - statement_ptr parse() { + program parse() { statements body; while (current < tokens.size()) { body.push_back(parse_any()); } - return std::make_unique(std::move(body)); + return program(std::move(body)); } private: @@ -320,7 +321,7 @@ private: statement_ptr parse_logical_or_expression() { auto left = parse_logical_and_expression(); while (is_identifier("or")) { - auto op = tokens[current++]; + token op = tokens[current++]; left = std::make_unique(op, std::move(left), parse_logical_and_expression()); } return left; @@ -538,7 +539,7 @@ private: } }; -statement_ptr parse(const std::vector& tokens) { +program parse_from_tokens(const std::vector & tokens) { return parser(tokens).parse(); } diff --git a/common/jinja/jinja-parser.h b/common/jinja/jinja-parser.h new file mode 100644 index 0000000000..ea212ad181 --- /dev/null +++ b/common/jinja/jinja-parser.h @@ -0,0 +1,16 @@ +#pragma once + +#include "jinja-lexer.h" +#include "jinja-vm.h" + +#include +#include +#include +#include +#include + +namespace jinja { + +program parse_from_tokens(const std::vector & tokens); + +} // namespace jinja diff --git a/common/jinja/jinja-vm.cpp b/common/jinja/jinja-vm.cpp new file mode 100644 index 0000000000..e69de29bb2 diff --git a/common/jinja/jinja-vm.h b/common/jinja/jinja-vm.h index 9ee2917531..b848ec4d9b 100644 --- a/common/jinja/jinja-vm.h +++ b/common/jinja/jinja-vm.h @@ -1,3 +1,4 @@ +#pragma once #include "jinja-lexer.h" #include @@ -181,26 +182,21 @@ struct identifier : public expression { // Literals -/** - * Abstract base class for all Literal expressions. - * Should not be instantiated directly. - */ -template -struct literal : public expression { - T value; - explicit literal(T && value) : value(std::move(value)) {} - std::string type() const override { return "Literal"; } -}; - -struct integer_literal : public literal { +struct integer_literal : public expression { + int64_t value; + explicit integer_literal(int64_t value) : value(value) {} std::string type() const override { return "IntegerLiteral"; } }; -struct float_literal : public literal { +struct float_literal : public expression { + double value; + explicit float_literal(double value) : value(value) {} std::string type() const override { return "FloatLiteral"; } }; -struct string_literal : public literal { +struct string_literal : public expression { + std::string value; + explicit string_literal(const std::string & value) : value(value) {} std::string type() const override { return "StringLiteral"; } }; @@ -240,11 +236,11 @@ struct object_literal : public expression { * of operations being determined by the operator. */ struct binary_expression : public expression { - token::type op; + token op; statement_ptr left; statement_ptr right; - binary_expression(token::type op, statement_ptr && left, statement_ptr && right) + binary_expression(token op, statement_ptr && left, statement_ptr && right) : op(op), left(std::move(left)), right(std::move(right)) { chk_type(this->left); chk_type(this->right); diff --git a/tests/test-chat-jinja.cpp b/tests/test-chat-jinja.cpp index 9fa0c7c817..ebebba37b1 100644 --- a/tests/test-chat-jinja.cpp +++ b/tests/test-chat-jinja.cpp @@ -7,9 +7,7 @@ #undef NDEBUG #include -#include "peg-parser.h" -#include "json-schema-to-grammar.h" -#include "jinja/jinja-compiler.h" +#include "jinja/jinja-parser.h" #include "jinja/jinja-lexer.h" int main(void) { @@ -26,30 +24,11 @@ int main(void) { std::cout << "token: type=" << static_cast(tok.t) << " text='" << tok.value << "'\n"; } - // jinja::compiler compiler; - // compiler.builder.set_root(compiler.root); - // auto parser = compiler.builder.build(); - - // auto grammar = build_grammar([&](const common_grammar_builder & builder0) { - // parser.build_grammar(builder0); - // }); - // printf("== GRAMMAR ==\n"); - // printf("%s\n", grammar.c_str()); - - // // printf("== DUMP ==\n"); - // // printf("%s\n", parser.dump(compiler.root.id()).c_str()); - - // printf("== PARSE ==\n"); - - // common_peg_parse_context ctx(contents); - // const auto result = parser.parse(ctx); - // if (!result.success()) { - // throw std::runtime_error("failed to parse, type = " + std::to_string(result.type)); - // } - - // ctx.ast.visit(result, [&](const common_peg_ast_node & node) { - // printf("node: rule='%s' text='%s'\n", node.rule.c_str(), std::string(node.text).c_str()); - // }); + jinja::program ast = jinja::parse_from_tokens(tokens); + std::cout << "\n=== AST ===\n"; + for (const auto & stmt : ast.body) { + std::cout << "stmt type: " << stmt->type() << "\n"; + } return 0; }