Basic universal PEG parser wrapper with tag-to-dict based extractor
This commit is contained in:
parent
0884aad1c5
commit
e501e1dec9
|
|
@ -177,6 +177,31 @@ void common_chat_peg_mapper::map(const common_peg_ast_node & node) {
|
|||
}
|
||||
}
|
||||
|
||||
void tag_based_peg_mapper::from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result) {
|
||||
arena.visit(result, [this](const common_peg_ast_node & node) {
|
||||
if (!node.tag.empty()) {
|
||||
tags[node.tag] = std::string(node.text);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
tagged_parse_result tagged_peg_parser::parse_and_extract(const std::string & input, bool is_partial) const {
|
||||
common_peg_parse_context ctx(input, is_partial);
|
||||
auto parse_result = arena.parse(ctx);
|
||||
|
||||
tag_based_peg_mapper mapper;
|
||||
mapper.from_ast(ctx.ast, parse_result);
|
||||
|
||||
return { std::move(parse_result), std::move(mapper.tags) };
|
||||
}
|
||||
|
||||
tagged_peg_parser build_tagged_peg_parser(
|
||||
const std::function<common_peg_parser(common_peg_parser_builder & builder)> & fn) {
|
||||
common_peg_parser_builder builder;
|
||||
builder.set_root(fn(builder));
|
||||
return { builder.build() };
|
||||
}
|
||||
|
||||
common_peg_parser common_chat_peg_builder::tag_with_safe_content(const std::string & tag_name,
|
||||
const std::string & marker,
|
||||
const common_peg_parser & p) {
|
||||
|
|
|
|||
|
|
@ -138,6 +138,27 @@ inline common_peg_arena build_chat_peg_unified_parser(
|
|||
return builder.build();
|
||||
}
|
||||
|
||||
class tag_based_peg_mapper {
|
||||
public:
|
||||
std::map<std::string, std::string> tags;
|
||||
|
||||
void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result);
|
||||
};
|
||||
|
||||
struct tagged_parse_result {
|
||||
common_peg_parse_result result;
|
||||
std::map<std::string, std::string> tags;
|
||||
};
|
||||
|
||||
struct tagged_peg_parser {
|
||||
common_peg_arena arena;
|
||||
|
||||
tagged_parse_result parse_and_extract(const std::string & input, bool is_partial = false) const;
|
||||
};
|
||||
|
||||
tagged_peg_parser build_tagged_peg_parser(
|
||||
const std::function<common_peg_parser(common_peg_parser_builder & builder)> & fn);
|
||||
|
||||
class common_chat_peg_unified_mapper : public common_chat_peg_mapper {
|
||||
std::optional<common_chat_tool_call> pending_tool_call; // Tool call waiting for name
|
||||
common_chat_tool_call * current_tool = nullptr;
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ static void test_example_qwen3_coder(testing & t);
|
|||
static void test_example_qwen3_non_coder(testing & t);
|
||||
static void test_command7_parser_compare(testing & t);
|
||||
static void test_prefix_tool_names(testing & t);
|
||||
static void test_tagged_peg_parser(testing & t);
|
||||
|
||||
int main(int argc, char * argv[]) {
|
||||
testing t(std::cout);
|
||||
|
|
@ -37,6 +38,7 @@ int main(int argc, char * argv[]) {
|
|||
t.test("qwen3 non-coder", test_example_qwen3_non_coder);
|
||||
t.test("comparison", test_command7_parser_compare);
|
||||
t.test("prefix tool names", test_prefix_tool_names);
|
||||
t.test("tagged peg parser", test_tagged_peg_parser);
|
||||
|
||||
return t.summary();
|
||||
}
|
||||
|
|
@ -878,3 +880,60 @@ static void test_prefix_tool_names(testing & t) {
|
|||
}
|
||||
});
|
||||
}
|
||||
|
||||
static void test_tagged_peg_parser(testing & t) {
|
||||
t.test("basic tag extraction", [&](testing & t) {
|
||||
auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) {
|
||||
return p.tag("greeting", p.until(" ")) + " " + p.tag("name", p.rest()) + p.end();
|
||||
});
|
||||
|
||||
auto result = parser.parse_and_extract("Hello World");
|
||||
t.assert_true("success", result.result.success());
|
||||
t.assert_equal("greeting tag", "Hello", result.tags.at("greeting"));
|
||||
t.assert_equal("name tag", "World", result.tags.at("name"));
|
||||
});
|
||||
|
||||
t.test("duplicate tags overwrite", [&](testing & t) {
|
||||
auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) {
|
||||
return p.tag("item", p.until(",")) + "," + p.tag("item", p.rest()) + p.end();
|
||||
});
|
||||
|
||||
auto result = parser.parse_and_extract("first,second");
|
||||
t.assert_true("success", result.result.success());
|
||||
t.assert_equal("item tag", "second", result.tags.at("item"));
|
||||
});
|
||||
|
||||
t.test("no tags extracted", [&](testing & t) {
|
||||
auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) {
|
||||
return p.rest() + p.end();
|
||||
});
|
||||
|
||||
auto result = parser.parse_and_extract("Hello");
|
||||
t.assert_true("success", result.result.success());
|
||||
t.assert_equal("empty tags", 0u, result.tags.size());
|
||||
});
|
||||
|
||||
t.test("structured extraction", [&](testing & t) {
|
||||
auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) {
|
||||
auto header = p.tag("header", p.until("\n"));
|
||||
auto body = p.tag("body", p.rest());
|
||||
return header + "\n" + body + p.end();
|
||||
});
|
||||
|
||||
auto result = parser.parse_and_extract("Title\nBody content here");
|
||||
t.assert_true("success", result.result.success());
|
||||
t.assert_equal("header", "Title", result.tags.at("header"));
|
||||
t.assert_equal("body", "Body content here", result.tags.at("body"));
|
||||
});
|
||||
|
||||
t.test("partial parse", [&](testing & t) {
|
||||
auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) {
|
||||
return p.tag("prefix", p.until(":")) + ":" + p.tag("value", p.rest()) + p.end();
|
||||
});
|
||||
|
||||
auto result = parser.parse_and_extract("key:val", true);
|
||||
t.assert_true("not fail", !result.result.fail());
|
||||
t.assert_equal("prefix tag", "key", result.tags.at("prefix"));
|
||||
t.assert_equal("value tag", "val", result.tags.at("value"));
|
||||
});
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue