From 0628226ea18c3321fd9023fbee182963a770d1e1 Mon Sep 17 00:00:00 2001 From: hanishkvc Date: Sat, 8 Nov 2025 00:27:43 +0530 Subject: [PATCH] SimpleChatTC:XmlFiltered: Avoid showing skipped tags as no content Dont even insert skipped tags as tag blocks with empty content. This should make the resultant xml cleaner and make it use less space. --- .../public_simplechat/local.tools/webmagic.py | 17 ++++++++++------- tools/server/public_simplechat/readme.md | 7 +++++++ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/tools/server/public_simplechat/local.tools/webmagic.py b/tools/server/public_simplechat/local.tools/webmagic.py index f8d65c90c4..42500c4fcf 100644 --- a/tools/server/public_simplechat/local.tools/webmagic.py +++ b/tools/server/public_simplechat/local.tools/webmagic.py @@ -259,19 +259,22 @@ class XMLFilterParser(html.parser.HTMLParser): return True def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]): - self.lastTrackedCB = "starttag" self.prefixTags.append(tag) + if not self.do_capture(): + return + self.lastTrackedCB = "starttag" self.prefix += "\t" self.text += f"\n{self.prefix}<{tag}>" def handle_endtag(self, tag: str): - if (self.lastTrackedCB == "endtag"): - self.text += f"\n{self.prefix}" - else: - self.text += f"" - self.lastTrackedCB = "endtag" + if self.do_capture(): + if (self.lastTrackedCB == "endtag"): + self.text += f"\n{self.prefix}" + else: + self.text += f"" + self.lastTrackedCB = "endtag" + self.prefix = self.prefix[:-1] self.prefixTags.pop() - self.prefix = self.prefix[:-1] def handle_data(self, data: str): if self.do_capture(): diff --git a/tools/server/public_simplechat/readme.md b/tools/server/public_simplechat/readme.md index d87e0cd44f..0aff20789f 100644 --- a/tools/server/public_simplechat/readme.md +++ b/tools/server/public_simplechat/readme.md @@ -664,6 +664,7 @@ sliding window based drop off or even before they kick in, this can help in many * renamed and updated logic wrt xml fetching to be fetch_xml_filtered. allow one to use re to identify the tags to be filtered in a fine grained manner including filtering based on tag heirarchy + * avoid showing empty skipped tag blocks * logic which shows the generated tool call has been updated to trap errors when parsing the function call arguments generated by the ai. This ensures that the chat ui itself doesnt get stuck in it. Instead now @@ -683,6 +684,12 @@ Handle multimodal handshaking with ai models. Add fetch_rss and may be different document formats processing related tool calling, in turn through the simpleproxy.py if and where needed. +* Using xmlfiltered and tagDropREs of + * ["^rss:channel:item:(?!title).+$"] one can fetch and extract out all the titles. + * ["^rss:channel:item:(?!title|link|description).+$"] one can fetch and extract out all the + titles along with corresponding links and descriptions + * rather with some minimal proding and guidance gpt-oss generated this to use xmlfiltered to read rss + Save used config entries along with the auto saved chat sessions and inturn give option to reload the same when saved chat is loaded.