SimpleChatTC:SimpleProxy: Avoid circular deps wrt Type Checking

also move debug dump helper to its own module

also remember to specify the Class name in quotes, similar to
refering to a class within a member of th class wrt python type
checking.
This commit is contained in:
hanishkvc 2025-11-02 17:24:36 +05:30
parent 350d7d77e0
commit d012d127bf
4 changed files with 21 additions and 20 deletions

View File

@ -3,7 +3,10 @@
import urllib.parse import urllib.parse
import urlvalidator as uv import urlvalidator as uv
import simpleproxy as root from typing import TYPE_CHECKING
if TYPE_CHECKING:
from simpleproxy import ProxyHandler
def process_pdf2text(url: str, startPN: int, endPN: int): def process_pdf2text(url: str, startPN: int, endPN: int):
@ -27,7 +30,7 @@ def process_pdf2text(url: str, startPN: int, endPN: int):
return { 'status': 200, 'msg': "Pdf2Text Response follows", 'data': tPdf } return { 'status': 200, 'msg': "Pdf2Text Response follows", 'data': tPdf }
def handle_pdf2text(ph: root.ProxyHandler, pr: urllib.parse.ParseResult): def handle_pdf2text(ph: 'ProxyHandler', pr: urllib.parse.ParseResult):
""" """
Handle requests to pdf2text path, which is used to extract plain text Handle requests to pdf2text path, which is used to extract plain text
from the specified pdf file. from the specified pdf file.

View File

@ -173,18 +173,6 @@ def handle_aum(ph: ProxyHandler, pr: urllib.parse.ParseResult):
ph.end_headers() ph.end_headers()
def debug_dump(meta: dict, data: dict):
if not gMe['--debug']:
return
timeTag = f"{time.time():0.12f}"
with open(f"/tmp/simpleproxy.{timeTag}.meta", '+w') as f:
for k in meta:
f.write(f"\n\n\n\n{k}:{meta[k]}\n\n\n\n")
with open(f"/tmp/simpleproxy.{timeTag}.data", '+w') as f:
for k in data:
f.write(f"\n\n\n\n{k}:{data[k]}\n\n\n\n")
def load_config(): def load_config():
""" """
Allow loading of a json based config file Allow loading of a json based config file

View File

@ -3,10 +3,15 @@
import urllib.parse import urllib.parse
import urllib.request import urllib.request
import simpleproxy as root
import urlvalidator as uv import urlvalidator as uv
from dataclasses import dataclass from dataclasses import dataclass
import html.parser import html.parser
import debug
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from simpleproxy import ProxyHandler
@dataclass(frozen=True) @dataclass(frozen=True)
@ -21,7 +26,7 @@ class UrlReqResp:
contentData: str = "" contentData: str = ""
def handle_urlreq(ph: root.ProxyHandler, pr: urllib.parse.ParseResult, tag: str): def handle_urlreq(ph: 'ProxyHandler', pr: urllib.parse.ParseResult, tag: str):
""" """
Common part of the url request handling used by both urlraw and urltext. Common part of the url request handling used by both urlraw and urltext.
@ -58,13 +63,13 @@ def handle_urlreq(ph: root.ProxyHandler, pr: urllib.parse.ParseResult, tag: str)
contentData = response.read().decode('utf-8') contentData = response.read().decode('utf-8')
statusCode = response.status or 200 statusCode = response.status or 200
contentType = response.getheader('Content-Type') or 'text/html' contentType = response.getheader('Content-Type') or 'text/html'
root.debug_dump({ 'url': req.full_url, 'headers': req.headers, 'ctype': contentType }, { 'cdata': contentData }) debug.dump({ 'url': req.full_url, 'headers': req.headers, 'ctype': contentType }, { 'cdata': contentData })
return UrlReqResp(True, statusCode, "", contentType, contentData) return UrlReqResp(True, statusCode, "", contentType, contentData)
except Exception as exc: except Exception as exc:
return UrlReqResp(False, 502, f"WARN:{tag}:Failed:{exc}") return UrlReqResp(False, 502, f"WARN:{tag}:Failed:{exc}")
def handle_urlraw(ph: root.ProxyHandler, pr: urllib.parse.ParseResult): def handle_urlraw(ph: 'ProxyHandler', pr: urllib.parse.ParseResult):
try: try:
# Get requested url # Get requested url
got = handle_urlreq(ph, pr, "HandleUrlRaw") got = handle_urlreq(ph, pr, "HandleUrlRaw")
@ -159,7 +164,7 @@ class TextHtmlParser(html.parser.HTMLParser):
return self.textStripped return self.textStripped
def handle_urltext(ph: root.ProxyHandler, pr: urllib.parse.ParseResult): def handle_urltext(ph: 'ProxyHandler', pr: urllib.parse.ParseResult):
try: try:
# Get requested url # Get requested url
got = handle_urlreq(ph, pr, "HandleUrlText") got = handle_urlreq(ph, pr, "HandleUrlText")
@ -176,6 +181,6 @@ def handle_urltext(ph: root.ProxyHandler, pr: urllib.parse.ParseResult):
ph.send_header('Access-Control-Allow-Origin', '*') ph.send_header('Access-Control-Allow-Origin', '*')
ph.end_headers() ph.end_headers()
ph.wfile.write(textHtml.get_stripped_text().encode('utf-8')) ph.wfile.write(textHtml.get_stripped_text().encode('utf-8'))
root.debug_dump({ 'RawText': 'yes', 'StrippedText': 'yes' }, { 'RawText': textHtml.text, 'StrippedText': textHtml.get_stripped_text() }) debug.dump({ 'RawText': 'yes', 'StrippedText': 'yes' }, { 'RawText': textHtml.text, 'StrippedText': textHtml.get_stripped_text() })
except Exception as exc: except Exception as exc:
ph.send_error(502, f"WARN:UrlTextFailed:{exc}") ph.send_error(502, f"WARN:UrlTextFailed:{exc}")

View File

@ -575,6 +575,11 @@ Trap http response errors and inform user the specific error returned by ai serv
Initial go at a pdf2text tool call. For now it allows local pdf files to be read and their text content Initial go at a pdf2text tool call. For now it allows local pdf files to be read and their text content
extracted and passed to ai model for further processing, as decided by ai and end user. extracted and passed to ai model for further processing, as decided by ai and end user.
SimpleProxy
* Convert from a single monolithic file into a collection of modules.
* UrlValidator to cross check scheme and domain of requested urls,
the whitelist inturn picked from config json
#### ToDo #### ToDo
Is the tool call promise land trap deep enough, need to think through and explore around this once later. Is the tool call promise land trap deep enough, need to think through and explore around this once later.