SimpleChatTC:SimpleProxy: Avoid circular deps wrt Type Checking
also move debug dump helper to its own module also remember to specify the Class name in quotes, similar to refering to a class within a member of th class wrt python type checking.
This commit is contained in:
parent
350d7d77e0
commit
d012d127bf
|
|
@ -3,7 +3,10 @@
|
||||||
|
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import urlvalidator as uv
|
import urlvalidator as uv
|
||||||
import simpleproxy as root
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from simpleproxy import ProxyHandler
|
||||||
|
|
||||||
|
|
||||||
def process_pdf2text(url: str, startPN: int, endPN: int):
|
def process_pdf2text(url: str, startPN: int, endPN: int):
|
||||||
|
|
@ -27,7 +30,7 @@ def process_pdf2text(url: str, startPN: int, endPN: int):
|
||||||
return { 'status': 200, 'msg': "Pdf2Text Response follows", 'data': tPdf }
|
return { 'status': 200, 'msg': "Pdf2Text Response follows", 'data': tPdf }
|
||||||
|
|
||||||
|
|
||||||
def handle_pdf2text(ph: root.ProxyHandler, pr: urllib.parse.ParseResult):
|
def handle_pdf2text(ph: 'ProxyHandler', pr: urllib.parse.ParseResult):
|
||||||
"""
|
"""
|
||||||
Handle requests to pdf2text path, which is used to extract plain text
|
Handle requests to pdf2text path, which is used to extract plain text
|
||||||
from the specified pdf file.
|
from the specified pdf file.
|
||||||
|
|
|
||||||
|
|
@ -173,18 +173,6 @@ def handle_aum(ph: ProxyHandler, pr: urllib.parse.ParseResult):
|
||||||
ph.end_headers()
|
ph.end_headers()
|
||||||
|
|
||||||
|
|
||||||
def debug_dump(meta: dict, data: dict):
|
|
||||||
if not gMe['--debug']:
|
|
||||||
return
|
|
||||||
timeTag = f"{time.time():0.12f}"
|
|
||||||
with open(f"/tmp/simpleproxy.{timeTag}.meta", '+w') as f:
|
|
||||||
for k in meta:
|
|
||||||
f.write(f"\n\n\n\n{k}:{meta[k]}\n\n\n\n")
|
|
||||||
with open(f"/tmp/simpleproxy.{timeTag}.data", '+w') as f:
|
|
||||||
for k in data:
|
|
||||||
f.write(f"\n\n\n\n{k}:{data[k]}\n\n\n\n")
|
|
||||||
|
|
||||||
|
|
||||||
def load_config():
|
def load_config():
|
||||||
"""
|
"""
|
||||||
Allow loading of a json based config file
|
Allow loading of a json based config file
|
||||||
|
|
|
||||||
|
|
@ -3,10 +3,15 @@
|
||||||
|
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import simpleproxy as root
|
|
||||||
import urlvalidator as uv
|
import urlvalidator as uv
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
import html.parser
|
import html.parser
|
||||||
|
import debug
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from simpleproxy import ProxyHandler
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
|
|
@ -21,7 +26,7 @@ class UrlReqResp:
|
||||||
contentData: str = ""
|
contentData: str = ""
|
||||||
|
|
||||||
|
|
||||||
def handle_urlreq(ph: root.ProxyHandler, pr: urllib.parse.ParseResult, tag: str):
|
def handle_urlreq(ph: 'ProxyHandler', pr: urllib.parse.ParseResult, tag: str):
|
||||||
"""
|
"""
|
||||||
Common part of the url request handling used by both urlraw and urltext.
|
Common part of the url request handling used by both urlraw and urltext.
|
||||||
|
|
||||||
|
|
@ -58,13 +63,13 @@ def handle_urlreq(ph: root.ProxyHandler, pr: urllib.parse.ParseResult, tag: str)
|
||||||
contentData = response.read().decode('utf-8')
|
contentData = response.read().decode('utf-8')
|
||||||
statusCode = response.status or 200
|
statusCode = response.status or 200
|
||||||
contentType = response.getheader('Content-Type') or 'text/html'
|
contentType = response.getheader('Content-Type') or 'text/html'
|
||||||
root.debug_dump({ 'url': req.full_url, 'headers': req.headers, 'ctype': contentType }, { 'cdata': contentData })
|
debug.dump({ 'url': req.full_url, 'headers': req.headers, 'ctype': contentType }, { 'cdata': contentData })
|
||||||
return UrlReqResp(True, statusCode, "", contentType, contentData)
|
return UrlReqResp(True, statusCode, "", contentType, contentData)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
return UrlReqResp(False, 502, f"WARN:{tag}:Failed:{exc}")
|
return UrlReqResp(False, 502, f"WARN:{tag}:Failed:{exc}")
|
||||||
|
|
||||||
|
|
||||||
def handle_urlraw(ph: root.ProxyHandler, pr: urllib.parse.ParseResult):
|
def handle_urlraw(ph: 'ProxyHandler', pr: urllib.parse.ParseResult):
|
||||||
try:
|
try:
|
||||||
# Get requested url
|
# Get requested url
|
||||||
got = handle_urlreq(ph, pr, "HandleUrlRaw")
|
got = handle_urlreq(ph, pr, "HandleUrlRaw")
|
||||||
|
|
@ -159,7 +164,7 @@ class TextHtmlParser(html.parser.HTMLParser):
|
||||||
return self.textStripped
|
return self.textStripped
|
||||||
|
|
||||||
|
|
||||||
def handle_urltext(ph: root.ProxyHandler, pr: urllib.parse.ParseResult):
|
def handle_urltext(ph: 'ProxyHandler', pr: urllib.parse.ParseResult):
|
||||||
try:
|
try:
|
||||||
# Get requested url
|
# Get requested url
|
||||||
got = handle_urlreq(ph, pr, "HandleUrlText")
|
got = handle_urlreq(ph, pr, "HandleUrlText")
|
||||||
|
|
@ -176,6 +181,6 @@ def handle_urltext(ph: root.ProxyHandler, pr: urllib.parse.ParseResult):
|
||||||
ph.send_header('Access-Control-Allow-Origin', '*')
|
ph.send_header('Access-Control-Allow-Origin', '*')
|
||||||
ph.end_headers()
|
ph.end_headers()
|
||||||
ph.wfile.write(textHtml.get_stripped_text().encode('utf-8'))
|
ph.wfile.write(textHtml.get_stripped_text().encode('utf-8'))
|
||||||
root.debug_dump({ 'RawText': 'yes', 'StrippedText': 'yes' }, { 'RawText': textHtml.text, 'StrippedText': textHtml.get_stripped_text() })
|
debug.dump({ 'RawText': 'yes', 'StrippedText': 'yes' }, { 'RawText': textHtml.text, 'StrippedText': textHtml.get_stripped_text() })
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
ph.send_error(502, f"WARN:UrlTextFailed:{exc}")
|
ph.send_error(502, f"WARN:UrlTextFailed:{exc}")
|
||||||
|
|
|
||||||
|
|
@ -575,6 +575,11 @@ Trap http response errors and inform user the specific error returned by ai serv
|
||||||
Initial go at a pdf2text tool call. For now it allows local pdf files to be read and their text content
|
Initial go at a pdf2text tool call. For now it allows local pdf files to be read and their text content
|
||||||
extracted and passed to ai model for further processing, as decided by ai and end user.
|
extracted and passed to ai model for further processing, as decided by ai and end user.
|
||||||
|
|
||||||
|
SimpleProxy
|
||||||
|
* Convert from a single monolithic file into a collection of modules.
|
||||||
|
* UrlValidator to cross check scheme and domain of requested urls,
|
||||||
|
the whitelist inturn picked from config json
|
||||||
|
|
||||||
#### ToDo
|
#### ToDo
|
||||||
|
|
||||||
Is the tool call promise land trap deep enough, need to think through and explore around this once later.
|
Is the tool call promise land trap deep enough, need to think through and explore around this once later.
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue