SimpleChatTC:SimpleProxy:Pdf2Text:Initial plumbing
Get the pdf2text request for processing.
This commit is contained in:
parent
da98a961ab
commit
ecfdb66c94
|
|
@ -46,7 +46,7 @@ gConfigType = {
|
||||||
|
|
||||||
gConfigNeeded = [ '--allowed.domains', '--bearer.insecure' ]
|
gConfigNeeded = [ '--allowed.domains', '--bearer.insecure' ]
|
||||||
|
|
||||||
gAllowedCalls = [ "urltext", "urlraw" ]
|
gAllowedCalls = [ "urltext", "urlraw", "pdf2text" ]
|
||||||
|
|
||||||
|
|
||||||
def bearer_transform():
|
def bearer_transform():
|
||||||
|
|
@ -128,6 +128,12 @@ class ProxyHandler(http.server.BaseHTTPRequestHandler):
|
||||||
self.send_error(400, f"WARN:{acGot['Msg']}")
|
self.send_error(400, f"WARN:{acGot['Msg']}")
|
||||||
else:
|
else:
|
||||||
handle_urltext(self, pr)
|
handle_urltext(self, pr)
|
||||||
|
case '/pdf2text':
|
||||||
|
acGot = self.auth_check()
|
||||||
|
if not acGot['AllOk']:
|
||||||
|
self.send_error(400, f"WARN:{acGot['Msg']}")
|
||||||
|
else:
|
||||||
|
handle_pdf2text(self, pr)
|
||||||
case '/aum':
|
case '/aum':
|
||||||
handle_aum(self, pr)
|
handle_aum(self, pr)
|
||||||
case _:
|
case _:
|
||||||
|
|
@ -372,6 +378,35 @@ def handle_urltext(ph: ProxyHandler, pr: urllib.parse.ParseResult):
|
||||||
ph.send_error(502, f"WARN:UrlTextFailed:{exc}")
|
ph.send_error(502, f"WARN:UrlTextFailed:{exc}")
|
||||||
|
|
||||||
|
|
||||||
|
def do_pdf2text(fUrl: str):
|
||||||
|
import pypdf
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
gAllowedPdfUrlTypes = [ "file", "http", "https" ]
|
||||||
|
|
||||||
|
def handle_pdf2text(ph: ProxyHandler, pr: urllib.parse.ParseResult):
|
||||||
|
"""
|
||||||
|
Handle requests to pdf2text path, which is used to extract plain text
|
||||||
|
from the specified pdf file.
|
||||||
|
"""
|
||||||
|
queryParams = urllib.parse.parse_qs(pr.query)
|
||||||
|
url = queryParams['url']
|
||||||
|
print(f"DBUG:HandlePdf2Text:Url:{url}")
|
||||||
|
url = url[0]
|
||||||
|
if (not url) or (len(url) == 0):
|
||||||
|
ph.send_error(400, f"WARN:HandlePdf2Text:MissingUrl!")
|
||||||
|
return
|
||||||
|
urlParts = url.split('://',1)
|
||||||
|
if not (urlParts[0] in gAllowedPdfUrlTypes):
|
||||||
|
ph.send_error(403, f"WARN:HandlePdf2Text:ForbiddedUrlType:{urlParts[0]}:AllowedUrlTypes:{gAllowedPdfUrlTypes}")
|
||||||
|
return
|
||||||
|
print(f"INFO:HandlePdf2Text:Processing:{url}")
|
||||||
|
ph.send_response_only(200, "Pdf2Text Response follows")
|
||||||
|
ph.end_headers()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def load_config():
|
def load_config():
|
||||||
"""
|
"""
|
||||||
Allow loading of a json based config file
|
Allow loading of a json based config file
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue