SimpleChatTC:Pdf2Text: cleanup initial go

Make the description bit more explicit with it supporting local
file paths as part of the url scheme, as the tested ai model was
cribbing about not supporting file url scheme. Need to check if
this new description will make things better.

Convert the text to bytes for writing to the http pipe.

Ensure CORS is kept happy by passing AccessControlAllowOrigin in
header.
This commit is contained in:
hanishkvc 2025-11-01 23:12:27 +05:30
parent f97efb86e4
commit dfeb94d3f6
2 changed files with 9 additions and 5 deletions

View File

@ -407,14 +407,18 @@ def handle_pdf2text(ph: ProxyHandler, pr: urllib.parse.ParseResult):
if (not url) or (len(url) == 0):
ph.send_error(400, f"WARN:HandlePdf2Text:MissingUrl!")
return
print(f"INFO:HandlePdf2Text:Processing:{url}")
print(f"INFO:HandlePdf2Text:Processing:{url}...")
gotP2T = process_pdf2text(url)
if (gotP2T['status'] != 200):
ph.send_error(gotP2T['status'], gotP2T['msg'] )
return
ph.send_response_only(gotP2T['status'], gotP2T['msg'])
ph.send_response(gotP2T['status'], gotP2T['msg'])
ph.send_header('Content-Type', 'text/text')
# Add CORS for browser fetch, just in case
ph.send_header('Access-Control-Allow-Origin', '*')
ph.end_headers()
ph.wfile.write(gotP2T['data'])
print(f"INFO:HandlePdf2Text:ExtractedText:{url}...")
ph.wfile.write(gotP2T['data'].encode('utf-8'))

View File

@ -278,13 +278,13 @@ let pdf2text_meta = {
"type": "function",
"function": {
"name": "pdf2text",
"description": "Fetch pdf from requested web / file url through a proxy server and return its text content after converting pdf to text, in few seconds",
"description": "Fetch pdf from requested web / file path url through a proxy server and return its text content after converting pdf to text, in few seconds",
"parameters": {
"type": "object",
"properties": {
"url":{
"type":"string",
"description":"url of the pdf that will be got and inturn converted to text to some extent"
"description":"local file / web (http/https) based url of the pdf that will be got and inturn converted to text to an extent"
}
},
"required": ["url"]