SimpleChatTC:Pdf2Text: cleanup initial go
Make the description bit more explicit with it supporting local file paths as part of the url scheme, as the tested ai model was cribbing about not supporting file url scheme. Need to check if this new description will make things better. Convert the text to bytes for writing to the http pipe. Ensure CORS is kept happy by passing AccessControlAllowOrigin in header.
This commit is contained in:
parent
f97efb86e4
commit
dfeb94d3f6
|
|
@ -407,14 +407,18 @@ def handle_pdf2text(ph: ProxyHandler, pr: urllib.parse.ParseResult):
|
|||
if (not url) or (len(url) == 0):
|
||||
ph.send_error(400, f"WARN:HandlePdf2Text:MissingUrl!")
|
||||
return
|
||||
print(f"INFO:HandlePdf2Text:Processing:{url}")
|
||||
print(f"INFO:HandlePdf2Text:Processing:{url}...")
|
||||
gotP2T = process_pdf2text(url)
|
||||
if (gotP2T['status'] != 200):
|
||||
ph.send_error(gotP2T['status'], gotP2T['msg'] )
|
||||
return
|
||||
ph.send_response_only(gotP2T['status'], gotP2T['msg'])
|
||||
ph.send_response(gotP2T['status'], gotP2T['msg'])
|
||||
ph.send_header('Content-Type', 'text/text')
|
||||
# Add CORS for browser fetch, just in case
|
||||
ph.send_header('Access-Control-Allow-Origin', '*')
|
||||
ph.end_headers()
|
||||
ph.wfile.write(gotP2T['data'])
|
||||
print(f"INFO:HandlePdf2Text:ExtractedText:{url}...")
|
||||
ph.wfile.write(gotP2T['data'].encode('utf-8'))
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -278,13 +278,13 @@ let pdf2text_meta = {
|
|||
"type": "function",
|
||||
"function": {
|
||||
"name": "pdf2text",
|
||||
"description": "Fetch pdf from requested web / file url through a proxy server and return its text content after converting pdf to text, in few seconds",
|
||||
"description": "Fetch pdf from requested web / file path url through a proxy server and return its text content after converting pdf to text, in few seconds",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"url":{
|
||||
"type":"string",
|
||||
"description":"url of the pdf that will be got and inturn converted to text to some extent"
|
||||
"description":"local file / web (http/https) based url of the pdf that will be got and inturn converted to text to an extent"
|
||||
}
|
||||
},
|
||||
"required": ["url"]
|
||||
|
|
|
|||
Loading…
Reference in New Issue