From dfeb94d3f6130993eff60b6fb808b43a87fc2609 Mon Sep 17 00:00:00 2001 From: hanishkvc Date: Sat, 1 Nov 2025 23:12:27 +0530 Subject: [PATCH] SimpleChatTC:Pdf2Text: cleanup initial go Make the description bit more explicit with it supporting local file paths as part of the url scheme, as the tested ai model was cribbing about not supporting file url scheme. Need to check if this new description will make things better. Convert the text to bytes for writing to the http pipe. Ensure CORS is kept happy by passing AccessControlAllowOrigin in header. --- .../public_simplechat/local.tools/simpleproxy.py | 10 +++++++--- tools/server/public_simplechat/toolweb.mjs | 4 ++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/tools/server/public_simplechat/local.tools/simpleproxy.py b/tools/server/public_simplechat/local.tools/simpleproxy.py index cb19bc8654..99a7004cd2 100644 --- a/tools/server/public_simplechat/local.tools/simpleproxy.py +++ b/tools/server/public_simplechat/local.tools/simpleproxy.py @@ -407,14 +407,18 @@ def handle_pdf2text(ph: ProxyHandler, pr: urllib.parse.ParseResult): if (not url) or (len(url) == 0): ph.send_error(400, f"WARN:HandlePdf2Text:MissingUrl!") return - print(f"INFO:HandlePdf2Text:Processing:{url}") + print(f"INFO:HandlePdf2Text:Processing:{url}...") gotP2T = process_pdf2text(url) if (gotP2T['status'] != 200): ph.send_error(gotP2T['status'], gotP2T['msg'] ) return - ph.send_response_only(gotP2T['status'], gotP2T['msg']) + ph.send_response(gotP2T['status'], gotP2T['msg']) + ph.send_header('Content-Type', 'text/text') + # Add CORS for browser fetch, just in case + ph.send_header('Access-Control-Allow-Origin', '*') ph.end_headers() - ph.wfile.write(gotP2T['data']) + print(f"INFO:HandlePdf2Text:ExtractedText:{url}...") + ph.wfile.write(gotP2T['data'].encode('utf-8')) diff --git a/tools/server/public_simplechat/toolweb.mjs b/tools/server/public_simplechat/toolweb.mjs index 566b65c1a0..f2cac18967 100644 --- a/tools/server/public_simplechat/toolweb.mjs +++ b/tools/server/public_simplechat/toolweb.mjs @@ -278,13 +278,13 @@ let pdf2text_meta = { "type": "function", "function": { "name": "pdf2text", - "description": "Fetch pdf from requested web / file url through a proxy server and return its text content after converting pdf to text, in few seconds", + "description": "Fetch pdf from requested web / file path url through a proxy server and return its text content after converting pdf to text, in few seconds", "parameters": { "type": "object", "properties": { "url":{ "type":"string", - "description":"url of the pdf that will be got and inturn converted to text to some extent" + "description":"local file / web (http/https) based url of the pdf that will be got and inturn converted to text to an extent" } }, "required": ["url"]