SimpleChatTC:PdfText:Numbering T2 - Need diff scheme

This increaments before itself, but we need to increment after
2025-11-08 04:04:39 +05:30 · 2025-11-08 04:04:39 +05:30 · 15e99843db
parent bd60437cc6
commit 15e99843db
1 changed files with 11 additions and 5 deletions
--- a/tools/server/public_simplechat/local.tools/pdfmagic.py
+++ b/tools/server/public_simplechat/local.tools/pdfmagic.py
@ -12,14 +12,19 @@ if TYPE_CHECKING:
 def extract_pdfoutline(ol: Any, prefix: str):
    """
-    Extract the pdf outline
+    Extract the pdf outline recursively.
    1st tuple entry returned indicates whether to increase outline entry numbering
    2nd tuple entry returns the outline string that provides the extracted outline.
    """
    if type(ol).__name__ != type([]).__name__:
-        return f"{prefix}:{ol['/Title']}\n"
+        return (1, f"{prefix}:{ol['/Title']}\n")
    olText = ""
    olNum = 1
    for (i,iol) in enumerate(ol):
-        olText += extract_pdfoutline(iol, f"{prefix}.{i+1}")
+        got = extract_pdfoutline(iol, f"{prefix}.{olNum}")
-    return olText
+        olNum += got[0]
        olText += got[1]
    return (0, olText)
 def process_pdftext(url: str, startPN: int, endPN: int):
@ -48,7 +53,8 @@ def process_pdftext(url: str, startPN: int, endPN: int):
        startPN = 1
    if (endPN <= 0) or (endPN > len(oPdf.pages)):
        endPN = len(oPdf.pages)
-    tPdf += extract_pdfoutline(oPdf.outline, "")
+    outlineGot = extract_pdfoutline(oPdf.outline, "")
    tPdf += outlineGot[1]
    for i in range(startPN, endPN+1):
        pd = oPdf.pages[i-1]
        tPdf = tPdf + pd.extract_text()