diff --git a/requirements-docs.txt b/requirements-docs.txt index d60125bbe..05b47fe92 100644 --- a/requirements-docs.txt +++ b/requirements-docs.txt @@ -18,3 +18,4 @@ griffe-warnings-deprecated==1.1.0 black==25.1.0 mkdocs-macros-plugin==1.4.1 markdown-include-variants==0.0.5 +python-slugify==8.0.4 diff --git a/scripts/docs.py b/scripts/docs.py index d08a218f8..d67ab50f7 100644 --- a/scripts/docs.py +++ b/scripts/docs.py @@ -4,6 +4,7 @@ import os import re import shutil import subprocess +from html.parser import HTMLParser from http.server import HTTPServer, SimpleHTTPRequestHandler from multiprocessing import Pool from pathlib import Path @@ -14,6 +15,7 @@ import typer import yaml from jinja2 import Template from ruff.__main__ import find_ruff_bin +from slugify import slugify as py_slugify logging.basicConfig(level=logging.INFO) @@ -25,8 +27,8 @@ missing_translation_snippet = """ {!../../docs/missing-translation.md!} """ -non_translated_sections = [ - "reference/", +non_translated_sections = ( + f"reference{os.sep}", "release-notes.md", "fastapi-people.md", "external-links.md", @@ -34,7 +36,7 @@ non_translated_sections = [ "management-tasks.md", "management.md", "contributing.md", -] +) docs_path = Path("docs") en_docs_path = Path("docs/en") @@ -42,7 +44,39 @@ en_config_path: Path = en_docs_path / mkdocs_name site_path = Path("site").absolute() build_site_path = Path("site_build").absolute() +header_pattern = re.compile(r"^(#{1,6}) (.+?)(?:\s*\{\s*(#.*)\s*\})?\s*$") header_with_permalink_pattern = re.compile(r"^(#{1,6}) (.+?)(\s*\{\s*#.*\s*\})\s*$") +code_block3_pattern = re.compile(r"^\s*```") +code_block4_pattern = re.compile(r"^\s*````") + + +class VisibleTextExtractor(HTMLParser): + """Extract visible text from a string with HTML tags.""" + + def __init__(self): + super().__init__() + self.text_parts = [] + + def handle_data(self, data): + self.text_parts.append(data) + + def extract_visible_text(self, html: str) -> str: + self.reset() + self.text_parts = [] + self.feed(html) + return "".join(self.text_parts).strip() + + +def slugify(text: str) -> str: + return py_slugify( + text, + replacements=[ + ("`", ""), # `dict`s -> dicts + ("'s", "s"), # it's -> its + ("'t", "t"), # don't -> dont + ("**", ""), # **FastAPI**s -> FastAPIs + ], + ) def get_en_config() -> Dict[str, Any]: @@ -426,5 +460,74 @@ def generate_docs_src_versions_for_file(file_path: Path) -> None: version_file.write_text(content_format, encoding="utf-8") +@app.command() +def add_permalinks_page(path: Path, update_existing: bool = False): + """ + Add or update header permalinks in specific page of En docs. + """ + + if not path.is_relative_to(en_docs_path / "docs"): + raise RuntimeError(f"Path must be inside {en_docs_path}") + rel_path = path.relative_to(en_docs_path / "docs") + + # Skip excluded sections + if str(rel_path).startswith(non_translated_sections): + return + + visible_text_extractor = VisibleTextExtractor() + updated_lines = [] + in_code_block3 = False + in_code_block4 = False + permalinks = set() + + with path.open("r", encoding="utf-8") as f: + lines = f.readlines() + + for line in lines: + # Handle codeblocks start and end + if not (in_code_block3 or in_code_block4): + if code_block4_pattern.match(line): + in_code_block4 = True + elif code_block3_pattern.match(line): + in_code_block3 = True + else: + if in_code_block4 and code_block4_pattern.match(line): + in_code_block4 = False + elif in_code_block3 and code_block3_pattern.match(line): + in_code_block3 = False + + # Process Headers only outside codeblocks + if not (in_code_block3 or in_code_block4): + match = header_pattern.match(line) + if match: + hashes, title, _permalink = match.groups() + if (not _permalink) or update_existing: + slug = slugify(visible_text_extractor.extract_visible_text(title)) + if slug in permalinks: + # If the slug is already used, append a number to make it unique + count = 1 + original_slug = slug + while slug in permalinks: + slug = f"{original_slug}_{count}" + count += 1 + permalinks.add(slug) + + line = f"{hashes} {title} {{ #{slug} }}\n" + + updated_lines.append(line) + + with path.open("w", encoding="utf-8") as f: + f.writelines(updated_lines) + + +@app.command() +def add_permalinks(update_existing: bool = False) -> None: + """ + Add or update header permalinks in all pages of En docs. + """ + for md_file in en_docs_path.rglob("*.md"): + add_permalinks_page(md_file, update_existing=update_existing) + + if __name__ == "__main__": app()