🛠️ Add `add-permalinks` and `add-permalinks-page` to `scripts/docs.py` (#14033)

Co-authored-by: Sebastián Ramírez <tiangolo@gmail.com>
This commit is contained in:
Motov Yurii 2025-11-21 13:49:11 +01:00 committed by GitHub
parent 456008a52b
commit 32b375c5e4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 107 additions and 3 deletions

View File

@ -18,3 +18,4 @@ griffe-warnings-deprecated==1.1.0
black==25.1.0 black==25.1.0
mkdocs-macros-plugin==1.4.1 mkdocs-macros-plugin==1.4.1
markdown-include-variants==0.0.5 markdown-include-variants==0.0.5
python-slugify==8.0.4

View File

@ -4,6 +4,7 @@ import os
import re import re
import shutil import shutil
import subprocess import subprocess
from html.parser import HTMLParser
from http.server import HTTPServer, SimpleHTTPRequestHandler from http.server import HTTPServer, SimpleHTTPRequestHandler
from multiprocessing import Pool from multiprocessing import Pool
from pathlib import Path from pathlib import Path
@ -14,6 +15,7 @@ import typer
import yaml import yaml
from jinja2 import Template from jinja2 import Template
from ruff.__main__ import find_ruff_bin from ruff.__main__ import find_ruff_bin
from slugify import slugify as py_slugify
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
@ -25,8 +27,8 @@ missing_translation_snippet = """
{!../../docs/missing-translation.md!} {!../../docs/missing-translation.md!}
""" """
non_translated_sections = [ non_translated_sections = (
"reference/", f"reference{os.sep}",
"release-notes.md", "release-notes.md",
"fastapi-people.md", "fastapi-people.md",
"external-links.md", "external-links.md",
@ -34,7 +36,7 @@ non_translated_sections = [
"management-tasks.md", "management-tasks.md",
"management.md", "management.md",
"contributing.md", "contributing.md",
] )
docs_path = Path("docs") docs_path = Path("docs")
en_docs_path = Path("docs/en") en_docs_path = Path("docs/en")
@ -42,7 +44,39 @@ en_config_path: Path = en_docs_path / mkdocs_name
site_path = Path("site").absolute() site_path = Path("site").absolute()
build_site_path = Path("site_build").absolute() build_site_path = Path("site_build").absolute()
header_pattern = re.compile(r"^(#{1,6}) (.+?)(?:\s*\{\s*(#.*)\s*\})?\s*$")
header_with_permalink_pattern = re.compile(r"^(#{1,6}) (.+?)(\s*\{\s*#.*\s*\})\s*$") header_with_permalink_pattern = re.compile(r"^(#{1,6}) (.+?)(\s*\{\s*#.*\s*\})\s*$")
code_block3_pattern = re.compile(r"^\s*```")
code_block4_pattern = re.compile(r"^\s*````")
class VisibleTextExtractor(HTMLParser):
"""Extract visible text from a string with HTML tags."""
def __init__(self):
super().__init__()
self.text_parts = []
def handle_data(self, data):
self.text_parts.append(data)
def extract_visible_text(self, html: str) -> str:
self.reset()
self.text_parts = []
self.feed(html)
return "".join(self.text_parts).strip()
def slugify(text: str) -> str:
return py_slugify(
text,
replacements=[
("`", ""), # `dict`s -> dicts
("'s", "s"), # it's -> its
("'t", "t"), # don't -> dont
("**", ""), # **FastAPI**s -> FastAPIs
],
)
def get_en_config() -> Dict[str, Any]: def get_en_config() -> Dict[str, Any]:
@ -426,5 +460,74 @@ def generate_docs_src_versions_for_file(file_path: Path) -> None:
version_file.write_text(content_format, encoding="utf-8") version_file.write_text(content_format, encoding="utf-8")
@app.command()
def add_permalinks_page(path: Path, update_existing: bool = False):
"""
Add or update header permalinks in specific page of En docs.
"""
if not path.is_relative_to(en_docs_path / "docs"):
raise RuntimeError(f"Path must be inside {en_docs_path}")
rel_path = path.relative_to(en_docs_path / "docs")
# Skip excluded sections
if str(rel_path).startswith(non_translated_sections):
return
visible_text_extractor = VisibleTextExtractor()
updated_lines = []
in_code_block3 = False
in_code_block4 = False
permalinks = set()
with path.open("r", encoding="utf-8") as f:
lines = f.readlines()
for line in lines:
# Handle codeblocks start and end
if not (in_code_block3 or in_code_block4):
if code_block4_pattern.match(line):
in_code_block4 = True
elif code_block3_pattern.match(line):
in_code_block3 = True
else:
if in_code_block4 and code_block4_pattern.match(line):
in_code_block4 = False
elif in_code_block3 and code_block3_pattern.match(line):
in_code_block3 = False
# Process Headers only outside codeblocks
if not (in_code_block3 or in_code_block4):
match = header_pattern.match(line)
if match:
hashes, title, _permalink = match.groups()
if (not _permalink) or update_existing:
slug = slugify(visible_text_extractor.extract_visible_text(title))
if slug in permalinks:
# If the slug is already used, append a number to make it unique
count = 1
original_slug = slug
while slug in permalinks:
slug = f"{original_slug}_{count}"
count += 1
permalinks.add(slug)
line = f"{hashes} {title} {{ #{slug} }}\n"
updated_lines.append(line)
with path.open("w", encoding="utf-8") as f:
f.writelines(updated_lines)
@app.command()
def add_permalinks(update_existing: bool = False) -> None:
"""
Add or update header permalinks in all pages of En docs.
"""
for md_file in en_docs_path.rglob("*.md"):
add_permalinks_page(md_file, update_existing=update_existing)
if __name__ == "__main__": if __name__ == "__main__":
app() app()