🛠️ Add `add-permalinks` and `add-permalinks-page` to `scripts/docs.py` (#14033)

Co-authored-by: Sebastián Ramírez <tiangolo@gmail.com>
This commit is contained in:
Motov Yurii 2025-11-21 13:49:11 +01:00 committed by GitHub
parent 456008a52b
commit 32b375c5e4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 107 additions and 3 deletions

View File

@ -18,3 +18,4 @@ griffe-warnings-deprecated==1.1.0
black==25.1.0
mkdocs-macros-plugin==1.4.1
markdown-include-variants==0.0.5
python-slugify==8.0.4

View File

@ -4,6 +4,7 @@ import os
import re
import shutil
import subprocess
from html.parser import HTMLParser
from http.server import HTTPServer, SimpleHTTPRequestHandler
from multiprocessing import Pool
from pathlib import Path
@ -14,6 +15,7 @@ import typer
import yaml
from jinja2 import Template
from ruff.__main__ import find_ruff_bin
from slugify import slugify as py_slugify
logging.basicConfig(level=logging.INFO)
@ -25,8 +27,8 @@ missing_translation_snippet = """
{!../../docs/missing-translation.md!}
"""
non_translated_sections = [
"reference/",
non_translated_sections = (
f"reference{os.sep}",
"release-notes.md",
"fastapi-people.md",
"external-links.md",
@ -34,7 +36,7 @@ non_translated_sections = [
"management-tasks.md",
"management.md",
"contributing.md",
]
)
docs_path = Path("docs")
en_docs_path = Path("docs/en")
@ -42,7 +44,39 @@ en_config_path: Path = en_docs_path / mkdocs_name
site_path = Path("site").absolute()
build_site_path = Path("site_build").absolute()
header_pattern = re.compile(r"^(#{1,6}) (.+?)(?:\s*\{\s*(#.*)\s*\})?\s*$")
header_with_permalink_pattern = re.compile(r"^(#{1,6}) (.+?)(\s*\{\s*#.*\s*\})\s*$")
code_block3_pattern = re.compile(r"^\s*```")
code_block4_pattern = re.compile(r"^\s*````")
class VisibleTextExtractor(HTMLParser):
"""Extract visible text from a string with HTML tags."""
def __init__(self):
super().__init__()
self.text_parts = []
def handle_data(self, data):
self.text_parts.append(data)
def extract_visible_text(self, html: str) -> str:
self.reset()
self.text_parts = []
self.feed(html)
return "".join(self.text_parts).strip()
def slugify(text: str) -> str:
return py_slugify(
text,
replacements=[
("`", ""), # `dict`s -> dicts
("'s", "s"), # it's -> its
("'t", "t"), # don't -> dont
("**", ""), # **FastAPI**s -> FastAPIs
],
)
def get_en_config() -> Dict[str, Any]:
@ -426,5 +460,74 @@ def generate_docs_src_versions_for_file(file_path: Path) -> None:
version_file.write_text(content_format, encoding="utf-8")
@app.command()
def add_permalinks_page(path: Path, update_existing: bool = False):
"""
Add or update header permalinks in specific page of En docs.
"""
if not path.is_relative_to(en_docs_path / "docs"):
raise RuntimeError(f"Path must be inside {en_docs_path}")
rel_path = path.relative_to(en_docs_path / "docs")
# Skip excluded sections
if str(rel_path).startswith(non_translated_sections):
return
visible_text_extractor = VisibleTextExtractor()
updated_lines = []
in_code_block3 = False
in_code_block4 = False
permalinks = set()
with path.open("r", encoding="utf-8") as f:
lines = f.readlines()
for line in lines:
# Handle codeblocks start and end
if not (in_code_block3 or in_code_block4):
if code_block4_pattern.match(line):
in_code_block4 = True
elif code_block3_pattern.match(line):
in_code_block3 = True
else:
if in_code_block4 and code_block4_pattern.match(line):
in_code_block4 = False
elif in_code_block3 and code_block3_pattern.match(line):
in_code_block3 = False
# Process Headers only outside codeblocks
if not (in_code_block3 or in_code_block4):
match = header_pattern.match(line)
if match:
hashes, title, _permalink = match.groups()
if (not _permalink) or update_existing:
slug = slugify(visible_text_extractor.extract_visible_text(title))
if slug in permalinks:
# If the slug is already used, append a number to make it unique
count = 1
original_slug = slug
while slug in permalinks:
slug = f"{original_slug}_{count}"
count += 1
permalinks.add(slug)
line = f"{hashes} {title} {{ #{slug} }}\n"
updated_lines.append(line)
with path.open("w", encoding="utf-8") as f:
f.writelines(updated_lines)
@app.command()
def add_permalinks(update_existing: bool = False) -> None:
"""
Add or update header permalinks in all pages of En docs.
"""
for md_file in en_docs_path.rglob("*.md"):
add_permalinks_page(md_file, update_existing=update_existing)
if __name__ == "__main__":
app()