mirror of https://github.com/tiangolo/fastapi.git
Handle code blocks, fix some bugs, add `fix-all` command
This commit is contained in:
parent
0339277673
commit
beff498743
|
|
@ -20,10 +20,15 @@ MARKDOWN_LINK_RE = re.compile(
|
||||||
)
|
)
|
||||||
|
|
||||||
HTML_LINK_RE = re.compile(r"<a\s+[^>]*>.*?</a>")
|
HTML_LINK_RE = re.compile(r"<a\s+[^>]*>.*?</a>")
|
||||||
HTML_LINK_TEXT = re.compile(r"<a\b([^>]*)>(.*?)</a>")
|
HTML_LINK_TEXT_RE = re.compile(r"<a\b([^>]*)>(.*?)</a>")
|
||||||
HTML_LINK_OPEN_TAG_RE = re.compile(r"<a\b([^>]*)>")
|
HTML_LINK_OPEN_TAG_RE = re.compile(r"<a\b([^>]*)>")
|
||||||
HTML_ATTR_RE = re.compile(r'(\w+)\s*=\s*([\'"])(.*?)\2')
|
HTML_ATTR_RE = re.compile(r'(\w+)\s*=\s*([\'"])(.*?)\2')
|
||||||
|
|
||||||
|
CODE_BLOCK_LANG_RE = re.compile(r"^```([\w-]*)", re.MULTILINE)
|
||||||
|
|
||||||
|
SLASHES_COMMENT_RE = re.compile(r"^(?P<code>.*?)(?P<comment>\s*// .*)?$")
|
||||||
|
HASH_COMMENT_RE = re.compile(r"^(?P<code>.*?)(?P<comment>\s*# .*)?$")
|
||||||
|
|
||||||
|
|
||||||
class CodeIncludeInfo(TypedDict):
|
class CodeIncludeInfo(TypedDict):
|
||||||
line_no: int
|
line_no: int
|
||||||
|
|
@ -57,6 +62,12 @@ class HtmlLinkInfo(TypedDict):
|
||||||
text: str
|
text: str
|
||||||
|
|
||||||
|
|
||||||
|
class MultilineCodeBlockInfo(TypedDict):
|
||||||
|
lang: str
|
||||||
|
start_line_no: int
|
||||||
|
content: list[str]
|
||||||
|
|
||||||
|
|
||||||
# Code includes
|
# Code includes
|
||||||
# -----------------------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
@ -82,10 +93,11 @@ def replace_code_includes_with_placeholders(text: list[str]) -> list[str]:
|
||||||
Replace code includes with placeholders.
|
Replace code includes with placeholders.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
modified_text = text.copy()
|
||||||
includes = extract_code_includes(text)
|
includes = extract_code_includes(text)
|
||||||
for include in includes:
|
for include in includes:
|
||||||
text[include["line_no"] - 1] = CODE_INCLUDE_PLACEHOLDER
|
modified_text[include["line_no"] - 1] = CODE_INCLUDE_PLACEHOLDER
|
||||||
return text
|
return modified_text
|
||||||
|
|
||||||
|
|
||||||
def replace_placeholders_with_code_includes(
|
def replace_placeholders_with_code_includes(
|
||||||
|
|
@ -274,7 +286,7 @@ def _construct_markdown_link(
|
||||||
link = f"[{text}]({url})"
|
link = f"[{text}]({url})"
|
||||||
|
|
||||||
if attributes:
|
if attributes:
|
||||||
link += f" {{{attributes}}}"
|
link += f"{{{attributes}}}"
|
||||||
|
|
||||||
return link
|
return link
|
||||||
|
|
||||||
|
|
@ -345,7 +357,7 @@ def extract_html_links(lines: list[str]) -> list[HtmlLinkInfo]:
|
||||||
for html_link in HTML_LINK_RE.finditer(line):
|
for html_link in HTML_LINK_RE.finditer(line):
|
||||||
link_str = html_link.group(0)
|
link_str = html_link.group(0)
|
||||||
|
|
||||||
link_text_match = HTML_LINK_TEXT.match(link_str)
|
link_text_match = HTML_LINK_TEXT_RE.match(link_str)
|
||||||
assert link_text_match is not None
|
assert link_text_match is not None
|
||||||
link_text = link_text_match.group(2)
|
link_text = link_text_match.group(2)
|
||||||
assert isinstance(link_text, str)
|
assert isinstance(link_text, str)
|
||||||
|
|
@ -442,3 +454,188 @@ def replace_html_links(
|
||||||
)
|
)
|
||||||
|
|
||||||
return modified_text
|
return modified_text
|
||||||
|
|
||||||
|
|
||||||
|
# Multiline code blocks
|
||||||
|
# -----------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def get_code_block_lang(line: str) -> str:
|
||||||
|
match = CODE_BLOCK_LANG_RE.match(line)
|
||||||
|
if match:
|
||||||
|
return match.group(1)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def extract_multiline_code_blocks(text: list[str]) -> list[MultilineCodeBlockInfo]:
|
||||||
|
blocks: list[MultilineCodeBlockInfo] = []
|
||||||
|
|
||||||
|
in_code_block3 = False
|
||||||
|
in_code_block4 = False
|
||||||
|
current_block_lang = ""
|
||||||
|
current_block_start_line = -1
|
||||||
|
current_block_lines = []
|
||||||
|
|
||||||
|
for line_no, line in enumerate(text, start=1):
|
||||||
|
stripped = line.lstrip()
|
||||||
|
|
||||||
|
# --- Detect opening fence ---
|
||||||
|
if not (in_code_block3 or in_code_block4):
|
||||||
|
if stripped.startswith("```"):
|
||||||
|
current_block_start_line = line_no
|
||||||
|
count = len(stripped) - len(stripped.lstrip("`"))
|
||||||
|
if count == 3:
|
||||||
|
in_code_block3 = True
|
||||||
|
current_block_lang = get_code_block_lang(stripped)
|
||||||
|
current_block_lines = [line]
|
||||||
|
continue
|
||||||
|
elif count >= 4:
|
||||||
|
in_code_block4 = True
|
||||||
|
current_block_lang = get_code_block_lang(stripped)
|
||||||
|
current_block_lines = [line]
|
||||||
|
continue
|
||||||
|
|
||||||
|
# --- Detect closing fence ---
|
||||||
|
elif in_code_block3:
|
||||||
|
if stripped.startswith("```"):
|
||||||
|
count = len(stripped) - len(stripped.lstrip("`"))
|
||||||
|
if count == 3:
|
||||||
|
current_block_lines.append(line)
|
||||||
|
blocks.append(
|
||||||
|
MultilineCodeBlockInfo(
|
||||||
|
lang=current_block_lang,
|
||||||
|
start_line_no=current_block_start_line,
|
||||||
|
content=current_block_lines,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
in_code_block3 = False
|
||||||
|
current_block_lang = ""
|
||||||
|
current_block_start_line = -1
|
||||||
|
current_block_lines = []
|
||||||
|
continue
|
||||||
|
current_block_lines.append(line)
|
||||||
|
|
||||||
|
elif in_code_block4:
|
||||||
|
if stripped.startswith("````"):
|
||||||
|
count = len(stripped) - len(stripped.lstrip("`"))
|
||||||
|
if count >= 4:
|
||||||
|
current_block_lines.append(line)
|
||||||
|
blocks.append(
|
||||||
|
MultilineCodeBlockInfo(
|
||||||
|
lang=current_block_lang,
|
||||||
|
start_line_no=current_block_start_line,
|
||||||
|
content=current_block_lines,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
in_code_block4 = False
|
||||||
|
current_block_lang = ""
|
||||||
|
current_block_start_line = -1
|
||||||
|
current_block_lines = []
|
||||||
|
continue
|
||||||
|
current_block_lines.append(line)
|
||||||
|
|
||||||
|
return blocks
|
||||||
|
|
||||||
|
|
||||||
|
def _split_hash_comment(line: str) -> tuple[str, str | None]:
|
||||||
|
match = HASH_COMMENT_RE.match(line)
|
||||||
|
if match:
|
||||||
|
code = match.group("code").rstrip()
|
||||||
|
comment = match.group("comment")
|
||||||
|
return code, comment
|
||||||
|
return line.rstrip(), None
|
||||||
|
|
||||||
|
|
||||||
|
def _split_slashes_comment(line: str) -> tuple[str, str | None]:
|
||||||
|
match = SLASHES_COMMENT_RE.match(line)
|
||||||
|
if match:
|
||||||
|
code = match.group("code").rstrip()
|
||||||
|
comment = match.group("comment")
|
||||||
|
return code, comment
|
||||||
|
return line, None
|
||||||
|
|
||||||
|
|
||||||
|
def replace_multiline_code_block(
|
||||||
|
block_a: MultilineCodeBlockInfo, block_b: MultilineCodeBlockInfo
|
||||||
|
) -> list[str]:
|
||||||
|
"""
|
||||||
|
Replace multiline code block a with block b leaving comments intact.
|
||||||
|
|
||||||
|
Syntax of comments depends on the language of the code block.
|
||||||
|
Raises ValueError if the blocks are not compatible (different languages or different number of lines).
|
||||||
|
"""
|
||||||
|
|
||||||
|
if block_a["lang"] != block_b["lang"]:
|
||||||
|
raise ValueError("Code blocks have different languages")
|
||||||
|
if len(block_a["content"]) != len(block_b["content"]):
|
||||||
|
raise ValueError("Code blocks have different number of lines")
|
||||||
|
|
||||||
|
block_language = block_a["lang"].lower()
|
||||||
|
if block_language in {"mermaid"}:
|
||||||
|
return block_a["content"].copy() # We don't handle mermaid code blocks for now
|
||||||
|
|
||||||
|
code_block: list[str] = []
|
||||||
|
for line_a, line_b in zip(block_a["content"], block_b["content"]):
|
||||||
|
line_a_comment: str | None = None
|
||||||
|
line_b_comment: str | None = None
|
||||||
|
|
||||||
|
# Handle comments based on language
|
||||||
|
if block_language in {
|
||||||
|
"python",
|
||||||
|
"py",
|
||||||
|
"sh",
|
||||||
|
"bash",
|
||||||
|
"dockerfile",
|
||||||
|
"requirements",
|
||||||
|
"gitignore",
|
||||||
|
"toml",
|
||||||
|
"yaml",
|
||||||
|
"yml",
|
||||||
|
}:
|
||||||
|
_line_a_code, line_a_comment = _split_hash_comment(line_a)
|
||||||
|
line_b_code, line_b_comment = _split_hash_comment(line_b)
|
||||||
|
res_line = line_b
|
||||||
|
if line_b_comment:
|
||||||
|
res_line = res_line.replace(line_b_comment, line_a_comment, 1)
|
||||||
|
code_block.append(res_line)
|
||||||
|
elif block_language in {"console", "json"}:
|
||||||
|
_line_a_code, line_a_comment = _split_slashes_comment(line_a)
|
||||||
|
line_b_code, line_b_comment = _split_slashes_comment(line_b)
|
||||||
|
res_line = line_b
|
||||||
|
if line_b_comment:
|
||||||
|
print(f"Replacing comment: {line_b_comment} with {line_a_comment}")
|
||||||
|
res_line = res_line.replace(line_b_comment, line_a_comment, 1)
|
||||||
|
print(f"Resulting line: {res_line}")
|
||||||
|
code_block.append(res_line)
|
||||||
|
else:
|
||||||
|
code_block.append(line_b)
|
||||||
|
|
||||||
|
return code_block
|
||||||
|
|
||||||
|
|
||||||
|
def replace_multiline_code_blocks_in_text(
|
||||||
|
text: list[str],
|
||||||
|
code_blocks: list[MultilineCodeBlockInfo],
|
||||||
|
original_code_blocks: list[MultilineCodeBlockInfo],
|
||||||
|
) -> list[MultilineCodeBlockInfo]:
|
||||||
|
"""
|
||||||
|
Update each code block in `text` with the corresponding code block from
|
||||||
|
`original_code_blocks` with comments taken from `code_blocks`.
|
||||||
|
|
||||||
|
Raises ValueError if the number, language, or shape of code blocks do not match.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if len(code_blocks) != len(original_code_blocks):
|
||||||
|
raise ValueError(
|
||||||
|
"Number of code blocks does not match the number of original code blocks"
|
||||||
|
)
|
||||||
|
|
||||||
|
modified_text = text.copy()
|
||||||
|
for block, original_block in zip(code_blocks, original_code_blocks):
|
||||||
|
updated_content = replace_multiline_code_block(block, original_block)
|
||||||
|
|
||||||
|
start_line_index = block["start_line_no"] - 1
|
||||||
|
for i, updated_line in enumerate(updated_content):
|
||||||
|
modified_text[start_line_index + i] = updated_line
|
||||||
|
|
||||||
|
return modified_text
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,6 @@
|
||||||
|
import difflib
|
||||||
|
import os
|
||||||
|
from collections.abc import Iterable
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Annotated
|
from typing import Annotated
|
||||||
|
|
||||||
|
|
@ -8,13 +11,27 @@ from scripts.doc_parsing_utils import (
|
||||||
extract_header_permalinks,
|
extract_header_permalinks,
|
||||||
extract_html_links,
|
extract_html_links,
|
||||||
extract_markdown_links,
|
extract_markdown_links,
|
||||||
|
extract_multiline_code_blocks,
|
||||||
replace_code_includes_with_placeholders,
|
replace_code_includes_with_placeholders,
|
||||||
replace_header_permalinks,
|
replace_header_permalinks,
|
||||||
replace_html_links,
|
replace_html_links,
|
||||||
replace_markdown_links,
|
replace_markdown_links,
|
||||||
|
replace_multiline_code_blocks_in_text,
|
||||||
replace_placeholders_with_code_includes,
|
replace_placeholders_with_code_includes,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
non_translated_sections = (
|
||||||
|
f"reference{os.sep}",
|
||||||
|
"release-notes.md",
|
||||||
|
"fastapi-people.md",
|
||||||
|
"external-links.md",
|
||||||
|
"newsletter.md",
|
||||||
|
"management-tasks.md",
|
||||||
|
"management.md",
|
||||||
|
"contributing.md",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
cli = typer.Typer()
|
cli = typer.Typer()
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -23,6 +40,53 @@ def callback():
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def iter_all_lang_paths(lang_path_root: Path) -> Iterable[Path]:
|
||||||
|
"""
|
||||||
|
Iterate on the markdown files to translate in order of priority.
|
||||||
|
"""
|
||||||
|
|
||||||
|
first_dirs = [
|
||||||
|
lang_path_root / "learn",
|
||||||
|
lang_path_root / "tutorial",
|
||||||
|
lang_path_root / "advanced",
|
||||||
|
lang_path_root / "about",
|
||||||
|
lang_path_root / "how-to",
|
||||||
|
]
|
||||||
|
first_parent = lang_path_root
|
||||||
|
yield from first_parent.glob("*.md")
|
||||||
|
for dir_path in first_dirs:
|
||||||
|
yield from dir_path.rglob("*.md")
|
||||||
|
first_dirs_str = tuple(str(d) for d in first_dirs)
|
||||||
|
for path in lang_path_root.rglob("*.md"):
|
||||||
|
if str(path).startswith(first_dirs_str):
|
||||||
|
continue
|
||||||
|
if path.parent == first_parent:
|
||||||
|
continue
|
||||||
|
yield path
|
||||||
|
|
||||||
|
|
||||||
|
def get_all_paths(lang: str):
|
||||||
|
res: list[str] = []
|
||||||
|
lang_docs_root = Path("docs") / lang / "docs"
|
||||||
|
for path in iter_all_lang_paths(lang_docs_root):
|
||||||
|
relpath = path.relative_to(lang_docs_root)
|
||||||
|
if not str(relpath).startswith(non_translated_sections):
|
||||||
|
res.append(str(relpath))
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
@cli.command()
|
||||||
|
def fix_all(ctx: typer.Context, language: str):
|
||||||
|
docs = get_all_paths(language)
|
||||||
|
|
||||||
|
for page in docs:
|
||||||
|
doc_path = Path("docs") / language / "docs" / page
|
||||||
|
try:
|
||||||
|
fix_pages(doc_paths=[doc_path])
|
||||||
|
except ValueError as e:
|
||||||
|
print(f"Error processing {doc_path}: {e}")
|
||||||
|
|
||||||
|
|
||||||
@cli.command()
|
@cli.command()
|
||||||
def fix_pages(
|
def fix_pages(
|
||||||
doc_paths: Annotated[
|
doc_paths: Annotated[
|
||||||
|
|
@ -49,6 +113,11 @@ def fix_pages(
|
||||||
)
|
)
|
||||||
if fixed_doc_lines != doc_lines:
|
if fixed_doc_lines != doc_lines:
|
||||||
print(f"Fixing code includes in: {path}")
|
print(f"Fixing code includes in: {path}")
|
||||||
|
diff = difflib.unified_diff(
|
||||||
|
doc_lines, fixed_doc_lines, fromfile="translation", tofile="fixed"
|
||||||
|
)
|
||||||
|
print("\n".join(diff))
|
||||||
|
|
||||||
doc_lines = fixed_doc_lines
|
doc_lines = fixed_doc_lines
|
||||||
|
|
||||||
# Fix permalinks
|
# Fix permalinks
|
||||||
|
|
@ -75,7 +144,14 @@ def fix_pages(
|
||||||
doc_lines = fixed_doc_lines
|
doc_lines = fixed_doc_lines
|
||||||
|
|
||||||
# Fix multiline code blocks
|
# Fix multiline code blocks
|
||||||
# TODO: Implement
|
en_code_blocks = extract_multiline_code_blocks(en_doc_lines)
|
||||||
|
doc_code_blocks = extract_multiline_code_blocks(doc_lines)
|
||||||
|
fixed_doc_lines = replace_multiline_code_blocks_in_text(
|
||||||
|
doc_lines, doc_code_blocks, en_code_blocks
|
||||||
|
)
|
||||||
|
if fixed_doc_lines != doc_lines:
|
||||||
|
print(f"Fixing multiline code blocks in: {path}")
|
||||||
|
doc_lines = fixed_doc_lines
|
||||||
|
|
||||||
# Write back the fixed document
|
# Write back the fixed document
|
||||||
doc_lines.append("") # Ensure file ends with a newline
|
doc_lines.append("") # Ensure file ends with a newline
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue