mirror of https://github.com/tiangolo/fastapi.git
734 lines
24 KiB
Python
734 lines
24 KiB
Python
import re
|
|
from typing import TypedDict, Union
|
|
|
|
CODE_INCLUDE_RE = re.compile(r"^\{\*\s*(\S+)\s*(.*)\*\}$")
|
|
CODE_INCLUDE_PLACEHOLDER = "<CODE_INCLUDE>"
|
|
|
|
HEADER_WITH_PERMALINK_RE = re.compile(r"^(#{1,6}) (.+?)(\s*\{\s*#.*\s*\})?\s*$")
|
|
HEADER_LINE_RE = re.compile(r"^(#{1,6}) (.+?)(?:\s*\{\s*(#.*)\s*\})?\s*$")
|
|
|
|
TIANGOLO_COM = "https://fastapi.tiangolo.com"
|
|
ASSETS_URL_PREFIXES = ("/img/", "/css/", "/js/")
|
|
|
|
MARKDOWN_LINK_RE = re.compile(
|
|
r"(?<!\\)(?<!\!)" # not an image ![...] and not escaped \[...]
|
|
r"\[(?P<text>.*?)\]" # link text (non-greedy)
|
|
r"\("
|
|
r"(?P<url>[^)\s]+)" # url (no spaces and `)`)
|
|
r'(?:\s+["\'](?P<title>.*?)["\'])?' # optional title in "" or ''
|
|
r"\)"
|
|
r"(?:\s*\{(?P<attrs>[^}]*)\})?" # optional attributes in {}
|
|
)
|
|
|
|
HTML_LINK_RE = re.compile(r"<a\s+[^>]*>.*?</a>")
|
|
HTML_LINK_TEXT_RE = re.compile(r"<a\b([^>]*)>(.*?)</a>")
|
|
HTML_LINK_OPEN_TAG_RE = re.compile(r"<a\b([^>]*)>")
|
|
HTML_ATTR_RE = re.compile(r'(\w+)\s*=\s*([\'"])(.*?)\2')
|
|
|
|
CODE_BLOCK_LANG_RE = re.compile(r"^`{3,4}([\w-]*)", re.MULTILINE)
|
|
|
|
SLASHES_COMMENT_RE = re.compile(
|
|
r"^(?P<code>.*?)(?P<comment>(?:(?<= )// .*)|(?:^// .*))?$"
|
|
)
|
|
|
|
HASH_COMMENT_RE = re.compile(r"^(?P<code>.*?)(?P<comment>(?:(?<= )# .*)|(?:^# .*))?$")
|
|
|
|
|
|
class CodeIncludeInfo(TypedDict):
|
|
line_no: int
|
|
line: str
|
|
|
|
|
|
class HeaderPermalinkInfo(TypedDict):
|
|
line_no: int
|
|
hashes: str
|
|
title: str
|
|
permalink: str
|
|
|
|
|
|
class MarkdownLinkInfo(TypedDict):
|
|
line_no: int
|
|
url: str
|
|
text: str
|
|
title: Union[str, None]
|
|
attributes: Union[str, None]
|
|
full_match: str
|
|
|
|
|
|
class HTMLLinkAttribute(TypedDict):
|
|
name: str
|
|
quote: str
|
|
value: str
|
|
|
|
|
|
class HtmlLinkInfo(TypedDict):
|
|
line_no: int
|
|
full_tag: str
|
|
attributes: list[HTMLLinkAttribute]
|
|
text: str
|
|
|
|
|
|
class MultilineCodeBlockInfo(TypedDict):
|
|
lang: str
|
|
start_line_no: int
|
|
content: list[str]
|
|
|
|
|
|
# Code includes
|
|
# --------------------------------------------------------------------------------------
|
|
|
|
|
|
def extract_code_includes(lines: list[str]) -> list[CodeIncludeInfo]:
|
|
"""
|
|
Extract lines that contain code includes.
|
|
|
|
Return list of CodeIncludeInfo, where each dict contains:
|
|
- `line_no` - line number (1-based)
|
|
- `line` - text of the line
|
|
"""
|
|
|
|
includes: list[CodeIncludeInfo] = []
|
|
for line_no, line in enumerate(lines, start=1):
|
|
if CODE_INCLUDE_RE.match(line):
|
|
includes.append(CodeIncludeInfo(line_no=line_no, line=line))
|
|
return includes
|
|
|
|
|
|
def replace_code_includes_with_placeholders(text: list[str]) -> list[str]:
|
|
"""
|
|
Replace code includes with placeholders.
|
|
"""
|
|
|
|
modified_text = text.copy()
|
|
includes = extract_code_includes(text)
|
|
for include in includes:
|
|
modified_text[include["line_no"] - 1] = CODE_INCLUDE_PLACEHOLDER
|
|
return modified_text
|
|
|
|
|
|
def replace_placeholders_with_code_includes(
|
|
text: list[str], original_includes: list[CodeIncludeInfo]
|
|
) -> list[str]:
|
|
"""
|
|
Replace code includes placeholders with actual code includes from the original (English) document.
|
|
Fail if the number of placeholders does not match the number of original includes.
|
|
"""
|
|
|
|
code_include_lines = [
|
|
line_no
|
|
for line_no, line in enumerate(text)
|
|
if line.strip() == CODE_INCLUDE_PLACEHOLDER
|
|
]
|
|
|
|
if len(code_include_lines) != len(original_includes):
|
|
raise ValueError(
|
|
"Number of code include placeholders does not match the number of code includes "
|
|
"in the original document "
|
|
f"({len(code_include_lines)} vs {len(original_includes)})"
|
|
)
|
|
|
|
modified_text = text.copy()
|
|
for i, line_no in enumerate(code_include_lines):
|
|
modified_text[line_no] = original_includes[i]["line"]
|
|
|
|
return modified_text
|
|
|
|
|
|
# Header permalinks
|
|
# --------------------------------------------------------------------------------------
|
|
|
|
|
|
def extract_header_permalinks(lines: list[str]) -> list[HeaderPermalinkInfo]:
|
|
"""
|
|
Extract list of header permalinks from the given lines.
|
|
|
|
Return list of HeaderPermalinkInfo, where each dict contains:
|
|
- `line_no` - line number (1-based)
|
|
- `hashes` - string of hashes representing header level (e.g., "###")
|
|
- `permalink` - permalink string (e.g., "{#permalink}")
|
|
"""
|
|
|
|
headers: list[HeaderPermalinkInfo] = []
|
|
in_code_block3 = False
|
|
in_code_block4 = False
|
|
|
|
for line_no, line in enumerate(lines, start=1):
|
|
if not (in_code_block3 or in_code_block4):
|
|
if line.startswith("```"):
|
|
count = len(line) - len(line.lstrip("`"))
|
|
if count == 3:
|
|
in_code_block3 = True
|
|
continue
|
|
elif count >= 4:
|
|
in_code_block4 = True
|
|
continue
|
|
|
|
header_match = HEADER_WITH_PERMALINK_RE.match(line)
|
|
if header_match:
|
|
hashes, title, permalink = header_match.groups()
|
|
headers.append(
|
|
HeaderPermalinkInfo(
|
|
hashes=hashes, line_no=line_no, permalink=permalink, title=title
|
|
)
|
|
)
|
|
|
|
elif in_code_block3:
|
|
if line.startswith("```"):
|
|
count = len(line) - len(line.lstrip("`"))
|
|
if count == 3:
|
|
in_code_block3 = False
|
|
continue
|
|
|
|
elif in_code_block4:
|
|
if line.startswith("````"):
|
|
count = len(line) - len(line.lstrip("`"))
|
|
if count >= 4:
|
|
in_code_block4 = False
|
|
continue
|
|
|
|
return headers
|
|
|
|
|
|
def remove_header_permalinks(lines: list[str]) -> list[str]:
|
|
"""
|
|
Remove permalinks from headers in the given lines.
|
|
"""
|
|
|
|
modified_lines: list[str] = []
|
|
for line in lines:
|
|
header_match = HEADER_WITH_PERMALINK_RE.match(line)
|
|
if header_match:
|
|
hashes, title, _permalink = header_match.groups()
|
|
modified_line = f"{hashes} {title}"
|
|
modified_lines.append(modified_line)
|
|
else:
|
|
modified_lines.append(line)
|
|
return modified_lines
|
|
|
|
|
|
def replace_header_permalinks(
|
|
text: list[str],
|
|
header_permalinks: list[HeaderPermalinkInfo],
|
|
original_header_permalinks: list[HeaderPermalinkInfo],
|
|
) -> list[str]:
|
|
"""
|
|
Replace permalinks in the given text with the permalinks from the original document.
|
|
|
|
Fail if the number or level of headers does not match the original.
|
|
"""
|
|
|
|
modified_text: list[str] = text.copy()
|
|
|
|
if len(header_permalinks) != len(original_header_permalinks):
|
|
raise ValueError(
|
|
"Number of headers with permalinks does not match the number in the "
|
|
"original document "
|
|
f"({len(header_permalinks)} vs {len(original_header_permalinks)})"
|
|
)
|
|
|
|
for header_no in range(len(header_permalinks)):
|
|
header_info = header_permalinks[header_no]
|
|
original_header_info = original_header_permalinks[header_no]
|
|
|
|
if header_info["hashes"] != original_header_info["hashes"]:
|
|
raise ValueError(
|
|
"Header levels do not match between document and original document"
|
|
f" (found {header_info['hashes']}, expected {original_header_info['hashes']})"
|
|
f" for header №{header_no + 1} in line {header_info['line_no']}"
|
|
)
|
|
line_no = header_info["line_no"] - 1
|
|
hashes = header_info["hashes"]
|
|
title = header_info["title"]
|
|
permalink = original_header_info["permalink"]
|
|
modified_text[line_no] = f"{hashes} {title}{permalink}"
|
|
|
|
return modified_text
|
|
|
|
|
|
# Markdown links
|
|
# --------------------------------------------------------------------------------------
|
|
|
|
|
|
def extract_markdown_links(lines: list[str]) -> list[MarkdownLinkInfo]:
|
|
"""
|
|
Extract all markdown links from the given lines.
|
|
|
|
Return list of MarkdownLinkInfo, where each dict contains:
|
|
- `line_no` - line number (1-based)
|
|
- `url` - link URL
|
|
- `text` - link text
|
|
- `title` - link title (if any)
|
|
"""
|
|
|
|
links: list[MarkdownLinkInfo] = []
|
|
for line_no, line in enumerate(lines, start=1):
|
|
for m in MARKDOWN_LINK_RE.finditer(line):
|
|
links.append(
|
|
MarkdownLinkInfo(
|
|
line_no=line_no,
|
|
url=m.group("url"),
|
|
text=m.group("text"),
|
|
title=m.group("title"),
|
|
attributes=m.group("attrs"),
|
|
full_match=m.group(0),
|
|
)
|
|
)
|
|
return links
|
|
|
|
|
|
def _add_lang_code_to_url(url: str, lang_code: str) -> str:
|
|
if url.startswith(TIANGOLO_COM):
|
|
rel_url = url[len(TIANGOLO_COM) :]
|
|
if not rel_url.startswith(ASSETS_URL_PREFIXES):
|
|
url = url.replace(TIANGOLO_COM, f"{TIANGOLO_COM}/{lang_code}")
|
|
return url
|
|
|
|
|
|
def _construct_markdown_link(
|
|
url: str,
|
|
text: str,
|
|
title: Union[str, None],
|
|
attributes: Union[str, None],
|
|
lang_code: str,
|
|
) -> str:
|
|
"""
|
|
Construct a markdown link, adjusting the URL for the given language code if needed.
|
|
"""
|
|
url = _add_lang_code_to_url(url, lang_code)
|
|
|
|
if title:
|
|
link = f'[{text}]({url} "{title}")'
|
|
else:
|
|
link = f"[{text}]({url})"
|
|
|
|
if attributes:
|
|
link += f"{{{attributes}}}"
|
|
|
|
return link
|
|
|
|
|
|
def replace_markdown_links(
|
|
text: list[str],
|
|
links: list[MarkdownLinkInfo],
|
|
original_links: list[MarkdownLinkInfo],
|
|
lang_code: str,
|
|
) -> list[str]:
|
|
"""
|
|
Replace markdown links in the given text with the original links.
|
|
|
|
Fail if the number of links does not match the original.
|
|
"""
|
|
|
|
if len(links) != len(original_links):
|
|
raise ValueError(
|
|
"Number of markdown links does not match the number in the "
|
|
"original document "
|
|
f"({len(links)} vs {len(original_links)})"
|
|
)
|
|
|
|
modified_text = text.copy()
|
|
for i, link_info in enumerate(links):
|
|
link_text = link_info["text"]
|
|
link_title = link_info["title"]
|
|
original_link_info = original_links[i]
|
|
|
|
# Replace
|
|
replacement_link = _construct_markdown_link(
|
|
url=original_link_info["url"],
|
|
text=link_text,
|
|
title=link_title,
|
|
attributes=original_link_info["attributes"],
|
|
lang_code=lang_code,
|
|
)
|
|
line_no = link_info["line_no"] - 1
|
|
modified_line = modified_text[line_no]
|
|
modified_line = modified_line.replace(
|
|
link_info["full_match"], replacement_link, 1
|
|
)
|
|
modified_text[line_no] = modified_line
|
|
|
|
return modified_text
|
|
|
|
|
|
# HTML links
|
|
# --------------------------------------------------------------------------------------
|
|
|
|
|
|
def extract_html_links(lines: list[str]) -> list[HtmlLinkInfo]:
|
|
"""
|
|
Extract all HTML links from the given lines.
|
|
|
|
Return list of HtmlLinkInfo, where each dict contains:
|
|
- `line_no` - line number (1-based)
|
|
- `full_tag` - full HTML link tag
|
|
- `attributes` - list of HTMLLinkAttribute (name, quote, value)
|
|
- `text` - link text
|
|
"""
|
|
|
|
links = []
|
|
for line_no, line in enumerate(lines, start=1):
|
|
for html_link in HTML_LINK_RE.finditer(line):
|
|
link_str = html_link.group(0)
|
|
|
|
link_text_match = HTML_LINK_TEXT_RE.match(link_str)
|
|
assert link_text_match is not None
|
|
link_text = link_text_match.group(2)
|
|
assert isinstance(link_text, str)
|
|
|
|
link_open_tag_match = HTML_LINK_OPEN_TAG_RE.match(link_str)
|
|
assert link_open_tag_match is not None
|
|
link_open_tag = link_open_tag_match.group(1)
|
|
assert isinstance(link_open_tag, str)
|
|
|
|
attributes: list[HTMLLinkAttribute] = []
|
|
for attr_name, attr_quote, attr_value in re.findall(
|
|
HTML_ATTR_RE, link_open_tag
|
|
):
|
|
assert isinstance(attr_name, str)
|
|
assert isinstance(attr_quote, str)
|
|
assert isinstance(attr_value, str)
|
|
attributes.append(
|
|
HTMLLinkAttribute(
|
|
name=attr_name, quote=attr_quote, value=attr_value
|
|
)
|
|
)
|
|
links.append(
|
|
HtmlLinkInfo(
|
|
line_no=line_no,
|
|
full_tag=link_str,
|
|
attributes=attributes,
|
|
text=link_text,
|
|
)
|
|
)
|
|
return links
|
|
|
|
|
|
def _construct_html_link(
|
|
link_text: str,
|
|
attributes: list[HTMLLinkAttribute],
|
|
lang_code: str,
|
|
) -> str:
|
|
"""
|
|
Reconstruct HTML link, adjusting the URL for the given language code if needed.
|
|
"""
|
|
|
|
attributes_upd: list[HTMLLinkAttribute] = []
|
|
for attribute in attributes:
|
|
if attribute["name"] == "href":
|
|
original_url = attribute["value"]
|
|
url = _add_lang_code_to_url(original_url, lang_code)
|
|
attributes_upd.append(
|
|
HTMLLinkAttribute(name="href", quote=attribute["quote"], value=url)
|
|
)
|
|
else:
|
|
attributes_upd.append(attribute)
|
|
|
|
attrs_str = " ".join(
|
|
f"{attribute['name']}={attribute['quote']}{attribute['value']}{attribute['quote']}"
|
|
for attribute in attributes_upd
|
|
)
|
|
return f"<a {attrs_str}>{link_text}</a>"
|
|
|
|
|
|
def replace_html_links(
|
|
text: list[str],
|
|
links: list[HtmlLinkInfo],
|
|
original_links: list[HtmlLinkInfo],
|
|
lang_code: str,
|
|
) -> list[str]:
|
|
"""
|
|
Replace HTML links in the given text with the links from the original document.
|
|
|
|
Adjust URLs for the given language code.
|
|
Fail if the number of links does not match the original.
|
|
"""
|
|
|
|
if len(links) != len(original_links):
|
|
raise ValueError(
|
|
"Number of HTML links does not match the number in the "
|
|
"original document "
|
|
f"({len(links)} vs {len(original_links)})"
|
|
)
|
|
|
|
modified_text = text.copy()
|
|
for link_index, link in enumerate(links):
|
|
original_link_info = original_links[link_index]
|
|
|
|
# Replace in the document text
|
|
replacement_link = _construct_html_link(
|
|
link_text=link["text"],
|
|
attributes=original_link_info["attributes"],
|
|
lang_code=lang_code,
|
|
)
|
|
line_no = link["line_no"] - 1
|
|
modified_text[line_no] = modified_text[line_no].replace(
|
|
link["full_tag"], replacement_link, 1
|
|
)
|
|
|
|
return modified_text
|
|
|
|
|
|
# Multiline code blocks
|
|
# --------------------------------------------------------------------------------------
|
|
|
|
|
|
def get_code_block_lang(line: str) -> str:
|
|
match = CODE_BLOCK_LANG_RE.match(line)
|
|
if match:
|
|
return match.group(1)
|
|
return ""
|
|
|
|
|
|
def extract_multiline_code_blocks(text: list[str]) -> list[MultilineCodeBlockInfo]:
|
|
blocks: list[MultilineCodeBlockInfo] = []
|
|
|
|
in_code_block3 = False
|
|
in_code_block4 = False
|
|
current_block_lang = ""
|
|
current_block_start_line = -1
|
|
current_block_lines = []
|
|
|
|
for line_no, line in enumerate(text, start=1):
|
|
stripped = line.lstrip()
|
|
|
|
# --- Detect opening fence ---
|
|
if not (in_code_block3 or in_code_block4):
|
|
if stripped.startswith("```"):
|
|
current_block_start_line = line_no
|
|
count = len(stripped) - len(stripped.lstrip("`"))
|
|
if count == 3:
|
|
in_code_block3 = True
|
|
current_block_lang = get_code_block_lang(stripped)
|
|
current_block_lines = [line]
|
|
continue
|
|
elif count >= 4:
|
|
in_code_block4 = True
|
|
current_block_lang = get_code_block_lang(stripped)
|
|
current_block_lines = [line]
|
|
continue
|
|
|
|
# --- Detect closing fence ---
|
|
elif in_code_block3:
|
|
if stripped.startswith("```"):
|
|
count = len(stripped) - len(stripped.lstrip("`"))
|
|
if count == 3:
|
|
current_block_lines.append(line)
|
|
blocks.append(
|
|
MultilineCodeBlockInfo(
|
|
lang=current_block_lang,
|
|
start_line_no=current_block_start_line,
|
|
content=current_block_lines,
|
|
)
|
|
)
|
|
in_code_block3 = False
|
|
current_block_lang = ""
|
|
current_block_start_line = -1
|
|
current_block_lines = []
|
|
continue
|
|
current_block_lines.append(line)
|
|
|
|
elif in_code_block4:
|
|
if stripped.startswith("````"):
|
|
count = len(stripped) - len(stripped.lstrip("`"))
|
|
if count >= 4:
|
|
current_block_lines.append(line)
|
|
blocks.append(
|
|
MultilineCodeBlockInfo(
|
|
lang=current_block_lang,
|
|
start_line_no=current_block_start_line,
|
|
content=current_block_lines,
|
|
)
|
|
)
|
|
in_code_block4 = False
|
|
current_block_lang = ""
|
|
current_block_start_line = -1
|
|
current_block_lines = []
|
|
continue
|
|
current_block_lines.append(line)
|
|
|
|
return blocks
|
|
|
|
|
|
def _split_hash_comment(line: str) -> tuple[str, Union[str, None]]:
|
|
match = HASH_COMMENT_RE.match(line)
|
|
if match:
|
|
code = match.group("code").rstrip()
|
|
comment = match.group("comment")
|
|
return code, comment
|
|
return line.rstrip(), None
|
|
|
|
|
|
def _split_slashes_comment(line: str) -> tuple[str, Union[str, None]]:
|
|
match = SLASHES_COMMENT_RE.match(line)
|
|
if match:
|
|
code = match.group("code").rstrip()
|
|
comment = match.group("comment")
|
|
return code, comment
|
|
return line, None
|
|
|
|
|
|
def replace_multiline_code_block(
|
|
block_a: MultilineCodeBlockInfo, block_b: MultilineCodeBlockInfo
|
|
) -> list[str]:
|
|
"""
|
|
Replace multiline code block `a` with block `b` leaving comments intact.
|
|
|
|
Syntax of comments depends on the language of the code block.
|
|
Raises ValueError if the blocks are not compatible (different languages or different number of lines).
|
|
"""
|
|
|
|
start_line = block_a["start_line_no"]
|
|
end_line_no = start_line + len(block_a["content"]) - 1
|
|
|
|
if block_a["lang"] != block_b["lang"]:
|
|
raise ValueError(
|
|
f"Code block (lines {start_line}-{end_line_no}) "
|
|
"has different language than the original block "
|
|
f"('{block_a['lang']}' vs '{block_b['lang']}')"
|
|
)
|
|
if len(block_a["content"]) != len(block_b["content"]):
|
|
raise ValueError(
|
|
f"Code block (lines {start_line}-{end_line_no}) "
|
|
"has different number of lines than the original block "
|
|
f"({len(block_a['content'])} vs {len(block_b['content'])})"
|
|
)
|
|
|
|
block_language = block_a["lang"].lower()
|
|
if block_language in {"mermaid"}:
|
|
if block_a != block_b:
|
|
print(
|
|
f"Skipping mermaid code block replacement (lines {start_line}-{end_line_no}). "
|
|
"This should be checked manually."
|
|
)
|
|
return block_a["content"].copy() # We don't handle mermaid code blocks for now
|
|
|
|
code_block: list[str] = []
|
|
for line_a, line_b in zip(block_a["content"], block_b["content"]):
|
|
line_a_comment: Union[str, None] = None
|
|
line_b_comment: Union[str, None] = None
|
|
|
|
# Handle comments based on language
|
|
if block_language in {
|
|
"python",
|
|
"py",
|
|
"sh",
|
|
"bash",
|
|
"dockerfile",
|
|
"requirements",
|
|
"gitignore",
|
|
"toml",
|
|
"yaml",
|
|
"yml",
|
|
"hash-style-comments",
|
|
}:
|
|
_line_a_code, line_a_comment = _split_hash_comment(line_a)
|
|
_line_b_code, line_b_comment = _split_hash_comment(line_b)
|
|
res_line = line_b
|
|
if line_b_comment:
|
|
res_line = res_line.replace(line_b_comment, line_a_comment, 1)
|
|
code_block.append(res_line)
|
|
elif block_language in {"console", "json", "slash-style-comments"}:
|
|
_line_a_code, line_a_comment = _split_slashes_comment(line_a)
|
|
_line_b_code, line_b_comment = _split_slashes_comment(line_b)
|
|
res_line = line_b
|
|
if line_b_comment:
|
|
res_line = res_line.replace(line_b_comment, line_a_comment, 1)
|
|
code_block.append(res_line)
|
|
else:
|
|
code_block.append(line_b)
|
|
|
|
return code_block
|
|
|
|
|
|
def replace_multiline_code_blocks_in_text(
|
|
text: list[str],
|
|
code_blocks: list[MultilineCodeBlockInfo],
|
|
original_code_blocks: list[MultilineCodeBlockInfo],
|
|
) -> list[str]:
|
|
"""
|
|
Update each code block in `text` with the corresponding code block from
|
|
`original_code_blocks` with comments taken from `code_blocks`.
|
|
|
|
Raises ValueError if the number, language, or shape of code blocks do not match.
|
|
"""
|
|
|
|
if len(code_blocks) != len(original_code_blocks):
|
|
raise ValueError(
|
|
"Number of code blocks does not match the number in the original document "
|
|
f"({len(code_blocks)} vs {len(original_code_blocks)})"
|
|
)
|
|
|
|
modified_text = text.copy()
|
|
for block, original_block in zip(code_blocks, original_code_blocks):
|
|
updated_content = replace_multiline_code_block(block, original_block)
|
|
|
|
start_line_index = block["start_line_no"] - 1
|
|
for i, updated_line in enumerate(updated_content):
|
|
modified_text[start_line_index + i] = updated_line
|
|
|
|
return modified_text
|
|
|
|
|
|
# All checks
|
|
# --------------------------------------------------------------------------------------
|
|
|
|
|
|
def check_translation(
|
|
doc_lines: list[str],
|
|
en_doc_lines: list[str],
|
|
lang_code: str,
|
|
auto_fix: bool,
|
|
path: str,
|
|
) -> list[str]:
|
|
# Fix code includes
|
|
en_code_includes = extract_code_includes(en_doc_lines)
|
|
doc_lines_with_placeholders = replace_code_includes_with_placeholders(doc_lines)
|
|
fixed_doc_lines = replace_placeholders_with_code_includes(
|
|
doc_lines_with_placeholders, en_code_includes
|
|
)
|
|
if auto_fix and (fixed_doc_lines != doc_lines):
|
|
print(f"Fixing code includes in: {path}")
|
|
doc_lines = fixed_doc_lines
|
|
|
|
# Fix permalinks
|
|
en_permalinks = extract_header_permalinks(en_doc_lines)
|
|
doc_permalinks = extract_header_permalinks(doc_lines)
|
|
fixed_doc_lines = replace_header_permalinks(
|
|
doc_lines, doc_permalinks, en_permalinks
|
|
)
|
|
if auto_fix and (fixed_doc_lines != doc_lines):
|
|
print(f"Fixing header permalinks in: {path}")
|
|
doc_lines = fixed_doc_lines
|
|
|
|
# Fix markdown links
|
|
en_markdown_links = extract_markdown_links(en_doc_lines)
|
|
doc_markdown_links = extract_markdown_links(doc_lines)
|
|
fixed_doc_lines = replace_markdown_links(
|
|
doc_lines, doc_markdown_links, en_markdown_links, lang_code
|
|
)
|
|
if auto_fix and (fixed_doc_lines != doc_lines):
|
|
print(f"Fixing markdown links in: {path}")
|
|
doc_lines = fixed_doc_lines
|
|
|
|
# Fix HTML links
|
|
en_html_links = extract_html_links(en_doc_lines)
|
|
doc_html_links = extract_html_links(doc_lines)
|
|
fixed_doc_lines = replace_html_links(
|
|
doc_lines, doc_html_links, en_html_links, lang_code
|
|
)
|
|
if auto_fix and (fixed_doc_lines != doc_lines):
|
|
print(f"Fixing HTML links in: {path}")
|
|
doc_lines = fixed_doc_lines
|
|
|
|
# Fix multiline code blocks
|
|
en_code_blocks = extract_multiline_code_blocks(en_doc_lines)
|
|
doc_code_blocks = extract_multiline_code_blocks(doc_lines)
|
|
fixed_doc_lines = replace_multiline_code_blocks_in_text(
|
|
doc_lines, doc_code_blocks, en_code_blocks
|
|
)
|
|
if auto_fix and (fixed_doc_lines != doc_lines):
|
|
print(f"Fixing multiline code blocks in: {path}")
|
|
doc_lines = fixed_doc_lines
|
|
|
|
return doc_lines
|