Refactor `replace_html_links`, improve error message

This commit is contained in:
Yurii Motov 2026-01-06 05:57:26 +01:00
parent 5b812d4754
commit badefaba9f
2 changed files with 16 additions and 9 deletions

View File

@ -269,7 +269,7 @@ def extract_markdown_links(lines: list[str]) -> list[tuple[str, int]]:
text=m.group("text"), text=m.group("text"),
title=m.group("title"), title=m.group("title"),
attributes=m.group("attrs"), attributes=m.group("attrs"),
full_match=m.group(0) full_match=m.group(0),
) )
) )
return links return links
@ -337,7 +337,9 @@ def replace_markdown_links(
) )
line_no = link_info["line_no"] - 1 line_no = link_info["line_no"] - 1
modified_line = modified_text[line_no] modified_line = modified_text[line_no]
modified_line = modified_line.replace(link_info["full_match"], replacement_link, 1) modified_line = modified_line.replace(
link_info["full_match"], replacement_link, 1
)
modified_text[line_no] = modified_line modified_text[line_no] = modified_line
return modified_text return modified_text
@ -430,7 +432,10 @@ def _construct_html_link(
def replace_html_links( def replace_html_links(
text: list[str], original_links: list[HtmlLinkInfo], lang_code: str text: list[str],
links: list[HtmlLinkInfo],
original_links: list[HtmlLinkInfo],
lang_code: str,
) -> list[str]: ) -> list[str]:
""" """
Replace HTML links in the given text with the links from the original document. Replace HTML links in the given text with the links from the original document.
@ -439,13 +444,12 @@ def replace_html_links(
Fail if the number of links does not match the original. Fail if the number of links does not match the original.
""" """
links = extract_html_links(text) if len(links) != len(original_links):
if len(links) > len(original_links):
raise ValueError( raise ValueError(
"Number of HTML links exceeds number of HTML links in the original document" "Number of HTML links does not match the number of HTML links in the "
"original document "
f"({len(links)} vs {len(original_links)})"
) )
elif len(links) < len(original_links):
raise ValueError("Number of HTML links is less than in the original document")
modified_text = text.copy() modified_text = text.copy()
for link_index, link in enumerate(links): for link_index, link in enumerate(links):

View File

@ -131,7 +131,10 @@ def process_one_page(path: Path) -> bool:
# Fix HTML links # Fix HTML links
en_html_links = extract_html_links(en_doc_lines) en_html_links = extract_html_links(en_doc_lines)
fixed_doc_lines = replace_html_links(doc_lines, en_html_links, lang_code) doc_html_links = extract_html_links(doc_lines)
fixed_doc_lines = replace_html_links(
doc_lines, doc_html_links, en_html_links, lang_code
)
if fixed_doc_lines != doc_lines: if fixed_doc_lines != doc_lines:
print(f"Fixing HTML links in: {path}") print(f"Fixing HTML links in: {path}")
doc_lines = fixed_doc_lines doc_lines = fixed_doc_lines