Fix header permalinks replacement

This commit is contained in:
Yurii Motov 2026-01-05 11:16:32 +01:00
parent beff498743
commit e7fb2453ea
2 changed files with 28 additions and 28 deletions

View File

@ -38,6 +38,7 @@ class CodeIncludeInfo(TypedDict):
class HeaderPermalinkInfo(TypedDict):
line_no: int
hashes: str
title: str
permalink: str
@ -160,10 +161,10 @@ def extract_header_permalinks(lines: list[str]) -> list[HeaderPermalinkInfo]:
header_match = HEADER_WITH_PERMALINK_RE.match(line)
if header_match:
hashes, _title, permalink = header_match.groups()
hashes, title, permalink = header_match.groups()
headers.append(
HeaderPermalinkInfo(
hashes=hashes, line_no=line_no, permalink=permalink
hashes=hashes, line_no=line_no, permalink=permalink, title=title
)
)
@ -202,41 +203,38 @@ def remove_header_permalinks(lines: list[str]) -> list[str]:
def replace_header_permalinks(
text: list[str], original_permalinks: list[HeaderPermalinkInfo]
text: list[str],
header_permalinks: list[HeaderPermalinkInfo],
original_header_permalinks: list[HeaderPermalinkInfo],
) -> list[str]:
"""
Replace permalinks in the given text with the permalinks from the original document.
Fail if the number or order of headers does not match the original.
Fail if the number or level of headers does not match the original.
"""
modified_text: list[str] = []
permalink_index = 0
for line in text:
header_match = HEADER_LINE_RE.match(line)
if header_match:
if permalink_index >= len(original_permalinks):
raise ValueError(
"Number of headers exceeds number of headers in the original document"
)
hashes, title, _permalink = header_match.groups()
original_permalink_info = original_permalinks[permalink_index]
if original_permalink_info["hashes"] != hashes:
raise ValueError(
"Header levels do not match between document and original document"
)
modified_text: list[str] = text.copy()
modified_line = f"{hashes} {title}{original_permalink_info['permalink']}"
modified_text.append(modified_line)
permalink_index += 1
else:
modified_text.append(line)
if permalink_index < len(original_permalinks):
if len(header_permalinks) != len(original_header_permalinks):
raise ValueError(
"Number of headers is less than number of headers in the original document"
"Number of headers with permalinks does not match the number in the original document"
)
for header_info, original_header_info in zip(
header_permalinks, original_header_permalinks
):
if header_info["hashes"] != original_header_info["hashes"]:
raise ValueError(
"Header levels do not match between document and original document"
f" (found {header_info['hashes']}, expected {original_header_info['hashes']})"
f" for header №{header_info['line_no']}"
)
line_no = header_info["line_no"] - 1
hashes = header_info["hashes"]
title = header_info["title"]
permalink = original_header_info["permalink"]
modified_text[line_no] = f"{hashes} {title}{permalink}"
return modified_text

View File

@ -122,7 +122,9 @@ def fix_pages(
# Fix permalinks
en_permalinks = extract_header_permalinks(en_doc_lines)
fixed_doc_lines = replace_header_permalinks(doc_lines, en_permalinks)
doc_permalinks = extract_header_permalinks(doc_lines)
fixed_doc_lines = replace_header_permalinks(doc_lines, doc_permalinks, en_permalinks)
if fixed_doc_lines != doc_lines:
print(f"Fixing header permalinks in: {path}")
doc_lines = fixed_doc_lines