From e7fb2453ea9c839c4dd4895957893bf488b553a5 Mon Sep 17 00:00:00 2001 From: Yurii Motov Date: Mon, 5 Jan 2026 11:16:32 +0100 Subject: [PATCH] Fix header permalinks replacement --- scripts/doc_parsing_utils.py | 52 +++++++++++++++++------------------- scripts/translation_fixer.py | 4 ++- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/scripts/doc_parsing_utils.py b/scripts/doc_parsing_utils.py index ee6f00738f..3d5bb1b834 100644 --- a/scripts/doc_parsing_utils.py +++ b/scripts/doc_parsing_utils.py @@ -38,6 +38,7 @@ class CodeIncludeInfo(TypedDict): class HeaderPermalinkInfo(TypedDict): line_no: int hashes: str + title: str permalink: str @@ -160,10 +161,10 @@ def extract_header_permalinks(lines: list[str]) -> list[HeaderPermalinkInfo]: header_match = HEADER_WITH_PERMALINK_RE.match(line) if header_match: - hashes, _title, permalink = header_match.groups() + hashes, title, permalink = header_match.groups() headers.append( HeaderPermalinkInfo( - hashes=hashes, line_no=line_no, permalink=permalink + hashes=hashes, line_no=line_no, permalink=permalink, title=title ) ) @@ -202,41 +203,38 @@ def remove_header_permalinks(lines: list[str]) -> list[str]: def replace_header_permalinks( - text: list[str], original_permalinks: list[HeaderPermalinkInfo] + text: list[str], + header_permalinks: list[HeaderPermalinkInfo], + original_header_permalinks: list[HeaderPermalinkInfo], ) -> list[str]: """ Replace permalinks in the given text with the permalinks from the original document. - Fail if the number or order of headers does not match the original. + Fail if the number or level of headers does not match the original. """ - modified_text: list[str] = [] - permalink_index = 0 - for line in text: - header_match = HEADER_LINE_RE.match(line) - if header_match: - if permalink_index >= len(original_permalinks): - raise ValueError( - "Number of headers exceeds number of headers in the original document" - ) - hashes, title, _permalink = header_match.groups() - original_permalink_info = original_permalinks[permalink_index] - if original_permalink_info["hashes"] != hashes: - raise ValueError( - "Header levels do not match between document and original document" - ) + modified_text: list[str] = text.copy() - modified_line = f"{hashes} {title}{original_permalink_info['permalink']}" - modified_text.append(modified_line) - permalink_index += 1 - else: - modified_text.append(line) - - if permalink_index < len(original_permalinks): + if len(header_permalinks) != len(original_header_permalinks): raise ValueError( - "Number of headers is less than number of headers in the original document" + "Number of headers with permalinks does not match the number in the original document" ) + for header_info, original_header_info in zip( + header_permalinks, original_header_permalinks + ): + if header_info["hashes"] != original_header_info["hashes"]: + raise ValueError( + "Header levels do not match between document and original document" + f" (found {header_info['hashes']}, expected {original_header_info['hashes']})" + f" for header №{header_info['line_no']}" + ) + line_no = header_info["line_no"] - 1 + hashes = header_info["hashes"] + title = header_info["title"] + permalink = original_header_info["permalink"] + modified_text[line_no] = f"{hashes} {title}{permalink}" + return modified_text diff --git a/scripts/translation_fixer.py b/scripts/translation_fixer.py index ed9c2969aa..cec5081527 100644 --- a/scripts/translation_fixer.py +++ b/scripts/translation_fixer.py @@ -122,7 +122,9 @@ def fix_pages( # Fix permalinks en_permalinks = extract_header_permalinks(en_doc_lines) - fixed_doc_lines = replace_header_permalinks(doc_lines, en_permalinks) + doc_permalinks = extract_header_permalinks(doc_lines) + + fixed_doc_lines = replace_header_permalinks(doc_lines, doc_permalinks, en_permalinks) if fixed_doc_lines != doc_lines: print(f"Fixing header permalinks in: {path}") doc_lines = fixed_doc_lines