From 237b959d0dab47a4f8ac0903b0700f6359fa5527 Mon Sep 17 00:00:00 2001 From: Jan Boon Date: Tue, 10 Feb 2026 01:20:22 +0000 Subject: [PATCH] common : additional path character mappings for windows --- common/common.cpp | 15 ++++++++++++--- tests/test-fs-validate-filename.cpp | 15 +++++++++++++-- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 50cff38fe8..e890ca3993 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -738,7 +738,7 @@ bool fs_validate_filename(const std::string & filename, bool allow_subdirs) { // Check for forbidden codepoints: // - Control characters - // - Unicode equivalents of illegal characters + // - Unicode equivalents of path traversal characters // - UTF-16 surrogate pairs // - UTF-8 replacement character // - Byte order mark (BOM) @@ -749,8 +749,17 @@ bool fs_validate_filename(const std::string & filename, bool allow_subdirs) { || c == 0x7F // Control characters (DEL) || (c >= 0x80 && c <= 0x9F) // Control characters (C1) || c == 0xFF0E // Fullwidth Full Stop (period equivalent) - || c == 0x2215 // Division Slash (forward slash equivalent) - || c == 0x2216 // Set Minus (backslash equivalent) + || c == 0xFF0F // Fullwidth Solidus (forward slash equivalent, CP 874, 1250-1258) + || c == 0xFF3C // Fullwidth Reverse Solidus (backslash equivalent, CP 874, 1250-1258) + || c == 0xFF1A // Fullwidth Colon (colon equivalent, CP 874, 1250-1258) + || c == 0x2215 // Division Slash (forward slash equivalent, CP 1250, 1252, 1254) + || c == 0x2216 // Set Minus (backslash equivalent, CP 1250, 1252, 1254) + || c == 0x2044 // Fraction Slash (forward slash equivalent, CP 1250, 1252, 1254) + || c == 0x2236 // Ratio (colon equivalent, CP 1250, 1252, 1254) + || c == 0x0589 // Armenian Full Stop (colon equivalent, CP 1250, 1252, 1254) + || c == 0x00A5 // Yen Sign (backslash equivalent, CP 932 Japanese) + || c == 0x20A9 // Won Sign (backslash equivalent, CP 949, 1361 Korean) + || c == 0x00B4 // Acute Accent (forward slash equivalent, CP 1253 Greek) || (c >= 0xD800 && c <= 0xDFFF) // UTF-16 surrogate pairs || c == 0xFFFD // Replacement Character (UTF-8) || c == 0xFEFF // Byte Order Mark (BOM) diff --git a/tests/test-fs-validate-filename.cpp b/tests/test-fs-validate-filename.cpp index 947e70f40e..af7118c2e0 100644 --- a/tests/test-fs-validate-filename.cpp +++ b/tests/test-fs-validate-filename.cpp @@ -64,11 +64,22 @@ int main(void) { // --- Unicode special codepoints --- test("fullwidth period U+FF0E", false, "foo\xef\xbc\x8e""bar"); - test("division slash U+2215", false, "foo\xe2\x88\x95""bar"); - test("set minus U+2216", false, "foo\xe2\x88\x96""bar"); test("replacement char U+FFFD", false, "foo\xef\xbf\xbd""bar"); test("BOM U+FEFF", false, "foo\xef\xbb\xbf""bar"); + // --- Windows bestfit characters (map to path traversal chars under WideCharToMultiByte) --- + test("fullwidth solidus U+FF0F", false, "foo\xef\xbc\x8f""bar"); // / on CP 874, 1250-1258 + test("fullwidth rev solidus U+FF3C",false, "foo\xef\xbc\xbc""bar"); // \ on CP 874, 1250-1258 + test("fullwidth colon U+FF1A", false, "foo\xef\xbc\x9a""bar"); // : on CP 874, 1250-1258 + test("division slash U+2215", false, "foo\xe2\x88\x95""bar"); // / on CP 1250, 1252, 1254 + test("set minus U+2216", false, "foo\xe2\x88\x96""bar"); // \ on CP 1250, 1252, 1254 + test("fraction slash U+2044", false, "foo\xe2\x81\x84""bar"); // / on CP 1250, 1252, 1254 + test("ratio U+2236", false, "foo\xe2\x88\xb6""bar"); // : on CP 1250, 1252, 1254 + test("armenian full stop U+0589", false, "foo\xd6\x89""bar"); // : on CP 1250, 1252, 1254 + test("yen sign U+00A5", false, "foo\xc2\xa5""bar"); // \ on CP 932 (Japanese) + test("won sign U+20A9", false, "foo\xe2\x82\xa9""bar"); // \ on CP 949 (Korean) + test("acute accent U+00B4", false, "foo\xc2\xb4""bar"); // / on CP 1253 (Greek) + // --- Invalid UTF-8 --- test("invalid continuation", false, std::string("foo\x80""bar")); test("truncated sequence", false, std::string("foo\xc3"));