common : additional path character mappings for windows

This commit is contained in:
Jan Boon 2026-02-10 01:20:22 +00:00
parent 730e236934
commit 237b959d0d
2 changed files with 25 additions and 5 deletions

View File

@ -738,7 +738,7 @@ bool fs_validate_filename(const std::string & filename, bool allow_subdirs) {
// Check for forbidden codepoints:
// - Control characters
// - Unicode equivalents of illegal characters
// - Unicode equivalents of path traversal characters
// - UTF-16 surrogate pairs
// - UTF-8 replacement character
// - Byte order mark (BOM)
@ -749,8 +749,17 @@ bool fs_validate_filename(const std::string & filename, bool allow_subdirs) {
|| c == 0x7F // Control characters (DEL)
|| (c >= 0x80 && c <= 0x9F) // Control characters (C1)
|| c == 0xFF0E // Fullwidth Full Stop (period equivalent)
|| c == 0x2215 // Division Slash (forward slash equivalent)
|| c == 0x2216 // Set Minus (backslash equivalent)
|| c == 0xFF0F // Fullwidth Solidus (forward slash equivalent, CP 874, 1250-1258)
|| c == 0xFF3C // Fullwidth Reverse Solidus (backslash equivalent, CP 874, 1250-1258)
|| c == 0xFF1A // Fullwidth Colon (colon equivalent, CP 874, 1250-1258)
|| c == 0x2215 // Division Slash (forward slash equivalent, CP 1250, 1252, 1254)
|| c == 0x2216 // Set Minus (backslash equivalent, CP 1250, 1252, 1254)
|| c == 0x2044 // Fraction Slash (forward slash equivalent, CP 1250, 1252, 1254)
|| c == 0x2236 // Ratio (colon equivalent, CP 1250, 1252, 1254)
|| c == 0x0589 // Armenian Full Stop (colon equivalent, CP 1250, 1252, 1254)
|| c == 0x00A5 // Yen Sign (backslash equivalent, CP 932 Japanese)
|| c == 0x20A9 // Won Sign (backslash equivalent, CP 949, 1361 Korean)
|| c == 0x00B4 // Acute Accent (forward slash equivalent, CP 1253 Greek)
|| (c >= 0xD800 && c <= 0xDFFF) // UTF-16 surrogate pairs
|| c == 0xFFFD // Replacement Character (UTF-8)
|| c == 0xFEFF // Byte Order Mark (BOM)

View File

@ -64,11 +64,22 @@ int main(void) {
// --- Unicode special codepoints ---
test("fullwidth period U+FF0E", false, "foo\xef\xbc\x8e""bar");
test("division slash U+2215", false, "foo\xe2\x88\x95""bar");
test("set minus U+2216", false, "foo\xe2\x88\x96""bar");
test("replacement char U+FFFD", false, "foo\xef\xbf\xbd""bar");
test("BOM U+FEFF", false, "foo\xef\xbb\xbf""bar");
// --- Windows bestfit characters (map to path traversal chars under WideCharToMultiByte) ---
test("fullwidth solidus U+FF0F", false, "foo\xef\xbc\x8f""bar"); // / on CP 874, 1250-1258
test("fullwidth rev solidus U+FF3C",false, "foo\xef\xbc\xbc""bar"); // \ on CP 874, 1250-1258
test("fullwidth colon U+FF1A", false, "foo\xef\xbc\x9a""bar"); // : on CP 874, 1250-1258
test("division slash U+2215", false, "foo\xe2\x88\x95""bar"); // / on CP 1250, 1252, 1254
test("set minus U+2216", false, "foo\xe2\x88\x96""bar"); // \ on CP 1250, 1252, 1254
test("fraction slash U+2044", false, "foo\xe2\x81\x84""bar"); // / on CP 1250, 1252, 1254
test("ratio U+2236", false, "foo\xe2\x88\xb6""bar"); // : on CP 1250, 1252, 1254
test("armenian full stop U+0589", false, "foo\xd6\x89""bar"); // : on CP 1250, 1252, 1254
test("yen sign U+00A5", false, "foo\xc2\xa5""bar"); // \ on CP 932 (Japanese)
test("won sign U+20A9", false, "foo\xe2\x82\xa9""bar"); // \ on CP 949 (Korean)
test("acute accent U+00B4", false, "foo\xc2\xb4""bar"); // / on CP 1253 (Greek)
// --- Invalid UTF-8 ---
test("invalid continuation", false, std::string("foo\x80""bar"));
test("truncated sequence", false, std::string("foo\xc3"));