From 730e23693467b481aec6b09c94c2ee60bef98624 Mon Sep 17 00:00:00 2001 From: Jan Boon Date: Tue, 10 Feb 2026 00:52:40 +0000 Subject: [PATCH] common : fix filename validation for subfolders --- common/common.cpp | 27 +++--- tests/CMakeLists.txt | 1 + tests/test-fs-validate-filename.cpp | 122 ++++++++++++++++++++++++++++ 3 files changed, 140 insertions(+), 10 deletions(-) create mode 100644 tests/test-fs-validate-filename.cpp diff --git a/common/common.cpp b/common/common.cpp index 3aa396127c..50cff38fe8 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -692,8 +692,7 @@ bool string_parse_kv_override(const char * data, std::vector | + char32_t prev = 0; for (char32_t c : filename_utf32) { if (c <= 0x1F // Control characters (C0) || c == 0x7F // Control characters (DEL) @@ -758,25 +758,32 @@ bool fs_validate_filename(const std::string & filename, bool allow_subdirs) { || c == '?' || c == '"' || c == '<' || c == '>' || c == '|') { return false; } - if (!allow_subdirs && (c == '/' || c == '\\')) { + if (allow_subdirs) { + if ((prev == '.' || prev == ' ') && (c == '/' || c == '\\')) { + // Reject any trailing dot or whitespace, these are stripped on Windows + // This also matches path elements that equal '..' or '.' + return false; + } + if ((prev == '/' || prev == '\\') && (c == ' ')) { + // Reject any leading whitespace, these are stripped on Windows + return false; + } + } else if (c == '/' || c == '\\') { // Subdirectories not allowed, reject path separators return false; } + prev = c; } // Reject any leading or trailing ' ', or any trailing '.', these are stripped on Windows and will cause a different filename // Unicode and other whitespace is not affected, only 0x20 space + // This also matches path elements that equal '..' or '.' if (filename.front() == ' ' || filename.back() == ' ' || filename.back() == '.') { return false; } - // Reject any ".." (currently stricter than necessary, it should be fine to just check for == ".." instead) - if (filename.find("..") != std::string::npos) { - return false; - } - - // Reject "." - if (filename == ".") { + // Reject any leading path separators + if (filename.front() == '/' || filename.front() == '\\') { return false; } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 350bffc315..878c3b8283 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -227,6 +227,7 @@ llama_build_and_test(test-thread-safety.cpp ARGS -m "${MODEL_DEST}" -ngl 99 -p " set_tests_properties(test-thread-safety PROPERTIES FIXTURES_REQUIRED test-download-model) llama_build_and_test(test-arg-parser.cpp) +llama_build_and_test(test-fs-validate-filename.cpp) if (NOT LLAMA_SANITIZE_ADDRESS AND NOT GGML_SCHED_NO_REALLOC) # TODO: repair known memory leaks diff --git a/tests/test-fs-validate-filename.cpp b/tests/test-fs-validate-filename.cpp new file mode 100644 index 0000000000..947e70f40e --- /dev/null +++ b/tests/test-fs-validate-filename.cpp @@ -0,0 +1,122 @@ +#include "common.h" + +#include +#include + +#undef NDEBUG +#include + +static int n_tests = 0; +static int n_failed = 0; + +static void test(const char * desc, bool expected, const std::string & filename, bool allow_subdirs = false) { + bool result = fs_validate_filename(filename, allow_subdirs); + n_tests++; + if (result != expected) { + n_failed++; + printf(" FAIL: %s (got %s, expected %s)\n", desc, + result ? "true" : "false", expected ? "true" : "false"); + } +} + +int main(void) { + // --- Basic valid filenames --- + test("simple ascii", true, "hello.txt"); + test("no extension", true, "readme"); + test("multiple dots", true, "archive.tar.gz"); + test("leading dot (hidden)", true, ".gitignore"); + test("unicode filename", true, "\xc3\xa9\xc3\xa0\xc3\xbc.txt"); // éàü.txt + test("max length (255 bytes)", true, std::string(255, 'a')); + + // --- Basic invalid filenames --- + test("empty string", false, ""); + test("over 255 bytes", false, std::string(256, 'a')); + test("just a dot", false, "."); + test("double dot", false, ".."); + test("leading space", false, " foo"); + test("trailing space", false, "foo "); + test("trailing dot", false, "foo."); + + // --- Double dots --- + test("contains double dot", true, "foo..bar"); + test("leading double dot", true, "..foo"); + test("trailing double dot", false, "foo.."); // trailing dot + + // --- Control characters --- + test("null byte", false, std::string("foo\x00bar", 7)); + test("newline", false, "foo\nbar"); + test("tab", false, "foo\tbar"); + test("C0 control (0x01)", false, "foo\x01""bar"); + test("DEL (0x7F)", false, std::string("foo\x7f""bar")); + test("C1 control (0x80)", false, "foo\xc2\x80""bar"); // U+0080 + test("C1 control (0x9F)", false, "foo\xc2\x9f""bar"); // U+009F + + // --- Illegal characters --- + test("colon", false, "foo:bar"); + test("asterisk", false, "foo*bar"); + test("question mark", false, "foo?bar"); + test("double quote", false, "foo\"bar"); + test("less than", false, "foobar"); + test("pipe", false, "foo|bar"); + test("forward slash", false, "foo/bar"); + test("backslash", false, "foo\\bar"); + + // --- Unicode special codepoints --- + test("fullwidth period U+FF0E", false, "foo\xef\xbc\x8e""bar"); + test("division slash U+2215", false, "foo\xe2\x88\x95""bar"); + test("set minus U+2216", false, "foo\xe2\x88\x96""bar"); + test("replacement char U+FFFD", false, "foo\xef\xbf\xbd""bar"); + test("BOM U+FEFF", false, "foo\xef\xbb\xbf""bar"); + + // --- Invalid UTF-8 --- + test("invalid continuation", false, std::string("foo\x80""bar")); + test("truncated sequence", false, std::string("foo\xc3")); + test("overlong slash (2-byte)", false, std::string("foo\xc0\xaf""bar", 6)); // U+002F as 2-byte + test("overlong dot (2-byte)", false, std::string("foo\xc0\xae""bar", 6)); // U+002E as 2-byte + test("overlong 'a' (2-byte)", false, std::string("foo\xc1\xa1""bar", 6)); // U+0061 as 2-byte + test("overlong 'A' (2-byte)", false, std::string("foo\xc1\x81""bar", 6)); // U+0041 as 2-byte + test("overlong null (2-byte)", false, std::string("foo\xc0\x80""bar", 6)); // U+0000 as 2-byte + + // --- Paths without allow_subdirs --- + test("forward slash blocked", false, "foo/bar"); + test("backslash blocked", false, "foo\\bar"); + + // --- Paths with allow_subdirs=true --- + test("simple subdir", true, "foo/bar", true); + test("backslash subdir", true, "foo\\bar", true); + test("deep path", true, "a/b/c/d.txt", true); + test("trailing slash", true, "foo/bar/", true); + test("colon in path", false, "foo/b:r/baz", true); + test("control char in path", false, "foo/b\nar/baz", true); + + // --- Leading separators --- + test("leading slash", false, "/foo/bar", true); + test("leading backslash", false, "\\foo\\bar", true); + + // --- Dotdot in paths --- + test("leading dotdot in path", false, "../bar", true); + test("dotdot in path", false, "foo/../bar", true); + test("dotdot component leading", true, "foo/..bar/baz", true); + test("dotdot component middle", true, "foo/ba..r/baz", true); + test("dotdot component trailing", false, "foo/bar../baz", true); // trailing dot + + // --- Per-component checks --- + test("leading space in component", false, "foo/ bar/baz", true); + test("trailing space in component", false, "foo/bar /baz", true); + test("trailing dot in component", false, "foo/bar./baz", true); + test("dot component in path", false, "foo/./bar", true); + test("leading space after slash", false, "foo/ bar", true); + test("trailing space before slash", false, "bar /baz", true); + test("trailing dot before slash", false, "bar./baz", true); + + if (n_failed) { + printf("\n%d/%d tests failed\n", n_failed, n_tests); + fflush(stdout); + assert(false); + } + + printf("OK\n"); + + return 0; +}