diff --git a/BUILD.bazel b/BUILD.bazel index a5f01e7..ad37b4c 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -138,7 +138,7 @@ cc_library( deps = [ ":basics", "//compression:fields", - "//compression:sfp", + "//compression:shared", "@highway//:hwy", # base.h ], ) @@ -159,10 +159,11 @@ cc_test( deps = [ ":basics", ":common", + ":mat", ":weights", "@googletest//:gtest_main", # buildcleaner: keep "//compression:compress", - "@highway//:hwy", + "@highway//:hwy", # aligned_allocator.h ], ) @@ -176,7 +177,7 @@ cc_library( ":common", ":threading_context", "//compression:fields", - "//compression:sfp", + "//compression:shared", "@highway//:hwy", "@highway//:profiler", ], @@ -348,7 +349,7 @@ cc_library( ":mat", "//compression:blob_store", "//compression:compress", - "//compression:io", + "//compression:io", # Path "@highway//:hwy", "@highway//:profiler", "@highway//:stats", @@ -362,8 +363,8 @@ cc_library( hdrs = ["gemma/tokenizer.h"], deps = [ ":common", - "//compression:io", - "//compression:sfp", + "//compression:io", # Path + "//compression:shared", "@highway//:hwy", "@highway//:profiler", "@com_google_sentencepiece//:sentencepiece_processor", @@ -405,16 +406,17 @@ cc_library( ":allocator", ":basics", ":common", - ":ops", - ":mat", - ":tokenizer", ":kv_cache", - ":weights", + ":mat", + ":ops", + ":tokenizer", ":threading", ":threading_context", + ":weights", # Placeholder for internal dep, do not remove., + "//compression:blob_store", "//compression:io", - "//compression:sfp", + "//compression:shared", "//paligemma:image", "@highway//:hwy", "@highway//:nanobenchmark", # timer @@ -445,7 +447,7 @@ cc_library( ":gemma_lib", ":ops", "//compression:io", - "//compression:sfp", + "//compression:shared", "@highway//:hwy", ], ) @@ -517,7 +519,7 @@ cc_binary( ":gemma_lib", ":ops", ":threading_context", - "//compression:sfp", + "//compression:shared", "//paligemma:image", "@highway//:hwy", "@highway//:profiler", @@ -706,6 +708,7 @@ cc_library( ":mat", ":weights", "//compression:compress", + "//compression:shared", "@highway//:hwy", "@highway//:thread_pool", ], @@ -731,7 +734,7 @@ cc_test( ":threading", ":weights", "@googletest//:gtest_main", # buildcleaner: keep - "//compression:sfp", + "//compression:shared", "@highway//:thread_pool", ], ) diff --git a/compression/BUILD.bazel b/compression/BUILD.bazel index e5102fe..e58b61c 100644 --- a/compression/BUILD.bazel +++ b/compression/BUILD.bazel @@ -40,6 +40,7 @@ cc_library( "//conditions:default": [], }), deps = [ + "//:allocator", "@highway//:hwy", ] + FILE_DEPS, ) @@ -69,6 +70,7 @@ cc_library( hdrs = ["blob_store.h"], deps = [ ":io", + "//:threading_context", "@highway//:hwy", "@highway//:thread_pool", ], @@ -81,7 +83,7 @@ cc_test( ":blob_store", ":io", "@googletest//:gtest_main", # buildcleaner: keep - "@highway//:hwy", + "//:threading_context", "@highway//:hwy_test_util", "@highway//:thread_pool", ], @@ -115,21 +117,30 @@ cc_test( ) cc_library( - name = "sfp", + name = "shared", hdrs = ["shared.h"], - textual_hdrs = ["sfp-inl.h"], deps = [ "//:basics", "@highway//:hwy", ], ) +cc_library( + name = "sfp", + textual_hdrs = ["sfp-inl.h"], + deps = [ + ":shared", + "//:basics", + "@highway//:hwy", + ], +) + cc_library( name = "nuq", - hdrs = ["shared.h"], textual_hdrs = ["nuq-inl.h"], deps = [ ":sfp", + ":shared", "//:basics", "@highway//:hwy", "@highway//hwy/contrib/sort:vqsort", @@ -144,6 +155,7 @@ cc_library( deps = [ ":compress", ":distortion", + "//:mat", "@highway//:hwy", "@highway//:hwy_test_util", "@highway//:thread_pool", @@ -254,6 +266,16 @@ cc_library( ], ) +cc_library( + name = "io_win", + srcs = ["io_win.cc"], + deps = [ + ":io", + "//:allocator", + "@highway//:hwy", + ], +) + cc_binary( name = "blob_compare", srcs = ["blob_compare.cc"], diff --git a/compression/fields.cc b/compression/fields.cc index 092597f..fb7b0b4 100644 --- a/compression/fields.cc +++ b/compression/fields.cc @@ -87,7 +87,7 @@ class PrintVisitor : public VisitorBase { } void operator()(uint64_t& value) override { - fprintf(stderr, "%sU64 %zu\n", indent_.c_str(), value); + fprintf(stderr, "%sU64 %zu\n", indent_.c_str(), static_cast(value)); } void operator()(float& value) override { diff --git a/compression/io.cc b/compression/io.cc index 84e3603..28df7e2 100644 --- a/compression/io.cc +++ b/compression/io.cc @@ -36,12 +36,16 @@ #include #include #include // SEEK_END - unistd isn't enough for IDE. +#include +// Old OSX may require sys/types.h before sys/mman.h. +#include // mmap #include // O_RDONLY #include // read, write, close #include #include "compression/io.h" +#include "util/allocator.h" #include "hwy/base.h" // HWY_ASSERT namespace gcpp { @@ -93,6 +97,28 @@ class FilePosix : public File { } return pos == size; // success if managed to write desired size } + + MapPtr Map() override { + const size_t mapping_size = FileSize(); + // No `MAP_POPULATE` because we do not want to wait for I/O, and + // `MAP_NONBLOCK` is not guaranteed. `MAP_HUGETLB` fails. `MAP_SHARED` is + // more efficient than `MAP_PRIVATE`; the main difference is that the former + // will eventually see subsequent changes to the file. + const int flags = MAP_SHARED; + void* mapping = + mmap(nullptr, mapping_size, PROT_READ, flags, fd_, /*offset=*/0); + if (mapping == MAP_FAILED) return MapPtr(); + +#ifdef MADV_WILLNEED // Missing on some OSX. + // (Maybe) initiate readahead. + madvise(mapping, mapping_size, MADV_WILLNEED); +#endif + + return MapPtr(static_cast(mapping), + DeleterFunc2([mapping_size](void* ptr) { + HWY_ASSERT(munmap(ptr, mapping_size) == 0); + })); + } }; // FilePosix HWY_MAYBE_UNUSED extern std::unique_ptr OpenFileGoogle( diff --git a/compression/io.h b/compression/io.h index 1d47143..7e1a18c 100644 --- a/compression/io.h +++ b/compression/io.h @@ -23,6 +23,7 @@ #include #include // std::move +#include "util/allocator.h" #include "hwy/base.h" namespace gcpp { @@ -32,6 +33,8 @@ namespace gcpp { // prefer to define Exists inline because there are multiple io*.cc files. struct Path; +using MapPtr = AlignedPtr2; + // Abstract base class enables multiple I/O backends in the same binary. class File { public: @@ -50,6 +53,12 @@ class File { // Returns true if all the requested bytes were written. virtual bool Write(const void* from, uint64_t size, uint64_t offset) = 0; + + // Maps the entire file into read-only memory or returns nullptr on failure. + // We do not support offsets because Windows requires them to be a multiple of + // the allocation granularity, which is 64 KiB. Some implementations may fail + // if the file is zero-sized and return a nullptr. + virtual MapPtr Map() = 0; }; // Returns nullptr on failure. `mode` is either "r" or "w+". This is not just @@ -87,6 +96,7 @@ struct Path { std::string path; }; +// Aborts on error. static inline HWY_MAYBE_UNUSED std::string ReadFileToString(const Path& path) { std::unique_ptr file = OpenFileOrNull(path, "r"); if (!file) { diff --git a/compression/io_win.cc b/compression/io_win.cc index 1cb1673..1f5e959 100644 --- a/compression/io_win.cc +++ b/compression/io_win.cc @@ -22,6 +22,7 @@ #include #include "compression/io.h" +#include "util/allocator.h" #include "hwy/base.h" // HWY_ASSERT #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN @@ -96,6 +97,22 @@ class FileWin : public File { } return true; // wrote everything => success } + + MapPtr Map() override { + if (hFile_ == INVALID_HANDLE_VALUE) return MapPtr(); + + // Size=0 means the entire file. + HANDLE hMapping = + CreateFileMappingA(hFile_, nullptr, PAGE_READONLY, 0, 0, nullptr); + // Offset zero and size=0 means the entire file. + void* ptr = MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, 0); + if (!ptr) return MapPtr(); + return MapPtr(static_cast(ptr), + DeleterFunc2([hMapping](void* ptr) { + HWY_ASSERT(UnmapViewOfFile(ptr)); + HWY_ASSERT(CloseHandle(hMapping)); + })); + } }; // FileWin std::unique_ptr OpenFileOrNull(const Path& filename, const char* mode) { diff --git a/compression/python/BUILD.bazel b/compression/python/BUILD.bazel index b2b376b..5594af0 100644 --- a/compression/python/BUILD.bazel +++ b/compression/python/BUILD.bazel @@ -32,7 +32,7 @@ pybind_extension( deps = [ ":compression_clif_aux", "@abseil-cpp//absl/types:span", - "//compression:sfp", + "//compression:shared", ], ) diff --git a/paligemma/BUILD.bazel b/paligemma/BUILD.bazel index 8f61ce2..069fd6b 100644 --- a/paligemma/BUILD.bazel +++ b/paligemma/BUILD.bazel @@ -43,7 +43,7 @@ cc_test( "//:benchmark_helper", "//:common", "//:gemma_lib", - "//compression:sfp", + "//compression:shared", "@highway//:hwy", "@highway//:hwy_test_util", ], diff --git a/python/BUILD.bazel b/python/BUILD.bazel index 1298473..2a7220a 100644 --- a/python/BUILD.bazel +++ b/python/BUILD.bazel @@ -13,7 +13,7 @@ pybind_extension( srcs = ["configs.cc"], deps = [ "//:common", - "//compression:sfp", + "//compression:shared", ], ) @@ -25,7 +25,7 @@ pybind_extension( "//:benchmark_helper", "//:gemma_args", "//:gemma_lib", - "//compression:sfp", + "//compression:shared", "@highway//:hwy", ], ) diff --git a/util/args.h b/util/args.h index 96ac0b9..eff046a 100644 --- a/util/args.h +++ b/util/args.h @@ -181,6 +181,10 @@ class ArgsBase { void ForEach(Visitor& visitor) { static_cast(this)->ForEach(visitor); } + template + void ForEach(Visitor& visitor) const { + const_cast(this)->ForEach(visitor); + } public: // WARNING: cannot call from ctor because the derived ctor has not yet run. @@ -189,12 +193,12 @@ class ArgsBase { ForEach(visitor); } - void Help() { + void Help() const { HelpVisitor visitor; ForEach(visitor); } - void Print(int verbosity = 0) { + void Print(int verbosity = 0) const { PrintVisitor visitor(verbosity); ForEach(visitor); } diff --git a/util/mat.h b/util/mat.h index 3d7057c..cbe37a3 100644 --- a/util/mat.h +++ b/util/mat.h @@ -112,6 +112,7 @@ class MatPtr : public IFields { type_ = type; element_bytes_ = static_cast(hwy::DivCeil(TypeBits(type), 8)); num_elements_ = static_cast(ComputeNumElements(type, Extents())); + HWY_DASSERT(0 != element_bytes_ && element_bytes_ <= 16); } bool IsEmpty() const { return rows_ == 0 || cols_ == 0; }