// Copyright 2024 Google LLC // SPDX-License-Identifier: Apache-2.0 // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Safe to be first, does not include POSIX headers. #include "hwy/detect_compiler_arch.h" // Request POSIX 2008, including `pread()` and `posix_fadvise()`. This also // implies `_POSIX_C_SOURCE`. #if !defined(_XOPEN_SOURCE) || _XOPEN_SOURCE < 700 #undef _XOPEN_SOURCE #define _XOPEN_SOURCE 700 // SUSv4 #endif // Make `off_t` 64-bit even on 32-bit systems. Works for Android >= r15c. #undef _FILE_OFFSET_BITS #define _FILE_OFFSET_BITS 64 #include #include #include #include "io/io.h" #include "hwy/base.h" // HWY_ASSERT #if (HWY_OS_LINUX || HWY_OS_FREEBSD) && \ (!defined(__ANDROID_API__) || __ANDROID_API__ >= 24) #define GEMMA_IO_PREADV 1 #else #define GEMMA_IO_PREADV 0 #endif #if (HWY_OS_LINUX || HWY_OS_FREEBSD) && \ (!defined(__ANDROID_API__) || __ANDROID_API__ >= 21) #define GEMMA_IO_FADVISE 1 #else #define GEMMA_IO_FADVISE 0 #endif // FilePosix should only be compiled on non-Windows. It is easier to // check this in source code because we support multiple build systems. Note // that IOBatch at the end of this TU is still compiled on all platforms. #if !HWY_OS_WIN #if GEMMA_IO_PREADV // Replacement for the _BSD_SOURCE specified by preadv documentation. #ifndef _DEFAULT_SOURCE #define _DEFAULT_SOURCE #endif #include #include // preadv #endif // GEMMA_IO_PREADV #include // open #include // IOV_MAX #include #include // SEEK_END - unistd isn't enough for IDE. #include // Old OSX may require sys/types.h before sys/mman.h. #include // mmap #include // O_RDONLY #include // read, write, close // Placeholder for internal header, do not modify. #include "util/allocator.h" namespace gcpp { class FilePosix : public File { int fd_ = 0; public: explicit FilePosix(int fd) : fd_(fd) { HWY_ASSERT(fd > 0); } ~FilePosix() override { if (fd_ != 0) { HWY_ASSERT(close(fd_) != -1); } } // pwrite is thread-safe and allows arbitrary offsets. bool IsAppendOnly() const override { return false; } uint64_t FileSize() const override { static_assert(sizeof(off_t) == 8, "64-bit off_t required"); const off_t size = lseek(fd_, 0, SEEK_END); if (size < 0) { return 0; } return static_cast(size); } bool Read(uint64_t offset, uint64_t size, void* to) const override { uint8_t* bytes = reinterpret_cast(to); uint64_t pos = 0; for (;;) { // pread seems to be faster than lseek + read when parallelized. const auto bytes_read = pread(fd_, bytes + pos, size - pos, offset + pos); if (bytes_read <= 0) { HWY_WARN( "Read failure at pos %zu within size %zu with offset %zu and " "errno %d\n", static_cast(pos), static_cast(size), static_cast(offset), errno); break; } pos += bytes_read; HWY_ASSERT(pos <= size); if (pos == size) break; } return pos == size; // success if managed to read desired size } bool Write(const void* from, uint64_t size, uint64_t offset) override { const uint8_t* bytes = reinterpret_cast(from); uint64_t pos = 0; for (;;) { const auto bytes_written = pwrite(fd_, bytes + pos, size - pos, offset + pos); if (bytes_written <= 0) { HWY_WARN( "Write failure at pos %zu within size %zu with offset %zu and " "errno %d\n", static_cast(pos), static_cast(size), static_cast(offset), errno); break; } pos += bytes_written; HWY_ASSERT(pos <= size); if (pos == size) break; } return pos == size; // success if managed to write desired size } MapPtr Map() override { const size_t mapping_size = FileSize(); // No `MAP_POPULATE` because we do not want to wait for I/O, and // `MAP_NONBLOCK` is not guaranteed. `MAP_HUGETLB` fails. `MAP_SHARED` is // more efficient than `MAP_PRIVATE`; the main difference is that the former // will eventually see subsequent changes to the file. const int flags = MAP_SHARED; void* mapping = mmap(nullptr, mapping_size, PROT_READ, flags, fd_, /*offset=*/0); if (mapping == MAP_FAILED) return MapPtr(); #ifdef MADV_WILLNEED // Missing on some OSX. // (Maybe) initiate readahead. madvise(mapping, mapping_size, MADV_WILLNEED); #endif return MapPtr(static_cast(mapping), DeleterFunc([mapping_size](void* ptr) { HWY_ASSERT(munmap(ptr, mapping_size) == 0); })); } int Handle() const override { return fd_; } }; // FilePosix HWY_MAYBE_UNUSED extern std::unique_ptr OpenFileGoogle( const Path& filename, const char* mode); std::unique_ptr OpenFileOrNull(const Path& filename, const char* mode) { std::unique_ptr file; // OpenFileGoogle omitted if (file) return file; const bool is_read = mode[0] != 'w'; const int flags = is_read ? O_RDONLY : O_CREAT | O_RDWR | O_TRUNC; const int fd = open(filename.path.c_str(), flags, 0644); if (fd < 0) return file; #if GEMMA_IO_FADVISE if (is_read) { // Doubles the readahead window, which seems slightly faster when cached. (void)posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL); } #endif // GEMMA_IO_FADVISE return std::make_unique(fd); } } // namespace gcpp #endif // !HWY_OS_WIN namespace gcpp { std::unique_ptr OpenFileOrAbort(const Path& filename, const char* mode) { std::unique_ptr file = OpenFileOrNull(filename, mode); if (!file) { HWY_ABORT("Failed to open %s, errno %d", filename.path.c_str(), errno); } return file; } std::string ReadFileToString(const Path& path) { std::unique_ptr file = OpenFileOrAbort(path, "r"); const size_t size = file->FileSize(); if (size == 0) { HWY_ABORT("Empty file %s", path.path.c_str()); } std::string content(size, ' '); if (!file->Read(0, size, content.data())) { HWY_ABORT("Failed to read %s", path.path.c_str()); } return content; } #ifdef IOV_MAX constexpr size_t kMaxSpans = IOV_MAX; #else constexpr size_t kMaxSpans = 1024; // Linux limit #endif IOBatch::IOBatch(uint64_t offset, size_t key_idx) : offset_(offset), key_idx_(key_idx) { spans_.reserve(kMaxSpans); } // Returns true if the batch was full; if so, call again on the new batch. bool IOBatch::Add(void* mem, size_t bytes) { if (spans_.size() >= kMaxSpans) return false; if (total_bytes_ + bytes > 0x7FFFF000) return false; // Linux limit spans_.push_back({.mem = mem, .bytes = bytes}); total_bytes_ += bytes; return true; } int InternalInit() { // currently unused, except for init list ordering in GemmaEnv. return 0; } uint64_t IOBatch::Read(const File& file) const { HWY_ASSERT(!spans_.empty()); #if GEMMA_IO_PREADV if (file.Handle() != -1) { ssize_t bytes_read; for (;;) { bytes_read = preadv(file.Handle(), reinterpret_cast(spans_.data()), static_cast(spans_.size()), offset_); if (bytes_read >= 0) break; if (errno == EINTR) continue; // signal: retry HWY_WARN("preadv(%d) for %4zu spans from offset %12zu failed, errno %d.", file.Handle(), spans_.size(), offset_, errno); return 0; } return static_cast(bytes_read); } #endif // GEMMA_IO_PREADV // preadv disabled or no handle: use normal reads (higher kernel overhead). uint64_t total = 0; uint64_t offset = offset_; for (const IOSpan& span : spans_) { if (!file.Read(offset, span.bytes, span.mem)) return 0; total += span.bytes; offset += span.bytes; } return total; } } // namespace gcpp