From 287a33017b32600bfc0e81feeb0ad6e81e0dd484 Mon Sep 17 00:00:00 2001 From: Julius Tischbein Date: Sun, 18 Jan 2026 17:35:57 +0100 Subject: [PATCH] llama : Extend fallback, fix fileno for dio file, exclude case that mmap uses dio file (#18887) --- src/llama-mmap.cpp | 6 +++++- src/llama-model-loader.cpp | 18 ++++++++++++------ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/llama-mmap.cpp b/src/llama-mmap.cpp index fe0847fe1a..0261e4c72c 100644 --- a/src/llama-mmap.cpp +++ b/src/llama-mmap.cpp @@ -265,7 +265,8 @@ struct llama_file::impl { continue; // Interrupted by signal, retry } // Fallback to std::fread in case the DMA controller cannot access the buffer - if (errno == EFAULT) { + if (errno == EFAULT || errno == EINVAL) { + LLAMA_LOG_WARN("%s: Falling back to buffered IO due to %s\n", __func__, strerror(errno)); auto curr_off = tell(); close(fd); fd = -1; @@ -384,6 +385,9 @@ int llama_file::file_id() const { #ifdef _WIN32 return _fileno(pimpl->fp); #else + if (pimpl->fd != -1) { + return pimpl->fd; + } #if defined(fileno) return fileno(pimpl->fp); #else diff --git a/src/llama-model-loader.cpp b/src/llama-model-loader.cpp index 300a322c51..383b8dc761 100644 --- a/src/llama-model-loader.cpp +++ b/src/llama-model-loader.cpp @@ -539,12 +539,18 @@ llama_model_loader::llama_model_loader( files.emplace_back(new llama_file(fname.c_str(), "rb", use_direct_io)); contexts.emplace_back(ctx); - use_direct_io = use_direct_io && files.back()->has_direct_io(); - - // Disable mmap in case Direct I/O is enabled and available - if (use_direct_io && use_mmap) { - use_mmap = false; - LLAMA_LOG_WARN("%s: direct I/O is enabled, disabling mmap\n", __func__); + if (use_mmap && use_direct_io) { + if (files.back()->has_direct_io()) { + // Disable mmap, as DirectIO is available + use_mmap = false; + LLAMA_LOG_WARN("%s: direct I/O is enabled, disabling mmap\n", __func__); + } else { + // Disable DirectIO and reopen file using std::fopen for mmap + use_direct_io = false; + files.pop_back(); + files.emplace_back(new llama_file(fname.c_str(), "rb", false)); + LLAMA_LOG_WARN("%s: direct I/O is not available, using mmap\n", __func__); + } } // Save tensors data offset of the main file.