From 26c04d4b31f265589e81605eca7accae7e7d22bc Mon Sep 17 00:00:00 2001
From: Siddhesh2377 <siddheshsonar2377@gmail.com>
Date: Sat, 14 Mar 2026 21:40:01 +0530
Subject: [PATCH] llama : use FILE pointer consistently, address review
 feedback

---
 ggml/src/ggml-impl.h       |  1 -
 ggml/src/gguf.cpp          | 15 ++++++---------
 include/llama.h            |  4 +++-
 src/llama-mmap.cpp         | 31 ++++++++++++-------------------
 src/llama-mmap.h           |  2 +-
 src/llama-model-loader.cpp |  2 +-
 tests/test-gguf.cpp        |  4 ++--
 7 files changed, 25 insertions(+), 34 deletions(-)

diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h
index e3714b38a6..ba0730ead2 100644
--- a/ggml/src/ggml-impl.h
+++ b/ggml/src/ggml-impl.h
@@ -718,6 +718,5 @@ inline bool ggml_check_edges(const struct ggml_cgraph *                cgraph,
 
 // expose GGUF internals for test code
 GGML_API size_t gguf_type_size(enum gguf_type type);
-GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params);
 GGML_API void gguf_write_to_buf(const struct gguf_context * ctx, std::vector<int8_t> & buf, bool only_meta);
 #endif // __cplusplus
diff --git a/ggml/src/gguf.cpp b/ggml/src/gguf.cpp
index bf28dabb06..49afeacae3 100644
--- a/ggml/src/gguf.cpp
+++ b/ggml/src/gguf.cpp
@@ -394,7 +394,11 @@ bool gguf_read_emplace_helper(const struct gguf_reader & gr, std::vector<struct
     return true;
 }
 
-struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params) {
+struct gguf_context * gguf_init_from_file_ptr(FILE * file, struct gguf_init_params params) {
+    if (!file) {
+        return nullptr;
+    }
+
     const struct gguf_reader gr(file);
     struct gguf_context * ctx = new gguf_context;
 
@@ -848,18 +852,11 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
         return nullptr;
     }
 
-    struct gguf_context * result = gguf_init_from_file_impl(file, params);
+    struct gguf_context * result = gguf_init_from_file_ptr(file, params);
     fclose(file);
     return result;
 }
 
-struct gguf_context * gguf_init_from_file_ptr(FILE * file, struct gguf_init_params params) {
-    if (!file) {
-        return nullptr;
-    }
-    return gguf_init_from_file_impl(file, params);
-}
-
 void gguf_free(struct gguf_context * ctx) {
     if (ctx == nullptr) {
         return;
diff --git a/include/llama.h b/include/llama.h
index df2ab4ab4b..342666a625 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -465,7 +465,9 @@ extern "C" {
               struct llama_model_params   params);
 
     // Load a model from an open FILE pointer
-    LLAMA_API struct llama_model * llama_model_load_from_file_ptr(FILE * file, struct llama_model_params params);
+    LLAMA_API struct llama_model * llama_model_load_from_file_ptr(
+                                   FILE * file,
+              struct llama_model_params   params);
 
     // Load a model from multiple splits (support custom naming scheme)
     // The paths must be in the correct order
diff --git a/src/llama-mmap.cpp b/src/llama-mmap.cpp
index 706d72f1e0..ccc29c1302 100644
--- a/src/llama-mmap.cpp
+++ b/src/llama-mmap.cpp
@@ -86,8 +86,12 @@ struct llama_file::impl {
         seek(0, SEEK_SET);
     }
 
-    impl(int /*fd_src*/) {
-        throw std::runtime_error("fd-based loading is not supported on Windows");
+    impl(FILE * file) : owns_fp(false) {
+        fp = file;
+        fp_win32 = (HANDLE) _get_osfhandle(_fileno(fp));
+        seek(0, SEEK_END);
+        size = tell();
+        seek(0, SEEK_SET);
     }
 
     size_t tell() const {
@@ -163,7 +167,7 @@ struct llama_file::impl {
     }
 
     ~impl() {
-        if (fp) {
+        if (fp && owns_fp) {
             std::fclose(fp);
         }
     }
@@ -213,20 +217,8 @@ struct llama_file::impl {
         seek(0, SEEK_SET);
     }
 
-    impl(int fd_src) : fname("(fd:" + std::to_string(fd_src) + ")") {
-        init_from_fd(fd_src);
-    }
-
-    void init_from_fd(int fd_src) {
-        const int fd_duped = dup(fd_src);
-        if (fd_duped < 0) {
-            throw std::runtime_error(format("failed to dup fd %d: %s", fd_src, strerror(errno)));
-        }
-        fp = fdopen(fd_duped, "rb");
-        if (!fp) {
-            close(fd_duped);
-            throw std::runtime_error(format("failed to fdopen fd %d: %s", fd_src, strerror(errno)));
-        }
+    impl(FILE * file) : fname("(file*)"), owns_fp(false) {
+        fp = file;
         seek(0, SEEK_END);
         size = tell();
         seek(0, SEEK_SET);
@@ -376,7 +368,7 @@ struct llama_file::impl {
     ~impl() {
         if (fd != -1) {
             close(fd);
-        } else {
+        } else if (owns_fp) {
             std::fclose(fp);
         }
     }
@@ -392,12 +384,13 @@ struct llama_file::impl {
 
     FILE * fp{};
     size_t size{};
+    bool owns_fp = true;
 };
 
 llama_file::llama_file(const char * fname, const char * mode, const bool use_direct_io) :
     pimpl(std::make_unique<impl>(fname, mode, use_direct_io)) {}
 
-llama_file::llama_file(int fd) : pimpl(std::make_unique<impl>(fd)) {}
+llama_file::llama_file(FILE * file) : pimpl(std::make_unique<impl>(file)) {}
 
 llama_file::~llama_file() = default;
 
diff --git a/src/llama-mmap.h b/src/llama-mmap.h
index 2d1eac91a3..32fab23119 100644
--- a/src/llama-mmap.h
+++ b/src/llama-mmap.h
@@ -15,7 +15,7 @@ using llama_mlocks = std::vector<std::unique_ptr<llama_mlock>>;
 
 struct llama_file {
     llama_file(const char * fname, const char * mode, bool use_direct_io = false);
-    llama_file(int fd);
+    llama_file(FILE * file);
     ~llama_file();
 
     size_t tell() const;
diff --git a/src/llama-model-loader.cpp b/src/llama-model-loader.cpp
index 6af0ee1fe5..8046df0194 100644
--- a/src/llama-model-loader.cpp
+++ b/src/llama-model-loader.cpp
@@ -674,7 +674,7 @@ llama_model_loader::llama_model_loader(
         get_key(llm_kv(LLM_KV_GENERAL_ARCHITECTURE), arch_name, false);
         llm_kv = LLM_KV(llm_arch_from_string(arch_name));
 
-        files.emplace_back(new llama_file(fileno(file)));
+        files.emplace_back(new llama_file(file));
         contexts.emplace_back(ctx);
 
         // Save tensors data offset info of the main file.
diff --git a/tests/test-gguf.cpp b/tests/test-gguf.cpp
index 8ebd16ba82..78ca95dcbd 100644
--- a/tests/test-gguf.cpp
+++ b/tests/test-gguf.cpp
@@ -742,7 +742,7 @@ static std::pair<int, int> test_handcrafted_file(const unsigned int seed) {
             /*ctx      =*/ hft >= offset_has_data ? &ctx : nullptr,
         };
 
-        struct gguf_context * gguf_ctx = gguf_init_from_file_impl(file, gguf_params);
+        struct gguf_context * gguf_ctx = gguf_init_from_file_ptr(file, gguf_params);
 
         if (expect_context_not_null(hft)) {
             printf("%s:   - context_not_null: ", __func__);
@@ -1137,7 +1137,7 @@ static std::pair<int, int> test_roundtrip(ggml_backend_dev_t dev, const unsigned
         /*no_alloc =*/ false,
         /*ctx      =*/ only_meta ? nullptr : &ctx_1,
     };
-    struct gguf_context * gguf_ctx_1 = gguf_init_from_file_impl(file, gguf_params);
+    struct gguf_context * gguf_ctx_1 = gguf_init_from_file_ptr(file, gguf_params);
 
     printf("%s: same_version: ", __func__);
     if (gguf_get_version(gguf_ctx_0) == gguf_get_version(gguf_ctx_1)) {