diff --git a/ggml/include/gguf.h b/ggml/include/gguf.h
index bd12997372..9d8e321ba0 100644
--- a/ggml/include/gguf.h
+++ b/ggml/include/gguf.h
@@ -78,7 +78,7 @@ extern "C" {
 
     GGML_API struct gguf_context * gguf_init_empty(void);
     GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
-    GGML_API struct gguf_context * gguf_init_from_fd(int fd, struct gguf_init_params params);
+    GGML_API struct gguf_context * gguf_init_from_file_ptr(FILE * file, struct gguf_init_params params);
     //GGML_API struct gguf_context * gguf_init_from_buffer(..);
 
     GGML_API void gguf_free(struct gguf_context * ctx);
diff --git a/ggml/src/gguf.cpp b/ggml/src/gguf.cpp
index 8eea785404..bf28dabb06 100644
--- a/ggml/src/gguf.cpp
+++ b/ggml/src/gguf.cpp
@@ -15,10 +15,6 @@
 #include <string>
 #include <vector>
 
-#ifndef _WIN32
-#include <unistd.h>
-#endif
-
 #define GGUF_MAX_STRING_LENGTH  (1024*1024*1024)
 #define GGUF_MAX_ARRAY_ELEMENTS (1024*1024*1024)
 
@@ -857,33 +853,12 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
     return result;
 }
 
-#ifndef _WIN32
-struct gguf_context * gguf_init_from_fd(int fd, struct gguf_init_params params) {
-    const int fd_duped = dup(fd);
-    if (fd_duped < 0) {
-        GGML_LOG_ERROR("%s: failed to dup fd %d: %s\n", __func__, fd, strerror(errno));
-        return nullptr;
-    }
-
-    FILE * file = fdopen(fd_duped, "rb");
+struct gguf_context * gguf_init_from_file_ptr(FILE * file, struct gguf_init_params params) {
     if (!file) {
-        close(fd_duped);
-        GGML_LOG_ERROR("%s: failed to fdopen fd %d: %s\n", __func__, fd, strerror(errno));
         return nullptr;
     }
-
-    struct gguf_context * result = gguf_init_from_file_impl(file, params);
-    fclose(file);
-    return result;
+    return gguf_init_from_file_impl(file, params);
 }
-#else
-struct gguf_context * gguf_init_from_fd(int fd, struct gguf_init_params params) {
-    GGML_LOG_ERROR("%s: fd-based loading is not supported on Windows\n", __func__);
-    GGML_UNUSED(fd);
-    GGML_UNUSED(params);
-    return nullptr;
-}
-#endif
 
 void gguf_free(struct gguf_context * ctx) {
     if (ctx == nullptr) {
diff --git a/src/llama-mmap.cpp b/src/llama-mmap.cpp
index 5ea00d3fa7..706d72f1e0 100644
--- a/src/llama-mmap.cpp
+++ b/src/llama-mmap.cpp
@@ -220,12 +220,12 @@ struct llama_file::impl {
     void init_from_fd(int fd_src) {
         const int fd_duped = dup(fd_src);
         if (fd_duped < 0) {
-            throw std::runtime_error(format("llama_file: failed to dup fd %d: %s", fd_src, strerror(errno)));
+            throw std::runtime_error(format("failed to dup fd %d: %s", fd_src, strerror(errno)));
         }
         fp = fdopen(fd_duped, "rb");
         if (!fp) {
             close(fd_duped);
-            throw std::runtime_error(format("llama_file: failed to fdopen fd %d: %s", fd_src, strerror(errno)));
+            throw std::runtime_error(format("failed to fdopen fd %d: %s", fd_src, strerror(errno)));
         }
         seek(0, SEEK_END);
         size = tell();
diff --git a/src/llama-model-loader.cpp b/src/llama-model-loader.cpp
index c0e1e754e7..358b505f18 100644
--- a/src/llama-model-loader.cpp
+++ b/src/llama-model-loader.cpp
@@ -13,6 +13,10 @@
 #include <future>
 #include <regex>
 
+#ifndef _WIN32
+#include <unistd.h>
+#endif // _WIN32
+
 static const size_t kiB = 1024;
 static const size_t MiB = 1024*kiB;
 static const size_t GiB = 1024*MiB;
@@ -659,13 +663,25 @@ llama_model_loader::llama_model_loader(
             LLAMA_LOG_INFO("%s: additional %d GGUFs metadata loaded.\n",  __func__, n_split - 1);
         }
     } else if (fd >= 0) {
+        const int fd_duped = dup(fd);
+        if (fd_duped < 0) {
+            throw std::runtime_error(format("%s: failed to dup fd %d: %s", __func__, fd, strerror(errno)));
+        }
+
+        FILE * f = fdopen(fd_duped, "rb");
+        if (!f) {
+            close(fd_duped);
+            throw std::runtime_error(format("%s: failed to fdopen fd %d: %s", __func__, fd, strerror(errno)));
+        }
+
         struct ggml_context * ctx = NULL;
         struct gguf_init_params params = {
             /*.no_alloc = */ true,
             /*.ctx      = */ &ctx,
         };
 
-        metadata_ptr.reset(gguf_init_from_fd(fd, params));
+        metadata_ptr.reset(gguf_init_from_file_ptr(f, params));
+        fclose(f);
         metadata = metadata_ptr.get();
         if (metadata == nullptr) {
             throw std::runtime_error(format("%s: failed to load model from fd %d", __func__, fd));
@@ -674,8 +690,8 @@ llama_model_loader::llama_model_loader(
         get_key(llm_kv(LLM_KV_GENERAL_ARCHITECTURE), arch_name, false);
         llm_kv = LLM_KV(llm_arch_from_string(arch_name));
 
-        contexts.emplace_back(ctx);
         files.emplace_back(new llama_file(fd));
+        contexts.emplace_back(ctx);
 
         // Save tensors data offset info of the main file.
         for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) {
diff --git a/src/llama.cpp b/src/llama.cpp
index daf3c3bd8d..c40d5c9d51 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -1083,7 +1083,7 @@ struct llama_model * llama_model_load_from_fd(int fd, struct llama_model_params
     std::string path_model;
     std::vector<std::string> splits = {};
     return llama_model_load_from_file_impl(nullptr, nullptr, nullptr, path_model, splits, fd, params);
-#endif
+#endif // _WIN32
 }
 
 void llama_model_save_to_file(const struct llama_model * model, const char * path_model) {
diff --git a/tests/test-model-load-fd.cpp b/tests/test-model-load-fd.cpp
index d5102942d0..dd982ba907 100644
--- a/tests/test-model-load-fd.cpp
+++ b/tests/test-model-load-fd.cpp
@@ -44,4 +44,4 @@ int main(int argc, char ** argv) {
 
     return n_vocab > 0 ? EXIT_SUCCESS : EXIT_FAILURE;
 }
-#endif
+#endif // _WIN32