llama : use FILE pointer consistently, address review feedback

2026-03-14 21:40:01 +05:30 · 2026-03-14 21:40:01 +05:30 · 26c04d4b31
parent 626823b2d9
commit 26c04d4b31
7 changed files with 25 additions and 34 deletions
--- a/ggml/src/ggml-impl.h
+++ b/ggml/src/ggml-impl.h
@ -718,6 +718,5 @@ inline bool ggml_check_edges(const struct ggml_cgraph *                cgraph,

 // expose GGUF internals for test code
 GGML_API size_t gguf_type_size(enum gguf_type type);
-GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params);
 GGML_API void gguf_write_to_buf(const struct gguf_context * ctx, std::vector<int8_t> & buf, bool only_meta);
 #endif // __cplusplus
--- a/ggml/src/gguf.cpp
+++ b/ggml/src/gguf.cpp
@ -394,7 +394,11 @@ bool gguf_read_emplace_helper(const struct gguf_reader & gr, std::vector<struct
    return true;
 }

-struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params) {
+struct gguf_context * gguf_init_from_file_ptr(FILE * file, struct gguf_init_params params) {
+    if (!file) {
+        return nullptr;
+    }
+
    const struct gguf_reader gr(file);
    struct gguf_context * ctx = new gguf_context;

@ -848,18 +852,11 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
        return nullptr;
    }

-    struct gguf_context * result = gguf_init_from_file_impl(file, params);
+    struct gguf_context * result = gguf_init_from_file_ptr(file, params);
    fclose(file);
    return result;
 }

-struct gguf_context * gguf_init_from_file_ptr(FILE * file, struct gguf_init_params params) {
-    if (!file) {
-        return nullptr;
-    }
-    return gguf_init_from_file_impl(file, params);
-}
-
 void gguf_free(struct gguf_context * ctx) {
    if (ctx == nullptr) {
        return;
--- a/include/llama.h
+++ b/include/llama.h
@ -465,7 +465,9 @@ extern "C" {
              struct llama_model_params   params);

    // Load a model from an open FILE pointer
-    LLAMA_API struct llama_model * llama_model_load_from_file_ptr(FILE * file, struct llama_model_params params);
+    LLAMA_API struct llama_model * llama_model_load_from_file_ptr(
+                                   FILE * file,
+              struct llama_model_params   params);

    // Load a model from multiple splits (support custom naming scheme)
    // The paths must be in the correct order
--- a/src/llama-mmap.cpp
+++ b/src/llama-mmap.cpp
@ -86,8 +86,12 @@ struct llama_file::impl {
        seek(0, SEEK_SET);
    }

-    impl(int /*fd_src*/) {
-        throw std::runtime_error("fd-based loading is not supported on Windows");
+    impl(FILE * file) : owns_fp(false) {
+        fp = file;
+        fp_win32 = (HANDLE) _get_osfhandle(_fileno(fp));
+        seek(0, SEEK_END);
+        size = tell();
+        seek(0, SEEK_SET);
    }

    size_t tell() const {
@ -163,7 +167,7 @@ struct llama_file::impl {
    }

    ~impl() {
-        if (fp) {
+        if (fp && owns_fp) {
            std::fclose(fp);
        }
    }
@ -213,20 +217,8 @@ struct llama_file::impl {
        seek(0, SEEK_SET);
    }

-    impl(int fd_src) : fname("(fd:" + std::to_string(fd_src) + ")") {
-        init_from_fd(fd_src);
-    }
-
-    void init_from_fd(int fd_src) {
-        const int fd_duped = dup(fd_src);
-        if (fd_duped < 0) {
-            throw std::runtime_error(format("failed to dup fd %d: %s", fd_src, strerror(errno)));
-        }
-        fp = fdopen(fd_duped, "rb");
-        if (!fp) {
-            close(fd_duped);
-            throw std::runtime_error(format("failed to fdopen fd %d: %s", fd_src, strerror(errno)));
-        }
+    impl(FILE * file) : fname("(file*)"), owns_fp(false) {
+        fp = file;
        seek(0, SEEK_END);
        size = tell();
        seek(0, SEEK_SET);
@ -376,7 +368,7 @@ struct llama_file::impl {
    ~impl() {
        if (fd != -1) {
            close(fd);
-        } else {
+        } else if (owns_fp) {
            std::fclose(fp);
        }
    }
@ -392,12 +384,13 @@ struct llama_file::impl {

    FILE * fp{};
    size_t size{};
+    bool owns_fp = true;
 };

 llama_file::llama_file(const char * fname, const char * mode, const bool use_direct_io) :
    pimpl(std::make_unique<impl>(fname, mode, use_direct_io)) {}

-llama_file::llama_file(int fd) : pimpl(std::make_unique<impl>(fd)) {}
+llama_file::llama_file(FILE * file) : pimpl(std::make_unique<impl>(file)) {}

 llama_file::~llama_file() = default;

--- a/src/llama-mmap.h
+++ b/src/llama-mmap.h
@ -15,7 +15,7 @@ using llama_mlocks = std::vector<std::unique_ptr<llama_mlock>>;

 struct llama_file {
    llama_file(const char * fname, const char * mode, bool use_direct_io = false);
-    llama_file(int fd);
+    llama_file(FILE * file);
    ~llama_file();

    size_t tell() const;
--- a/src/llama-model-loader.cpp
+++ b/src/llama-model-loader.cpp
@ -674,7 +674,7 @@ llama_model_loader::llama_model_loader(
        get_key(llm_kv(LLM_KV_GENERAL_ARCHITECTURE), arch_name, false);
        llm_kv = LLM_KV(llm_arch_from_string(arch_name));

-        files.emplace_back(new llama_file(fileno(file)));
+        files.emplace_back(new llama_file(file));
        contexts.emplace_back(ctx);

        // Save tensors data offset info of the main file.
--- a/tests/test-gguf.cpp
+++ b/tests/test-gguf.cpp
@ -742,7 +742,7 @@ static std::pair<int, int> test_handcrafted_file(const unsigned int seed) {
            /*ctx      =*/ hft >= offset_has_data ? &ctx : nullptr,
        };

-        struct gguf_context * gguf_ctx = gguf_init_from_file_impl(file, gguf_params);
+        struct gguf_context * gguf_ctx = gguf_init_from_file_ptr(file, gguf_params);

        if (expect_context_not_null(hft)) {
            printf("%s:   - context_not_null: ", __func__);
@ -1137,7 +1137,7 @@ static std::pair<int, int> test_roundtrip(ggml_backend_dev_t dev, const unsigned
        /*no_alloc =*/ false,
        /*ctx      =*/ only_meta ? nullptr : &ctx_1,
    };
-    struct gguf_context * gguf_ctx_1 = gguf_init_from_file_impl(file, gguf_params);
+    struct gguf_context * gguf_ctx_1 = gguf_init_from_file_ptr(file, gguf_params);

    printf("%s: same_version: ", __func__);
    if (gguf_get_version(gguf_ctx_0) == gguf_get_version(gguf_ctx_1)) {