diff --git a/ggml/include/gguf.h b/ggml/include/gguf.h index bd12997372..9d8e321ba0 100644 --- a/ggml/include/gguf.h +++ b/ggml/include/gguf.h @@ -78,7 +78,7 @@ extern "C" { GGML_API struct gguf_context * gguf_init_empty(void); GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params); - GGML_API struct gguf_context * gguf_init_from_fd(int fd, struct gguf_init_params params); + GGML_API struct gguf_context * gguf_init_from_file_ptr(FILE * file, struct gguf_init_params params); //GGML_API struct gguf_context * gguf_init_from_buffer(..); GGML_API void gguf_free(struct gguf_context * ctx); diff --git a/ggml/src/gguf.cpp b/ggml/src/gguf.cpp index 8eea785404..bf28dabb06 100644 --- a/ggml/src/gguf.cpp +++ b/ggml/src/gguf.cpp @@ -15,10 +15,6 @@ #include #include -#ifndef _WIN32 -#include -#endif - #define GGUF_MAX_STRING_LENGTH (1024*1024*1024) #define GGUF_MAX_ARRAY_ELEMENTS (1024*1024*1024) @@ -857,33 +853,12 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p return result; } -#ifndef _WIN32 -struct gguf_context * gguf_init_from_fd(int fd, struct gguf_init_params params) { - const int fd_duped = dup(fd); - if (fd_duped < 0) { - GGML_LOG_ERROR("%s: failed to dup fd %d: %s\n", __func__, fd, strerror(errno)); - return nullptr; - } - - FILE * file = fdopen(fd_duped, "rb"); +struct gguf_context * gguf_init_from_file_ptr(FILE * file, struct gguf_init_params params) { if (!file) { - close(fd_duped); - GGML_LOG_ERROR("%s: failed to fdopen fd %d: %s\n", __func__, fd, strerror(errno)); return nullptr; } - - struct gguf_context * result = gguf_init_from_file_impl(file, params); - fclose(file); - return result; + return gguf_init_from_file_impl(file, params); } -#else -struct gguf_context * gguf_init_from_fd(int fd, struct gguf_init_params params) { - GGML_LOG_ERROR("%s: fd-based loading is not supported on Windows\n", __func__); - GGML_UNUSED(fd); - GGML_UNUSED(params); - return nullptr; -} -#endif void gguf_free(struct gguf_context * ctx) { if (ctx == nullptr) { diff --git a/src/llama-mmap.cpp b/src/llama-mmap.cpp index 5ea00d3fa7..706d72f1e0 100644 --- a/src/llama-mmap.cpp +++ b/src/llama-mmap.cpp @@ -220,12 +220,12 @@ struct llama_file::impl { void init_from_fd(int fd_src) { const int fd_duped = dup(fd_src); if (fd_duped < 0) { - throw std::runtime_error(format("llama_file: failed to dup fd %d: %s", fd_src, strerror(errno))); + throw std::runtime_error(format("failed to dup fd %d: %s", fd_src, strerror(errno))); } fp = fdopen(fd_duped, "rb"); if (!fp) { close(fd_duped); - throw std::runtime_error(format("llama_file: failed to fdopen fd %d: %s", fd_src, strerror(errno))); + throw std::runtime_error(format("failed to fdopen fd %d: %s", fd_src, strerror(errno))); } seek(0, SEEK_END); size = tell(); diff --git a/src/llama-model-loader.cpp b/src/llama-model-loader.cpp index c0e1e754e7..358b505f18 100644 --- a/src/llama-model-loader.cpp +++ b/src/llama-model-loader.cpp @@ -13,6 +13,10 @@ #include #include +#ifndef _WIN32 +#include +#endif // _WIN32 + static const size_t kiB = 1024; static const size_t MiB = 1024*kiB; static const size_t GiB = 1024*MiB; @@ -659,13 +663,25 @@ llama_model_loader::llama_model_loader( LLAMA_LOG_INFO("%s: additional %d GGUFs metadata loaded.\n", __func__, n_split - 1); } } else if (fd >= 0) { + const int fd_duped = dup(fd); + if (fd_duped < 0) { + throw std::runtime_error(format("%s: failed to dup fd %d: %s", __func__, fd, strerror(errno))); + } + + FILE * f = fdopen(fd_duped, "rb"); + if (!f) { + close(fd_duped); + throw std::runtime_error(format("%s: failed to fdopen fd %d: %s", __func__, fd, strerror(errno))); + } + struct ggml_context * ctx = NULL; struct gguf_init_params params = { /*.no_alloc = */ true, /*.ctx = */ &ctx, }; - metadata_ptr.reset(gguf_init_from_fd(fd, params)); + metadata_ptr.reset(gguf_init_from_file_ptr(f, params)); + fclose(f); metadata = metadata_ptr.get(); if (metadata == nullptr) { throw std::runtime_error(format("%s: failed to load model from fd %d", __func__, fd)); @@ -674,8 +690,8 @@ llama_model_loader::llama_model_loader( get_key(llm_kv(LLM_KV_GENERAL_ARCHITECTURE), arch_name, false); llm_kv = LLM_KV(llm_arch_from_string(arch_name)); - contexts.emplace_back(ctx); files.emplace_back(new llama_file(fd)); + contexts.emplace_back(ctx); // Save tensors data offset info of the main file. for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) { diff --git a/src/llama.cpp b/src/llama.cpp index daf3c3bd8d..c40d5c9d51 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -1083,7 +1083,7 @@ struct llama_model * llama_model_load_from_fd(int fd, struct llama_model_params std::string path_model; std::vector splits = {}; return llama_model_load_from_file_impl(nullptr, nullptr, nullptr, path_model, splits, fd, params); -#endif +#endif // _WIN32 } void llama_model_save_to_file(const struct llama_model * model, const char * path_model) { diff --git a/tests/test-model-load-fd.cpp b/tests/test-model-load-fd.cpp index d5102942d0..dd982ba907 100644 --- a/tests/test-model-load-fd.cpp +++ b/tests/test-model-load-fd.cpp @@ -44,4 +44,4 @@ int main(int argc, char ** argv) { return n_vocab > 0 ? EXIT_SUCCESS : EXIT_FAILURE; } -#endif +#endif // _WIN32