llama : address review feedback for fd-based model loading

This commit is contained in:
Siddhesh2377 2026-03-14 00:44:33 +05:30
parent 158239a2b1
commit a4cfaf07c4
No known key found for this signature in database
6 changed files with 25 additions and 34 deletions

View File

@ -78,7 +78,7 @@ extern "C" {
GGML_API struct gguf_context * gguf_init_empty(void); GGML_API struct gguf_context * gguf_init_empty(void);
GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params); GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
GGML_API struct gguf_context * gguf_init_from_fd(int fd, struct gguf_init_params params); GGML_API struct gguf_context * gguf_init_from_file_ptr(FILE * file, struct gguf_init_params params);
//GGML_API struct gguf_context * gguf_init_from_buffer(..); //GGML_API struct gguf_context * gguf_init_from_buffer(..);
GGML_API void gguf_free(struct gguf_context * ctx); GGML_API void gguf_free(struct gguf_context * ctx);

View File

@ -15,10 +15,6 @@
#include <string> #include <string>
#include <vector> #include <vector>
#ifndef _WIN32
#include <unistd.h>
#endif
#define GGUF_MAX_STRING_LENGTH (1024*1024*1024) #define GGUF_MAX_STRING_LENGTH (1024*1024*1024)
#define GGUF_MAX_ARRAY_ELEMENTS (1024*1024*1024) #define GGUF_MAX_ARRAY_ELEMENTS (1024*1024*1024)
@ -857,33 +853,12 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
return result; return result;
} }
#ifndef _WIN32 struct gguf_context * gguf_init_from_file_ptr(FILE * file, struct gguf_init_params params) {
struct gguf_context * gguf_init_from_fd(int fd, struct gguf_init_params params) {
const int fd_duped = dup(fd);
if (fd_duped < 0) {
GGML_LOG_ERROR("%s: failed to dup fd %d: %s\n", __func__, fd, strerror(errno));
return nullptr;
}
FILE * file = fdopen(fd_duped, "rb");
if (!file) { if (!file) {
close(fd_duped);
GGML_LOG_ERROR("%s: failed to fdopen fd %d: %s\n", __func__, fd, strerror(errno));
return nullptr; return nullptr;
} }
return gguf_init_from_file_impl(file, params);
struct gguf_context * result = gguf_init_from_file_impl(file, params);
fclose(file);
return result;
} }
#else
struct gguf_context * gguf_init_from_fd(int fd, struct gguf_init_params params) {
GGML_LOG_ERROR("%s: fd-based loading is not supported on Windows\n", __func__);
GGML_UNUSED(fd);
GGML_UNUSED(params);
return nullptr;
}
#endif
void gguf_free(struct gguf_context * ctx) { void gguf_free(struct gguf_context * ctx) {
if (ctx == nullptr) { if (ctx == nullptr) {

View File

@ -220,12 +220,12 @@ struct llama_file::impl {
void init_from_fd(int fd_src) { void init_from_fd(int fd_src) {
const int fd_duped = dup(fd_src); const int fd_duped = dup(fd_src);
if (fd_duped < 0) { if (fd_duped < 0) {
throw std::runtime_error(format("llama_file: failed to dup fd %d: %s", fd_src, strerror(errno))); throw std::runtime_error(format("failed to dup fd %d: %s", fd_src, strerror(errno)));
} }
fp = fdopen(fd_duped, "rb"); fp = fdopen(fd_duped, "rb");
if (!fp) { if (!fp) {
close(fd_duped); close(fd_duped);
throw std::runtime_error(format("llama_file: failed to fdopen fd %d: %s", fd_src, strerror(errno))); throw std::runtime_error(format("failed to fdopen fd %d: %s", fd_src, strerror(errno)));
} }
seek(0, SEEK_END); seek(0, SEEK_END);
size = tell(); size = tell();

View File

@ -13,6 +13,10 @@
#include <future> #include <future>
#include <regex> #include <regex>
#ifndef _WIN32
#include <unistd.h>
#endif // _WIN32
static const size_t kiB = 1024; static const size_t kiB = 1024;
static const size_t MiB = 1024*kiB; static const size_t MiB = 1024*kiB;
static const size_t GiB = 1024*MiB; static const size_t GiB = 1024*MiB;
@ -659,13 +663,25 @@ llama_model_loader::llama_model_loader(
LLAMA_LOG_INFO("%s: additional %d GGUFs metadata loaded.\n", __func__, n_split - 1); LLAMA_LOG_INFO("%s: additional %d GGUFs metadata loaded.\n", __func__, n_split - 1);
} }
} else if (fd >= 0) { } else if (fd >= 0) {
const int fd_duped = dup(fd);
if (fd_duped < 0) {
throw std::runtime_error(format("%s: failed to dup fd %d: %s", __func__, fd, strerror(errno)));
}
FILE * f = fdopen(fd_duped, "rb");
if (!f) {
close(fd_duped);
throw std::runtime_error(format("%s: failed to fdopen fd %d: %s", __func__, fd, strerror(errno)));
}
struct ggml_context * ctx = NULL; struct ggml_context * ctx = NULL;
struct gguf_init_params params = { struct gguf_init_params params = {
/*.no_alloc = */ true, /*.no_alloc = */ true,
/*.ctx = */ &ctx, /*.ctx = */ &ctx,
}; };
metadata_ptr.reset(gguf_init_from_fd(fd, params)); metadata_ptr.reset(gguf_init_from_file_ptr(f, params));
fclose(f);
metadata = metadata_ptr.get(); metadata = metadata_ptr.get();
if (metadata == nullptr) { if (metadata == nullptr) {
throw std::runtime_error(format("%s: failed to load model from fd %d", __func__, fd)); throw std::runtime_error(format("%s: failed to load model from fd %d", __func__, fd));
@ -674,8 +690,8 @@ llama_model_loader::llama_model_loader(
get_key(llm_kv(LLM_KV_GENERAL_ARCHITECTURE), arch_name, false); get_key(llm_kv(LLM_KV_GENERAL_ARCHITECTURE), arch_name, false);
llm_kv = LLM_KV(llm_arch_from_string(arch_name)); llm_kv = LLM_KV(llm_arch_from_string(arch_name));
contexts.emplace_back(ctx);
files.emplace_back(new llama_file(fd)); files.emplace_back(new llama_file(fd));
contexts.emplace_back(ctx);
// Save tensors data offset info of the main file. // Save tensors data offset info of the main file.
for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) { for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) {

View File

@ -1083,7 +1083,7 @@ struct llama_model * llama_model_load_from_fd(int fd, struct llama_model_params
std::string path_model; std::string path_model;
std::vector<std::string> splits = {}; std::vector<std::string> splits = {};
return llama_model_load_from_file_impl(nullptr, nullptr, nullptr, path_model, splits, fd, params); return llama_model_load_from_file_impl(nullptr, nullptr, nullptr, path_model, splits, fd, params);
#endif #endif // _WIN32
} }
void llama_model_save_to_file(const struct llama_model * model, const char * path_model) { void llama_model_save_to_file(const struct llama_model * model, const char * path_model) {

View File

@ -44,4 +44,4 @@ int main(int argc, char ** argv) {
return n_vocab > 0 ? EXIT_SUCCESS : EXIT_FAILURE; return n_vocab > 0 ? EXIT_SUCCESS : EXIT_FAILURE;
} }
#endif #endif // _WIN32