llama : use FILE pointer instead of fd in public API

This commit is contained in:
Siddhesh2377 2026-03-14 01:20:50 +05:30
parent a4cfaf07c4
commit 626823b2d9
No known key found for this signature in database
6 changed files with 35 additions and 48 deletions

View File

@ -464,9 +464,8 @@ extern "C" {
const char * path_model,
struct llama_model_params params);
// Load a model from a POSIX file descriptor
// Not supported on Windows
LLAMA_API struct llama_model * llama_model_load_from_fd(int fd, struct llama_model_params params);
// Load a model from an open FILE pointer
LLAMA_API struct llama_model * llama_model_load_from_file_ptr(FILE * file, struct llama_model_params params);
// Load a model from multiple splits (support custom naming scheme)
// The paths must be in the correct order

View File

@ -13,10 +13,6 @@
#include <future>
#include <regex>
#ifndef _WIN32
#include <unistd.h>
#endif // _WIN32
static const size_t kiB = 1024;
static const size_t MiB = 1024*kiB;
static const size_t GiB = 1024*MiB;
@ -514,7 +510,7 @@ llama_model_loader::llama_model_loader(
void * set_tensor_data_ud,
const std::string & fname,
std::vector<std::string> & splits,
int fd,
FILE * file,
bool use_mmap,
bool use_direct_io,
bool check_tensors,
@ -662,35 +658,23 @@ llama_model_loader::llama_model_loader(
LLAMA_LOG_INFO("%s: additional %d GGUFs metadata loaded.\n", __func__, n_split - 1);
}
} else if (fd >= 0) {
const int fd_duped = dup(fd);
if (fd_duped < 0) {
throw std::runtime_error(format("%s: failed to dup fd %d: %s", __func__, fd, strerror(errno)));
}
FILE * f = fdopen(fd_duped, "rb");
if (!f) {
close(fd_duped);
throw std::runtime_error(format("%s: failed to fdopen fd %d: %s", __func__, fd, strerror(errno)));
}
} else if (file) {
struct ggml_context * ctx = NULL;
struct gguf_init_params params = {
/*.no_alloc = */ true,
/*.ctx = */ &ctx,
};
metadata_ptr.reset(gguf_init_from_file_ptr(f, params));
fclose(f);
metadata_ptr.reset(gguf_init_from_file_ptr(file, params));
metadata = metadata_ptr.get();
if (metadata == nullptr) {
throw std::runtime_error(format("%s: failed to load model from fd %d", __func__, fd));
throw std::runtime_error(format("%s: failed to load model from file pointer", __func__));
}
get_key(llm_kv(LLM_KV_GENERAL_ARCHITECTURE), arch_name, false);
llm_kv = LLM_KV(llm_arch_from_string(arch_name));
files.emplace_back(new llama_file(fd));
files.emplace_back(new llama_file(fileno(file)));
contexts.emplace_back(ctx);
// Save tensors data offset info of the main file.
@ -715,7 +699,7 @@ llama_model_loader::llama_model_loader(
fver = (enum llama_fver) gguf_get_version(metadata);
LLAMA_LOG_INFO("%s: loaded meta data with %d key-value pairs and %d tensors from %s (version %s)\n",
__func__, n_kv, n_tensors, fname.empty() ? "(fd)" : fname.c_str(), llama_file_version_name(fver));
__func__, n_kv, n_tensors, fname.empty() ? "(file*)" : fname.c_str(), llama_file_version_name(fver));
// determine file type based on the number of tensors for each quantization and print meta data
// TODO: make optional

View File

@ -125,7 +125,7 @@ struct llama_model_loader {
void * set_tensor_data_ud,
const std::string & fname,
std::vector<std::string> & splits, // optional, only need if the split does not follow naming scheme
int fd,
FILE * file,
bool use_mmap,
bool use_direct_io,
bool check_tensors,

View File

@ -859,7 +859,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
std::vector<std::string> splits = {};
llama_model_loader ml(/*metadata*/ nullptr, /*set_tensor_data*/ nullptr, /*set_tensor_data_ud*/ nullptr,
fname_inp, splits, /*fd*/ -1, use_mmap, /*use_direct_io*/ false, /*check_tensors*/ true, /*no_alloc*/ false, kv_overrides, nullptr);
fname_inp, splits, /*file*/ nullptr, use_mmap, /*use_direct_io*/ false, /*check_tensors*/ true, /*no_alloc*/ false, kv_overrides, nullptr);
ml.init_mappings(false); // no prefetching
llama_model model(llama_model_default_params());

View File

@ -828,7 +828,7 @@ int64_t llama_time_us(void) {
// Returns 0 on success, -1 on error, and -2 on cancellation via llama_progress_callback
static int llama_model_load(struct gguf_context * metadata, llama_model_set_tensor_data_t set_tensor_data, void * set_tensor_data_ud,
const std::string & fname, std::vector<std::string> & splits, int fd, llama_model & model, llama_model_params & params) {
const std::string & fname, std::vector<std::string> & splits, FILE * file, llama_model & model, llama_model_params & params) {
// loading time will be recalculated after the first eval, so
// we take page faults deferred by mmap() into consideration
model.t_load_us = 0;
@ -837,7 +837,7 @@ static int llama_model_load(struct gguf_context * metadata, llama_model_set_tens
model.t_start_us = tm.t_start_us;
try {
llama_model_loader ml(metadata, set_tensor_data, set_tensor_data_ud, fname, splits, fd, params.use_mmap, params.use_direct_io,
llama_model_loader ml(metadata, set_tensor_data, set_tensor_data_ud, fname, splits, file, params.use_mmap, params.use_direct_io,
params.check_tensors, params.no_alloc, params.kv_overrides, params.tensor_buft_overrides);
ml.print_info();
@ -889,9 +889,9 @@ static struct llama_model * llama_model_load_from_file_impl(
void * set_tensor_data_ud,
const std::string & path_model,
std::vector<std::string> & splits,
int fd,
FILE * file,
struct llama_model_params params) {
if (metadata == nullptr && path_model.empty() && fd < 0) {
if (metadata == nullptr && path_model.empty() && !file) {
LLAMA_LOG_ERROR("%s: no model source provided\n", __func__);
return nullptr;
}
@ -1015,7 +1015,7 @@ static struct llama_model * llama_model_load_from_file_impl(
props.memory_free/1024/1024);
}
const int status = llama_model_load(metadata, set_tensor_data, set_tensor_data_ud, path_model, splits, fd, *model, params);
const int status = llama_model_load(metadata, set_tensor_data, set_tensor_data_ud, path_model, splits, file, *model, params);
GGML_ASSERT(status <= 0);
if (status < 0) {
if (status == -1) {
@ -1041,7 +1041,7 @@ struct llama_model * llama_model_init_from_user(
std::vector<std::string> splits = {};
params.use_mmap = false;
params.use_extra_bufts = false;
return llama_model_load_from_file_impl(metadata, set_tensor_data, set_tensor_data_ud, path_model, splits, /*fd*/ -1, params);
return llama_model_load_from_file_impl(metadata, set_tensor_data, set_tensor_data_ud, path_model, splits, /*file*/ nullptr, params);
}
// deprecated
struct llama_model * llama_load_model_from_file(
@ -1054,7 +1054,7 @@ struct llama_model * llama_model_load_from_file(
const char * path_model,
struct llama_model_params params) {
std::vector<std::string> splits = {};
return llama_model_load_from_file_impl(nullptr, nullptr, nullptr, path_model, splits, /*fd*/ -1, params);
return llama_model_load_from_file_impl(nullptr, nullptr, nullptr, path_model, splits, /*file*/ nullptr, params);
}
struct llama_model * llama_model_load_from_splits(
@ -1070,20 +1070,17 @@ struct llama_model * llama_model_load_from_splits(
for (size_t i = 0; i < n_paths; ++i) {
splits.push_back(paths[i]);
}
return llama_model_load_from_file_impl(nullptr, nullptr, nullptr, splits.front(), splits, /*fd*/ -1, params);
return llama_model_load_from_file_impl(nullptr, nullptr, nullptr, splits.front(), splits, /*file*/ nullptr, params);
}
struct llama_model * llama_model_load_from_fd(int fd, struct llama_model_params params) {
#ifdef _WIN32
LLAMA_LOG_ERROR("%s: fd-based loading is not supported on Windows\n", __func__);
GGML_UNUSED(fd);
GGML_UNUSED(params);
return nullptr;
#else
struct llama_model * llama_model_load_from_file_ptr(FILE * file, struct llama_model_params params) {
if (!file) {
LLAMA_LOG_ERROR("%s: file is NULL\n", __func__);
return nullptr;
}
std::string path_model;
std::vector<std::string> splits = {};
return llama_model_load_from_file_impl(nullptr, nullptr, nullptr, path_model, splits, fd, params);
#endif // _WIN32
return llama_model_load_from_file_impl(nullptr, nullptr, nullptr, path_model, splits, file, params);
}
void llama_model_save_to_file(const struct llama_model * model, const char * path_model) {

View File

@ -24,20 +24,27 @@ int main(int argc, char ** argv) {
return EXIT_FAILURE;
}
FILE * f = fdopen(dup(fd), "rb");
close(fd);
if (!f) {
fprintf(stderr, "failed to fdopen\n");
return EXIT_FAILURE;
}
auto params = llama_model_default_params();
params.use_mmap = true;
params.vocab_only = true;
struct llama_model * model = llama_model_load_from_fd(fd, params);
close(fd);
struct llama_model * model = llama_model_load_from_file_ptr(f, params);
fclose(f);
if (model == nullptr) {
fprintf(stderr, "load from fd failed\n");
fprintf(stderr, "load from file pointer failed\n");
return EXIT_FAILURE;
}
const int n_vocab = llama_vocab_n_tokens(llama_model_get_vocab(model));
fprintf(stderr, "loaded %d tokens from fd\n", n_vocab);
fprintf(stderr, "loaded %d tokens via file pointer\n", n_vocab);
llama_model_free(model);
llama_backend_free();