llama : address review feedback for fd-based model loading
This commit is contained in:
parent
158239a2b1
commit
a4cfaf07c4
|
|
@ -78,7 +78,7 @@ extern "C" {
|
||||||
|
|
||||||
GGML_API struct gguf_context * gguf_init_empty(void);
|
GGML_API struct gguf_context * gguf_init_empty(void);
|
||||||
GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
|
GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
|
||||||
GGML_API struct gguf_context * gguf_init_from_fd(int fd, struct gguf_init_params params);
|
GGML_API struct gguf_context * gguf_init_from_file_ptr(FILE * file, struct gguf_init_params params);
|
||||||
//GGML_API struct gguf_context * gguf_init_from_buffer(..);
|
//GGML_API struct gguf_context * gguf_init_from_buffer(..);
|
||||||
|
|
||||||
GGML_API void gguf_free(struct gguf_context * ctx);
|
GGML_API void gguf_free(struct gguf_context * ctx);
|
||||||
|
|
|
||||||
|
|
@ -15,10 +15,6 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#ifndef _WIN32
|
|
||||||
#include <unistd.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define GGUF_MAX_STRING_LENGTH (1024*1024*1024)
|
#define GGUF_MAX_STRING_LENGTH (1024*1024*1024)
|
||||||
#define GGUF_MAX_ARRAY_ELEMENTS (1024*1024*1024)
|
#define GGUF_MAX_ARRAY_ELEMENTS (1024*1024*1024)
|
||||||
|
|
||||||
|
|
@ -857,33 +853,12 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef _WIN32
|
struct gguf_context * gguf_init_from_file_ptr(FILE * file, struct gguf_init_params params) {
|
||||||
struct gguf_context * gguf_init_from_fd(int fd, struct gguf_init_params params) {
|
|
||||||
const int fd_duped = dup(fd);
|
|
||||||
if (fd_duped < 0) {
|
|
||||||
GGML_LOG_ERROR("%s: failed to dup fd %d: %s\n", __func__, fd, strerror(errno));
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
FILE * file = fdopen(fd_duped, "rb");
|
|
||||||
if (!file) {
|
if (!file) {
|
||||||
close(fd_duped);
|
|
||||||
GGML_LOG_ERROR("%s: failed to fdopen fd %d: %s\n", __func__, fd, strerror(errno));
|
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
return gguf_init_from_file_impl(file, params);
|
||||||
struct gguf_context * result = gguf_init_from_file_impl(file, params);
|
|
||||||
fclose(file);
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
struct gguf_context * gguf_init_from_fd(int fd, struct gguf_init_params params) {
|
|
||||||
GGML_LOG_ERROR("%s: fd-based loading is not supported on Windows\n", __func__);
|
|
||||||
GGML_UNUSED(fd);
|
|
||||||
GGML_UNUSED(params);
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void gguf_free(struct gguf_context * ctx) {
|
void gguf_free(struct gguf_context * ctx) {
|
||||||
if (ctx == nullptr) {
|
if (ctx == nullptr) {
|
||||||
|
|
|
||||||
|
|
@ -220,12 +220,12 @@ struct llama_file::impl {
|
||||||
void init_from_fd(int fd_src) {
|
void init_from_fd(int fd_src) {
|
||||||
const int fd_duped = dup(fd_src);
|
const int fd_duped = dup(fd_src);
|
||||||
if (fd_duped < 0) {
|
if (fd_duped < 0) {
|
||||||
throw std::runtime_error(format("llama_file: failed to dup fd %d: %s", fd_src, strerror(errno)));
|
throw std::runtime_error(format("failed to dup fd %d: %s", fd_src, strerror(errno)));
|
||||||
}
|
}
|
||||||
fp = fdopen(fd_duped, "rb");
|
fp = fdopen(fd_duped, "rb");
|
||||||
if (!fp) {
|
if (!fp) {
|
||||||
close(fd_duped);
|
close(fd_duped);
|
||||||
throw std::runtime_error(format("llama_file: failed to fdopen fd %d: %s", fd_src, strerror(errno)));
|
throw std::runtime_error(format("failed to fdopen fd %d: %s", fd_src, strerror(errno)));
|
||||||
}
|
}
|
||||||
seek(0, SEEK_END);
|
seek(0, SEEK_END);
|
||||||
size = tell();
|
size = tell();
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,10 @@
|
||||||
#include <future>
|
#include <future>
|
||||||
#include <regex>
|
#include <regex>
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
|
#include <unistd.h>
|
||||||
|
#endif // _WIN32
|
||||||
|
|
||||||
static const size_t kiB = 1024;
|
static const size_t kiB = 1024;
|
||||||
static const size_t MiB = 1024*kiB;
|
static const size_t MiB = 1024*kiB;
|
||||||
static const size_t GiB = 1024*MiB;
|
static const size_t GiB = 1024*MiB;
|
||||||
|
|
@ -659,13 +663,25 @@ llama_model_loader::llama_model_loader(
|
||||||
LLAMA_LOG_INFO("%s: additional %d GGUFs metadata loaded.\n", __func__, n_split - 1);
|
LLAMA_LOG_INFO("%s: additional %d GGUFs metadata loaded.\n", __func__, n_split - 1);
|
||||||
}
|
}
|
||||||
} else if (fd >= 0) {
|
} else if (fd >= 0) {
|
||||||
|
const int fd_duped = dup(fd);
|
||||||
|
if (fd_duped < 0) {
|
||||||
|
throw std::runtime_error(format("%s: failed to dup fd %d: %s", __func__, fd, strerror(errno)));
|
||||||
|
}
|
||||||
|
|
||||||
|
FILE * f = fdopen(fd_duped, "rb");
|
||||||
|
if (!f) {
|
||||||
|
close(fd_duped);
|
||||||
|
throw std::runtime_error(format("%s: failed to fdopen fd %d: %s", __func__, fd, strerror(errno)));
|
||||||
|
}
|
||||||
|
|
||||||
struct ggml_context * ctx = NULL;
|
struct ggml_context * ctx = NULL;
|
||||||
struct gguf_init_params params = {
|
struct gguf_init_params params = {
|
||||||
/*.no_alloc = */ true,
|
/*.no_alloc = */ true,
|
||||||
/*.ctx = */ &ctx,
|
/*.ctx = */ &ctx,
|
||||||
};
|
};
|
||||||
|
|
||||||
metadata_ptr.reset(gguf_init_from_fd(fd, params));
|
metadata_ptr.reset(gguf_init_from_file_ptr(f, params));
|
||||||
|
fclose(f);
|
||||||
metadata = metadata_ptr.get();
|
metadata = metadata_ptr.get();
|
||||||
if (metadata == nullptr) {
|
if (metadata == nullptr) {
|
||||||
throw std::runtime_error(format("%s: failed to load model from fd %d", __func__, fd));
|
throw std::runtime_error(format("%s: failed to load model from fd %d", __func__, fd));
|
||||||
|
|
@ -674,8 +690,8 @@ llama_model_loader::llama_model_loader(
|
||||||
get_key(llm_kv(LLM_KV_GENERAL_ARCHITECTURE), arch_name, false);
|
get_key(llm_kv(LLM_KV_GENERAL_ARCHITECTURE), arch_name, false);
|
||||||
llm_kv = LLM_KV(llm_arch_from_string(arch_name));
|
llm_kv = LLM_KV(llm_arch_from_string(arch_name));
|
||||||
|
|
||||||
contexts.emplace_back(ctx);
|
|
||||||
files.emplace_back(new llama_file(fd));
|
files.emplace_back(new llama_file(fd));
|
||||||
|
contexts.emplace_back(ctx);
|
||||||
|
|
||||||
// Save tensors data offset info of the main file.
|
// Save tensors data offset info of the main file.
|
||||||
for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) {
|
for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) {
|
||||||
|
|
|
||||||
|
|
@ -1083,7 +1083,7 @@ struct llama_model * llama_model_load_from_fd(int fd, struct llama_model_params
|
||||||
std::string path_model;
|
std::string path_model;
|
||||||
std::vector<std::string> splits = {};
|
std::vector<std::string> splits = {};
|
||||||
return llama_model_load_from_file_impl(nullptr, nullptr, nullptr, path_model, splits, fd, params);
|
return llama_model_load_from_file_impl(nullptr, nullptr, nullptr, path_model, splits, fd, params);
|
||||||
#endif
|
#endif // _WIN32
|
||||||
}
|
}
|
||||||
|
|
||||||
void llama_model_save_to_file(const struct llama_model * model, const char * path_model) {
|
void llama_model_save_to_file(const struct llama_model * model, const char * path_model) {
|
||||||
|
|
|
||||||
|
|
@ -44,4 +44,4 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
return n_vocab > 0 ? EXIT_SUCCESS : EXIT_FAILURE;
|
return n_vocab > 0 ? EXIT_SUCCESS : EXIT_FAILURE;
|
||||||
}
|
}
|
||||||
#endif
|
#endif // _WIN32
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue