diff --git a/ggml/include/gguf.h b/ggml/include/gguf.h index 42763b86f9..02d5f221c0 100644 --- a/ggml/include/gguf.h +++ b/ggml/include/gguf.h @@ -77,8 +77,8 @@ extern "C" { }; GGML_API struct gguf_context * gguf_init_empty(void); - GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params); GGML_API struct gguf_context * gguf_init_from_file_ptr(FILE * file, struct gguf_init_params params); + GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params); //GGML_API struct gguf_context * gguf_init_from_buffer(..); GGML_API void gguf_free(struct gguf_context * ctx); @@ -190,8 +190,8 @@ extern "C" { // // write the entire context to a binary file - GGML_API bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta); GGML_API bool gguf_write_to_file_ptr(const struct gguf_context * ctx, FILE * file, bool only_meta); + GGML_API bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta); // get the size in bytes of the meta data (header, kv pairs, tensor info) including padding GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx); diff --git a/ggml/src/gguf.cpp b/ggml/src/gguf.cpp index 407b514f71..ab3cc97486 100644 --- a/ggml/src/gguf.cpp +++ b/ggml/src/gguf.cpp @@ -1512,6 +1512,19 @@ void gguf_write_to_buf(const struct gguf_context * ctx, std::vector & bu gguf_write_out(ctx, gw, only_meta); } +bool gguf_write_to_file_ptr(const struct gguf_context * ctx, FILE * file, bool only_meta) { + GGML_ASSERT(file); + + try { + gguf_writer_file gw(file); + gguf_write_out(ctx, gw, only_meta); + } catch (const std::runtime_error& ex) { + GGML_LOG_ERROR("%s: failed to write GGUF data: %s\n", __func__, ex.what()); + return false; + } + return true; +} + bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta) { FILE * file = ggml_fopen(fname, "wb"); @@ -1529,19 +1542,6 @@ bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, boo return success; } -bool gguf_write_to_file_ptr(const struct gguf_context * ctx, FILE * file, bool only_meta) { - GGML_ASSERT(file); - - try { - gguf_writer_file gw(file); - gguf_write_out(ctx, gw, only_meta); - } catch (const std::runtime_error& ex) { - GGML_LOG_ERROR("%s: failed to write GGUF data: %s\n", __func__, ex.what()); - return false; - } - return true; -} - size_t gguf_get_meta_size(const struct gguf_context * ctx) { // only return size std::vector buf; diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp index f6084fa84a..322a66bc05 100644 --- a/src/llama-arch.cpp +++ b/src/llama-arch.cpp @@ -2757,7 +2757,7 @@ std::string LLM_TN_IMPL::str() const { if (model_tensors.find(tensor) == model_tensors.end()) { const char * name = LLM_TENSOR_NAMES.at(tensor); if (suffix != nullptr || bid != -1 || xid != -1) { - LLAMA_LOG_ERROR("%s: cannot properly format tensor name %s with suffix=%s bid=%d xid=%d\n", + LLAMA_LOG_WARN("%s: cannot properly format tensor name %s with suffix=%s bid=%d xid=%d\n", __func__, name, suffix, bid, xid); } return name; diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 6d63d58fa7..490e8f336b 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -7453,7 +7453,6 @@ bool llama_model::load_tensors(llama_model_loader & ml) { // generic pass: load optional per-tensor/per-expert ".scale" tensors (e.g. NVFP4 scale2) // this avoids having to add scale loading to every architecture - if (arch != LLM_ARCH_T5) { for (int i = 0; i < n_layer; ++i) { auto & layer = layers[i]; @@ -7523,7 +7522,6 @@ bool llama_model::load_tensors(llama_model_loader & ml) { } } } - } ml.done_getting_tensors();