From 646f0a7d78e399dcf8884db22d0e54df3b39c7b2 Mon Sep 17 00:00:00 2001 From: Ruben Ortlam Date: Tue, 31 Mar 2026 14:39:43 +0200 Subject: [PATCH] use no_alloc instead of fixing f32 fallback --- common/common.cpp | 1 + common/common.h | 1 + src/llama-model-loader.cpp | 4 ---- tests/export-graph-ops.cpp | 1 + 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index a9bd494191..a99862db5a 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1434,6 +1434,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) { mparams.progress_callback = params.load_progress_callback; mparams.progress_callback_user_data = params.load_progress_callback_user_data; + mparams.no_alloc = params.no_alloc; return mparams; } diff --git a/common/common.h b/common/common.h index 17dc3fb232..31a337daa6 100644 --- a/common/common.h +++ b/common/common.h @@ -679,6 +679,7 @@ struct common_params { // return false from callback to abort model loading or true to continue llama_progress_callback load_progress_callback = NULL; void * load_progress_callback_user_data = NULL; + bool no_alloc = false; // Don't allocate model buffers }; // call once at the start of a program if it uses libcommon diff --git a/src/llama-model-loader.cpp b/src/llama-model-loader.cpp index 9dbc108036..3d549cae5b 100644 --- a/src/llama-model-loader.cpp +++ b/src/llama-model-loader.cpp @@ -1215,10 +1215,6 @@ struct ggml_tensor * llama_model_loader::create_tensor( const int64_t tid = gguf_find_tensor(metadata, tn.str().c_str()); if (tid != -1) { type = gguf_get_tensor_type(metadata, tid); - } else if (flags & TENSOR_NOT_REQUIRED) { - // If the tensor is not found and not required, return nullptr to allow - // the caller to fall back - return nullptr; } // for tensors that are not required some of the dimensions can be invalid: diff --git a/tests/export-graph-ops.cpp b/tests/export-graph-ops.cpp index 2d75a27960..f4f82b8664 100644 --- a/tests/export-graph-ops.cpp +++ b/tests/export-graph-ops.cpp @@ -167,6 +167,7 @@ int main(int argc, char ** argv) { llama_model_params model_params = llama_model_default_params(); model_params.devices = params.devices.data(); + model_params.no_alloc = true; model.reset(llama_model_init_from_user(gguf_ctx.get(), set_tensor_data, nullptr, model_params));