use no_alloc instead of fixing f32 fallback

2026-03-31 14:39:43 +02:00 · 2026-03-31 14:39:43 +02:00 · 646f0a7d78
parent d6fc8fe0c7
commit 646f0a7d78
4 changed files with 3 additions and 4 deletions
--- a/common/common.cpp
+++ b/common/common.cpp
@ -1434,6 +1434,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {

    mparams.progress_callback           = params.load_progress_callback;
    mparams.progress_callback_user_data = params.load_progress_callback_user_data;
+    mparams.no_alloc                    = params.no_alloc;

    return mparams;
 }
--- a/common/common.h
+++ b/common/common.h
@ -679,6 +679,7 @@ struct common_params {
    // return false from callback to abort model loading or true to continue
    llama_progress_callback load_progress_callback = NULL;
    void *                  load_progress_callback_user_data = NULL;
+    bool no_alloc = false; // Don't allocate model buffers
 };

 // call once at the start of a program if it uses libcommon
--- a/src/llama-model-loader.cpp
+++ b/src/llama-model-loader.cpp
@ -1215,10 +1215,6 @@ struct ggml_tensor * llama_model_loader::create_tensor(
        const int64_t tid = gguf_find_tensor(metadata, tn.str().c_str());
        if (tid != -1) {
            type = gguf_get_tensor_type(metadata, tid);
-        } else if (flags & TENSOR_NOT_REQUIRED) {
-            // If the tensor is not found and not required, return nullptr to allow
-            // the caller to fall back
-            return nullptr;
        }

        // for tensors that are not required some of the dimensions can be invalid:
--- a/tests/export-graph-ops.cpp
+++ b/tests/export-graph-ops.cpp
@ -167,6 +167,7 @@ int main(int argc, char ** argv) {

        llama_model_params model_params = llama_model_default_params();
        model_params.devices = params.devices.data();
+        model_params.no_alloc = true;

        model.reset(llama_model_init_from_user(gguf_ctx.get(), set_tensor_data, nullptr, model_params));