use no_alloc instead of fixing f32 fallback

This commit is contained in:
Ruben Ortlam 2026-03-31 14:39:43 +02:00
parent d6fc8fe0c7
commit 646f0a7d78
4 changed files with 3 additions and 4 deletions

View File

@ -1434,6 +1434,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
mparams.progress_callback = params.load_progress_callback;
mparams.progress_callback_user_data = params.load_progress_callback_user_data;
mparams.no_alloc = params.no_alloc;
return mparams;
}

View File

@ -679,6 +679,7 @@ struct common_params {
// return false from callback to abort model loading or true to continue
llama_progress_callback load_progress_callback = NULL;
void * load_progress_callback_user_data = NULL;
bool no_alloc = false; // Don't allocate model buffers
};
// call once at the start of a program if it uses libcommon

View File

@ -1215,10 +1215,6 @@ struct ggml_tensor * llama_model_loader::create_tensor(
const int64_t tid = gguf_find_tensor(metadata, tn.str().c_str());
if (tid != -1) {
type = gguf_get_tensor_type(metadata, tid);
} else if (flags & TENSOR_NOT_REQUIRED) {
// If the tensor is not found and not required, return nullptr to allow
// the caller to fall back
return nullptr;
}
// for tensors that are not required some of the dimensions can be invalid:

View File

@ -167,6 +167,7 @@ int main(int argc, char ** argv) {
llama_model_params model_params = llama_model_default_params();
model_params.devices = params.devices.data();
model_params.no_alloc = true;
model.reset(llama_model_init_from_user(gguf_ctx.get(), set_tensor_data, nullptr, model_params));