diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 0897a64c21..8713653430 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -2357,7 +2357,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) { first_moved_to_buft = buft; } } - + ggml_context * ctx = ctx_for_buft(buft); // if duplicated, check if the original tensor was allocated in the same buffer type context and avoid creating a new one