diff --git a/src/llama-context.cpp b/src/llama-context.cpp index a09ac6d447..62d7898b5f 100644 --- a/src/llama-context.cpp +++ b/src/llama-context.cpp @@ -3050,5 +3050,7 @@ void llama_build_and_execute_mtp_graph(struct llama_context * ctx, if (logits_mtp) { ctx->set_logits_ith(logits_mtp, sched, last_tok_idx); } + + ggml_backend_sched_free(sched); }