From 471e026327cca9f6f58aeefe32129a6cb9390f4f Mon Sep 17 00:00:00 2001 From: Aaron Lee Date: Tue, 19 Aug 2025 23:10:56 -0400 Subject: [PATCH] fixed vram leak --- src/llama-context.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/llama-context.cpp b/src/llama-context.cpp index a09ac6d447..62d7898b5f 100644 --- a/src/llama-context.cpp +++ b/src/llama-context.cpp @@ -3050,5 +3050,7 @@ void llama_build_and_execute_mtp_graph(struct llama_context * ctx, if (logits_mtp) { ctx->set_logits_ith(logits_mtp, sched, last_tok_idx); } + + ggml_backend_sched_free(sched); }