From 5260bb79c08146d75a83a15697b33c8c43e7123a Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 12 Jan 2026 15:18:06 +0200 Subject: [PATCH] context : add comments --- src/llama-context.cpp | 2 +- src/llama-context.h | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/llama-context.cpp b/src/llama-context.cpp index 3f74b51bc6..d1b02ae71c 100644 --- a/src/llama-context.cpp +++ b/src/llama-context.cpp @@ -1005,7 +1005,7 @@ void llama_context::set_warmup(bool value) { } bool llama_context::set_sampler(llama_seq_id seq_id, llama_sampler * sampler) { - LLAMA_LOG_DEBUG("%s: seq_id = %d, sampler = %p\n", __func__, (int) seq_id, (void *) sampler); + LLAMA_LOG_ERROR("%s: seq_id = %d, sampler = %p\n", __func__, (int) seq_id, (void *) sampler); const bool can_offload = sampler && diff --git a/src/llama-context.h b/src/llama-context.h index d085d25779..960e4a0782 100644 --- a/src/llama-context.h +++ b/src/llama-context.h @@ -40,7 +40,15 @@ struct llama_context { ~llama_context(); + // reserve a new backend scheduler + // recommended to call whenver the context changes in such a way that the compute graph is modified. + // for example: + // - changing loras + // - changing samplers + // - changing attention type + // - etc. void reserve(); + void synchronize(); const llama_model & get_model() const;