From 5260bb79c08146d75a83a15697b33c8c43e7123a Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Mon, 12 Jan 2026 15:18:06 +0200
Subject: [PATCH] context : add comments

---
 src/llama-context.cpp | 2 +-
 src/llama-context.h   | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/llama-context.cpp b/src/llama-context.cpp
index 3f74b51bc6..d1b02ae71c 100644
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -1005,7 +1005,7 @@ void llama_context::set_warmup(bool value) {
 }
 
 bool llama_context::set_sampler(llama_seq_id seq_id, llama_sampler * sampler) {
-    LLAMA_LOG_DEBUG("%s: seq_id = %d, sampler = %p\n", __func__, (int) seq_id, (void *) sampler);
+    LLAMA_LOG_ERROR("%s: seq_id = %d, sampler = %p\n", __func__, (int) seq_id, (void *) sampler);
 
     const bool can_offload =
         sampler &&
diff --git a/src/llama-context.h b/src/llama-context.h
index d085d25779..960e4a0782 100644
--- a/src/llama-context.h
+++ b/src/llama-context.h
@@ -40,7 +40,15 @@ struct llama_context {
 
     ~llama_context();
 
+    // reserve a new backend scheduler
+    // recommended to call whenver the context changes in such a way that the compute graph is modified.
+    // for example:
+    //   - changing loras
+    //   - changing samplers
+    //   - changing attention type
+    //   - etc.
     void reserve();
+
     void synchronize();
 
     const llama_model   & get_model()   const;