diff --git a/src/llama-context.cpp b/src/llama-context.cpp
index 3f74b51bc6..d1b02ae71c 100644
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -1005,7 +1005,7 @@ void llama_context::set_warmup(bool value) {
 }
 
 bool llama_context::set_sampler(llama_seq_id seq_id, llama_sampler * sampler) {
-    LLAMA_LOG_DEBUG("%s: seq_id = %d, sampler = %p\n", __func__, (int) seq_id, (void *) sampler);
+    LLAMA_LOG_ERROR("%s: seq_id = %d, sampler = %p\n", __func__, (int) seq_id, (void *) sampler);
 
     const bool can_offload =
         sampler &&
diff --git a/src/llama-context.h b/src/llama-context.h
index d085d25779..960e4a0782 100644
--- a/src/llama-context.h
+++ b/src/llama-context.h
@@ -40,7 +40,15 @@ struct llama_context {
 
     ~llama_context();
 
+    // reserve a new backend scheduler
+    // recommended to call whenver the context changes in such a way that the compute graph is modified.
+    // for example:
+    //   - changing loras
+    //   - changing samplers
+    //   - changing attention type
+    //   - etc.
     void reserve();
+
     void synchronize();
 
     const llama_model   & get_model()   const;