diff --git a/include/llama.h b/include/llama.h index 2b1fdcbded..a4ad4c2827 100644 --- a/include/llama.h +++ b/include/llama.h @@ -213,7 +213,7 @@ extern "C" { typedef bool (*llama_progress_callback)(float progress, void * user_data); // called after graph build but before memory allocation in llama_decode/llama_encode - // use ggml_backend_sched_set_tensor_backend() to reassign ops to a different backend + // use ggml_backend_sched_set_tensor_backend() to reassign graph nodes to a different backend // NOTE: not called when a previous graph is reused; assignments from the last invocation // persist. set LLAMA_GRAPH_REUSE_DISABLE=1 for per-decode control. typedef void (*llama_pre_alloc_callback)(ggml_backend_sched_t sched, struct ggml_cgraph * gf, void * user_data); diff --git a/tests/test-pre-alloc-callback.cpp b/tests/test-pre-alloc-callback.cpp index 1b19e91860..0385fa84aa 100644 --- a/tests/test-pre-alloc-callback.cpp +++ b/tests/test-pre-alloc-callback.cpp @@ -1,5 +1,4 @@ #include -#include #include "llama.h" #include "get-model.h" @@ -31,8 +30,13 @@ static void pre_alloc_cb(ggml_backend_sched_t sched, struct ggml_cgraph * gf, vo } } - ggml_backend_sched_set_tensor_backend(sched, node, target); - state->reassign_ok = (ggml_backend_sched_get_tensor_backend(sched, node) == target); + if (target != current) { + ggml_backend_sched_set_tensor_backend(sched, node, target); + state->reassign_ok = (ggml_backend_sched_get_tensor_backend(sched, node) == target); + } else { + // only one backend available — can't test reassignment, just verify the callback was called + state->reassign_ok = true; + } } int main(int argc, char ** argv) {