ggml: add graph_reused (#21764)

* ggml: add graph_reused

* use versioning instead of reuse flag

* increment version with atomic

* use top bits for split numbering

* add assert

* move counter to ggml.c

* set uid in split_graph only

* fix windows

* address further review comments

* get next_uid rather than doing bit manipulation

* rename + add comment about uid
This commit is contained in:
Aman Gupta 2026-04-16 17:21:28 +08:00 committed by GitHub
parent ae2d34899e
commit 3f7c29d318
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 35 additions and 0 deletions

View File

@ -1030,6 +1030,8 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra
GGML_ABORT("%s: failed to initialize context\n", __func__);
}
graph->uid = ggml_graph_next_uid();
// pass 1: assign backends to ops with pre-allocated inputs
for (int i = 0; i < graph->n_leafs; i++) {
struct ggml_tensor * leaf = graph->leafs[i];
@ -1477,6 +1479,11 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra
assert(graph_copy->size > graph_copy->n_leafs);
graph_copy->leafs[graph_copy->n_leafs++] = leaf;
}
// set ids for all splits
for (int i = 0; i < sched->n_splits; ++i) {
sched->splits[i].graph.uid = ggml_graph_next_uid();
}
}
static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {

View File

@ -1186,6 +1186,7 @@ struct ggml_cuda_graph {
std::vector<cudaGraphNode_t> nodes;
bool disable_due_to_gpu_arch = false;
bool warmup_complete = false;
uint64_t uid = 0;
struct node_properties {
ggml_tensor node;
void * node_src_data_ptrs[GGML_MAX_SRC];

View File

@ -3108,6 +3108,15 @@ static bool ggml_cuda_graph_update_required(ggml_backend_cuda_context * cuda_ctx
const void * graph_key = ggml_cuda_graph_get_key(cgraph);
ggml_cuda_graph * graph = cuda_ctx->cuda_graph(graph_key);
if (cgraph->uid != 0 &&
cgraph->uid == graph->uid) {
GGML_LOG_DEBUG("CUDA Graph id %zu reused\n", cgraph->uid);
GGML_ASSERT((int)graph->node_props.size() == cgraph->n_nodes);
return false;
}
graph->uid = cgraph->uid;
// Check if the graph size has changed
if ((int)graph->node_props.size() != cgraph->n_nodes) {
res = true;

View File

@ -30,6 +30,8 @@ extern "C" {
void ggml_print_backtrace(void);
uint64_t ggml_graph_next_uid(void);
#ifndef MIN
# define MIN(a, b) ((a) < (b) ? (a) : (b))
#endif
@ -338,6 +340,10 @@ struct ggml_cgraph {
struct ggml_hash_set visited_hash_set;
enum ggml_cgraph_eval_order order;
// an optional identifier that can be utilized to recognize same graphs if two non-zero values match
// a value of 0 means it is not set and should be ignored
uint64_t uid;
};
// returns a slice of cgraph with nodes [i0, i1)

View File

@ -53,6 +53,16 @@
#define UNUSED GGML_UNUSED
uint64_t ggml_graph_next_uid(void) {
#ifdef _MSC_VER
static volatile long long counter = 1;
return (uint64_t) _InterlockedIncrement64(&counter) - 1;
#else
static uint64_t counter = 1;
return __atomic_fetch_add(&counter, 1, __ATOMIC_RELAXED);
#endif
}
// Needed for ggml_fp32_to_bf16_row()
#if defined(__AVX512BF16__)
#if defined(_MSC_VER)
@ -7098,6 +7108,7 @@ struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t siz
/*.use_counts =*/ use_counts_ptr,
/*.hash_table =*/ { hash_size, hash_used, hash_keys_ptr },
/*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
/*.uid =*/ 0,
};
ggml_hash_set_reset(&cgraph->visited_hash_set);
@ -7125,6 +7136,7 @@ struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1)
/*.use_counts =*/ cgraph0->use_counts,
/*.visited_hash_set =*/ cgraph0->visited_hash_set,
/*.order =*/ cgraph0->order,
/*.uid =*/ 0
};
return cgraph;