ggml : extend the GGML_SCHED_NO_REALLOC debug logic of the scheduler (#17617)
This commit is contained in:
parent
6eea666912
commit
90c72a614a
|
|
@ -723,6 +723,12 @@ struct ggml_backend_sched {
|
||||||
bool op_offload;
|
bool op_offload;
|
||||||
|
|
||||||
int debug;
|
int debug;
|
||||||
|
|
||||||
|
// used for debugging graph reallocations [GGML_SCHED_DEBUG_REALLOC]
|
||||||
|
// ref: https://github.com/ggml-org/llama.cpp/pull/17617
|
||||||
|
int debug_realloc;
|
||||||
|
int debug_graph_size;
|
||||||
|
int debug_prev_graph_size;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define hash_id(tensor) ggml_hash_find_or_insert(&sched->hash_set, tensor)
|
#define hash_id(tensor) ggml_hash_find_or_insert(&sched->hash_set, tensor)
|
||||||
|
|
@ -1289,6 +1295,11 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra
|
||||||
}
|
}
|
||||||
|
|
||||||
int graph_size = std::max(graph->n_nodes, graph->n_leafs) + sched->n_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2*sched->n_copies;
|
int graph_size = std::max(graph->n_nodes, graph->n_leafs) + sched->n_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2*sched->n_copies;
|
||||||
|
|
||||||
|
// remember the actual graph_size for performing reallocation checks later [GGML_SCHED_DEBUG_REALLOC]
|
||||||
|
sched->debug_prev_graph_size = sched->debug_graph_size;
|
||||||
|
sched->debug_graph_size = graph_size;
|
||||||
|
|
||||||
if (sched->graph.size < graph_size) {
|
if (sched->graph.size < graph_size) {
|
||||||
sched->graph.size = graph_size;
|
sched->graph.size = graph_size;
|
||||||
sched->graph.nodes = (ggml_tensor **) realloc(sched->graph.nodes, graph_size * sizeof(struct ggml_tensor *));
|
sched->graph.nodes = (ggml_tensor **) realloc(sched->graph.nodes, graph_size * sizeof(struct ggml_tensor *));
|
||||||
|
|
@ -1395,14 +1406,21 @@ static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
|
||||||
|
|
||||||
// allocate graph
|
// allocate graph
|
||||||
if (backend_ids_changed || !ggml_gallocr_alloc_graph(sched->galloc, &sched->graph)) {
|
if (backend_ids_changed || !ggml_gallocr_alloc_graph(sched->galloc, &sched->graph)) {
|
||||||
#ifdef GGML_SCHED_NO_REALLOC
|
|
||||||
GGML_ABORT("%s: failed to allocate graph, but graph re-allocation is disabled by GGML_SCHED_NO_REALLOC\n", __func__);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
GGML_LOG_DEBUG("%s: failed to allocate graph, reserving (backend_ids_changed = %d)\n", __func__, backend_ids_changed);
|
GGML_LOG_DEBUG("%s: failed to allocate graph, reserving (backend_ids_changed = %d)\n", __func__, backend_ids_changed);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
if (sched->debug_realloc > 0) {
|
||||||
|
// we are interested only in situations where the graph was reallocated even though its size remained the same [GGML_SCHED_DEBUG_REALLOC]
|
||||||
|
// example: https://github.com/ggml-org/llama.cpp/pull/17143
|
||||||
|
const bool unexpected = !backend_ids_changed && sched->debug_prev_graph_size == sched->debug_graph_size;
|
||||||
|
|
||||||
|
if (unexpected || sched->debug_realloc > 1) {
|
||||||
|
GGML_ABORT("%s: unexpected graph reallocation (graph size = %d, nodes = %d, leafs = %d), debug_realloc = %d\n", __func__,
|
||||||
|
sched->debug_graph_size, sched->graph.n_nodes, sched->graph.n_leafs, sched->debug_realloc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// the re-allocation may cause the split inputs to be moved to a different address
|
// the re-allocation may cause the split inputs to be moved to a different address
|
||||||
// synchronize without ggml_backend_sched_synchronize to avoid changing cur_copy
|
// synchronize without ggml_backend_sched_synchronize to avoid changing cur_copy
|
||||||
for (int i = 0; i < sched->n_backends; i++) {
|
for (int i = 0; i < sched->n_backends; i++) {
|
||||||
|
|
@ -1620,6 +1638,14 @@ ggml_backend_sched_t ggml_backend_sched_new(
|
||||||
|
|
||||||
const char * GGML_SCHED_DEBUG = getenv("GGML_SCHED_DEBUG");
|
const char * GGML_SCHED_DEBUG = getenv("GGML_SCHED_DEBUG");
|
||||||
sched->debug = GGML_SCHED_DEBUG ? atoi(GGML_SCHED_DEBUG) : 0;
|
sched->debug = GGML_SCHED_DEBUG ? atoi(GGML_SCHED_DEBUG) : 0;
|
||||||
|
|
||||||
|
sched->debug_realloc = 0;
|
||||||
|
#ifdef GGML_SCHED_NO_REALLOC
|
||||||
|
sched->debug_realloc = 1;
|
||||||
|
#endif
|
||||||
|
const char * GGML_SCHED_DEBUG_REALLOC = getenv("GGML_SCHED_DEBUG_REALLOC");
|
||||||
|
sched->debug_realloc = GGML_SCHED_DEBUG_REALLOC ? atoi(GGML_SCHED_DEBUG_REALLOC) : sched->debug_realloc;
|
||||||
|
|
||||||
sched->n_backends = n_backends;
|
sched->n_backends = n_backends;
|
||||||
sched->n_copies = parallel ? GGML_SCHED_MAX_COPIES : 1;
|
sched->n_copies = parallel ? GGML_SCHED_MAX_COPIES : 1;
|
||||||
|
|
||||||
|
|
@ -1636,6 +1662,9 @@ ggml_backend_sched_t ggml_backend_sched_new(
|
||||||
sched->prev_node_backend_ids = (int *) calloc(nodes_size, sizeof(sched->prev_node_backend_ids[0]));
|
sched->prev_node_backend_ids = (int *) calloc(nodes_size, sizeof(sched->prev_node_backend_ids[0]));
|
||||||
sched->prev_leaf_backend_ids = (int *) calloc(nodes_size, sizeof(sched->prev_leaf_backend_ids[0]));
|
sched->prev_leaf_backend_ids = (int *) calloc(nodes_size, sizeof(sched->prev_leaf_backend_ids[0]));
|
||||||
|
|
||||||
|
sched->debug_graph_size = 0;
|
||||||
|
sched->debug_prev_graph_size = 0;
|
||||||
|
|
||||||
sched->context_buffer_size = ggml_sched_max_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + ggml_graph_overhead_custom(graph_size, false);
|
sched->context_buffer_size = ggml_sched_max_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + ggml_graph_overhead_custom(graph_size, false);
|
||||||
sched->context_buffer = (char *) malloc(sched->context_buffer_size);
|
sched->context_buffer = (char *) malloc(sched->context_buffer_size);
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue