metal : adaptive CPU/GPU interleave based on number of nodes (#19369)

2026-02-05 19:07:22 +02:00 · 2026-02-05 19:07:22 +02:00 · 22cae83218
parent 449ec2ab07
commit 22cae83218
1 changed files with 1 additions and 1 deletions
--- a/ggml/src/ggml-metal/ggml-metal-context.m
+++ b/ggml/src/ggml-metal/ggml-metal-context.m
@ -415,7 +415,7 @@ bool ggml_metal_cpy_tensor_async(ggml_metal_t ctx_src, ggml_metal_t ctx_dst, con

 enum ggml_status ggml_metal_graph_compute(ggml_metal_t ctx, struct ggml_cgraph * gf) {
    // number of nodes encoded by the main thread (empirically determined)
-    const int n_main = 64;
+    const int n_main = MAX(64, 0.1*gf->n_nodes);

    // number of threads in addition to the main thread
    const int n_cb = ctx->n_cb;