metal : adaptive CPU/GPU interleave based on number of nodes (#19369)
This commit is contained in:
parent
449ec2ab07
commit
22cae83218
|
|
@ -415,7 +415,7 @@ bool ggml_metal_cpy_tensor_async(ggml_metal_t ctx_src, ggml_metal_t ctx_dst, con
|
|||
|
||||
enum ggml_status ggml_metal_graph_compute(ggml_metal_t ctx, struct ggml_cgraph * gf) {
|
||||
// number of nodes encoded by the main thread (empirically determined)
|
||||
const int n_main = 64;
|
||||
const int n_main = MAX(64, 0.1*gf->n_nodes);
|
||||
|
||||
// number of threads in addition to the main thread
|
||||
const int n_cb = ctx->n_cb;
|
||||
|
|
|
|||
Loading…
Reference in New Issue