metal : adaptive CPU/GPU interleave based on number of nodes (#19369)
This commit is contained in:
parent
449ec2ab07
commit
22cae83218
|
|
@ -415,7 +415,7 @@ bool ggml_metal_cpy_tensor_async(ggml_metal_t ctx_src, ggml_metal_t ctx_dst, con
|
||||||
|
|
||||||
enum ggml_status ggml_metal_graph_compute(ggml_metal_t ctx, struct ggml_cgraph * gf) {
|
enum ggml_status ggml_metal_graph_compute(ggml_metal_t ctx, struct ggml_cgraph * gf) {
|
||||||
// number of nodes encoded by the main thread (empirically determined)
|
// number of nodes encoded by the main thread (empirically determined)
|
||||||
const int n_main = 64;
|
const int n_main = MAX(64, 0.1*gf->n_nodes);
|
||||||
|
|
||||||
// number of threads in addition to the main thread
|
// number of threads in addition to the main thread
|
||||||
const int n_cb = ctx->n_cb;
|
const int n_cb = ctx->n_cb;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue