ggml: add env var GGML_OP_OFFLOAD_MIN_BATCH

* makes the min_batch_size for triggering op offload configurable via env var, defaulting to the prior hardcoded value of 32
This commit is contained in:
DocShotgun 2026-01-01 22:09:18 -08:00
parent e86f3c2221
commit 3c1bcf2606
5 changed files with 5 additions and 5 deletions

View File

@ -2512,7 +2512,7 @@ static bool ggml_backend_buft_is_cann(ggml_backend_buffer_type_t buft) {
* false.
*/
static bool ggml_backend_cann_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
const int min_batch_size = 32;
const int min_batch_size = getenv("GGML_OP_OFFLOAD_MIN_BATCH") ? atoi(getenv("GGML_OP_OFFLOAD_MIN_BATCH")) : 32;
GGML_UNUSED(dev);
return op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS;

View File

@ -4660,7 +4660,7 @@ static int64_t get_op_batch_size(const ggml_tensor * op) {
}
static bool ggml_backend_cuda_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
const int min_batch_size = 32;
const int min_batch_size = getenv("GGML_OP_OFFLOAD_MIN_BATCH") ? atoi(getenv("GGML_OP_OFFLOAD_MIN_BATCH")) : 32;
return get_op_batch_size(op) >= min_batch_size;

View File

@ -625,7 +625,7 @@ static int64_t get_op_batch_size(const ggml_tensor * op) {
}
static bool ggml_backend_metal_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
const int min_batch_size = 32;
const int min_batch_size = getenv("GGML_OP_OFFLOAD_MIN_BATCH") ? atoi(getenv("GGML_OP_OFFLOAD_MIN_BATCH")) : 32;
return (op->op == GGML_OP_MUL_MAT ||
op->op == GGML_OP_MUL_MAT_ID) &&

View File

@ -4674,7 +4674,7 @@ static int64_t get_op_batch_size(const ggml_tensor * op) {
}
static bool ggml_backend_sycl_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
const int min_batch_size = 32;
const int min_batch_size = getenv("GGML_OP_OFFLOAD_MIN_BATCH") ? atoi(getenv("GGML_OP_OFFLOAD_MIN_BATCH")) : 32;
return get_op_batch_size(op) >= min_batch_size;
GGML_UNUSED(dev);
}

View File

@ -14651,7 +14651,7 @@ static bool ggml_backend_vk_device_supports_buft(ggml_backend_dev_t dev, ggml_ba
}
static bool ggml_backend_vk_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
const int min_batch_size = 32;
const int min_batch_size = getenv("GGML_OP_OFFLOAD_MIN_BATCH") ? atoi(getenv("GGML_OP_OFFLOAD_MIN_BATCH")) : 32;
return (op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS) ||
(op->ne[2] >= min_batch_size && op->op == GGML_OP_MUL_MAT_ID);