From 3c1bcf26069be6c4683d0bf42794e85d5c520aab Mon Sep 17 00:00:00 2001 From: DocShotgun <126566557+DocShotgun@users.noreply.github.com> Date: Thu, 1 Jan 2026 22:09:18 -0800 Subject: [PATCH] ggml: add env var GGML_OP_OFFLOAD_MIN_BATCH * makes the min_batch_size for triggering op offload configurable via env var, defaulting to the prior hardcoded value of 32 --- ggml/src/ggml-cann/ggml-cann.cpp | 2 +- ggml/src/ggml-cuda/ggml-cuda.cu | 2 +- ggml/src/ggml-metal/ggml-metal.cpp | 2 +- ggml/src/ggml-sycl/ggml-sycl.cpp | 2 +- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp index ef23ec78da..5b48e02606 100644 --- a/ggml/src/ggml-cann/ggml-cann.cpp +++ b/ggml/src/ggml-cann/ggml-cann.cpp @@ -2512,7 +2512,7 @@ static bool ggml_backend_buft_is_cann(ggml_backend_buffer_type_t buft) { * false. */ static bool ggml_backend_cann_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) { - const int min_batch_size = 32; + const int min_batch_size = getenv("GGML_OP_OFFLOAD_MIN_BATCH") ? atoi(getenv("GGML_OP_OFFLOAD_MIN_BATCH")) : 32; GGML_UNUSED(dev); return op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS; diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index 84eccea3f7..1cb879ed5a 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -4660,7 +4660,7 @@ static int64_t get_op_batch_size(const ggml_tensor * op) { } static bool ggml_backend_cuda_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) { - const int min_batch_size = 32; + const int min_batch_size = getenv("GGML_OP_OFFLOAD_MIN_BATCH") ? atoi(getenv("GGML_OP_OFFLOAD_MIN_BATCH")) : 32; return get_op_batch_size(op) >= min_batch_size; diff --git a/ggml/src/ggml-metal/ggml-metal.cpp b/ggml/src/ggml-metal/ggml-metal.cpp index 70bf6f3d98..60da5baf30 100644 --- a/ggml/src/ggml-metal/ggml-metal.cpp +++ b/ggml/src/ggml-metal/ggml-metal.cpp @@ -625,7 +625,7 @@ static int64_t get_op_batch_size(const ggml_tensor * op) { } static bool ggml_backend_metal_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) { - const int min_batch_size = 32; + const int min_batch_size = getenv("GGML_OP_OFFLOAD_MIN_BATCH") ? atoi(getenv("GGML_OP_OFFLOAD_MIN_BATCH")) : 32; return (op->op == GGML_OP_MUL_MAT || op->op == GGML_OP_MUL_MAT_ID) && diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index e996d98be8..8259c76052 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -4674,7 +4674,7 @@ static int64_t get_op_batch_size(const ggml_tensor * op) { } static bool ggml_backend_sycl_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) { - const int min_batch_size = 32; + const int min_batch_size = getenv("GGML_OP_OFFLOAD_MIN_BATCH") ? atoi(getenv("GGML_OP_OFFLOAD_MIN_BATCH")) : 32; return get_op_batch_size(op) >= min_batch_size; GGML_UNUSED(dev); } diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 541e4a50b7..bd76b16cff 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -14651,7 +14651,7 @@ static bool ggml_backend_vk_device_supports_buft(ggml_backend_dev_t dev, ggml_ba } static bool ggml_backend_vk_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) { - const int min_batch_size = 32; + const int min_batch_size = getenv("GGML_OP_OFFLOAD_MIN_BATCH") ? atoi(getenv("GGML_OP_OFFLOAD_MIN_BATCH")) : 32; return (op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS) || (op->ne[2] >= min_batch_size && op->op == GGML_OP_MUL_MAT_ID);