Don't put kvcache on GPU in stateful mode

This commit is contained in:
Yu, Zijun 2026-01-24 17:16:06 +08:00
parent 1c0a47a485
commit c840210213
1 changed files with 1 additions and 1 deletions

View File

@ -140,7 +140,7 @@ static enum ggml_status ggml_backend_openvino_buffer_init_tensor(ggml_backend_bu
// Put kvcache on device memory for GPU (NPU memory is too small even for kvcache)
if (buffer->usage == GGML_BACKEND_BUFFER_USAGE_ANY && strncmp(tensor->name, "cache_", 6) == 0 && !ctx->is_remote &&
ggml_openvino_get_device_name() == "GPU") {
ggml_openvino_get_device_name() == "GPU" && !getenv("GGML_OPENVINO_STATEFUL_EXECUTION")) {
GGML_ASSERT(ctx->tensor_extras.empty());
auto device = ctx->device;
auto size = ctx->size;