Don't put kvcache on GPU in stateful mode
This commit is contained in:
parent
1c0a47a485
commit
c840210213
|
|
@ -140,7 +140,7 @@ static enum ggml_status ggml_backend_openvino_buffer_init_tensor(ggml_backend_bu
|
|||
|
||||
// Put kvcache on device memory for GPU (NPU memory is too small even for kvcache)
|
||||
if (buffer->usage == GGML_BACKEND_BUFFER_USAGE_ANY && strncmp(tensor->name, "cache_", 6) == 0 && !ctx->is_remote &&
|
||||
ggml_openvino_get_device_name() == "GPU") {
|
||||
ggml_openvino_get_device_name() == "GPU" && !getenv("GGML_OPENVINO_STATEFUL_EXECUTION")) {
|
||||
GGML_ASSERT(ctx->tensor_extras.empty());
|
||||
auto device = ctx->device;
|
||||
auto size = ctx->size;
|
||||
|
|
|
|||
Loading…
Reference in New Issue