diff --git a/ggml/src/ggml-openvino/utils.cpp b/ggml/src/ggml-openvino/utils.cpp index 0ec815f07f..9b000f26d5 100644 --- a/ggml/src/ggml-openvino/utils.cpp +++ b/ggml/src/ggml-openvino/utils.cpp @@ -80,11 +80,6 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c bool is_static = device == "NPU" ? true : false; ov::AnyMap config; - if (device == "GPU") { - config = { - {"GPU_ENABLE_SDPA_OPTIMIZATION", "0"} - }; - } if (getenv("GGML_OPENVINO_DUMP_CGRAPH")) { std::string filename = "cgraph.txt"; @@ -186,6 +181,13 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c ov::serialize(model, timestamped_filename); } + auto* disable_sdpa_optimization = getenv("GGML_OPENVINO_DISABLE_SDPA_OPTIMIZATION"); + if (disable_sdpa_optimization && std::string(disable_sdpa_optimization) != "0") { + config = { + {"GPU_ENABLE_SDPA_OPTIMIZATION", "0"} + }; + } + auto compiled_model = core.compile_model(model, device, config); compile_end_time = ggml_time_us(); infer_request_cache[cgraph] = std::make_shared(compiled_model.create_infer_request());