env variable GGML_OPENVINO_DISABLE_SDPA_OPTIMIZATION added

This commit is contained in:
cavusmustafa 2025-10-01 14:33:48 -07:00 committed by Mustafa Cavus
parent c112bc4e73
commit e7252920e1
1 changed files with 7 additions and 5 deletions

View File

@ -80,11 +80,6 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c
bool is_static = device == "NPU" ? true : false;
ov::AnyMap config;
if (device == "GPU") {
config = {
{"GPU_ENABLE_SDPA_OPTIMIZATION", "0"}
};
}
if (getenv("GGML_OPENVINO_DUMP_CGRAPH")) {
std::string filename = "cgraph.txt";
@ -186,6 +181,13 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c
ov::serialize(model, timestamped_filename);
}
auto* disable_sdpa_optimization = getenv("GGML_OPENVINO_DISABLE_SDPA_OPTIMIZATION");
if (disable_sdpa_optimization && std::string(disable_sdpa_optimization) != "0") {
config = {
{"GPU_ENABLE_SDPA_OPTIMIZATION", "0"}
};
}
auto compiled_model = core.compile_model(model, device, config);
compile_end_time = ggml_time_us();
infer_request_cache[cgraph] = std::make_shared<ov::InferRequest>(compiled_model.create_infer_request());