Move to no timeout for WaitAny in graph submission to avoid deadlocks in some cases on llvm-pipe backends (#20618)
This commit is contained in:
parent
78d550b541
commit
8ced5f41f9
|
|
@ -509,50 +509,39 @@ static void ggml_backend_webgpu_wait_profile_futures(webgpu_global_context &
|
||||||
static void ggml_backend_webgpu_wait(webgpu_global_context & ctx,
|
static void ggml_backend_webgpu_wait(webgpu_global_context & ctx,
|
||||||
std::vector<webgpu_submission> & subs,
|
std::vector<webgpu_submission> & subs,
|
||||||
bool block = true) {
|
bool block = true) {
|
||||||
// If we have too many in-flight submissions, wait on the oldest one first.
|
|
||||||
if (subs.empty()) {
|
if (subs.empty()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
while (subs.size() >= WEBGPU_MAX_INFLIGHT_SUBS_PER_THREAD) {
|
|
||||||
auto waitStatus = ctx->instance.WaitAny(1, &subs[0].submit_done, UINT64_MAX);
|
bool blocking_wait = block || subs.size() >= WEBGPU_MAX_INFLIGHT_SUBS_PER_THREAD;
|
||||||
if (ggml_backend_webgpu_handle_wait_status(waitStatus)) {
|
while (blocking_wait) {
|
||||||
|
auto waitStatus = ctx->instance.WaitAny(1, &subs[0].submit_done, 0);
|
||||||
|
if (ggml_backend_webgpu_handle_wait_status(waitStatus, true)) {
|
||||||
#ifdef GGML_WEBGPU_GPU_PROFILE
|
#ifdef GGML_WEBGPU_GPU_PROFILE
|
||||||
ggml_backend_webgpu_wait_profile_futures(ctx, subs[0].profile_futures, true);
|
ggml_backend_webgpu_wait_profile_futures(ctx, subs[0].profile_futures, true);
|
||||||
#endif
|
#endif
|
||||||
subs.erase(subs.begin());
|
subs.erase(subs.begin());
|
||||||
}
|
}
|
||||||
|
blocking_wait = (block && !subs.empty()) || subs.size() >= WEBGPU_MAX_INFLIGHT_SUBS_PER_THREAD;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (subs.empty()) {
|
if (subs.empty()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (block) {
|
// Poll each submit future once and remove completed submissions.
|
||||||
for (auto & sub : subs) {
|
for (auto sub = subs.begin(); sub != subs.end();) {
|
||||||
while (!sub.submit_done.completed) {
|
auto waitStatus = ctx->instance.WaitAny(1, &sub->submit_done, 0);
|
||||||
auto waitStatus = ctx->instance.WaitAny(1, &sub.submit_done, UINT64_MAX);
|
bool success = ggml_backend_webgpu_handle_wait_status(waitStatus, true);
|
||||||
ggml_backend_webgpu_handle_wait_status(waitStatus);
|
|
||||||
}
|
|
||||||
#ifdef GGML_WEBGPU_GPU_PROFILE
|
#ifdef GGML_WEBGPU_GPU_PROFILE
|
||||||
ggml_backend_webgpu_wait_profile_futures(ctx, sub.profile_futures, true);
|
ggml_backend_webgpu_wait_profile_futures(ctx, sub->profile_futures, false);
|
||||||
#endif
|
if (success && sub->profile_futures.empty()) {
|
||||||
}
|
|
||||||
subs.clear();
|
|
||||||
} else {
|
|
||||||
// Poll each submit future once and remove completed submissions.
|
|
||||||
for (auto sub = subs.begin(); sub != subs.end();) {
|
|
||||||
auto waitStatus = ctx->instance.WaitAny(1, &sub->submit_done, 0);
|
|
||||||
ggml_backend_webgpu_handle_wait_status(waitStatus, true);
|
|
||||||
#ifdef GGML_WEBGPU_GPU_PROFILE
|
|
||||||
ggml_backend_webgpu_wait_profile_futures(ctx, sub->profile_futures, false);
|
|
||||||
if (sub->submit_done.completed && sub->profile_futures.empty()) {
|
|
||||||
#else
|
#else
|
||||||
if (sub->submit_done.completed) {
|
if (success) {
|
||||||
#endif
|
#endif
|
||||||
sub = subs.erase(sub);
|
sub = subs.erase(sub);
|
||||||
} else {
|
} else {
|
||||||
++sub;
|
++sub;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -2961,17 +2950,16 @@ static ggml_backend_buffer_type_t ggml_backend_webgpu_device_get_buffer_type(ggm
|
||||||
|
|
||||||
static struct ggml_backend_buffer_type ggml_backend_webgpu_buffer_type = {
|
static struct ggml_backend_buffer_type ggml_backend_webgpu_buffer_type = {
|
||||||
/* .iface = */ {
|
/* .iface = */ {
|
||||||
/* .get_name = */ ggml_backend_webgpu_buffer_type_get_name,
|
/* .get_name = */ ggml_backend_webgpu_buffer_type_get_name,
|
||||||
/* .alloc_buffer = */
|
/* .alloc_buffer = */ ggml_backend_webgpu_buffer_type_alloc_buffer,
|
||||||
ggml_backend_webgpu_buffer_type_alloc_buffer, /* .get_alignment = */
|
/* .get_alignment = */ ggml_backend_webgpu_buffer_type_get_alignment,
|
||||||
ggml_backend_webgpu_buffer_type_get_alignment, /* .get_max_size = */
|
/* .get_max_size = */ ggml_backend_webgpu_buffer_type_get_max_size,
|
||||||
ggml_backend_webgpu_buffer_type_get_max_size, /* .get_alloc_size = */
|
/* .get_alloc_size = */ ggml_backend_webgpu_buffer_type_get_alloc_size,
|
||||||
ggml_backend_webgpu_buffer_type_get_alloc_size, /* .is_host = */ NULL, // defaults to false
|
/* .is_host = */ NULL, // defaults to false
|
||||||
},
|
},
|
||||||
/* .device = */
|
/* .device = */
|
||||||
dev,
|
dev,
|
||||||
/* .context = */
|
/* .context = */ NULL
|
||||||
NULL
|
|
||||||
};
|
};
|
||||||
|
|
||||||
return &ggml_backend_webgpu_buffer_type;
|
return &ggml_backend_webgpu_buffer_type;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue