CANN: Stream sync between devices for acl_graph (#15809)

* CANN: Switch to stream synchronization

Switch to stream synchronization because events are not effective.

Co-authored-by: hipudding <huafengchun@gmail.com>

* CANN: add Comments

---------

Co-authored-by: hipudding <huafengchun@gmail.com>
This commit is contained in:
Chenguang Li 2025-09-08 10:03:29 +08:00 committed by GitHub
parent 3976dfbe00
commit 85ca66a746
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 9 additions and 8 deletions

View File

@ -2092,16 +2092,17 @@ static bool ggml_backend_cann_cpy_tensor_async(
ACL_CHECK(aclrtMemcpyAsync(dst->data, copy_size, src->data, copy_size, ACL_CHECK(aclrtMemcpyAsync(dst->data, copy_size, src->data, copy_size,
ACL_MEMCPY_DEVICE_TO_DEVICE, ACL_MEMCPY_DEVICE_TO_DEVICE,
cann_ctx_src->stream())); cann_ctx_src->stream()));
// record event on src stream after the copy // record event on src stream after the copy
if (!cann_ctx_src->copy_event) { // TODO: this event is not effective with acl graph mode, change to use aclrtSynchronizeStream
ACL_CHECK(aclrtCreateEventWithFlag(&cann_ctx_src->copy_event, ACL_EVENT_SYNC)); // if (!cann_ctx_src->copy_event) {
} // ACL_CHECK(aclrtCreateEventWithFlag(&cann_ctx_src->copy_event, ACL_EVENT_SYNC));
ACL_CHECK(aclrtRecordEvent(cann_ctx_src->copy_event, cann_ctx_src->stream())); // }
// ACL_CHECK(aclrtRecordEvent(cann_ctx_src->copy_event, cann_ctx_src->stream()));
// wait on dst stream for the copy to complete // // wait on dst stream for the copy to complete
ggml_cann_set_device(cann_ctx_dst->device); // ggml_cann_set_device(cann_ctx_dst->device);
ACL_CHECK(aclrtStreamWaitEvent(cann_ctx_dst->stream(), cann_ctx_src->copy_event)); // ACL_CHECK(aclrtStreamWaitEvent(cann_ctx_dst->stream(), cann_ctx_src->copy_event));
ACL_CHECK(aclrtSynchronizeStream(cann_ctx_src->stream()));
} else { } else {
// src and dst are on the same backend // src and dst are on the same backend
ACL_CHECK(aclrtMemcpyAsync(dst->data, copy_size, src->data, copy_size, ACL_CHECK(aclrtMemcpyAsync(dst->data, copy_size, src->data, copy_size,