sycl : fix llama_kv_cache hang when kv_cache is huge: 5GB (#21283)

This commit is contained in:
Neo Zhang 2026-04-02 15:08:32 +08:00 committed by GitHub
parent fbd441c379
commit 4888137b17
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 9 additions and 3 deletions

View File

@ -569,9 +569,15 @@ static void ggml_backend_sycl_buffer_clear(ggml_backend_buffer_t buffer,
SYCL_CHECK(
CHECK_TRY_ERROR(dpct::get_current_device().queues_wait_and_throw()));
SYCL_CHECK(CHECK_TRY_ERROR((*stream)
.memset(ctx->dev_ptr, value, buffer->size)
.wait()));
constexpr size_t MAX_CHUNK = 2ULL << 30; // 2 GiB
for (size_t off = 0; off < buffer->size; off += MAX_CHUNK) {
size_t chunk = std::min(buffer->size - off, MAX_CHUNK);
SYCL_CHECK(CHECK_TRY_ERROR(
(*stream)
.memset(static_cast<char*>(ctx->dev_ptr) + off, value, chunk)
.wait()
));
}
}
catch (sycl::exception const &exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__