Renamed new async function, fixed some whitespace
This commit is contained in:
parent
942bbfc9dc
commit
f6b408d843
|
|
@ -48,7 +48,7 @@ extern "C" {
|
||||||
// tensor data access
|
// tensor data access
|
||||||
void (*memset_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
|
void (*memset_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
|
||||||
void (*set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
void (*set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
||||||
void (*set_tensor_sync_optional) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size, bool sync);
|
void (*set_tensor_async) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
||||||
void (*get_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
void (*get_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
||||||
// (optional) tensor copy: dst is in the buffer, src may be in any buffer, including buffers from a different backend (return false if not supported)
|
// (optional) tensor copy: dst is in the buffer, src may be in any buffer, including buffers from a different backend (return false if not supported)
|
||||||
bool (*cpy_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst);
|
bool (*cpy_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst);
|
||||||
|
|
|
||||||
|
|
@ -285,10 +285,11 @@ void ggml_backend_tensor_set(struct ggml_tensor * tensor, const void * data, siz
|
||||||
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
|
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
|
||||||
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
|
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
|
||||||
|
|
||||||
|
// do not synchronize directly after dispatching async tensor copies
|
||||||
static bool disable_sync_optimization = (getenv("GGML_CUDA_DISABLE_SYNC_OPTIMIZATION") != nullptr);
|
static bool disable_sync_optimization = (getenv("GGML_CUDA_DISABLE_SYNC_OPTIMIZATION") != nullptr);
|
||||||
|
|
||||||
if (!disable_sync_optimization && buf->iface.set_tensor_sync_optional != NULL) {
|
if (!disable_sync_optimization && buf->iface.set_tensor_async != NULL) {
|
||||||
buf->iface.set_tensor_sync_optional(buf, tensor, data, offset, size, false);
|
buf->iface.set_tensor_async(buf, tensor, data, offset, size);
|
||||||
} else {
|
} else {
|
||||||
buf->iface.set_tensor(buf, tensor, data, offset, size);
|
buf->iface.set_tensor(buf, tensor, data, offset, size);
|
||||||
}
|
}
|
||||||
|
|
@ -610,7 +611,7 @@ static const struct ggml_backend_buffer_i ggml_backend_multi_buffer_i = {
|
||||||
/* .init_tensor = */ NULL,
|
/* .init_tensor = */ NULL,
|
||||||
/* .memset_tensor = */ NULL,
|
/* .memset_tensor = */ NULL,
|
||||||
/* .set_tensor = */ NULL,
|
/* .set_tensor = */ NULL,
|
||||||
/* .set_tensor_s_o = */ NULL,
|
/* .set_tensor_async = */ NULL,
|
||||||
/* .get_tensor = */ NULL,
|
/* .get_tensor = */ NULL,
|
||||||
/* .cpy_tensor = */ NULL,
|
/* .cpy_tensor = */ NULL,
|
||||||
/* .clear = */ ggml_backend_multi_buffer_clear,
|
/* .clear = */ ggml_backend_multi_buffer_clear,
|
||||||
|
|
@ -2133,7 +2134,7 @@ static const struct ggml_backend_buffer_i ggml_backend_cpu_buffer_i = {
|
||||||
/* .init_tensor = */ NULL, // no initialization required
|
/* .init_tensor = */ NULL, // no initialization required
|
||||||
/* .memset_tensor = */ ggml_backend_cpu_buffer_memset_tensor,
|
/* .memset_tensor = */ ggml_backend_cpu_buffer_memset_tensor,
|
||||||
/* .set_tensor = */ ggml_backend_cpu_buffer_set_tensor,
|
/* .set_tensor = */ ggml_backend_cpu_buffer_set_tensor,
|
||||||
/* .set_tensor_s_o = */ NULL,
|
/* .set_tensor_async = */ NULL,
|
||||||
/* .get_tensor = */ ggml_backend_cpu_buffer_get_tensor,
|
/* .get_tensor = */ ggml_backend_cpu_buffer_get_tensor,
|
||||||
/* .cpy_tensor = */ ggml_backend_cpu_buffer_cpy_tensor,
|
/* .cpy_tensor = */ ggml_backend_cpu_buffer_cpy_tensor,
|
||||||
/* .clear = */ ggml_backend_cpu_buffer_clear,
|
/* .clear = */ ggml_backend_cpu_buffer_clear,
|
||||||
|
|
@ -2146,7 +2147,7 @@ static const struct ggml_backend_buffer_i ggml_backend_cpu_buffer_from_ptr_i = {
|
||||||
/* .init_tensor = */ NULL, // no initialization required
|
/* .init_tensor = */ NULL, // no initialization required
|
||||||
/* .memset_tensor = */ ggml_backend_cpu_buffer_memset_tensor,
|
/* .memset_tensor = */ ggml_backend_cpu_buffer_memset_tensor,
|
||||||
/* .set_tensor = */ ggml_backend_cpu_buffer_set_tensor,
|
/* .set_tensor = */ ggml_backend_cpu_buffer_set_tensor,
|
||||||
/* .set_tensor_s_o = */ NULL,
|
/* .set_tensor_async = */ NULL,
|
||||||
/* .get_tensor = */ ggml_backend_cpu_buffer_get_tensor,
|
/* .get_tensor = */ ggml_backend_cpu_buffer_get_tensor,
|
||||||
/* .cpy_tensor = */ ggml_backend_cpu_buffer_cpy_tensor,
|
/* .cpy_tensor = */ ggml_backend_cpu_buffer_cpy_tensor,
|
||||||
/* .clear = */ ggml_backend_cpu_buffer_clear,
|
/* .clear = */ ggml_backend_cpu_buffer_clear,
|
||||||
|
|
|
||||||
|
|
@ -110,7 +110,7 @@ static ggml_backend_buffer_i ggml_backend_amx_buffer_interface = {
|
||||||
/* .init_tensor = */ ggml_backend_amx_buffer_init_tensor,
|
/* .init_tensor = */ ggml_backend_amx_buffer_init_tensor,
|
||||||
/* .memset_tensor = */ ggml_backend_amx_buffer_memset_tensor,
|
/* .memset_tensor = */ ggml_backend_amx_buffer_memset_tensor,
|
||||||
/* .set_tensor = */ ggml_backend_amx_buffer_set_tensor,
|
/* .set_tensor = */ ggml_backend_amx_buffer_set_tensor,
|
||||||
/* .set_tensor_s_o = */ nullptr,
|
/* .set_tensor_async = */ nullptr,
|
||||||
/* .get_tensor = */ nullptr,
|
/* .get_tensor = */ nullptr,
|
||||||
/* .cpy_tensor = */ nullptr,
|
/* .cpy_tensor = */ nullptr,
|
||||||
/* .clear = */ ggml_backend_amx_buffer_clear,
|
/* .clear = */ ggml_backend_amx_buffer_clear,
|
||||||
|
|
|
||||||
|
|
@ -617,20 +617,16 @@ static void ggml_backend_cuda_buffer_memset_tensor(ggml_backend_buffer_t buffer,
|
||||||
CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread));
|
CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_backend_cuda_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
static void ggml_backend_cuda_buffer_set_tensor_async(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
||||||
ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
|
ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
|
||||||
|
|
||||||
ggml_cuda_set_device(ctx->device);
|
ggml_cuda_set_device(ctx->device);
|
||||||
CUDA_CHECK(cudaMemcpyAsync((char *)tensor->data + offset, data, size, cudaMemcpyHostToDevice, cudaStreamPerThread));
|
CUDA_CHECK(cudaMemcpyAsync((char *)tensor->data + offset, data, size, cudaMemcpyHostToDevice, cudaStreamPerThread));
|
||||||
CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_backend_cuda_buffer_set_tensor_sync_optional(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size, bool sync) {
|
static void ggml_backend_cuda_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
||||||
ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
|
ggml_backend_cuda_buffer_set_tensor_async(buffer, tensor, data, offset, size);
|
||||||
|
CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread));
|
||||||
ggml_cuda_set_device(ctx->device);
|
|
||||||
CUDA_CHECK(cudaMemcpyAsync((char *)tensor->data + offset, data, size, cudaMemcpyHostToDevice, cudaStreamPerThread));
|
|
||||||
if (sync) CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_backend_cuda_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
static void ggml_backend_cuda_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
||||||
|
|
@ -676,7 +672,7 @@ static const ggml_backend_buffer_i ggml_backend_cuda_buffer_interface = {
|
||||||
/* .init_tensor = */ ggml_backend_cuda_buffer_init_tensor,
|
/* .init_tensor = */ ggml_backend_cuda_buffer_init_tensor,
|
||||||
/* .memset_tensor = */ ggml_backend_cuda_buffer_memset_tensor,
|
/* .memset_tensor = */ ggml_backend_cuda_buffer_memset_tensor,
|
||||||
/* .set_tensor = */ ggml_backend_cuda_buffer_set_tensor,
|
/* .set_tensor = */ ggml_backend_cuda_buffer_set_tensor,
|
||||||
/* .set_tensor_s_o = */ ggml_backend_cuda_buffer_set_tensor_sync_optional,
|
/* .set_tensor_async = */ ggml_backend_cuda_buffer_set_tensor_async,
|
||||||
/* .get_tensor = */ ggml_backend_cuda_buffer_get_tensor,
|
/* .get_tensor = */ ggml_backend_cuda_buffer_get_tensor,
|
||||||
/* .cpy_tensor = */ ggml_backend_cuda_buffer_cpy_tensor,
|
/* .cpy_tensor = */ ggml_backend_cuda_buffer_cpy_tensor,
|
||||||
/* .clear = */ ggml_backend_cuda_buffer_clear,
|
/* .clear = */ ggml_backend_cuda_buffer_clear,
|
||||||
|
|
@ -989,7 +985,7 @@ static const ggml_backend_buffer_i ggml_backend_cuda_split_buffer_interface = {
|
||||||
/* .init_tensor = */ ggml_backend_cuda_split_buffer_init_tensor,
|
/* .init_tensor = */ ggml_backend_cuda_split_buffer_init_tensor,
|
||||||
/* .memset_tensor = */ NULL,
|
/* .memset_tensor = */ NULL,
|
||||||
/* .set_tensor = */ ggml_backend_cuda_split_buffer_set_tensor,
|
/* .set_tensor = */ ggml_backend_cuda_split_buffer_set_tensor,
|
||||||
/* .set_tensor_s_o = */ NULL,
|
/* .set_tensor_async = */ NULL,
|
||||||
/* .get_tensor = */ ggml_backend_cuda_split_buffer_get_tensor,
|
/* .get_tensor = */ ggml_backend_cuda_split_buffer_get_tensor,
|
||||||
/* .cpy_tensor = */ NULL,
|
/* .cpy_tensor = */ NULL,
|
||||||
/* .clear = */ ggml_backend_cuda_split_buffer_clear,
|
/* .clear = */ ggml_backend_cuda_split_buffer_clear,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue