opencl: use larger workgroup size for get_rows (#20316)
This commit is contained in:
parent
3d9ab225e7
commit
0516e04bf9
|
|
@ -5796,19 +5796,12 @@ static void ggml_cl_get_rows(ggml_backend_t backend, const ggml_tensor * src0, c
|
|||
GGML_ASSERT(dst);
|
||||
GGML_ASSERT(dst->extra);
|
||||
|
||||
const int ne00 = src0->ne[0];
|
||||
const cl_ulong nb01 = src0->nb[1];
|
||||
const cl_ulong nb02 = src0->nb[2];
|
||||
const cl_ulong nb03 = src0->nb[3];
|
||||
const int ne10 = src1->ne[0];
|
||||
const cl_ulong nb10 = src1->nb[0];
|
||||
const int ne11 = src1->ne[1];
|
||||
const int ne12 = src1->ne[2];
|
||||
const cl_ulong nb11 = src1->nb[1];
|
||||
const cl_ulong nb12 = src1->nb[2];
|
||||
const cl_ulong nb1 = dst->nb[1];
|
||||
const cl_ulong nb2 = dst->nb[2];
|
||||
const cl_ulong nb3 = dst->nb[3];
|
||||
GGML_TENSOR_LOCALS(int, ne0, src0, ne);
|
||||
GGML_TENSOR_LOCALS(cl_ulong, nb0, src0, nb);
|
||||
GGML_TENSOR_LOCALS(int, ne1, src1, ne);
|
||||
GGML_TENSOR_LOCALS(cl_ulong, nb1, src1, nb);
|
||||
GGML_TENSOR_LOCALS(int, ne, dst, ne);
|
||||
GGML_TENSOR_LOCALS(cl_ulong, nb, dst, nb);
|
||||
|
||||
ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context;
|
||||
|
||||
|
|
@ -5854,8 +5847,14 @@ static void ggml_cl_get_rows(ggml_backend_t backend, const ggml_tensor * src0, c
|
|||
CL_CHECK(clSetKernelArg(kernel, 15, sizeof(cl_ulong), &nb2));
|
||||
CL_CHECK(clSetKernelArg(kernel, 16, sizeof(cl_ulong), &nb3));
|
||||
|
||||
size_t global_work_size[] = {(size_t)ne10*64, (size_t)ne11, (size_t)ne12};
|
||||
size_t local_work_size[] = {64, 1, 1};
|
||||
int max_workgroup_size = backend_ctx->get_kernel_workgroup_size(kernel);
|
||||
int nth = 1;
|
||||
while (nth < ne00 && 2*nth <= max_workgroup_size) {
|
||||
nth *= 2;
|
||||
}
|
||||
|
||||
size_t global_work_size[] = {(size_t)ne10*nth, (size_t)ne11, (size_t)ne12};
|
||||
size_t local_work_size[] = {(size_t)nth, 1, 1};
|
||||
|
||||
backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue