diff --git a/ggml/src/ggml-opencl/ggml-opencl.cpp b/ggml/src/ggml-opencl/ggml-opencl.cpp index 46a95a1999..e1dca6b4b4 100644 --- a/ggml/src/ggml-opencl/ggml-opencl.cpp +++ b/ggml/src/ggml-opencl/ggml-opencl.cpp @@ -5796,19 +5796,12 @@ static void ggml_cl_get_rows(ggml_backend_t backend, const ggml_tensor * src0, c GGML_ASSERT(dst); GGML_ASSERT(dst->extra); - const int ne00 = src0->ne[0]; - const cl_ulong nb01 = src0->nb[1]; - const cl_ulong nb02 = src0->nb[2]; - const cl_ulong nb03 = src0->nb[3]; - const int ne10 = src1->ne[0]; - const cl_ulong nb10 = src1->nb[0]; - const int ne11 = src1->ne[1]; - const int ne12 = src1->ne[2]; - const cl_ulong nb11 = src1->nb[1]; - const cl_ulong nb12 = src1->nb[2]; - const cl_ulong nb1 = dst->nb[1]; - const cl_ulong nb2 = dst->nb[2]; - const cl_ulong nb3 = dst->nb[3]; + GGML_TENSOR_LOCALS(int, ne0, src0, ne); + GGML_TENSOR_LOCALS(cl_ulong, nb0, src0, nb); + GGML_TENSOR_LOCALS(int, ne1, src1, ne); + GGML_TENSOR_LOCALS(cl_ulong, nb1, src1, nb); + GGML_TENSOR_LOCALS(int, ne, dst, ne); + GGML_TENSOR_LOCALS(cl_ulong, nb, dst, nb); ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; @@ -5854,8 +5847,14 @@ static void ggml_cl_get_rows(ggml_backend_t backend, const ggml_tensor * src0, c CL_CHECK(clSetKernelArg(kernel, 15, sizeof(cl_ulong), &nb2)); CL_CHECK(clSetKernelArg(kernel, 16, sizeof(cl_ulong), &nb3)); - size_t global_work_size[] = {(size_t)ne10*64, (size_t)ne11, (size_t)ne12}; - size_t local_work_size[] = {64, 1, 1}; + int max_workgroup_size = backend_ctx->get_kernel_workgroup_size(kernel); + int nth = 1; + while (nth < ne00 && 2*nth <= max_workgroup_size) { + nth *= 2; + } + + size_t global_work_size[] = {(size_t)ne10*nth, (size_t)ne11, (size_t)ne12}; + size_t local_work_size[] = {(size_t)nth, 1, 1}; backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); }