ggml : fix bug in CUDA Hadamard transform implementation
This commit is contained in:
parent
83a0313a14
commit
6011bdd92b
|
|
@ -5022,8 +5022,10 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
|
||||||
case GGML_OP_SOLVE_TRI:
|
case GGML_OP_SOLVE_TRI:
|
||||||
case GGML_OP_SCATTER:
|
case GGML_OP_SCATTER:
|
||||||
return true;
|
return true;
|
||||||
case GGML_OP_HADAMARD:
|
case GGML_OP_HADAMARD: {
|
||||||
return (op->ne[0] == 64 || op->ne[0] == 128 || op->ne[0] == 256) && op->type == GGML_TYPE_F32 && op->src[0]->type == GGML_TYPE_F32;
|
int nh = op->op_params[0];
|
||||||
|
return (nh == 64 || nh == 128 || nh == 256) && op->ne[0] % nh == 0 && op->type == GGML_TYPE_F32 && op->src[0]->type == GGML_TYPE_F32;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,7 @@ static __global__ void hadamard_f32(const char * src, char * dst, int ne0,
|
||||||
float scale = ksqrt2;
|
float scale = ksqrt2;
|
||||||
|
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int h = 2; h < nh; h <<= 2) {
|
for (int h = 2; h < nh; h <<= 1) {
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
int ii = tid/h, jj = tid%h;
|
int ii = tid/h, jj = tid%h;
|
||||||
int j = 2*h*ii+jj;
|
int j = 2*h*ii+jj;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue