58 lines
1.3 KiB
Common Lisp
58 lines
1.3 KiB
Common Lisp
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
|
|
|
kernel void kernel_im2col_f32(
|
|
global float * src1,
|
|
ulong offset1,
|
|
global float * dst,
|
|
ulong offsetd,
|
|
ulong batch_offset,
|
|
ulong delta_offset,
|
|
long IW,
|
|
long IH,
|
|
long IC,
|
|
long OW,
|
|
long OH,
|
|
long KW,
|
|
long KH,
|
|
long pelements,
|
|
long CHW,
|
|
int s0,
|
|
int s1,
|
|
int p0,
|
|
int p1,
|
|
int d0,
|
|
int d1
|
|
) {
|
|
long i = get_global_id(0);
|
|
if (i >= pelements) {
|
|
return;
|
|
}
|
|
|
|
src1 = (global float*)((global char*)src1 + offset1);
|
|
dst = (global float*)((global char*)dst + offsetd);
|
|
|
|
long ksize = OW * KH;
|
|
long kx = i / ksize;
|
|
long kd = kx * ksize;
|
|
long ky = (i - kd) / OW;
|
|
long ix = i % OW;
|
|
|
|
long oh = get_group_id(1);
|
|
long batch = get_group_id(2) / IC;
|
|
long ic = get_group_id(2) % IC;
|
|
|
|
long iiw = ix * s0 + kx * d0 - p0;
|
|
long iih = oh * s1 + ky * d1 - p1;
|
|
|
|
long offset_dst =
|
|
((batch * OH + oh) * OW + ix) * CHW +
|
|
(ic * (KW * KH) + ky * KW + kx);
|
|
|
|
if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
|
|
dst[offset_dst] = 0.0f;
|
|
} else {
|
|
long offset_src = ic * delta_offset + batch * batch_offset;
|
|
dst[offset_dst] = src1[offset_src + iih * IW + iiw];
|
|
}
|
|
}
|