musa: fix build warnings (#15611)
Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
This commit is contained in:
parent
835b2b915c
commit
0f7c69689f
|
|
@ -54,7 +54,7 @@ static __global__ void k_bin_bcast(const src0_t * src0,
|
||||||
const uint32_t i2 = fastdiv((blockDim.z * blockIdx.z + threadIdx.z), ne3);
|
const uint32_t i2 = fastdiv((blockDim.z * blockIdx.z + threadIdx.z), ne3);
|
||||||
const uint32_t i3 = (blockDim.z * blockIdx.z + threadIdx.z) - (i2 * ne3.z);
|
const uint32_t i3 = (blockDim.z * blockIdx.z + threadIdx.z) - (i2 * ne3.z);
|
||||||
|
|
||||||
if (i0s >= ne0 || i1 >= ne1 || i2 >= ne2 || i3 >= ne3.z) {
|
if (i0s >= (uint32_t)ne0 || i1 >= (uint32_t)ne1 || i2 >= (uint32_t)ne2 || i3 >= ne3.z) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -81,7 +81,7 @@ static __global__ void mmq_ids_helper(
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int offset = neu_padded; offset < warp_size; offset += neu_padded) {
|
for (int offset = neu_padded; offset < warp_size; offset += neu_padded) {
|
||||||
const int tmp = __shfl_up_sync(0xFFFFFFFF, it_compact_add_self, offset, warp_size);
|
const int tmp = __shfl_up_sync(0xFFFFFFFF, it_compact_add_self, offset, warp_size);
|
||||||
if (threadIdx.x >= offset) {
|
if (threadIdx.x >= static_cast<unsigned int>(offset)) {
|
||||||
it_compact_add_lower += tmp;
|
it_compact_add_lower += tmp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -110,7 +110,7 @@ static __global__ void mmq_ids_helper(
|
||||||
|
|
||||||
expert_bounds[expert] = nex_prev;
|
expert_bounds[expert] = nex_prev;
|
||||||
|
|
||||||
if (expert < gridDim.x - 1) {
|
if (expert < static_cast<int>(gridDim.x) - 1) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -220,7 +220,7 @@ static __global__ void mul_mat_vec_q(
|
||||||
tmp[j][i] = warp_reduce_sum<warp_size>(tmp[j][i]);
|
tmp[j][i] = warp_reduce_sum<warp_size>(tmp[j][i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (threadIdx.x < rows_per_cuda_block && (rows_per_cuda_block == 1 || row0 + int(threadIdx.x) < stride_col_dst)) {
|
if (threadIdx.x < rows_per_cuda_block && (rows_per_cuda_block == 1 || uint32_t(row0 + threadIdx.x) < stride_col_dst)) {
|
||||||
dst[j*stride_col_dst + threadIdx.x] = tmp[j][threadIdx.x];
|
dst[j*stride_col_dst + threadIdx.x] = tmp[j][threadIdx.x];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -51,6 +51,8 @@ static __global__ __launch_bounds__(CUDA_PAD_REFLECT_1D_BLOCK_SIZE, 1) void
|
||||||
}
|
}
|
||||||
const float value = *(const float *) (src0_ptr + src_idx * nb00);
|
const float value = *(const float *) (src0_ptr + src_idx * nb00);
|
||||||
*(float *) (dst_ptr + i0 * nb0) = value;
|
*(float *) (dst_ptr + i0 * nb0) = value;
|
||||||
|
|
||||||
|
GGML_UNUSED(p1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_cuda_op_pad_reflect_1d(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
void ggml_cuda_op_pad_reflect_1d(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue