Compare commits

...

3 Commits

Author SHA1 Message Date
iacopPBK fbc4cfcdde
Update ggml/src/ggml-cuda/mmq.cuh
Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
2026-04-01 13:24:19 +02:00
iacopPBK 777f5943a4
Update ggml/src/ggml-cuda/mmq.cuh
Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
2026-04-01 13:24:02 +02:00
iacopPBK d3065542f0
Update ggml/src/ggml-cuda/mmq.cuh
Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
2026-04-01 13:23:42 +02:00
1 changed files with 5 additions and 3 deletions

View File

@ -384,7 +384,8 @@ static __device__ __forceinline__ void vec_dot_q4_0_q8_1_dp4a(
int u[2*VDR_Q4_0_Q8_1_MMQ];
constexpr int max_cpy = ggml_cuda_get_max_cpy_bytes();
constexpr int mcpy_int = max_cpy / sizeof(int);
constexpr int mcpy_int = max_cpy / sizeof(int);
static_assert(VDR_Q4_0_Q8_1_MMQ == 4, "bad VDR_Q4_0_Q8_1_MMQ");
int tmp0[4], tmp1[4];
@ -400,7 +401,6 @@ static __device__ __forceinline__ void vec_dot_q4_0_q8_1_dp4a(
sum[j0/nwarps*mmq_y/warp_size + i0/warp_size] += vec_dot_q4_0_q8_1_impl<VDR_Q4_0_Q8_1_MMQ>
(&x_qs[i*(MMQ_TILE_NE_K + 1) + k0/QR4_0], u,
x_df[i*(MMQ_TILE_NE_K/QI4_0) + i/QI4_0 + k0/(QR4_0*QI4_0)], y_ds[j*MMQ_TILE_Y_K + k01/QI8_1]);
}
}
}
@ -495,7 +495,9 @@ static __device__ __forceinline__ void vec_dot_q4_1_q8_1_dp4a(
int u[2*VDR_Q4_1_Q8_1_MMQ];
constexpr int max_cpy = ggml_cuda_get_max_cpy_bytes();
constexpr int mcpy_int = max_cpy / sizeof(int);
constexpr int mcpy_int = max_cpy / sizeof(int);
static_assert(VDR_Q4_0_Q8_1_MMQ == 4, "bad VDR_Q4_0_Q8_1_MMQ");
int tmp0[4], tmp1[4];