From 9dba9f5352308894bfb8786fcfe7c284168ff8f5 Mon Sep 17 00:00:00 2001 From: pl752 Date: Sat, 3 Jan 2026 15:13:40 +0500 Subject: [PATCH] (Bugfix, ggml-cuda) Pool alloc count fix + small size computation type adjustment (#18559) * CUDA: Fixed obj byte size instead of obj count being passed to pool alloc (fattn-common, dst_tmp_meta) * CUDA: Explicitly casted some of the int alloc counts before multiplication in argsort --------- Co-authored-by: pl752 --- ggml/src/ggml-cuda/argsort.cu | 4 ++-- ggml/src/ggml-cuda/fattn-common.cuh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ggml/src/ggml-cuda/argsort.cu b/ggml/src/ggml-cuda/argsort.cu index da9652c3be..99669200ff 100644 --- a/ggml/src/ggml-cuda/argsort.cu +++ b/ggml/src/ggml-cuda/argsort.cu @@ -29,8 +29,8 @@ static void argsort_f32_i32_cuda_cub(ggml_cuda_pool & pool, const int nrows, ggml_sort_order order, cudaStream_t stream) { - ggml_cuda_pool_alloc temp_indices_alloc(pool, ncols * nrows); - ggml_cuda_pool_alloc temp_keys_alloc(pool, ncols * nrows); + ggml_cuda_pool_alloc temp_indices_alloc(pool, ((size_t) ncols) * nrows); + ggml_cuda_pool_alloc temp_keys_alloc(pool, ((size_t) ncols) * nrows); ggml_cuda_pool_alloc offsets_alloc(pool, nrows + 1); int * temp_indices = temp_indices_alloc.get(); diff --git a/ggml/src/ggml-cuda/fattn-common.cuh b/ggml/src/ggml-cuda/fattn-common.cuh index 8dc82a9d3b..fa4e87ee47 100644 --- a/ggml/src/ggml-cuda/fattn-common.cuh +++ b/ggml/src/ggml-cuda/fattn-common.cuh @@ -918,7 +918,7 @@ void launch_fattn( blocks_num.y = 1; blocks_num.z = 1; - dst_tmp_meta.alloc(blocks_num.x*ncols * (2*2 + DV) * sizeof(float)); + dst_tmp_meta.alloc(((size_t) blocks_num.x) * ncols * (2 + DV/2)); } else { const int ntiles_KQ = (K->ne[1] + nbatch_fa - 1) / nbatch_fa; // Max. number of parallel blocks limited by tensor size.