cuda : fix top-k compilation when CUB is unavailable

This commit adds a macro guard around argsort_f32_i32_cuda_cub usage
in the top-k fallback path, falling back to bitonic sort when
GGML_CUDA_USE_CUB is not defined.

The motivation for this is that some environments like AMD HIP
do not have CUB available, causing compilation failure.

Refs: https://github.com/ggml-org/llama.cpp/actions/runs/19728226426/job/56523606840#step:6:208
This commit is contained in:
Daniel Bevenius 2025-11-27 09:40:13 +01:00
parent 51107a0b63
commit 5ea3be265b
No known key found for this signature in database
1 changed files with 4 additions and 0 deletions

View File

@ -94,7 +94,11 @@ void ggml_cuda_op_top_k(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
int * tmp_dst = temp_dst_alloc.get(); int * tmp_dst = temp_dst_alloc.get();
if (shared_mem > max_shared_mem || ncols > 1024) { if (shared_mem > max_shared_mem || ncols > 1024) {
#ifdef GGML_CUDA_USE_CUB
argsort_f32_i32_cuda_cub(pool, src0_d, tmp_dst, ncols, nrows, GGML_SORT_ORDER_DESC, stream); argsort_f32_i32_cuda_cub(pool, src0_d, tmp_dst, ncols, nrows, GGML_SORT_ORDER_DESC, stream);
#else
argsort_f32_i32_cuda_bitonic(src0_d, tmp_dst, ncols, nrows, GGML_SORT_ORDER_DESC, stream);
#endif
} else { } else {
argsort_f32_i32_cuda_bitonic(src0_d, tmp_dst, ncols, nrows, GGML_SORT_ORDER_DESC, stream); argsort_f32_i32_cuda_bitonic(src0_d, tmp_dst, ncols, nrows, GGML_SORT_ORDER_DESC, stream);
} }