From 5ea3be265ba6f8916daf52e19e3fb8efe9a03637 Mon Sep 17 00:00:00 2001
From: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Thu, 27 Nov 2025 09:40:13 +0100
Subject: [PATCH] cuda : fix top-k compilation when CUB is unavailable

This commit adds a macro guard around argsort_f32_i32_cuda_cub usage
in the top-k fallback path, falling back to bitonic sort when
GGML_CUDA_USE_CUB is not defined.

The motivation for this is that some environments like AMD HIP
do not have CUB available, causing compilation failure.

Refs: https://github.com/ggml-org/llama.cpp/actions/runs/19728226426/job/56523606840#step:6:208
---
 ggml/src/ggml-cuda/top-k.cu | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/ggml/src/ggml-cuda/top-k.cu b/ggml/src/ggml-cuda/top-k.cu
index ae5989cacc..912c41626b 100644
--- a/ggml/src/ggml-cuda/top-k.cu
+++ b/ggml/src/ggml-cuda/top-k.cu
@@ -94,7 +94,11 @@ void ggml_cuda_op_top_k(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
     int *                     tmp_dst = temp_dst_alloc.get();
 
     if (shared_mem > max_shared_mem || ncols > 1024) {
+#ifdef GGML_CUDA_USE_CUB
         argsort_f32_i32_cuda_cub(pool, src0_d, tmp_dst, ncols, nrows, GGML_SORT_ORDER_DESC, stream);
+#else
+        argsort_f32_i32_cuda_bitonic(src0_d, tmp_dst, ncols, nrows, GGML_SORT_ORDER_DESC, stream);
+#endif
     } else {
         argsort_f32_i32_cuda_bitonic(src0_d, tmp_dst, ncols, nrows, GGML_SORT_ORDER_DESC, stream);
     }