From 65f944bf1832985934b795613322991d1e3937c5 Mon Sep 17 00:00:00 2001 From: Aman Gupta Date: Wed, 10 Dec 2025 17:00:15 +0100 Subject: [PATCH] cleanup --- ggml/src/ggml-cuda/CMakeLists.txt | 4 ---- ggml/src/ggml-cuda/mmq.cuh | 1 - 2 files changed, 5 deletions(-) diff --git a/ggml/src/ggml-cuda/CMakeLists.txt b/ggml/src/ggml-cuda/CMakeLists.txt index c562960619..5ae2f14f7e 100644 --- a/ggml/src/ggml-cuda/CMakeLists.txt +++ b/ggml/src/ggml-cuda/CMakeLists.txt @@ -35,10 +35,6 @@ if (CUDAToolkit_FOUND) if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.8") list(APPEND CMAKE_CUDA_ARCHITECTURES 89-real) endif() - - if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "12.8") - list(APPEND CMAKE_CUDA_ARCHITECTURES 100-real) - endif() endif() endif() message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}") diff --git a/ggml/src/ggml-cuda/mmq.cuh b/ggml/src/ggml-cuda/mmq.cuh index b8b1d9aefd..9b82247e07 100644 --- a/ggml/src/ggml-cuda/mmq.cuh +++ b/ggml/src/ggml-cuda/mmq.cuh @@ -806,7 +806,6 @@ static __device__ __forceinline__ void load_tiles_mxfp4_fp4(const char * __restr const block_mxfp4 * bxi = (const block_mxfp4 *) x + kbx0 + i * stride + kbx; - // Load 16 bytes more efficiently using memcpy (compiler optimizes to vector loads) int aux_q4[4]; memcpy(aux_q4, bxi->qs, 16);