This commit is contained in:
Aman Gupta 2025-12-10 17:00:15 +01:00
parent 40eb6c7ccd
commit 65f944bf18
2 changed files with 0 additions and 5 deletions

View File

@ -35,10 +35,6 @@ if (CUDAToolkit_FOUND)
if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.8")
list(APPEND CMAKE_CUDA_ARCHITECTURES 89-real)
endif()
if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "12.8")
list(APPEND CMAKE_CUDA_ARCHITECTURES 100-real)
endif()
endif()
endif()
message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")

View File

@ -806,7 +806,6 @@ static __device__ __forceinline__ void load_tiles_mxfp4_fp4(const char * __restr
const block_mxfp4 * bxi = (const block_mxfp4 *) x + kbx0 + i * stride + kbx;
// Load 16 bytes more efficiently using memcpy (compiler optimizes to vector loads)
int aux_q4[4];
memcpy(aux_q4, bxi->qs, 16);