cleanup
This commit is contained in:
parent
40eb6c7ccd
commit
65f944bf18
|
|
@ -35,10 +35,6 @@ if (CUDAToolkit_FOUND)
|
|||
if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.8")
|
||||
list(APPEND CMAKE_CUDA_ARCHITECTURES 89-real)
|
||||
endif()
|
||||
|
||||
if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "12.8")
|
||||
list(APPEND CMAKE_CUDA_ARCHITECTURES 100-real)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
|
||||
|
|
|
|||
|
|
@ -806,7 +806,6 @@ static __device__ __forceinline__ void load_tiles_mxfp4_fp4(const char * __restr
|
|||
|
||||
const block_mxfp4 * bxi = (const block_mxfp4 *) x + kbx0 + i * stride + kbx;
|
||||
|
||||
// Load 16 bytes more efficiently using memcpy (compiler optimizes to vector loads)
|
||||
int aux_q4[4];
|
||||
memcpy(aux_q4, bxi->qs, 16);
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue