add comments and code clean up
This commit is contained in:
parent
c770ca2812
commit
0bf9f09ae4
|
|
@ -897,7 +897,7 @@ static __device__ __forceinline__ void mma(
|
||||||
GGML_UNUSED(A);
|
GGML_UNUSED(A);
|
||||||
GGML_UNUSED(B);
|
GGML_UNUSED(B);
|
||||||
NO_DEVICE_CODE;
|
NO_DEVICE_CODE;
|
||||||
#endif // AMD_MFMA_AVAILABLE
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1178,7 +1178,7 @@ static __device__ __forceinline__ void vec_dot_q8_0_16_q8_1_mma(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#elif defined(AMD_WMMA_AVAILABLE)
|
#elif defined(AMD_WMMA_AVAILABLE) //wmma instructions can handle 16x4 tiles, does not require loading 64x2 tiles
|
||||||
typedef tile<16, 4, int> tile_A;
|
typedef tile<16, 4, int> tile_A;
|
||||||
typedef tile<16, 4, int> tile_B;
|
typedef tile<16, 4, int> tile_B;
|
||||||
typedef tile<16, 16, int> tile_C;
|
typedef tile<16, 16, int> tile_C;
|
||||||
|
|
@ -1500,7 +1500,7 @@ static __device__ __forceinline__ void vec_dot_q2_K_q8_1_mma(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#elif defined(AMD_WMMA_AVAILABLE)
|
#elif defined(AMD_WMMA_AVAILABLE) //wmma instructions can handle 16x4 tiles, does not require loading 64x2 tiles
|
||||||
|
|
||||||
typedef tile<16, 4, int> tile_A;
|
typedef tile<16, 4, int> tile_A;
|
||||||
typedef tile<16, 4, int> tile_B;
|
typedef tile<16, 4, int> tile_B;
|
||||||
|
|
@ -2313,7 +2313,7 @@ static __device__ __forceinline__ void vec_dot_q6_K_q8_1_mma(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#elif defined(AMD_WMMA_AVAILABLE)
|
#elif defined(AMD_WMMA_AVAILABLE) //wmma instructions can handle 16x4 tiles, does not require loading 64x2 tiles
|
||||||
typedef tile<16, 4, int> tile_A;
|
typedef tile<16, 4, int> tile_A;
|
||||||
typedef tile<16, 4, int> tile_B;
|
typedef tile<16, 4, int> tile_B;
|
||||||
typedef tile<16, 16, int> tile_C;
|
typedef tile<16, 16, int> tile_C;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue