default to Bc 32
This commit is contained in:
parent
d8d536cf98
commit
8fbd3575e0
|
|
@ -2789,7 +2789,14 @@ static std::array<uint32_t, 2> fa_rows_cols(FaCodePath path, uint32_t hsk, uint3
|
||||||
GGML_UNUSED(clamp);
|
GGML_UNUSED(clamp);
|
||||||
|
|
||||||
if (path == FA_SCALAR) {
|
if (path == FA_SCALAR) {
|
||||||
return {get_fa_scalar_num_rows(hsk, hsv, rows, small_cache), 64};
|
if (rows == FA_ROWS_1 && ((hsk|hsv) & 8)) {
|
||||||
|
// HSV/HSK not being a multiple of 16 makes D_split smaller, which makes cols_per_iter
|
||||||
|
// larger, and Bc needs to be >= cols_per_thread. 64 is large enough, 32 is not.
|
||||||
|
// But this only applies to row_split=1, meaning FA_ROWS_1
|
||||||
|
return {get_fa_scalar_num_rows(hsk, hsv, rows, small_cache), 64};
|
||||||
|
} else {
|
||||||
|
return {get_fa_scalar_num_rows(hsk, hsv, rows, small_cache), 32};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (path == FA_COOPMAT1) {
|
if (path == FA_COOPMAT1) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue