default to Bc 32

This commit is contained in:
Ruben Ortlam 2026-02-08 11:25:54 +01:00
parent d8d536cf98
commit 8fbd3575e0
1 changed files with 8 additions and 1 deletions

View File

@ -2789,7 +2789,14 @@ static std::array<uint32_t, 2> fa_rows_cols(FaCodePath path, uint32_t hsk, uint3
GGML_UNUSED(clamp);
if (path == FA_SCALAR) {
return {get_fa_scalar_num_rows(hsk, hsv, rows, small_cache), 64};
if (rows == FA_ROWS_1 && ((hsk|hsv) & 8)) {
// HSV/HSK not being a multiple of 16 makes D_split smaller, which makes cols_per_iter
// larger, and Bc needs to be >= cols_per_thread. 64 is large enough, 32 is not.
// But this only applies to row_split=1, meaning FA_ROWS_1
return {get_fa_scalar_num_rows(hsk, hsv, rows, small_cache), 64};
} else {
return {get_fa_scalar_num_rows(hsk, hsv, rows, small_cache), 32};
}
}
if (path == FA_COOPMAT1) {