mirror of https://github.com/google/gemma.cpp.git
Fix loop iteration in GeluMulToBF16
Also attempt to speed up builders (parallel) PiperOrigin-RevId: 613092863
This commit is contained in:
parent
c8b9675898
commit
3cdd5e524a
|
|
@ -44,7 +44,7 @@ jobs:
|
|||
-D CMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||
|
||||
- name: Build
|
||||
run: cmake --build ${{ github.workspace }}/build --preset ${{ matrix.preset }} --config ${{ matrix.build_type }}
|
||||
run: cmake --build ${{ github.workspace }}/build --preset ${{ matrix.preset }} --config ${{ matrix.build_type }} -j 4
|
||||
|
||||
- name: Archive production artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
|
|
|
|||
2
ops.h
2
ops.h
|
|
@ -241,7 +241,7 @@ static HWY_NOINLINE HWY_MAYBE_UNUSED void GeluMulToBF16(
|
|||
|
||||
size_t i = 0;
|
||||
if (size >= 2 * NF) {
|
||||
for (; i < size - 2 * NF; i += 2 * NF) {
|
||||
for (; i <= size - 2 * NF; i += 2 * NF) {
|
||||
const VF mul0 = hn::LoadU(df, mul + i);
|
||||
const VF mul1 = hn::LoadU(df, mul + i + NF);
|
||||
const VF g0 = hn::Mul(mul0, Gelu(df, hn::LoadU(df, gelu_in + i)));
|
||||
|
|
|
|||
Loading…
Reference in New Issue