mirror of https://github.com/google/gemma.cpp.git
parent
d6e836c651
commit
197c1a049c
|
|
@ -472,7 +472,7 @@ struct CompressTraits<int8_t> {
|
|||
const auto vi32_1 = hn::NearestInt(v1);
|
||||
const auto vi16 = hn::OrderedDemote2To(di16, vi32_0, vi32_1);
|
||||
const auto vi8 = hn::OrderedDemote2To(
|
||||
di8_16, hn::UpperHalf(di16_16, vi16), hn::LowerHalf(di16_16, vi16));
|
||||
di8_16, hn::LowerHalf(di16_16, vi16), hn::UpperHalf(di16_16, vi16));
|
||||
hn::StoreU(vi8, di8_16, packed.ptr + packed_ofs + i);
|
||||
}
|
||||
}
|
||||
|
|
@ -487,7 +487,7 @@ struct CompressTraits<int8_t> {
|
|||
const auto vi32_1 = hn::NearestInt(v1);
|
||||
const auto vi16 = hn::OrderedDemote2To(di16, vi32_0, vi32_1);
|
||||
const auto vi8 = hn::OrderedDemote2To(
|
||||
di8_16, hn::UpperHalf(di16_16, vi16), hn::LowerHalf(di16_16, vi16));
|
||||
di8_16, hn::LowerHalf(di16_16, vi16), hn::UpperHalf(di16_16, vi16));
|
||||
hn::StoreN(vi8, di8_16, packed.ptr + packed_ofs + i, remaining);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -249,8 +249,7 @@ static HWY_INLINE void ComputeQKVTransposedTile(
|
|||
v_cache_values = v_buf;
|
||||
}
|
||||
|
||||
if (attention_impl == AttentionImpl::kFlashTransposedQsBF16 &&
|
||||
!IsInt8<KV_T>()) {
|
||||
if (attention_impl == AttentionImpl::kFlashTransposedQsBF16) {
|
||||
const int in_tile_idx_mod_2 = in_tile_idx % 2;
|
||||
for (int dim = 0; dim < qkv_dim; dim += 2) {
|
||||
const int dim_mod_2 = dim % 2;
|
||||
|
|
|
|||
Loading…
Reference in New Issue