This commit is contained in:
copybara-service[bot] 2026-03-25 12:02:06 +00:00 committed by GitHub
commit 3cc9ec1177
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 21 additions and 20 deletions

View File

@ -15,26 +15,27 @@
namespace gcpp { namespace gcpp {
// Passed to HWY_VISIT_TARGETS; declares for one target. // Passed to HWY_VISIT_TARGETS; declares for one target.
#define GEMMA_DECL_TILED_ATTENTION(TARGET, NAMESPACE) \ #define GEMMA_DECL_TILED_ATTENTION(TARGET, NAMESPACE) \
namespace NAMESPACE { \ namespace NAMESPACE { \
void TiledAttention(AttentionImpl attention_impl, size_t num_tokens, \ void TiledAttention(AttentionImpl attention_impl, size_t num_tokens, \
size_t layer_idx, const LayerWeightsPtrs& layer, \ size_t layer_idx, const LayerWeightsPtrs& layer, \
AttentionActivationsPtrs& activations, QBatch& qbatch, \ AttentionActivationsPtrs& activations, QBatch& qbatch, \
MatMulEnv& env, int flags); \ MatMulEnv& env, int flags); \
void TransposeStridedQueries(hwy::Span<float*> queries, int qkv_dim, \ void TransposeStridedQueries(hwy::Span<float*> queries, int qkv_dim, \
hwy::Span<float> transposed_queries); \ hwy::Span<float> transposed_queries); \
void LocalAttentionForAllHeadsTokensAndBatch( \ void LocalAttentionForAllHeadsTokensAndBatch( \
AttentionImpl attention_impl, const size_t num_tokens, \ AttentionImpl attention_impl, const size_t num_tokens, \
const size_t layer_idx, const LayerWeightsPtrs& layer, \ const size_t layer_idx, const LayerWeightsPtrs& layer, \
AttentionActivationsPtrs& activations, QBatch& qbatch, \ AttentionActivationsPtrs& activations, QBatch& qbatch, \
ThreadingContext& ctx); \ ThreadingContext& ctx); \
\ \
template <typename OutT> \ template <typename OutT> \
std::tuple<std::vector<OutT, hwy::AlignedAllocator<OutT>>, \ std::tuple<std::vector<OutT, hwy::AlignedAllocator<OutT>>, \
std::vector<OutT*>, AlignedFloatVector> \ std::vector<OutT*>, AlignedFloatVector> \
TransposeQueriesToGroupsOfNBF16orInt16(hwy::Span<float*> queries_ptrs, \ TransposeQueriesToGroupsOfNBF16orInt16(hwy::Span<float*> queries_ptrs, \
int qkv_dim, size_t group_size); \ int qkv_dim, size_t group_size); \
/* NOLINTNEXTLINE(google-readability-namespace-comments) */ \ \
/* NOLINTNEXTLINE(google-readability-namespace-comments) */ \
} // namespace NAMESPACE } // namespace NAMESPACE
// Function declarations for each SIMD target. Allows direct call from the // Function declarations for each SIMD target. Allows direct call from the