mirror of https://github.com/google/gemma.cpp.git
Merge 7efeb4fe06 into f56d18dd68
This commit is contained in:
commit
3cc9ec1177
|
|
@ -15,26 +15,27 @@
|
||||||
namespace gcpp {
|
namespace gcpp {
|
||||||
|
|
||||||
// Passed to HWY_VISIT_TARGETS; declares for one target.
|
// Passed to HWY_VISIT_TARGETS; declares for one target.
|
||||||
#define GEMMA_DECL_TILED_ATTENTION(TARGET, NAMESPACE) \
|
#define GEMMA_DECL_TILED_ATTENTION(TARGET, NAMESPACE) \
|
||||||
namespace NAMESPACE { \
|
namespace NAMESPACE { \
|
||||||
void TiledAttention(AttentionImpl attention_impl, size_t num_tokens, \
|
void TiledAttention(AttentionImpl attention_impl, size_t num_tokens, \
|
||||||
size_t layer_idx, const LayerWeightsPtrs& layer, \
|
size_t layer_idx, const LayerWeightsPtrs& layer, \
|
||||||
AttentionActivationsPtrs& activations, QBatch& qbatch, \
|
AttentionActivationsPtrs& activations, QBatch& qbatch, \
|
||||||
MatMulEnv& env, int flags); \
|
MatMulEnv& env, int flags); \
|
||||||
void TransposeStridedQueries(hwy::Span<float*> queries, int qkv_dim, \
|
void TransposeStridedQueries(hwy::Span<float*> queries, int qkv_dim, \
|
||||||
hwy::Span<float> transposed_queries); \
|
hwy::Span<float> transposed_queries); \
|
||||||
void LocalAttentionForAllHeadsTokensAndBatch( \
|
void LocalAttentionForAllHeadsTokensAndBatch( \
|
||||||
AttentionImpl attention_impl, const size_t num_tokens, \
|
AttentionImpl attention_impl, const size_t num_tokens, \
|
||||||
const size_t layer_idx, const LayerWeightsPtrs& layer, \
|
const size_t layer_idx, const LayerWeightsPtrs& layer, \
|
||||||
AttentionActivationsPtrs& activations, QBatch& qbatch, \
|
AttentionActivationsPtrs& activations, QBatch& qbatch, \
|
||||||
ThreadingContext& ctx); \
|
ThreadingContext& ctx); \
|
||||||
\
|
\
|
||||||
template <typename OutT> \
|
template <typename OutT> \
|
||||||
std::tuple<std::vector<OutT, hwy::AlignedAllocator<OutT>>, \
|
std::tuple<std::vector<OutT, hwy::AlignedAllocator<OutT>>, \
|
||||||
std::vector<OutT*>, AlignedFloatVector> \
|
std::vector<OutT*>, AlignedFloatVector> \
|
||||||
TransposeQueriesToGroupsOfNBF16orInt16(hwy::Span<float*> queries_ptrs, \
|
TransposeQueriesToGroupsOfNBF16orInt16(hwy::Span<float*> queries_ptrs, \
|
||||||
int qkv_dim, size_t group_size); \
|
int qkv_dim, size_t group_size); \
|
||||||
/* NOLINTNEXTLINE(google-readability-namespace-comments) */ \
|
\
|
||||||
|
/* NOLINTNEXTLINE(google-readability-namespace-comments) */ \
|
||||||
} // namespace NAMESPACE
|
} // namespace NAMESPACE
|
||||||
|
|
||||||
// Function declarations for each SIMD target. Allows direct call from the
|
// Function declarations for each SIMD target. Allows direct call from the
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue