mirror of https://github.com/google/gemma.cpp.git
parent
c153d5255b
commit
b31e8f98e8
|
|
@ -17,7 +17,9 @@
|
|||
#include <stdint.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
#include <limits>
|
||||
|
||||
#include "compression/types.h" // GEMMA_DISABLED_TARGETS
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ namespace gcpp {
|
|||
const AttentionActivationsPtrs& activations, ThreadingContext& ctx); \
|
||||
\
|
||||
void SingleFlashAttention(size_t start_pos, size_t last_pos, \
|
||||
const float* HWY_RESTRICT q, \
|
||||
const BF16* HWY_RESTRICT q, \
|
||||
const MatPtrT<KV_t>& k, const MatPtrT<KV_t>& v, \
|
||||
size_t layer_idx, \
|
||||
const AttentionActivationsPtrs& activations, \
|
||||
|
|
@ -60,6 +60,7 @@ namespace gcpp {
|
|||
size_t layer_idx, const MatPtr& query_norm_scale, \
|
||||
AttentionActivationsPtrs& activations, QBatch& qbatch, \
|
||||
ThreadingContext& ctx); \
|
||||
\
|
||||
/* NOLINTNEXTLINE(google-readability-namespace-comments) */ \
|
||||
} // namespace NAMESPACE
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
#include <cstdint>
|
||||
#include <random>
|
||||
#include <type_traits> // std::enable_if_t
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "ops/matmul.h"
|
||||
|
|
|
|||
Loading…
Reference in New Issue