mirror of https://github.com/google/gemma.cpp.git
Windows build fixes: struct vs class, unused arg/var, avoid VLA
PiperOrigin-RevId: 724216434
This commit is contained in:
parent
82ca526c0c
commit
c822957fce
|
|
@ -50,7 +50,7 @@ namespace gcpp {
|
||||||
// fields: not required for the intended use case of `ModelConfig`.
|
// fields: not required for the intended use case of `ModelConfig`.
|
||||||
// - support any other languages than C++ and Python (for the exporter).
|
// - support any other languages than C++ and Python (for the exporter).
|
||||||
|
|
||||||
class IFields; // breaks circular dependency
|
struct IFields; // breaks circular dependency
|
||||||
|
|
||||||
// Visitors are internal-only, but their base class is visible to user code
|
// Visitors are internal-only, but their base class is visible to user code
|
||||||
// because their `IFields::VisitFields` calls `visitor.operator()`.
|
// because their `IFields::VisitFields` calls `visitor.operator()`.
|
||||||
|
|
|
||||||
|
|
@ -252,7 +252,6 @@ class NuqClustering {
|
||||||
using VF = hn::Vec<decltype(df)>;
|
using VF = hn::Vec<decltype(df)>;
|
||||||
using MF = hn::Mask<decltype(df)>;
|
using MF = hn::Mask<decltype(df)>;
|
||||||
using VI = hn::Vec<decltype(di)>;
|
using VI = hn::Vec<decltype(di)>;
|
||||||
const VI k1 = hn::Set(di, 1);
|
|
||||||
const size_t N = hn::Lanes(df);
|
const size_t N = hn::Lanes(df);
|
||||||
HWY_DASSERT(kGroupSize % N == 0);
|
HWY_DASSERT(kGroupSize % N == 0);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -207,6 +207,7 @@ static LayerConfig LayerConfigGriffin2B(size_t model_dim) {
|
||||||
config.kv_heads = 1;
|
config.kv_heads = 1;
|
||||||
config.qkv_dim = 256;
|
config.qkv_dim = 256;
|
||||||
config.conv1d_width = 4;
|
config.conv1d_width = 4;
|
||||||
|
HWY_DASSERT(config.conv1d_width <= kMaxConv1DWidth);
|
||||||
config.ff_biases = true;
|
config.ff_biases = true;
|
||||||
config.softmax_attn_output_biases = true;
|
config.softmax_attn_output_biases = true;
|
||||||
config.optimized_gating = false;
|
config.optimized_gating = false;
|
||||||
|
|
|
||||||
|
|
@ -44,6 +44,7 @@ namespace gcpp {
|
||||||
static constexpr size_t kSeqLen = GEMMA_MAX_SEQLEN;
|
static constexpr size_t kSeqLen = GEMMA_MAX_SEQLEN;
|
||||||
static constexpr size_t kTopK = GEMMA_TOPK;
|
static constexpr size_t kTopK = GEMMA_TOPK;
|
||||||
static constexpr size_t kVocabSize = 256000;
|
static constexpr size_t kVocabSize = 256000;
|
||||||
|
static constexpr size_t kMaxConv1DWidth = 4;
|
||||||
|
|
||||||
using EmbedderInputT = BF16;
|
using EmbedderInputT = BF16;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -109,7 +109,7 @@ HWY_NOINLINE void GriffinRecurrent(size_t batch_start, size_t num_tokens,
|
||||||
const size_t layer_offset = layer * model_dim * (conv_1d_width - 1);
|
const size_t layer_offset = layer * model_dim * (conv_1d_width - 1);
|
||||||
|
|
||||||
// cache[i] = input at time t-i.
|
// cache[i] = input at time t-i.
|
||||||
float* HWY_RESTRICT cache[HWY_MAX(conv_1d_width, 1)];
|
float* HWY_RESTRICT cache[kMaxConv1DWidth];
|
||||||
cache[0] = x;
|
cache[0] = x;
|
||||||
for (size_t i = 1; i < conv_1d_width; i++) {
|
for (size_t i = 1; i < conv_1d_width; i++) {
|
||||||
cache[i] =
|
cache[i] =
|
||||||
|
|
@ -887,6 +887,7 @@ HWY_NOINLINE void VitTransformerLayer(size_t num_tokens, size_t layer,
|
||||||
const size_t model_dim = activations.weights_config.model_dim;
|
const size_t model_dim = activations.weights_config.model_dim;
|
||||||
auto type = layer_weights->layer_config.type;
|
auto type = layer_weights->layer_config.type;
|
||||||
HWY_DASSERT(type == LayerAttentionType::kVit);
|
HWY_DASSERT(type == LayerAttentionType::kVit);
|
||||||
|
(void)type;
|
||||||
|
|
||||||
auto& x = activations.x;
|
auto& x = activations.x;
|
||||||
HWY_DASSERT(x.BatchSize() == num_tokens);
|
HWY_DASSERT(x.BatchSize() == num_tokens);
|
||||||
|
|
|
||||||
|
|
@ -189,7 +189,7 @@ struct LayerWeightsPtrs {
|
||||||
} \
|
} \
|
||||||
if (tensors[0]->Ptr() != nullptr || fet != ForEachType::kIgnoreNulls) { \
|
if (tensors[0]->Ptr() != nullptr || fet != ForEachType::kIgnoreNulls) { \
|
||||||
func(ptrs[0]->member.CacheName(layer_idx, sep, sep_index).c_str(), \
|
func(ptrs[0]->member.CacheName(layer_idx, sep, sep_index).c_str(), \
|
||||||
hwy::Span<MatPtr*>(tensors, ptrs.size())); \
|
hwy::Span<MatPtr*>(tensors.data(), ptrs.size())); \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -197,7 +197,7 @@ struct LayerWeightsPtrs {
|
||||||
static void ForEachTensor(const std::vector<LayerWeightsPtrs<Weight>*>& ptrs,
|
static void ForEachTensor(const std::vector<LayerWeightsPtrs<Weight>*>& ptrs,
|
||||||
int layer_idx, ForEachType fet, Func func,
|
int layer_idx, ForEachType fet, Func func,
|
||||||
char sep = ' ', int sep_index = -1) {
|
char sep = ' ', int sep_index = -1) {
|
||||||
MatPtr* tensors[ptrs.size()];
|
std::vector<MatPtr*> tensors(ptrs.size(), nullptr);
|
||||||
auto type = ptrs[0]->layer_config.type;
|
auto type = ptrs[0]->layer_config.type;
|
||||||
if (type == LayerAttentionType::kVit) {
|
if (type == LayerAttentionType::kVit) {
|
||||||
// MHA.
|
// MHA.
|
||||||
|
|
@ -449,7 +449,7 @@ struct ModelWeightsPtrs {
|
||||||
ForEachType fet, Func func) {
|
ForEachType fet, Func func) {
|
||||||
std::vector<LayerWeightsPtrs<Weight>*> layers(ptrs.size());
|
std::vector<LayerWeightsPtrs<Weight>*> layers(ptrs.size());
|
||||||
std::vector<LayerWeightsPtrs<Weight>*> vit_layers(ptrs.size());
|
std::vector<LayerWeightsPtrs<Weight>*> vit_layers(ptrs.size());
|
||||||
MatPtr* tensors[ptrs.size()];
|
std::vector<MatPtr*> tensors(ptrs.size(), nullptr);
|
||||||
// Variables used by GEMMA_CALL_FUNC.
|
// Variables used by GEMMA_CALL_FUNC.
|
||||||
int layer_idx = -1;
|
int layer_idx = -1;
|
||||||
char sep = ' ';
|
char sep = ' ';
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue