From c73eb685fd4e6eee0a4b43e8b6e50a902ab19e31 Mon Sep 17 00:00:00 2001 From: ryan-mangeno Date: Fri, 29 Aug 2025 12:15:31 -0400 Subject: [PATCH] added cls token per previous modern bert attempt, still working on checking out the rest --- gguf-py/gguf/constants.py | 2 ++ src/llama-arch.cpp | 2 ++ src/llama-model.cpp | 5 +++++ 3 files changed, 9 insertions(+) diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 1273ca31d5..607486a31a 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -1185,6 +1185,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = { MODEL_TENSOR.FFN_UP, MODEL_TENSOR.FFN_DOWN, MODEL_TENSOR.FFN_NORM, + MODEL_TENSOR.CLS, + MODEL_TENSOR.CLS_OUT, ], MODEL_ARCH.NOMIC_BERT: [ MODEL_TENSOR.TOKEN_EMBD, diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp index 031b4c486f..9a009ac902 100644 --- a/src/llama-arch.cpp +++ b/src/llama-arch.cpp @@ -519,6 +519,8 @@ static const std::map> LLM_TENSOR_N { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" }, { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" }, { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" }, + { LLM_TENSOR_CLS, "cls" }, + { LLM_TENSOR_CLS_OUT, "cls.output" }, }, }, { diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 88784ddadd..a159eb3472 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -2710,6 +2710,11 @@ bool llama_model::load_tensors(llama_model_loader & ml) { layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), {n_ff, n_embd}, 0); layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0); } + + cls = create_tensor(tn(LLM_TENSOR_CLS, "weight"), {n_embd, n_embd}, TENSOR_NOT_REQUIRED); + cls_out = create_tensor(tn(LLM_TENSOR_CLS_OUT, "weight"), {n_embd, hparams.n_cls_out}, TENSOR_NOT_REQUIRED); + cls_out_b = create_tensor(tn(LLM_TENSOR_CLS_OUT, "bias"), {hparams.n_cls_out}, TENSOR_NOT_REQUIRED); + } break; case LLM_ARCH_NEO_BERT: {