This commit is contained in:
younesbelkada 2025-07-04 15:50:43 +04:00
parent a6d0067dd7
commit 1fd0574adc
1 changed files with 4 additions and 1 deletions

View File

@ -607,7 +607,10 @@ class TextModel(ModelBase):
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(self.dir_model)
vocab_size = self.hparams.get("vocab_size", len(tokenizer.vocab))
vocab_size = max(
self.hparams.get("vocab_size", len(tokenizer.vocab)),
len(tokenizer.vocab)
)
assert max(tokenizer.vocab.values()) < vocab_size
tokpre = self.get_vocab_base_pre(tokenizer)