This commit is contained in:
ylwango613 2026-04-04 19:58:01 +11:00 committed by GitHub
commit baa1c6e5c8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 3 additions and 0 deletions

View File

@ -889,6 +889,9 @@ struct mtmd_tokenizer {
int n_tokens = text.length() + 2 * add_special;
std::vector<llama_token> result(n_tokens);
n_tokens = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
if (n_tokens == std::numeric_limits<int32_t>::min()) {
throw std::runtime_error("Tokenization failed: input text too large, tokenization result exceeds int32_t limit");
}
if (n_tokens < 0) {
result.resize(-n_tokens);
int check = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);