mtmd : fix integer overflow when n_tokens equals INT32_MIN

This commit is contained in:
ylwango613 2026-01-04 17:11:37 +08:00
parent cef1d23c5a
commit 4e20fcdc95
1 changed files with 4 additions and 0 deletions

View File

@ -774,6 +774,10 @@ struct mtmd_tokenizer {
int n_tokens = text.length() + 2 * add_special;
std::vector<llama_token> result(n_tokens);
n_tokens = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
// -2147483648 is std::numeric_limits<int32_t>::min()
if (n_tokens == -2147483648) {
throw std::runtime_error("Tokenization failed: input text too large, tokenization result exceeds int32_t limit");
}
if (n_tokens < 0) {
result.resize(-n_tokens);
int check = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);