mtmd : fix integer overflow when n_tokens equals INT32_MIN

2026-01-04 17:11:37 +08:00 · 2026-01-04 17:11:37 +08:00 · 4e20fcdc95
parent cef1d23c5a
commit 4e20fcdc95
1 changed files with 4 additions and 0 deletions
--- a/tools/mtmd/mtmd.cpp
+++ b/tools/mtmd/mtmd.cpp
@ -774,6 +774,10 @@ struct mtmd_tokenizer {
        int n_tokens = text.length() + 2 * add_special;
        std::vector<llama_token> result(n_tokens);
        n_tokens = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
+        // -2147483648 is std::numeric_limits<int32_t>::min()
+        if (n_tokens == -2147483648) {
+            throw std::runtime_error("Tokenization failed: input text too large, tokenization result exceeds int32_t limit");
+        }
        if (n_tokens < 0) {
            result.resize(-n_tokens);
            int check = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);