diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index bd311bea45..a20c6525e4 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -2203,6 +2203,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { // for now, we apply this workaround to find the tokens based on their text for (const auto & t : token_to_id) { + auto & attr = id_to_token[t.second].attr; + // find EOT token: "<|eot_id|>", "<|im_end|>", "", etc. if (special_eot_id == LLAMA_TOKEN_NULL) { if (false @@ -2218,10 +2220,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { || t.first == "" // smoldocling ) { special_eot_id = t.second; - if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) { + if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) { LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n", __func__, t.second, t.first.c_str()); - id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL; + attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_CONTROL); } } } @@ -2232,10 +2234,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { || t.first == "<|eom_id|>" ) { special_eom_id = t.second; - if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) { + if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) { LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n", __func__, t.second, t.first.c_str()); - id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL; + attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_CONTROL); } } } @@ -2252,10 +2254,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { || t.first == "<|code_prefix|>" // GLM-4.5 ) { special_fim_pre_id = t.second; - if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) { + if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) { LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n", __func__, t.second, t.first.c_str()); - id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL; + attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_CONTROL); } } } @@ -2272,10 +2274,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { || t.first == "<|code_suffix|>" // GLM-4.5 ) { special_fim_suf_id = t.second; - if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) { + if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) { LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n", __func__, t.second, t.first.c_str()); - id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL; + attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_CONTROL); } } } @@ -2292,10 +2294,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { || t.first == "<|code_middle|>" // GLM-4.5 ) { special_fim_mid_id = t.second; - if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) { + if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) { LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n", __func__, t.second, t.first.c_str()); - id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL; + attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_CONTROL); } } } @@ -2309,10 +2311,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { || t.first == "" ) { special_fim_pad_id = t.second; - if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) { + if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) { LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n", __func__, t.second, t.first.c_str()); - id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL; + attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_CONTROL); } } } @@ -2327,10 +2329,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { || t.first == "" // Granite ) { special_fim_rep_id = t.second; - if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) { + if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) { LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n", __func__, t.second, t.first.c_str()); - id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL; + attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_CONTROL); } } } @@ -2341,15 +2343,41 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { || t.first == "<|file_sep|>" // Qwen ) { special_fim_sep_id = t.second; - if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) { + if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) { LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n", __func__, t.second, t.first.c_str()); - id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL; + attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_CONTROL); } } } } + // auto-detect unused tokens: e.g. control tokens with the word "unused" + // ideally, these tokens should be marked as unused during conversion + { + uint32_t n_unused = 0; + + for (const auto & t : token_to_id) { + auto & attr = id_to_token[t.second].attr; + + if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) { + continue; + } + + if ((attr & LLAMA_TOKEN_ATTR_UNUSED) == 0) { + if (strstr(t.first.c_str(), "unused") != NULL) { + attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_UNUSED); + } + } + + if (attr & LLAMA_TOKEN_ATTR_UNUSED) { + n_unused++; + } + } + + LLAMA_LOG_INFO("%s: %u unused tokens\n", __func__, n_unused); + } + // maintain a list of tokens that cause end-of-generation // this is currently determined based on the token text, which is obviously not ideal // ref: https://github.com/ggerganov/llama.cpp/issues/9606 @@ -2368,6 +2396,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { } for (const auto & t : token_to_id) { + auto & attr = id_to_token[t.second].attr; + if (false || t.first == "<|eot_id|>" || t.first == "<|im_end|>" @@ -2385,24 +2415,28 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { || t.first == "" // smoldocling ) { special_eog_ids.insert(t.second); - if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) { + if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) { LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n", __func__, t.second, t.first.c_str()); - id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL; + attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_CONTROL); } } else { - // token is control, but not marked as EOG -> print a debug log - if (id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL && special_eog_ids.count(t.second) == 0) { - LLAMA_LOG_DEBUG("%s: control token: %6d '%s' is not marked as EOG\n", - __func__, t.second, t.first.c_str()); + if (attr & LLAMA_TOKEN_ATTR_CONTROL && !(attr & LLAMA_TOKEN_ATTR_UNUSED)) { + // token is control, but not marked as EOG -> print a debug log + if (special_eog_ids.count(t.second) == 0) { + LLAMA_LOG_DEBUG("%s: control token: %6d '%s' is not marked as EOG\n", + __func__, t.second, t.first.c_str()); + } } } } // @ngxson : quick hack for gpt-oss, always render these tokens for (const auto & t : token_to_id) { + auto & attr = id_to_token[t.second].attr; + if (t.first == "<|channel|>" || t.first == "<|message|>" || t.first == "<|start|>" || t.first == "<|constrain|>") { - id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_USER_DEFINED; + attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_USER_DEFINED); } } @@ -2435,15 +2469,17 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { LLAMA_LOG_INFO("%s: printing all EOG tokens:\n", __func__); for (auto tid : special_eog_ids) { - LLAMA_LOG_INFO("%s: - %d ('%s')\n", __func__, tid, id_to_token[tid].text.c_str()); + auto & text = id_to_token[tid].text; - if (id_to_token[tid].text == "<|return|>") { + LLAMA_LOG_INFO("%s: - %d ('%s')\n", __func__, tid, text.c_str()); + + if (text == "<|return|>") { has_return = true; - } else if (id_to_token[tid].text == "<|call|>" || id_to_token[tid].text == "<|calls|>") { + } else if (text == "<|call|>" || text == "<|calls|>") { has_call = true; - } else if (id_to_token[tid].text == "<|flush|>") { + } else if (text == "<|flush|>") { has_flush = true; - } else if (id_to_token[tid].text == "<|end|>") { + } else if (text == "<|end|>") { has_end = true; end_id = tid; } @@ -2451,7 +2487,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { if ((has_return && has_call && has_end) || (has_call && has_flush && has_end)) { special_eog_ids.erase(end_id); - id_to_token[end_id].attr = LLAMA_TOKEN_ATTR_USER_DEFINED; + + auto & attr = id_to_token[end_id].attr; + attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_USER_DEFINED); + LLAMA_LOG_WARN("%s: special_eog_ids contains both '<|return|>' and '<|call|>', or '<|calls|>' and '<|flush|>' tokens, removing '<|end|>' token from EOG list\n", __func__); } }