mtmd: correct token order
This commit is contained in:
parent
4cfa15fcd7
commit
3f71188303
|
|
@ -2347,6 +2347,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|||
|| t.first == "_<EOT>"
|
||||
|| t.first == "<|end_of_text|>"
|
||||
|| t.first == "<end_of_utterance>" // smoldocling
|
||||
|| t.first == "<|end▁of▁sentence|>" // deepseek-ocr
|
||||
) {
|
||||
special_eog_ids.insert(t.second);
|
||||
if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
||||
|
|
|
|||
|
|
@ -222,14 +222,18 @@ static std::string chat_add_and_format(mtmd_cli_context & ctx, common_chat_msg &
|
|||
|
||||
static int eval_message(mtmd_cli_context & ctx, common_chat_msg & msg) {
|
||||
bool add_bos = ctx.chat_history.empty();
|
||||
auto formatted_chat = chat_add_and_format(ctx, msg);
|
||||
LOG_DBG("formatted_chat.prompt: %s\n", formatted_chat.c_str());
|
||||
|
||||
mtmd_input_text text;
|
||||
text.text = formatted_chat.c_str();
|
||||
text.text = msg.content.c_str();
|
||||
text.add_special = add_bos;
|
||||
text.parse_special = true;
|
||||
|
||||
if (!mtmd_is_deepseekocr(ctx.ctx_vision.get())) {
|
||||
auto formatted_chat = chat_add_and_format(ctx, msg);
|
||||
LOG_DBG("formatted_chat.prompt: %s\n", formatted_chat.c_str());
|
||||
text.text = formatted_chat.c_str();
|
||||
}
|
||||
|
||||
if (g_is_interrupted) return 0;
|
||||
|
||||
mtmd::input_chunks chunks(mtmd_input_chunks_init());
|
||||
|
|
@ -332,6 +336,11 @@ int main(int argc, char ** argv) {
|
|||
}
|
||||
|
||||
} else {
|
||||
if (mtmd_is_deepseekocr(ctx.ctx_vision.get())) {
|
||||
LOG_ERR("\n DeepSeek-OCR doesn't support chat mode.");
|
||||
return 1;
|
||||
}
|
||||
|
||||
LOG("\n Running in chat mode, available commands:");
|
||||
if (mtmd_support_vision(ctx.ctx_vision.get())) {
|
||||
LOG("\n /image <path> load an image");
|
||||
|
|
|
|||
|
|
@ -864,6 +864,10 @@ int mtmd_get_audio_bitrate(mtmd_context * ctx) {
|
|||
return 16000; // 16kHz
|
||||
}
|
||||
|
||||
bool mtmd_is_deepseekocr(mtmd_context * ctx) {
|
||||
return ctx->ctx_v && clip_is_deepseekocr(ctx->ctx_v);
|
||||
}
|
||||
|
||||
//
|
||||
// public API functions
|
||||
//
|
||||
|
|
|
|||
|
|
@ -117,6 +117,9 @@ MTMD_API bool mtmd_support_audio(mtmd_context * ctx);
|
|||
// return -1 if audio is not supported
|
||||
MTMD_API int mtmd_get_audio_bitrate(mtmd_context * ctx);
|
||||
|
||||
// whether the current model is DeepSeek-OCR
|
||||
MTMD_API bool mtmd_is_deepseekocr(mtmd_context * ctx);
|
||||
|
||||
// mtmd_bitmap
|
||||
//
|
||||
// if bitmap is image:
|
||||
|
|
|
|||
Loading…
Reference in New Issue