mtmd: remove tweak to llama-mtmd-cli & deepseek-ocr template

This commit is contained in:
bluebread 2025-12-09 16:31:44 +00:00
parent 5174a1e69a
commit 016140699f
5 changed files with 10 additions and 31 deletions

View File

@ -49,6 +49,7 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
{ "deepseek", LLM_CHAT_TEMPLATE_DEEPSEEK },
{ "deepseek2", LLM_CHAT_TEMPLATE_DEEPSEEK_2 },
{ "deepseek3", LLM_CHAT_TEMPLATE_DEEPSEEK_3 },
{ "deepseek-ocr", LLM_CHAT_TEMPLATE_DEEPSEEK_OCR },
{ "command-r", LLM_CHAT_TEMPLATE_COMMAND_R },
{ "llama3", LLM_CHAT_TEMPLATE_LLAMA_3 },
{ "chatglm3", LLM_CHAT_TEMPLATE_CHATGLM_3 },
@ -541,6 +542,11 @@ int32_t llm_chat_apply_template(
if (add_ass) {
ss << LU8("<Assistant>");
}
} else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_OCR) {
for (auto message : chat) {
// no template
ss << message->content;
}
} else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_3) {
// ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
// EXAONE-3.0-7.8B-Instruct

View File

@ -28,6 +28,7 @@ enum llm_chat_template {
LLM_CHAT_TEMPLATE_DEEPSEEK,
LLM_CHAT_TEMPLATE_DEEPSEEK_2,
LLM_CHAT_TEMPLATE_DEEPSEEK_3,
LLM_CHAT_TEMPLATE_DEEPSEEK_OCR,
LLM_CHAT_TEMPLATE_COMMAND_R,
LLM_CHAT_TEMPLATE_LLAMA_3,
LLM_CHAT_TEMPLATE_CHATGLM_3,

View File

@ -222,20 +222,14 @@ static std::string chat_add_and_format(mtmd_cli_context & ctx, common_chat_msg &
static int eval_message(mtmd_cli_context & ctx, common_chat_msg & msg) {
bool add_bos = ctx.chat_history.empty();
auto formatted_chat = chat_add_and_format(ctx, msg);
LOG_DBG("formatted_chat.prompt: %s\n", formatted_chat.c_str());
mtmd_input_text text;
text.text = msg.content.c_str();
text.text = formatted_chat.c_str();
text.add_special = add_bos;
text.parse_special = true;
std::string formatted_chat;
if (!mtmd_is_deepseekocr(ctx.ctx_vision.get())) {
formatted_chat = chat_add_and_format(ctx, msg);
LOG_DBG("formatted_chat.prompt: %s\n", formatted_chat.c_str());
text.text = formatted_chat.c_str();
}
if (g_is_interrupted) return 0;
mtmd::input_chunks chunks(mtmd_input_chunks_init());
@ -319,18 +313,8 @@ int main(int argc, char ** argv) {
if (is_single_turn) {
g_is_generating = true;
if (params.prompt.find(mtmd_default_marker()) == std::string::npos) {
if (mtmd_is_deepseekocr(ctx.ctx_vision.get())) {
std::string image_tokens = "";
for (size_t i = 0; i < params.image.size(); i++) {
image_tokens += mtmd_default_marker();
image_tokens += '\n';
}
params.prompt = image_tokens + params.prompt;
}
else {
for (size_t i = 0; i < params.image.size(); i++) {
params.prompt += mtmd_default_marker();
}
}
}
common_chat_msg msg;
@ -349,11 +333,6 @@ int main(int argc, char ** argv) {
}
} else {
if (mtmd_is_deepseekocr(ctx.ctx_vision.get())) {
LOG_ERR("\n DeepSeek-OCR doesn't support chat mode.");
return 1;
}
LOG("\n Running in chat mode, available commands:");
if (mtmd_support_vision(ctx.ctx_vision.get())) {
LOG("\n /image <path> load an image");

View File

@ -868,10 +868,6 @@ int mtmd_get_audio_bitrate(mtmd_context * ctx) {
return 16000; // 16kHz
}
bool mtmd_is_deepseekocr(mtmd_context * ctx) {
return ctx->ctx_v && clip_is_deepseekocr(ctx->ctx_v);
}
//
// public API functions
//

View File

@ -117,9 +117,6 @@ MTMD_API bool mtmd_support_audio(mtmd_context * ctx);
// return -1 if audio is not supported
MTMD_API int mtmd_get_audio_bitrate(mtmd_context * ctx);
// whether the current model is DeepSeek-OCR
MTMD_API bool mtmd_is_deepseekocr(mtmd_context * ctx);
// mtmd_bitmap
//
// if bitmap is image: