diff --git a/src/llama-chat.cpp b/src/llama-chat.cpp
index fc6a6223cf..aa7e8f967d 100644
--- a/src/llama-chat.cpp
+++ b/src/llama-chat.cpp
@@ -49,6 +49,7 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
     { "deepseek",          LLM_CHAT_TEMPLATE_DEEPSEEK          },
     { "deepseek2",         LLM_CHAT_TEMPLATE_DEEPSEEK_2        },
     { "deepseek3",         LLM_CHAT_TEMPLATE_DEEPSEEK_3        },
+    { "deepseek-ocr",      LLM_CHAT_TEMPLATE_DEEPSEEK_OCR      },
     { "command-r",         LLM_CHAT_TEMPLATE_COMMAND_R         },
     { "llama3",            LLM_CHAT_TEMPLATE_LLAMA_3           },
     { "chatglm3",          LLM_CHAT_TEMPLATE_CHATGLM_3         },
@@ -541,6 +542,11 @@ int32_t llm_chat_apply_template(
         if (add_ass) {
             ss << LU8("<｜Assistant｜>");
         }
+    } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_OCR) {
+        for (auto message : chat) {
+            // no template
+            ss << message->content;
+        }
     } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_3) {
         // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
         // EXAONE-3.0-7.8B-Instruct
diff --git a/src/llama-chat.h b/src/llama-chat.h
index 684efb4d67..326db1896c 100644
--- a/src/llama-chat.h
+++ b/src/llama-chat.h
@@ -28,6 +28,7 @@ enum llm_chat_template {
     LLM_CHAT_TEMPLATE_DEEPSEEK,
     LLM_CHAT_TEMPLATE_DEEPSEEK_2,
     LLM_CHAT_TEMPLATE_DEEPSEEK_3,
+    LLM_CHAT_TEMPLATE_DEEPSEEK_OCR,
     LLM_CHAT_TEMPLATE_COMMAND_R,
     LLM_CHAT_TEMPLATE_LLAMA_3,
     LLM_CHAT_TEMPLATE_CHATGLM_3,
diff --git a/tools/mtmd/mtmd-cli.cpp b/tools/mtmd/mtmd-cli.cpp
index ab0dc72628..3c41001100 100644
--- a/tools/mtmd/mtmd-cli.cpp
+++ b/tools/mtmd/mtmd-cli.cpp
@@ -222,20 +222,14 @@ static std::string chat_add_and_format(mtmd_cli_context & ctx, common_chat_msg &
 
 static int eval_message(mtmd_cli_context & ctx, common_chat_msg & msg) {
     bool add_bos = ctx.chat_history.empty();
+    auto formatted_chat = chat_add_and_format(ctx, msg);
+    LOG_DBG("formatted_chat.prompt: %s\n", formatted_chat.c_str());
 
     mtmd_input_text text;
-    text.text          = msg.content.c_str();
+    text.text          = formatted_chat.c_str();
     text.add_special   = add_bos;
     text.parse_special = true;
 
-    std::string formatted_chat;
-
-    if (!mtmd_is_deepseekocr(ctx.ctx_vision.get())) {
-        formatted_chat = chat_add_and_format(ctx, msg);
-        LOG_DBG("formatted_chat.prompt: %s\n", formatted_chat.c_str());
-        text.text = formatted_chat.c_str();
-    }
-
     if (g_is_interrupted) return 0;
 
     mtmd::input_chunks chunks(mtmd_input_chunks_init());
@@ -319,18 +313,8 @@ int main(int argc, char ** argv) {
     if (is_single_turn) {
         g_is_generating = true;
         if (params.prompt.find(mtmd_default_marker()) == std::string::npos) {
-            if (mtmd_is_deepseekocr(ctx.ctx_vision.get())) {
-                std::string image_tokens = "";
-                for (size_t i = 0; i < params.image.size(); i++) {
-                    image_tokens += mtmd_default_marker();
-                    image_tokens += '\n';
-                }
-                params.prompt = image_tokens + params.prompt;
-            }
-            else {
                 for (size_t i = 0; i < params.image.size(); i++) {
                     params.prompt += mtmd_default_marker();
-                }
             }
         }
         common_chat_msg msg;
@@ -349,11 +333,6 @@ int main(int argc, char ** argv) {
         }
 
     } else {
-        if (mtmd_is_deepseekocr(ctx.ctx_vision.get())) {
-            LOG_ERR("\n DeepSeek-OCR doesn't support chat mode.");
-            return 1;
-        }
-
         LOG("\n Running in chat mode, available commands:");
         if (mtmd_support_vision(ctx.ctx_vision.get())) {
             LOG("\n   /image <path>    load an image");
diff --git a/tools/mtmd/mtmd.cpp b/tools/mtmd/mtmd.cpp
index 791ac77166..33042722eb 100644
--- a/tools/mtmd/mtmd.cpp
+++ b/tools/mtmd/mtmd.cpp
@@ -868,10 +868,6 @@ int mtmd_get_audio_bitrate(mtmd_context * ctx) {
     return 16000; // 16kHz
 }
 
-bool mtmd_is_deepseekocr(mtmd_context * ctx) {
-    return ctx->ctx_v && clip_is_deepseekocr(ctx->ctx_v);
-}
-
 //
 // public API functions
 //
diff --git a/tools/mtmd/mtmd.h b/tools/mtmd/mtmd.h
index 0c2d001db6..b3df24c299 100644
--- a/tools/mtmd/mtmd.h
+++ b/tools/mtmd/mtmd.h
@@ -117,9 +117,6 @@ MTMD_API bool mtmd_support_audio(mtmd_context * ctx);
 // return -1 if audio is not supported
 MTMD_API int mtmd_get_audio_bitrate(mtmd_context * ctx);
 
-// whether the current model is DeepSeek-OCR
-MTMD_API bool mtmd_is_deepseekocr(mtmd_context * ctx);
-
 // mtmd_bitmap
 //
 // if bitmap is image: