From a94c241751e7addfb56b52517f59c510214d89c8 Mon Sep 17 00:00:00 2001 From: Saba Fallah <10401143+sfallah@users.noreply.github.com> Date: Mon, 2 Feb 2026 12:07:35 +0100 Subject: [PATCH] merge resolved - fixed issues in convert - tested several deepseek models --- convert_hf_to_gguf.py | 9 -------- gguf-py/gguf/tensor_mapping.py | 2 +- src/llama-model.cpp | 1 - src/models/deepseek2.cpp | 5 ++--- tools/mtmd/clip.cpp | 31 --------------------------- tools/mtmd/tests/test-deepseek-ocr.py | 4 ++-- 6 files changed, 5 insertions(+), 47 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 321e712e89..d7448441f3 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -7519,9 +7519,6 @@ class DeepseekV2Model(TextModel): first_k_dense_replace = hparams["num_hidden_layers"] if not has_moe else 0 self.gguf_writer.add_leading_dense_block_count(first_k_dense_replace) kv_lora_rank = hparams["kv_lora_rank"] if hparams.get("kv_lora_rank") is not None else 512 - routed_scaling_factor = hparams.get("routed_scaling_factor", 1.0) - norm_topk_prob = hparams.get("norm_topk_prob", False) - self.gguf_writer.add_leading_dense_block_count(hparams["first_k_dense_replace"]) self.gguf_writer.add_vocab_size(hparams["vocab_size"]) if "q_lora_rank" in hparams and hparams["q_lora_rank"] is not None: self.gguf_writer.add_q_lora_rank(hparams["q_lora_rank"]) @@ -7534,7 +7531,6 @@ class DeepseekV2Model(TextModel): self.gguf_writer.add_value_length(kv_lora_rank) self.gguf_writer.add_key_length_mla(hparams["qk_nope_head_dim"] + hparams["qk_rope_head_dim"]) self.gguf_writer.add_value_length_mla(hparams["v_head_dim"]) - self.gguf_writer.add_rope_dimension_count(hparams["qk_rope_head_dim"]) # MoE parameters (required by C++ code for DEEPSEEK2 arch) # For non-MoE models like Youtu, use intermediate_size as expert_feed_forward_length @@ -7554,11 +7550,6 @@ class DeepseekV2Model(TextModel): if (norm_topk_prob := hparams.get("norm_topk_prob")) is not None and norm_topk_prob: self.gguf_writer.add_expert_weights_norm(norm_topk_prob) - self.gguf_writer.add_expert_feed_forward_length(hparams["moe_intermediate_size"]) - self.gguf_writer.add_expert_count(hparams["n_routed_experts"]) - self.gguf_writer.add_expert_shared_count(hparams["n_shared_experts"]) - self.gguf_writer.add_expert_weights_scale(routed_scaling_factor) - self.gguf_writer.add_expert_weights_norm(norm_topk_prob) self.gguf_writer.add_rope_dimension_count(hparams["qk_rope_head_dim"]) diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py index 05db831d26..a6a82483ea 100644 --- a/gguf-py/gguf/tensor_mapping.py +++ b/gguf-py/gguf/tensor_mapping.py @@ -1312,7 +1312,7 @@ class TensorNameMap: "vision_model.positional_embedding_vlm", # llama 4 "vision_tower.patch_embed.pos_emb", # kimi-vl "visual.pos_embed", # qwen3vl - "model.vision.patch_embedding.position_embedding", # cogvlm + "model.vision.patch_embedding.position_embedding", # cogvlm "visual.embeddings.position_embedding", # glm4v ), diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 3955a537f0..dd62ccb0cd 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -4917,7 +4917,6 @@ bool llama_model::load_tensors(llama_model_loader & ml) { case LLM_ARCH_DEEPSEEK2OCR: { // lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B - const bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26); const bool is_ocr = (arch == LLM_ARCH_DEEPSEEK2OCR); const bool is_mla = hparams.is_mla(); diff --git a/src/models/deepseek2.cpp b/src/models/deepseek2.cpp index f705df0665..2e3446a53b 100644 --- a/src/models/deepseek2.cpp +++ b/src/models/deepseek2.cpp @@ -3,8 +3,7 @@ llm_build_deepseek2::llm_build_deepseek2(const llama_model & model, const llm_graph_params & params) : llm_graph_context(params) { // lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B - bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26); - bool is_ocr = (model.arch == LLM_ARCH_DEEPSEEK2OCR); + bool is_ocr = model.arch == LLM_ARCH_DEEPSEEK2OCR; const bool is_mla = hparams.is_mla(); @@ -83,7 +82,7 @@ llm_build_deepseek2::llm_build_deepseek2(const llama_model & model, const llm_gr cb(Qcur, "q_pe", il); cb(Kcur, "k_pe", il); - cur = build_attn(inp_attn, + cur = build_attn(inp_attn_kv, model.layers[il].wo, NULL, Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, kq_scale, il); cb(cur, "attn_out", il); diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp index 38e1bd5e97..a356ab7c05 100644 --- a/tools/mtmd/clip.cpp +++ b/tools/mtmd/clip.cpp @@ -4180,37 +4180,6 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima return false; } - // print debug nodes - if (ctx->debug_graph) { - LOG_INF("\n\n---\n\n"); - LOG_INF("\n\nDebug graph:\n\n"); - for (ggml_tensor * t : ctx->debug_print_tensors) { - std::vector data(ggml_nbytes(t)); - ggml_backend_tensor_get(t, data.data(), 0, ggml_nbytes(t)); - print_tensor_info(t); - print_tensor_shape(t); - print_tensor_sum(t, data.data(), 3); - std::string tname_s = std::string(t->name); - - bool is_stored = false; - std::vector patterns = { - /* Add tensor names here to dump (e.g. "sam_output") */ - }; - - for (auto & p : patterns) { - if (tname_s == p) { - save_tensor_to_file(t, data.data()); - is_stored = true; - break; - } - } - - if (!is_stored) { - print_tensor_data(t, data.data(), 3); - } - } - } - // the last node is the embedding tensor ggml_tensor * embeddings = ggml_graph_node(gf, -1); diff --git a/tools/mtmd/tests/test-deepseek-ocr.py b/tools/mtmd/tests/test-deepseek-ocr.py index 674a350015..d25f8c330b 100644 --- a/tools/mtmd/tests/test-deepseek-ocr.py +++ b/tools/mtmd/tests/test-deepseek-ocr.py @@ -83,9 +83,9 @@ def read_expected_output(file_path: str) -> str: def main(): ap = argparse.ArgumentParser(description="Compare llama.cpp and HuggingFace DeepSeek-OCR outputs") - ap.add_argument("--llama-model", default="gguf_models/deepseek-ai/deepseek-ocr-f16.gguf", + ap.add_argument("--llama-model", default="gguf_models/deepseek-ai/deepseek-ocr-q8_0_test.gguf", help="Path to llama.cpp GGUF model") - ap.add_argument("--mmproj", default="gguf_models/deepseek-ai/mmproj-deepseek-ocr-f16.gguf", + ap.add_argument("--mmproj", default="gguf_models/deepseek-ai/mmproj-deepseek-ocr-f16_test.gguf", help="Path to mmproj GGUF file") ap.add_argument("--image", default="test-1.jpeg", help="Path to test image")