mtmd: fix "v.patch_embd" quant and unsupported im2col ops on Metal for deepseek-ocr (#21027)

* mtmd: fix "v.patch_embd" quant and unsupported im2col ops on Metal for deepseek-ocr * Update src/llama-quant.cpp Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> --------- Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
2026-03-27 00:07:55 +01:00 · 2026-03-27 00:07:55 +01:00 · 1743d98057
parent 7ca0c9cca7
commit 1743d98057
2 changed files with 6 additions and 1 deletions
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@ -7150,6 +7150,8 @@ class DeepseekOCRVisionModel(MmprojModel):
            return gguf.GGMLQuantizationType.F32
        if ".rel_pos_h" in name or '.rel_pos_w' in name:
            return gguf.GGMLQuantizationType.F32
+        if ".neck." in name or ".net_" in name:
+            return gguf.GGMLQuantizationType.F32
        return super().tensor_force_quant(name, new_name, bid, n_dims)

    def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
--- a/src/llama-quant.cpp
+++ b/src/llama-quant.cpp
@ -345,9 +345,12 @@ static bool tensor_allows_quantization(const llama_model_quantize_params * param

    // do not quantize specific multimodal tensors
    quantize &= name.find(".position_embd") == std::string::npos;
-    quantize &= name.find("sam.patch_embd") == std::string::npos;
    quantize &= name.find("sam.pos_embd")   == std::string::npos;
+    quantize &= name.find("sam.neck.")      == std::string::npos;
+    quantize &= name.find("sam.net_")       == std::string::npos;
    quantize &= name.find(".rel_pos")       == std::string::npos;
+    quantize &= name.find(".patch_embd")    == std::string::npos;
+    quantize &= name.find(".patch_merger")  == std::string::npos;

    return quantize;
 }