mtmd: convert model in FP16

2025-12-08 02:36:00 +00:00 · 2025-12-08 02:36:00 +00:00 · 48c6cf2132
parent 53273f83f8
commit 48c6cf2132
3 changed files with 7 additions and 9 deletions
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@ -6049,14 +6049,11 @@ class DeepseekOCRVisionModel(MmprojModel):
        return vision_config

    def tensor_force_quant(self, name, new_name, bid, n_dims):
-        # TODO: increase numercial stability. maybe delete later.
-        return gguf.GGMLQuantizationType.F32
-        # related to https://github.com/ggml-org/llama.cpp/issues/13025
-        # if "input_projection" in name:
-        #     return gguf.GGMLQuantizationType.F16
-        # if ".embeddings." in name:
-        #     return gguf.GGMLQuantizationType.F32
-        # return super().tensor_force_quant(name, new_name, bid, n_dims)
+        if ".embeddings." in name or 'pos_embed' in name:
+            return gguf.GGMLQuantizationType.F32
+        if ".rel_pos_h" in name or '.rel_pos_w' in name:
+            return gguf.GGMLQuantizationType.F32
+        return gguf.GGMLQuantizationType.F16

    def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
        # Only process vision-related tensors, skip language model tensors
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@ -4893,6 +4893,7 @@ static struct ggml_tensor * ggml_interpolate_impl(
    GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT);
    // TODO: implement antialias for modes other than bilinear
    GGML_ASSERT(!(mode & GGML_SCALE_FLAG_ANTIALIAS) || (mode & 0xFF) == GGML_SCALE_MODE_BILINEAR);
+    GGML_ASSERT(a->type == GGML_TYPE_F32);

    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);

--- a/tools/mtmd/clip.cpp
+++ b/tools/mtmd/clip.cpp
@ -2888,7 +2888,7 @@ private:
        ggml_tensor * learned_pos_embd = ggml_get_rows(ctx0, new_pos_embd, positions);

        ggml_tensor * cur = build_vit(inp, n_pos, NORM_TYPE_NORMAL, ffn_op_type::FFN_GELU_QUICK,
-                                      learned_pos_embd, nullptr);  // shape [1024, 16, 16]
+                                      learned_pos_embd, nullptr);

        ggml_build_forward_expand(gf, cur);