diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index db119a2770..662822379d 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -6049,14 +6049,11 @@ class DeepseekOCRVisionModel(MmprojModel): return vision_config def tensor_force_quant(self, name, new_name, bid, n_dims): - # TODO: increase numercial stability. maybe delete later. - return gguf.GGMLQuantizationType.F32 - # related to https://github.com/ggml-org/llama.cpp/issues/13025 - # if "input_projection" in name: - # return gguf.GGMLQuantizationType.F16 - # if ".embeddings." in name: - # return gguf.GGMLQuantizationType.F32 - # return super().tensor_force_quant(name, new_name, bid, n_dims) + if ".embeddings." in name or 'pos_embed' in name: + return gguf.GGMLQuantizationType.F32 + if ".rel_pos_h" in name or '.rel_pos_w' in name: + return gguf.GGMLQuantizationType.F32 + return gguf.GGMLQuantizationType.F16 def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: # Only process vision-related tensors, skip language model tensors diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 534753b796..8a17886aa4 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -4893,6 +4893,7 @@ static struct ggml_tensor * ggml_interpolate_impl( GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT); // TODO: implement antialias for modes other than bilinear GGML_ASSERT(!(mode & GGML_SCALE_FLAG_ANTIALIAS) || (mode & 0xFF) == GGML_SCALE_MODE_BILINEAR); + GGML_ASSERT(a->type == GGML_TYPE_F32); struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3); diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp index e1b604cee3..2ab14e7852 100644 --- a/tools/mtmd/clip.cpp +++ b/tools/mtmd/clip.cpp @@ -2888,7 +2888,7 @@ private: ggml_tensor * learned_pos_embd = ggml_get_rows(ctx0, new_pos_embd, positions); ggml_tensor * cur = build_vit(inp, n_pos, NORM_TYPE_NORMAL, ffn_op_type::FFN_GELU_QUICK, - learned_pos_embd, nullptr); // shape [1024, 16, 16] + learned_pos_embd, nullptr); ggml_build_forward_expand(gf, cur);