mtmd: convert model in FP16

This commit is contained in:
bluebread 2025-12-08 02:36:00 +00:00
parent 53273f83f8
commit 48c6cf2132
3 changed files with 7 additions and 9 deletions

View File

@ -6049,14 +6049,11 @@ class DeepseekOCRVisionModel(MmprojModel):
return vision_config
def tensor_force_quant(self, name, new_name, bid, n_dims):
# TODO: increase numercial stability. maybe delete later.
return gguf.GGMLQuantizationType.F32
# related to https://github.com/ggml-org/llama.cpp/issues/13025
# if "input_projection" in name:
# return gguf.GGMLQuantizationType.F16
# if ".embeddings." in name:
# return gguf.GGMLQuantizationType.F32
# return super().tensor_force_quant(name, new_name, bid, n_dims)
if ".embeddings." in name or 'pos_embed' in name:
return gguf.GGMLQuantizationType.F32
if ".rel_pos_h" in name or '.rel_pos_w' in name:
return gguf.GGMLQuantizationType.F32
return gguf.GGMLQuantizationType.F16
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
# Only process vision-related tensors, skip language model tensors

View File

@ -4893,6 +4893,7 @@ static struct ggml_tensor * ggml_interpolate_impl(
GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT);
// TODO: implement antialias for modes other than bilinear
GGML_ASSERT(!(mode & GGML_SCALE_FLAG_ANTIALIAS) || (mode & 0xFF) == GGML_SCALE_MODE_BILINEAR);
GGML_ASSERT(a->type == GGML_TYPE_F32);
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);

View File

@ -2888,7 +2888,7 @@ private:
ggml_tensor * learned_pos_embd = ggml_get_rows(ctx0, new_pos_embd, positions);
ggml_tensor * cur = build_vit(inp, n_pos, NORM_TYPE_NORMAL, ffn_op_type::FFN_GELU_QUICK,
learned_pos_embd, nullptr); // shape [1024, 16, 16]
learned_pos_embd, nullptr);
ggml_build_forward_expand(gf, cur);