mtmd: convert model in FP16
This commit is contained in:
parent
53273f83f8
commit
48c6cf2132
|
|
@ -6049,14 +6049,11 @@ class DeepseekOCRVisionModel(MmprojModel):
|
|||
return vision_config
|
||||
|
||||
def tensor_force_quant(self, name, new_name, bid, n_dims):
|
||||
# TODO: increase numercial stability. maybe delete later.
|
||||
return gguf.GGMLQuantizationType.F32
|
||||
# related to https://github.com/ggml-org/llama.cpp/issues/13025
|
||||
# if "input_projection" in name:
|
||||
# return gguf.GGMLQuantizationType.F16
|
||||
# if ".embeddings." in name:
|
||||
# return gguf.GGMLQuantizationType.F32
|
||||
# return super().tensor_force_quant(name, new_name, bid, n_dims)
|
||||
if ".embeddings." in name or 'pos_embed' in name:
|
||||
return gguf.GGMLQuantizationType.F32
|
||||
if ".rel_pos_h" in name or '.rel_pos_w' in name:
|
||||
return gguf.GGMLQuantizationType.F32
|
||||
return gguf.GGMLQuantizationType.F16
|
||||
|
||||
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
||||
# Only process vision-related tensors, skip language model tensors
|
||||
|
|
|
|||
|
|
@ -4893,6 +4893,7 @@ static struct ggml_tensor * ggml_interpolate_impl(
|
|||
GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT);
|
||||
// TODO: implement antialias for modes other than bilinear
|
||||
GGML_ASSERT(!(mode & GGML_SCALE_FLAG_ANTIALIAS) || (mode & 0xFF) == GGML_SCALE_MODE_BILINEAR);
|
||||
GGML_ASSERT(a->type == GGML_TYPE_F32);
|
||||
|
||||
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
|
||||
|
||||
|
|
|
|||
|
|
@ -2888,7 +2888,7 @@ private:
|
|||
ggml_tensor * learned_pos_embd = ggml_get_rows(ctx0, new_pos_embd, positions);
|
||||
|
||||
ggml_tensor * cur = build_vit(inp, n_pos, NORM_TYPE_NORMAL, ffn_op_type::FFN_GELU_QUICK,
|
||||
learned_pos_embd, nullptr); // shape [1024, 16, 16]
|
||||
learned_pos_embd, nullptr);
|
||||
|
||||
ggml_build_forward_expand(gf, cur);
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue