Compare commits
8 Commits
4d9642394b
...
6d2d1fb3f9
| Author | SHA1 | Date |
|---|---|---|
|
|
6d2d1fb3f9 | |
|
|
bafae27654 | |
|
|
873c825611 | |
|
|
82764d8f40 | |
|
|
21a4933042 | |
|
|
1e9d771e2c | |
|
|
aa4695c5e5 | |
|
|
68a7c144a3 |
|
|
@ -22,6 +22,9 @@ import math
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
# Check for legacy torch compatibility flag (needed for older PyTorch versions, e.g., 2.2.2 on Intel Mac)
|
||||||
|
_LEGACY_TORCH = "--legacy-torch" in sys.argv
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from torch import Tensor
|
from torch import Tensor
|
||||||
|
|
||||||
|
|
@ -4258,9 +4261,7 @@ class Qwen2VLVisionModel(MmprojModel):
|
||||||
yield from super().modify_tensors(data_torch, name, bid)
|
yield from super().modify_tensors(data_torch, name, bid)
|
||||||
|
|
||||||
|
|
||||||
@ModelBase.register("Qwen2_5OmniModel")
|
class Qwen25AudioModel(MmprojModel):
|
||||||
class Qwen25OmniModel(Qwen2VLVisionModel):
|
|
||||||
has_vision_encoder = True
|
|
||||||
has_audio_encoder = True
|
has_audio_encoder = True
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
|
|
@ -4276,12 +4277,6 @@ class Qwen25OmniModel(Qwen2VLVisionModel):
|
||||||
self.gguf_writer.add_audio_num_mel_bins(self.hparams_audio["num_mel_bins"])
|
self.gguf_writer.add_audio_num_mel_bins(self.hparams_audio["num_mel_bins"])
|
||||||
self.gguf_writer.add_audio_attention_layernorm_eps(self.hparams_audio.get("layer_norm_eps", 1e-5))
|
self.gguf_writer.add_audio_attention_layernorm_eps(self.hparams_audio.get("layer_norm_eps", 1e-5))
|
||||||
|
|
||||||
def get_vision_config(self) -> dict[str, Any] | None:
|
|
||||||
return self.global_config["thinker_config"].get("vision_config")
|
|
||||||
|
|
||||||
def get_audio_config(self) -> dict[str, Any] | None:
|
|
||||||
return self.global_config["thinker_config"].get("audio_config")
|
|
||||||
|
|
||||||
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
|
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
|
||||||
# SinusoidsPositionEmbedding
|
# SinusoidsPositionEmbedding
|
||||||
assert self.hparams_audio is not None
|
assert self.hparams_audio is not None
|
||||||
|
|
@ -4312,7 +4307,32 @@ class Qwen25OmniModel(Qwen2VLVisionModel):
|
||||||
# this tensor is left unused in transformers code
|
# this tensor is left unused in transformers code
|
||||||
# https://github.com/huggingface/transformers/blob/6e3063422c4b1c014aa60c32b9254fd2902f0f28/src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py#L1809
|
# https://github.com/huggingface/transformers/blob/6e3063422c4b1c014aa60c32b9254fd2902f0f28/src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py#L1809
|
||||||
return
|
return
|
||||||
yield from super().modify_tensors(data_torch, name, bid)
|
yield from MmprojModel.modify_tensors(self, data_torch, name, bid)
|
||||||
|
|
||||||
|
return # skip other tensors
|
||||||
|
|
||||||
|
|
||||||
|
@ModelBase.register("Qwen2_5OmniModel")
|
||||||
|
class Qwen25OmniModel(Qwen2VLVisionModel, Qwen25AudioModel):
|
||||||
|
has_audio_encoder = True
|
||||||
|
has_vision_encoder = True
|
||||||
|
|
||||||
|
def get_vision_config(self) -> dict[str, Any] | None:
|
||||||
|
return self.global_config["thinker_config"].get("vision_config")
|
||||||
|
|
||||||
|
def get_audio_config(self) -> dict[str, Any] | None:
|
||||||
|
return self.global_config["thinker_config"].get("audio_config")
|
||||||
|
|
||||||
|
def set_gguf_parameters(self):
|
||||||
|
super().set_gguf_parameters()
|
||||||
|
self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.QWEN25O)
|
||||||
|
|
||||||
|
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
||||||
|
if "visual." in name:
|
||||||
|
yield from Qwen2VLVisionModel.modify_tensors(self, data_torch, name, bid)
|
||||||
|
elif "audio_tower." in name:
|
||||||
|
yield from Qwen25AudioModel.modify_tensors(self, data_torch, name, bid)
|
||||||
|
return # skip other tensors
|
||||||
|
|
||||||
|
|
||||||
@ModelBase.register("InternVisionModel")
|
@ModelBase.register("InternVisionModel")
|
||||||
|
|
@ -4816,7 +4836,10 @@ class RND1Model(Qwen2MoeModel):
|
||||||
class Qwen3VLVisionModel(MmprojModel):
|
class Qwen3VLVisionModel(MmprojModel):
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
assert self.hparams_vision is not None
|
if self.hparams_vision is None:
|
||||||
|
logger.info("No vision config found, skipping vision tensor processing")
|
||||||
|
return
|
||||||
|
|
||||||
# Compute image_size if not present
|
# Compute image_size if not present
|
||||||
if "image_size" not in self.hparams_vision:
|
if "image_size" not in self.hparams_vision:
|
||||||
# For Qwen3VL/Qwen3VLMoe, compute from num_position_embeddings
|
# For Qwen3VL/Qwen3VLMoe, compute from num_position_embeddings
|
||||||
|
|
@ -4837,6 +4860,8 @@ class Qwen3VLVisionModel(MmprojModel):
|
||||||
|
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
super().set_gguf_parameters()
|
super().set_gguf_parameters()
|
||||||
|
# in case mixed modalities, the arch will be handled by subclass
|
||||||
|
if not self.has_audio_encoder:
|
||||||
self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.QWEN3VL)
|
self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.QWEN3VL)
|
||||||
self.gguf_writer.add_vision_use_gelu(True)
|
self.gguf_writer.add_vision_use_gelu(True)
|
||||||
|
|
||||||
|
|
@ -4925,11 +4950,64 @@ class Qwen3VLVisionModel(MmprojModel):
|
||||||
return
|
return
|
||||||
|
|
||||||
if name.startswith("visual."):
|
if name.startswith("visual."):
|
||||||
yield from super().modify_tensors(data_torch, name, bid)
|
yield from MmprojModel.modify_tensors(self, data_torch, name, bid)
|
||||||
return
|
return # skip other tensors
|
||||||
|
|
||||||
# Fall back to parent class for other tensors
|
|
||||||
yield from super().modify_tensors(data_torch, name, bid)
|
@ModelBase.register("Qwen3OmniMoeForConditionalGeneration")
|
||||||
|
class Qwen3OmniMmprojModel(Qwen3VLVisionModel, Qwen25AudioModel):
|
||||||
|
has_audio_encoder = True
|
||||||
|
has_vision_encoder = True
|
||||||
|
|
||||||
|
def get_vision_config(self) -> dict[str, Any] | None:
|
||||||
|
if self.has_vision_encoder:
|
||||||
|
return self.global_config["thinker_config"].get("vision_config")
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_audio_config(self) -> dict[str, Any] | None:
|
||||||
|
if self.has_audio_encoder:
|
||||||
|
return self.global_config["thinker_config"].get("audio_config")
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def set_gguf_parameters(self):
|
||||||
|
if self.has_vision_encoder:
|
||||||
|
Qwen3VLVisionModel.set_gguf_parameters(self)
|
||||||
|
self.gguf_writer.add_clip_vision_projector_type(gguf.VisionProjectorType.QWEN3VL)
|
||||||
|
if self.has_audio_encoder:
|
||||||
|
Qwen25AudioModel.set_gguf_parameters(self)
|
||||||
|
self.gguf_writer.add_clip_audio_projector_type(gguf.VisionProjectorType.QWEN3A)
|
||||||
|
|
||||||
|
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
||||||
|
if "visual." in name:
|
||||||
|
if not self.has_vision_encoder:
|
||||||
|
raise ValueError(f"Model does not have vision encoder, but found tensor {name}")
|
||||||
|
# need to transform vision tensor naming, so that modify_tensors() logic can be used correctly
|
||||||
|
name = name.replace("thinker.visual.", "model.visual.")
|
||||||
|
if ".merger_list." in name:
|
||||||
|
name = name.replace(".merger_list.", ".deepstack_merger_list.")
|
||||||
|
name = name.replace(".ln_q", ".norm")
|
||||||
|
name = name.replace(".mlp.0", ".linear_fc1")
|
||||||
|
name = name.replace(".mlp.2", ".linear_fc2")
|
||||||
|
elif ".merger." in name:
|
||||||
|
name = name.replace(".ln_q", ".norm")
|
||||||
|
name = name.replace(".mlp.0", ".linear_fc1")
|
||||||
|
name = name.replace(".mlp.2", ".linear_fc2")
|
||||||
|
yield from Qwen3VLVisionModel.modify_tensors(self, data_torch, name, bid)
|
||||||
|
elif "audio_tower." in name:
|
||||||
|
if not self.has_audio_encoder:
|
||||||
|
raise ValueError(f"Model does not have audio encoder, but found tensor {name}")
|
||||||
|
if "conv2d" in name and name.endswith(".bias"):
|
||||||
|
# transform conv2d bias [n_embd] --> [1, 1, n_embd]
|
||||||
|
data_torch = data_torch.unsqueeze(-1).unsqueeze(-1)
|
||||||
|
yield from Qwen25AudioModel.modify_tensors(self, data_torch, name, bid)
|
||||||
|
|
||||||
|
|
||||||
|
@ModelBase.register("Qwen3ASRForConditionalGeneration")
|
||||||
|
class Qwen3ASRMmprojModel(Qwen3OmniMmprojModel):
|
||||||
|
has_audio_encoder = True
|
||||||
|
has_vision_encoder = False
|
||||||
|
|
||||||
|
|
||||||
@ModelBase.register("Glm4vForConditionalGeneration", "Glm4vMoeForConditionalGeneration", "GlmOcrForConditionalGeneration")
|
@ModelBase.register("Glm4vForConditionalGeneration", "Glm4vMoeForConditionalGeneration", "GlmOcrForConditionalGeneration")
|
||||||
|
|
@ -4992,6 +5070,8 @@ class Step3VLVisionModel(MmprojModel):
|
||||||
def tensor_force_quant(self, name, new_name, bid, n_dims):
|
def tensor_force_quant(self, name, new_name, bid, n_dims):
|
||||||
if ".position_embd." in new_name:
|
if ".position_embd." in new_name:
|
||||||
return gguf.GGMLQuantizationType.F32
|
return gguf.GGMLQuantizationType.F32
|
||||||
|
if ("mm.0." in new_name or "mm.1." in new_name) and new_name.endswith(".weight"):
|
||||||
|
return gguf.GGMLQuantizationType.F16 if self.ftype == gguf.LlamaFileType.MOSTLY_F16 else gguf.GGMLQuantizationType.F32
|
||||||
return super().tensor_force_quant(name, new_name, bid, n_dims)
|
return super().tensor_force_quant(name, new_name, bid, n_dims)
|
||||||
|
|
||||||
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
||||||
|
|
@ -5030,8 +5110,9 @@ class Qwen3VLTextModel(Qwen3Model):
|
||||||
|
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
super().set_gguf_parameters()
|
super().set_gguf_parameters()
|
||||||
|
if "thinker_config" in self.hparams:
|
||||||
# Handle MRoPE (Multi-axis Rotary Position Embedding) for Qwen3-VL
|
vision_config = self.hparams["thinker_config"].get("vision_config", {})
|
||||||
|
else:
|
||||||
vision_config = self.hparams.get("vision_config", {})
|
vision_config = self.hparams.get("vision_config", {})
|
||||||
deepstack_layer_num = len(vision_config.get("deepstack_visual_indexes", []))
|
deepstack_layer_num = len(vision_config.get("deepstack_visual_indexes", []))
|
||||||
self.gguf_writer.add_num_deepstack_layers(deepstack_layer_num)
|
self.gguf_writer.add_num_deepstack_layers(deepstack_layer_num)
|
||||||
|
|
@ -5101,6 +5182,70 @@ class Qwen3VLMoeTextModel(Qwen3MoeModel):
|
||||||
yield from super().modify_tensors(data_torch, name, bid)
|
yield from super().modify_tensors(data_torch, name, bid)
|
||||||
|
|
||||||
|
|
||||||
|
@ModelBase.register("Qwen3OmniMoeForConditionalGeneration")
|
||||||
|
class Qwen3OmniMoeTextModel(Qwen3VLMoeTextModel):
|
||||||
|
model_arch = gguf.MODEL_ARCH.QWEN3VLMOE
|
||||||
|
|
||||||
|
def set_vocab(self):
|
||||||
|
super().set_vocab()
|
||||||
|
# correct BOS/EOS tokens
|
||||||
|
with open(self.dir_model / "tokenizer_config.json", "r", encoding="utf-8") as f:
|
||||||
|
tokenizer_config = json.load(f)
|
||||||
|
added_tokens = tokenizer_config.get("added_tokens_decoder", {})
|
||||||
|
for token_id, data in added_tokens.items():
|
||||||
|
if data.get("content") == "<|im_end|>":
|
||||||
|
self.gguf_writer.add_bos_token_id(int(token_id))
|
||||||
|
self.gguf_writer.add_eos_token_id(int(token_id))
|
||||||
|
break
|
||||||
|
|
||||||
|
def set_gguf_parameters(self):
|
||||||
|
super().set_gguf_parameters()
|
||||||
|
self.gguf_writer.add_num_deepstack_layers(0)
|
||||||
|
|
||||||
|
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
||||||
|
# Skip vision and audio tensors - they go in the mmproj file
|
||||||
|
if "visual." in name or "audio_tower." in name \
|
||||||
|
or "talker." in name or "code2wav." in name:
|
||||||
|
return
|
||||||
|
|
||||||
|
name = name.replace("thinker.", "")
|
||||||
|
yield from super().modify_tensors(data_torch, name, bid)
|
||||||
|
|
||||||
|
|
||||||
|
@ModelBase.register("Qwen3ASRForConditionalGeneration")
|
||||||
|
class Qwen3ASRTextModel(Qwen3VLTextModel):
|
||||||
|
model_arch = gguf.MODEL_ARCH.QWEN3VL
|
||||||
|
|
||||||
|
def set_gguf_parameters(self):
|
||||||
|
super().set_gguf_parameters()
|
||||||
|
self.gguf_writer.add_num_deepstack_layers(0)
|
||||||
|
|
||||||
|
def set_vocab(self):
|
||||||
|
super().set_vocab()
|
||||||
|
# fix chat template, use correct chatml format
|
||||||
|
self.gguf_writer.add_chat_template("{% for message in messages %}{{'<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\\n' }}{% endif %}")
|
||||||
|
# correct BOS/EOS tokens
|
||||||
|
with open(self.dir_model / "tokenizer_config.json", "r", encoding="utf-8") as f:
|
||||||
|
tokenizer_config = json.load(f)
|
||||||
|
added_tokens = tokenizer_config.get("added_tokens_decoder", {})
|
||||||
|
for token_id, data in added_tokens.items():
|
||||||
|
if data.get("content") == "<|im_end|>":
|
||||||
|
self.gguf_writer.add_bos_token_id(int(token_id))
|
||||||
|
self.gguf_writer.add_eos_token_id(int(token_id))
|
||||||
|
break
|
||||||
|
|
||||||
|
def modify_tensors(self, data_torch, name, bid):
|
||||||
|
# qwen3-omni
|
||||||
|
name = name.replace("thinker.", "")
|
||||||
|
|
||||||
|
# Skip vision and audio tensors - they go in the mmproj file
|
||||||
|
if "visual." in name or "audio_tower." in name \
|
||||||
|
or "talker." in name or "code2wav." in name:
|
||||||
|
return
|
||||||
|
|
||||||
|
yield from super().modify_tensors(data_torch, name, bid)
|
||||||
|
|
||||||
|
|
||||||
class _LinearAttentionVReorderBase(Qwen3NextModel):
|
class _LinearAttentionVReorderBase(Qwen3NextModel):
|
||||||
model_arch = gguf.MODEL_ARCH.QWEN3NEXT # overridden by subclasses
|
model_arch = gguf.MODEL_ARCH.QWEN3NEXT # overridden by subclasses
|
||||||
"""reorders V heads from grouped to tiled order for ggml broadcast
|
"""reorders V heads from grouped to tiled order for ggml broadcast
|
||||||
|
|
@ -12918,6 +13063,24 @@ class LazyTorchTensor(gguf.LazyBase):
|
||||||
}
|
}
|
||||||
|
|
||||||
# only used when byteswapping data. Only correct size is needed
|
# only used when byteswapping data. Only correct size is needed
|
||||||
|
if _LEGACY_TORCH:
|
||||||
|
# Compatible with older PyTorch versions (e.g., 2.2.2 on Intel Mac)
|
||||||
|
# Missing in PyTorch < 2.3: uint64, uint32, uint16
|
||||||
|
_dtype_byteswap_map: dict[torch.dtype, type] = {
|
||||||
|
torch.float64: np.float64,
|
||||||
|
torch.float32: np.float32,
|
||||||
|
torch.bfloat16: np.float16,
|
||||||
|
torch.float16: np.float16,
|
||||||
|
torch.int64: np.int64,
|
||||||
|
torch.int32: np.int32,
|
||||||
|
torch.int16: np.int16,
|
||||||
|
torch.int8: np.int8,
|
||||||
|
torch.uint8: np.uint8,
|
||||||
|
torch.bool: np.uint8,
|
||||||
|
torch.float8_e4m3fn: np.uint8,
|
||||||
|
torch.float8_e5m2: np.uint8,
|
||||||
|
}
|
||||||
|
else:
|
||||||
_dtype_byteswap_map: dict[torch.dtype, type] = {
|
_dtype_byteswap_map: dict[torch.dtype, type] = {
|
||||||
torch.float64: np.float64,
|
torch.float64: np.float64,
|
||||||
torch.float32: np.float32,
|
torch.float32: np.float32,
|
||||||
|
|
@ -12939,6 +13102,26 @@ class LazyTorchTensor(gguf.LazyBase):
|
||||||
# used for safetensors slices
|
# used for safetensors slices
|
||||||
# ref: https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/src/lib.rs#L1046
|
# ref: https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/src/lib.rs#L1046
|
||||||
# TODO: uncomment U64, U32, and U16, ref: https://github.com/pytorch/pytorch/issues/58734
|
# TODO: uncomment U64, U32, and U16, ref: https://github.com/pytorch/pytorch/issues/58734
|
||||||
|
if _LEGACY_TORCH:
|
||||||
|
# Compatible with older PyTorch versions (e.g., 2.2.2 on Intel Mac)
|
||||||
|
_dtype_str_map: dict[str, torch.dtype] = {
|
||||||
|
"F64": torch.float64,
|
||||||
|
"F32": torch.float32,
|
||||||
|
"BF16": torch.bfloat16,
|
||||||
|
"F16": torch.float16,
|
||||||
|
# "U64": torch.uint64,
|
||||||
|
"I64": torch.int64,
|
||||||
|
# "U32": torch.uint32,
|
||||||
|
"I32": torch.int32,
|
||||||
|
# "U16": torch.uint16,
|
||||||
|
"I16": torch.int16,
|
||||||
|
"U8": torch.uint8,
|
||||||
|
"I8": torch.int8,
|
||||||
|
"BOOL": torch.bool,
|
||||||
|
"F8_E4M3": torch.float8_e4m3fn,
|
||||||
|
"F8_E5M2": torch.float8_e5m2,
|
||||||
|
}
|
||||||
|
else:
|
||||||
_dtype_str_map: dict[str, torch.dtype] = {
|
_dtype_str_map: dict[str, torch.dtype] = {
|
||||||
"F64": torch.float64,
|
"F64": torch.float64,
|
||||||
"F32": torch.float32,
|
"F32": torch.float32,
|
||||||
|
|
@ -13110,6 +13293,10 @@ def parse_args() -> argparse.Namespace:
|
||||||
"It can be used for sentence-transformers models, like google/embeddinggemma-300m. "
|
"It can be used for sentence-transformers models, like google/embeddinggemma-300m. "
|
||||||
"Default these modules are not included.")
|
"Default these modules are not included.")
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--legacy-torch", action="store_true",
|
||||||
|
help="Use legacy PyTorch dtype mappings for compatibility with older PyTorch versions (e.g., 2.2.2 on Intel Mac)."
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--fuse-gate-up-exps", action="store_true",
|
"--fuse-gate-up-exps", action="store_true",
|
||||||
|
|
|
||||||
|
|
@ -94,6 +94,11 @@ NOTE: some models may require large context window, for example: `-c 8192`
|
||||||
# Moondream2 20250414 version
|
# Moondream2 20250414 version
|
||||||
(tool_name) -hf ggml-org/moondream2-20250414-GGUF
|
(tool_name) -hf ggml-org/moondream2-20250414-GGUF
|
||||||
|
|
||||||
|
# Gemma 4
|
||||||
|
(tool_name) -hf ggml-org/gemma-4-E2B-it-GGUF
|
||||||
|
(tool_name) -hf ggml-org/gemma-4-E4B-it-GGUF
|
||||||
|
(tool_name) -hf ggml-org/gemma-4-26B-A4B-it-GGUF
|
||||||
|
(tool_name) -hf ggml-org/gemma-4-31B-it-GGUF
|
||||||
```
|
```
|
||||||
|
|
||||||
**Audio models**:
|
**Audio models**:
|
||||||
|
|
@ -118,6 +123,11 @@ NOTE: some models may require large context window, for example: `-c 8192`
|
||||||
# Capabilities: audio input, vision input
|
# Capabilities: audio input, vision input
|
||||||
(tool_name) -hf ggml-org/Qwen2.5-Omni-3B-GGUF
|
(tool_name) -hf ggml-org/Qwen2.5-Omni-3B-GGUF
|
||||||
(tool_name) -hf ggml-org/Qwen2.5-Omni-7B-GGUF
|
(tool_name) -hf ggml-org/Qwen2.5-Omni-7B-GGUF
|
||||||
|
|
||||||
|
# Gemma 4
|
||||||
|
# Capabilities: audio input, vision input
|
||||||
|
(tool_name) -hf ggml-org/gemma-4-E2B-it-GGUF
|
||||||
|
(tool_name) -hf ggml-org/gemma-4-E4B-it-GGUF
|
||||||
```
|
```
|
||||||
|
|
||||||
## Finding more models:
|
## Finding more models:
|
||||||
|
|
|
||||||
|
|
@ -488,7 +488,7 @@ static void dequantize_row_nvfp4_sycl(const void * vx, dst_t * y, const int64_t
|
||||||
const int nb = k / QK_NVFP4;
|
const int nb = k / QK_NVFP4;
|
||||||
stream->parallel_for(
|
stream->parallel_for(
|
||||||
sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)),
|
sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)),
|
||||||
[=](sycl::nd_item<3> item_ct1) {
|
[=](sycl::nd_item<3> /*item_ct1*/) {
|
||||||
dequantize_block_nvfp4(vx, y, k);
|
dequantize_block_nvfp4(vx, y, k);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@
|
||||||
#define GGML_SYCL_DEQUANTIZE_HPP
|
#define GGML_SYCL_DEQUANTIZE_HPP
|
||||||
|
|
||||||
#include "common.hpp"
|
#include "common.hpp"
|
||||||
|
#include "convert.hpp"
|
||||||
|
|
||||||
typedef void (*dequantize_kernel_t)(const void * vx, const int64_t ib, const int iqs, dfloat2 & v);
|
typedef void (*dequantize_kernel_t)(const void * vx, const int64_t ib, const int iqs, dfloat2 & v);
|
||||||
typedef void (*dequantize_kernel_t_reorder)(const void *d, const int64_t ib, const void *qs,
|
typedef void (*dequantize_kernel_t_reorder)(const void *d, const int64_t ib, const void *qs,
|
||||||
|
|
|
||||||
|
|
@ -355,7 +355,7 @@ static void acc_f32_sycl(const float *x, const float *y, float *dst,
|
||||||
const int num_blocks = (n_elements + SYCL_ACC_BLOCK_SIZE - 1) / SYCL_ACC_BLOCK_SIZE;
|
const int num_blocks = (n_elements + SYCL_ACC_BLOCK_SIZE - 1) / SYCL_ACC_BLOCK_SIZE;
|
||||||
stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_ACC_BLOCK_SIZE),
|
stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_ACC_BLOCK_SIZE),
|
||||||
sycl::range<3>(1, 1, SYCL_ACC_BLOCK_SIZE)),
|
sycl::range<3>(1, 1, SYCL_ACC_BLOCK_SIZE)),
|
||||||
[=](sycl::nd_item<3> item_ct1) {
|
[=](sycl::nd_item<3> /*item_ct1*/) {
|
||||||
acc_f32(x, y, dst, n_elements, ne10, ne11, ne12, ne13, s1, s2, s3, offset);
|
acc_f32(x, y, dst, n_elements, ne10, ne11, ne12, ne13, s1, s2, s3, offset);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -176,14 +176,12 @@ static void launch_gated_delta_net(const float * q_d,
|
||||||
const sycl::uint3 neqk1_magic = init_fastdiv_values(neqk1);
|
const sycl::uint3 neqk1_magic = init_fastdiv_values(neqk1);
|
||||||
const sycl::uint3 rq3_magic = init_fastdiv_values(rq3);
|
const sycl::uint3 rq3_magic = init_fastdiv_values(rq3);
|
||||||
|
|
||||||
int cc = ggml_sycl_info().devices[ggml_sycl_get_device()].cc;
|
|
||||||
|
|
||||||
switch (S_v) {
|
switch (S_v) {
|
||||||
case 16:
|
case 16:
|
||||||
{
|
{
|
||||||
constexpr int sv = 16;
|
constexpr int sv = 16;
|
||||||
stream->parallel_for(sycl::nd_range<3>(grid_dims * block_dims, block_dims),
|
stream->parallel_for(sycl::nd_range<3>(grid_dims * block_dims, block_dims),
|
||||||
[=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
|
[=](sycl::nd_item<3> /*item_ct1*/) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
|
||||||
gated_delta_net_sycl<sv, KDA>(q_d, k_d, v_d, g_d, b_d, s_d, dst_d, H, n_tokens,
|
gated_delta_net_sycl<sv, KDA>(q_d, k_d, v_d, g_d, b_d, s_d, dst_d, H, n_tokens,
|
||||||
n_seqs, sq1, sq2, sq3, sv1, sv2, sv3, sb1, sb2,
|
n_seqs, sq1, sq2, sq3, sv1, sv2, sv3, sb1, sb2,
|
||||||
sb3, neqk1_magic, rq3_magic, scale);
|
sb3, neqk1_magic, rq3_magic, scale);
|
||||||
|
|
@ -194,7 +192,7 @@ static void launch_gated_delta_net(const float * q_d,
|
||||||
{
|
{
|
||||||
constexpr int sv = 32;
|
constexpr int sv = 32;
|
||||||
stream->parallel_for(sycl::nd_range<3>(grid_dims * block_dims, block_dims),
|
stream->parallel_for(sycl::nd_range<3>(grid_dims * block_dims, block_dims),
|
||||||
[=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
|
[=](sycl::nd_item<3> /*item_ct1*/) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
|
||||||
gated_delta_net_sycl<sv, KDA>(q_d, k_d, v_d, g_d, b_d, s_d, dst_d, H, n_tokens,
|
gated_delta_net_sycl<sv, KDA>(q_d, k_d, v_d, g_d, b_d, s_d, dst_d, H, n_tokens,
|
||||||
n_seqs, sq1, sq2, sq3, sv1, sv2, sv3, sb1, sb2,
|
n_seqs, sq1, sq2, sq3, sv1, sv2, sv3, sb1, sb2,
|
||||||
sb3, neqk1_magic, rq3_magic, scale);
|
sb3, neqk1_magic, rq3_magic, scale);
|
||||||
|
|
@ -205,7 +203,7 @@ static void launch_gated_delta_net(const float * q_d,
|
||||||
{
|
{
|
||||||
constexpr int sv = 64;
|
constexpr int sv = 64;
|
||||||
stream->parallel_for(sycl::nd_range<3>(grid_dims * block_dims, block_dims),
|
stream->parallel_for(sycl::nd_range<3>(grid_dims * block_dims, block_dims),
|
||||||
[=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
|
[=](sycl::nd_item<3> /*item_ct1*/) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
|
||||||
gated_delta_net_sycl<sv, KDA>(
|
gated_delta_net_sycl<sv, KDA>(
|
||||||
q_d, k_d, v_d, g_d, b_d, s_d, dst_d, H, n_tokens, n_seqs, sq1, sq2,
|
q_d, k_d, v_d, g_d, b_d, s_d, dst_d, H, n_tokens, n_seqs, sq1, sq2,
|
||||||
sq3, sv1, sv2, sv3, sb1, sb2, sb3, neqk1_magic, rq3_magic, scale);
|
sq3, sv1, sv2, sv3, sb1, sb2, sb3, neqk1_magic, rq3_magic, scale);
|
||||||
|
|
@ -217,7 +215,7 @@ static void launch_gated_delta_net(const float * q_d,
|
||||||
{
|
{
|
||||||
constexpr int sv = 128;
|
constexpr int sv = 128;
|
||||||
stream->parallel_for(sycl::nd_range<3>(grid_dims * block_dims, block_dims),
|
stream->parallel_for(sycl::nd_range<3>(grid_dims * block_dims, block_dims),
|
||||||
[=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
|
[=](sycl::nd_item<3> /*item_ct1*/) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
|
||||||
gated_delta_net_sycl<sv, KDA>(
|
gated_delta_net_sycl<sv, KDA>(
|
||||||
q_d, k_d, v_d, g_d, b_d, s_d, dst_d, H, n_tokens, n_seqs, sq1, sq2,
|
q_d, k_d, v_d, g_d, b_d, s_d, dst_d, H, n_tokens, n_seqs, sq1, sq2,
|
||||||
sq3, sv1, sv2, sv3, sb1, sb2, sb3, neqk1_magic, rq3_magic, scale);
|
sq3, sv1, sv2, sv3, sb1, sb2, sb3, neqk1_magic, rq3_magic, scale);
|
||||||
|
|
|
||||||
|
|
@ -4727,12 +4727,19 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
|
||||||
struct ggml_tensor * a = op->src[0];
|
struct ggml_tensor * a = op->src[0];
|
||||||
struct ggml_tensor * b = op->src[1];
|
struct ggml_tensor * b = op->src[1];
|
||||||
|
|
||||||
|
// disable Q1_0 until implementation
|
||||||
|
if (a->type == GGML_TYPE_Q1_0 || b->type == GGML_TYPE_Q1_0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
if (a->ne[3] != b->ne[3]) {
|
if (a->ne[3] != b->ne[3]) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_type src0_type = op->src[0]->type;
|
ggml_type src0_type = op->src[0]->type;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// TODO: The configuration below needs more work to be supported with oneDNN
|
// TODO: The configuration below needs more work to be supported with oneDNN
|
||||||
if (ggml_is_permuted(a) && !ggml_is_contiguous(a) &&
|
if (ggml_is_permuted(a) && !ggml_is_contiguous(a) &&
|
||||||
a->ne[2] > 1 && a->ne[3] > 1 && src0_type == GGML_TYPE_F16) {
|
a->ne[2] > 1 && a->ne[3] > 1 && src0_type == GGML_TYPE_F16) {
|
||||||
|
|
|
||||||
|
|
@ -272,7 +272,7 @@ static void upscale_f32_sycl(const float * x,
|
||||||
sycl::nd_range<3>(
|
sycl::nd_range<3>(
|
||||||
sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE),
|
sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE),
|
||||||
sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE)),
|
sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE)),
|
||||||
[=](sycl::nd_item<3> item_ct1) {
|
[=](sycl::nd_item<3> /*item_ct1*/) {
|
||||||
upscale_f32(x, dst, nb00, nb01, nb02, nb03, ne10, ne11, ne12, ne13, sf0, sf1, sf2, sf3);
|
upscale_f32(x, dst, nb00, nb01, nb02, nb03, ne10, ne11, ne12, ne13, sf0, sf1, sf2, sf3);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
@ -304,7 +304,7 @@ static void upscale_f32_bilinear_sycl(const float * x,
|
||||||
sycl::nd_range<3>(
|
sycl::nd_range<3>(
|
||||||
sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE),
|
sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE),
|
||||||
sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE)),
|
sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE)),
|
||||||
[=](sycl::nd_item<3> item_ct1) {
|
[=](sycl::nd_item<3> /*item_ct1*/) {
|
||||||
upscale_f32_bilinear_antialias(
|
upscale_f32_bilinear_antialias(
|
||||||
x, dst, nb00, nb01, nb02, nb03, ne00_src, ne01_src, ne10_dst, ne11_dst,
|
x, dst, nb00, nb01, nb02, nb03, ne00_src, ne01_src, ne10_dst, ne11_dst,
|
||||||
ne12_dst, ne13_dst, sf0, sf1, sf2, sf3, pixel_offset);
|
ne12_dst, ne13_dst, sf0, sf1, sf2, sf3, pixel_offset);
|
||||||
|
|
@ -314,7 +314,7 @@ static void upscale_f32_bilinear_sycl(const float * x,
|
||||||
sycl::nd_range<3>(
|
sycl::nd_range<3>(
|
||||||
sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE),
|
sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE),
|
||||||
sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE)),
|
sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE)),
|
||||||
[=](sycl::nd_item<3> item_ct1) {
|
[=](sycl::nd_item<3> /*item_ct1*/) {
|
||||||
upscale_f32_bilinear(
|
upscale_f32_bilinear(
|
||||||
x, dst, nb00, nb01, nb02, nb03, ne00_src, ne01_src, ne10_dst, ne11_dst, ne12_dst,
|
x, dst, nb00, nb01, nb02, nb03, ne00_src, ne01_src, ne10_dst, ne11_dst, ne12_dst,
|
||||||
ne13_dst, sf0, sf1, sf2, sf3, pixel_offset);
|
ne13_dst, sf0, sf1, sf2, sf3, pixel_offset);
|
||||||
|
|
@ -349,7 +349,7 @@ static void upscale_f32_bicubic_sycl(const float * x,
|
||||||
sycl::nd_range<3>(
|
sycl::nd_range<3>(
|
||||||
sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE),
|
sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE),
|
||||||
sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE)),
|
sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE)),
|
||||||
[=](sycl::nd_item<3> item_ct1) {
|
[=](sycl::nd_item<3> /*item_ct1*/) {
|
||||||
upscale_f32_bicubic(
|
upscale_f32_bicubic(
|
||||||
x, dst, nb00, nb01, nb02, nb03, ne00_src, ne01_src, ne10_dst, ne11_dst,
|
x, dst, nb00, nb01, nb02, nb03, ne00_src, ne01_src, ne10_dst, ne11_dst,
|
||||||
ne12_dst, ne13_dst, sf0, sf1, sf2, sf3, pixel_offset);
|
ne12_dst, ne13_dst, sf0, sf1, sf2, sf3, pixel_offset);
|
||||||
|
|
|
||||||
|
|
@ -534,11 +534,7 @@ static void ggml_backend_webgpu_debug(webgpu_global_context & ctx) {
|
||||||
encoder.CopyBufferToBuffer(ctx->debug_dev_buf, 0, ctx->debug_host_buf, 0, ctx->debug_host_buf.GetSize());
|
encoder.CopyBufferToBuffer(ctx->debug_dev_buf, 0, ctx->debug_host_buf, 0, ctx->debug_host_buf.GetSize());
|
||||||
wgpu::CommandBuffer commands = encoder.Finish();
|
wgpu::CommandBuffer commands = encoder.Finish();
|
||||||
ctx->queue.Submit(1, &commands);
|
ctx->queue.Submit(1, &commands);
|
||||||
if (!ggml_backend_webgpu_map_buffer(ctx, ctx->debug_host_buf, wgpu::MapMode::Read, 0,
|
ggml_backend_webgpu_map_buffer(ctx, ctx->debug_host_buf, wgpu::MapMode::Read, 0, ctx->debug_host_buf.GetSize());
|
||||||
ctx->debug_host_buf.GetSize())) {
|
|
||||||
GGML_LOG_ERROR("ggml_webgpu: Debug buffer map failed\n");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const float * debug_data = (const float *) ctx->debug_host_buf.GetConstMappedRange();
|
const float * debug_data = (const float *) ctx->debug_host_buf.GetConstMappedRange();
|
||||||
std::cout << "debug[0]: " << debug_data[0] << "\n";
|
std::cout << "debug[0]: " << debug_data[0] << "\n";
|
||||||
ctx->debug_host_buf.Unmap();
|
ctx->debug_host_buf.Unmap();
|
||||||
|
|
|
||||||
|
|
@ -798,6 +798,8 @@ class MODEL_TENSOR(IntEnum):
|
||||||
A_ENC_INP_PROJ = auto() # gemma4
|
A_ENC_INP_PROJ = auto() # gemma4
|
||||||
A_ENC_CONV1D = auto()
|
A_ENC_CONV1D = auto()
|
||||||
A_ENC_CONV1D_NORM = auto() # gemma3n
|
A_ENC_CONV1D_NORM = auto() # gemma3n
|
||||||
|
A_ENC_CONV2D = auto()
|
||||||
|
A_ENC_CONV_OUT = auto()
|
||||||
A_PRE_NORM = auto()
|
A_PRE_NORM = auto()
|
||||||
A_POST_NORM = auto()
|
A_POST_NORM = auto()
|
||||||
A_ENC_LAYER_PRE_NORM = auto() # gemma3n
|
A_ENC_LAYER_PRE_NORM = auto() # gemma3n
|
||||||
|
|
@ -1280,6 +1282,8 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
||||||
MODEL_TENSOR.A_ENC_EMBD_TO_LOGITS: "a.embd_to_logits",
|
MODEL_TENSOR.A_ENC_EMBD_TO_LOGITS: "a.embd_to_logits",
|
||||||
MODEL_TENSOR.A_ENC_INP_PROJ: "a.input_projection",
|
MODEL_TENSOR.A_ENC_INP_PROJ: "a.input_projection",
|
||||||
MODEL_TENSOR.A_ENC_CONV1D: "a.conv1d.{bid}",
|
MODEL_TENSOR.A_ENC_CONV1D: "a.conv1d.{bid}",
|
||||||
|
MODEL_TENSOR.A_ENC_CONV2D: "a.conv2d.{bid}",
|
||||||
|
MODEL_TENSOR.A_ENC_CONV_OUT: "a.conv_out",
|
||||||
MODEL_TENSOR.A_ENC_CONV1D_NORM: "a.conv1d.{bid}.norm",
|
MODEL_TENSOR.A_ENC_CONV1D_NORM: "a.conv1d.{bid}.norm",
|
||||||
MODEL_TENSOR.A_PRE_NORM: "a.pre_ln",
|
MODEL_TENSOR.A_PRE_NORM: "a.pre_ln",
|
||||||
MODEL_TENSOR.A_POST_NORM: "a.post_ln",
|
MODEL_TENSOR.A_POST_NORM: "a.post_ln",
|
||||||
|
|
@ -1426,6 +1430,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||||
MODEL_TENSOR.A_ENC_EMBD_TO_LOGITS,
|
MODEL_TENSOR.A_ENC_EMBD_TO_LOGITS,
|
||||||
MODEL_TENSOR.A_ENC_INP_PROJ,
|
MODEL_TENSOR.A_ENC_INP_PROJ,
|
||||||
MODEL_TENSOR.A_ENC_CONV1D,
|
MODEL_TENSOR.A_ENC_CONV1D,
|
||||||
|
MODEL_TENSOR.A_ENC_CONV2D,
|
||||||
|
MODEL_TENSOR.A_ENC_CONV_OUT,
|
||||||
MODEL_TENSOR.A_ENC_CONV1D_NORM,
|
MODEL_TENSOR.A_ENC_CONV1D_NORM,
|
||||||
MODEL_TENSOR.A_PRE_NORM,
|
MODEL_TENSOR.A_PRE_NORM,
|
||||||
MODEL_TENSOR.A_POST_NORM,
|
MODEL_TENSOR.A_POST_NORM,
|
||||||
|
|
@ -4112,6 +4118,7 @@ class VisionProjectorType:
|
||||||
ULTRAVOX = "ultravox"
|
ULTRAVOX = "ultravox"
|
||||||
INTERNVL = "internvl"
|
INTERNVL = "internvl"
|
||||||
QWEN2A = "qwen2a" # audio
|
QWEN2A = "qwen2a" # audio
|
||||||
|
QWEN3A = "qwen3a" # audio
|
||||||
GLMA = "glma" # audio
|
GLMA = "glma" # audio
|
||||||
QWEN25O = "qwen2.5o" # omni
|
QWEN25O = "qwen2.5o" # omni
|
||||||
VOXTRAL = "voxtral"
|
VOXTRAL = "voxtral"
|
||||||
|
|
|
||||||
|
|
@ -1892,6 +1892,14 @@ class TensorNameMap:
|
||||||
"conformer.subsample_conv_projection.input_proj_linear", # gemma4
|
"conformer.subsample_conv_projection.input_proj_linear", # gemma4
|
||||||
),
|
),
|
||||||
|
|
||||||
|
MODEL_TENSOR.A_ENC_CONV2D: (
|
||||||
|
"audio_tower.conv2d{bid}", # qwen3omni
|
||||||
|
),
|
||||||
|
|
||||||
|
MODEL_TENSOR.A_ENC_CONV_OUT: (
|
||||||
|
"audio_tower.conv_out", # qwen3omni
|
||||||
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.A_PRE_NORM: (),
|
MODEL_TENSOR.A_PRE_NORM: (),
|
||||||
|
|
||||||
MODEL_TENSOR.A_POST_NORM: (
|
MODEL_TENSOR.A_POST_NORM: (
|
||||||
|
|
@ -2042,7 +2050,8 @@ class TensorNameMap:
|
||||||
|
|
||||||
MODEL_TENSOR.A_MMPROJ: (
|
MODEL_TENSOR.A_MMPROJ: (
|
||||||
"audio.multi_modal_projector.linear_{bid}", # ultravox, meralion
|
"audio.multi_modal_projector.linear_{bid}", # ultravox, meralion
|
||||||
"audio_adapter.model.{bid}" # lfm2
|
"audio_adapter.model.{bid}", # lfm2
|
||||||
|
"audio_tower.proj{bid}", # qwen3omni
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.A_MMPROJ_FC: (
|
MODEL_TENSOR.A_MMPROJ_FC: (
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,7 @@ add_library(mtmd
|
||||||
models/pixtral.cpp
|
models/pixtral.cpp
|
||||||
models/qwen2vl.cpp
|
models/qwen2vl.cpp
|
||||||
models/qwen3vl.cpp
|
models/qwen3vl.cpp
|
||||||
|
models/qwen3a.cpp
|
||||||
models/step3vl.cpp
|
models/step3vl.cpp
|
||||||
models/siglip.cpp
|
models/siglip.cpp
|
||||||
models/whisper-enc.cpp
|
models/whisper-enc.cpp
|
||||||
|
|
|
||||||
|
|
@ -135,6 +135,8 @@
|
||||||
|
|
||||||
// ultravox
|
// ultravox
|
||||||
#define TN_CONV1D "a.conv1d.%d.%s"
|
#define TN_CONV1D "a.conv1d.%d.%s"
|
||||||
|
#define TN_CONV2D "a.conv2d.%d.%s"
|
||||||
|
#define TN_CONV_OUT "a.conv_out.%s"
|
||||||
#define TN_MM_AUDIO_MLP "mm.a.mlp.%d.%s"
|
#define TN_MM_AUDIO_MLP "mm.a.mlp.%d.%s"
|
||||||
#define TN_MM_AUDIO_FC "mm.a.fc.%s" // fully connected layer
|
#define TN_MM_AUDIO_FC "mm.a.fc.%s" // fully connected layer
|
||||||
#define TN_MM_NORM_PRE "mm.a.norm_pre.%s"
|
#define TN_MM_NORM_PRE "mm.a.norm_pre.%s"
|
||||||
|
|
@ -271,6 +273,7 @@ enum projector_type {
|
||||||
PROJECTOR_TYPE_INTERNVL,
|
PROJECTOR_TYPE_INTERNVL,
|
||||||
PROJECTOR_TYPE_LLAMA4,
|
PROJECTOR_TYPE_LLAMA4,
|
||||||
PROJECTOR_TYPE_QWEN2A,
|
PROJECTOR_TYPE_QWEN2A,
|
||||||
|
PROJECTOR_TYPE_QWEN3A,
|
||||||
PROJECTOR_TYPE_GLMA,
|
PROJECTOR_TYPE_GLMA,
|
||||||
PROJECTOR_TYPE_QWEN25O, // will be replaced by QWEN2A or QWEN25VL depending on clip_ctx
|
PROJECTOR_TYPE_QWEN25O, // will be replaced by QWEN2A or QWEN25VL depending on clip_ctx
|
||||||
PROJECTOR_TYPE_VOXTRAL,
|
PROJECTOR_TYPE_VOXTRAL,
|
||||||
|
|
@ -315,6 +318,7 @@ static std::map<projector_type, std::string> PROJECTOR_TYPE_NAMES = {
|
||||||
{ PROJECTOR_TYPE_INTERNVL, "internvl"},
|
{ PROJECTOR_TYPE_INTERNVL, "internvl"},
|
||||||
{ PROJECTOR_TYPE_LLAMA4, "llama4"},
|
{ PROJECTOR_TYPE_LLAMA4, "llama4"},
|
||||||
{ PROJECTOR_TYPE_QWEN2A, "qwen2a"},
|
{ PROJECTOR_TYPE_QWEN2A, "qwen2a"},
|
||||||
|
{ PROJECTOR_TYPE_QWEN3A, "qwen3a"},
|
||||||
{ PROJECTOR_TYPE_GLMA, "glma"},
|
{ PROJECTOR_TYPE_GLMA, "glma"},
|
||||||
{ PROJECTOR_TYPE_QWEN25O, "qwen2.5o"},
|
{ PROJECTOR_TYPE_QWEN25O, "qwen2.5o"},
|
||||||
{ PROJECTOR_TYPE_VOXTRAL, "voxtral"},
|
{ PROJECTOR_TYPE_VOXTRAL, "voxtral"},
|
||||||
|
|
|
||||||
|
|
@ -413,10 +413,20 @@ struct clip_model {
|
||||||
ggml_tensor * conv1d_1_b = nullptr;
|
ggml_tensor * conv1d_1_b = nullptr;
|
||||||
ggml_tensor * conv1d_2_w = nullptr;
|
ggml_tensor * conv1d_2_w = nullptr;
|
||||||
ggml_tensor * conv1d_2_b = nullptr;
|
ggml_tensor * conv1d_2_b = nullptr;
|
||||||
|
ggml_tensor * conv_out_w = nullptr;
|
||||||
|
ggml_tensor * conv_out_b = nullptr;
|
||||||
ggml_tensor * mm_norm_pre_w = nullptr;
|
ggml_tensor * mm_norm_pre_w = nullptr;
|
||||||
ggml_tensor * mm_norm_pre_b = nullptr;
|
ggml_tensor * mm_norm_pre_b = nullptr;
|
||||||
ggml_tensor * mm_norm_mid_w = nullptr;
|
ggml_tensor * mm_norm_mid_w = nullptr;
|
||||||
|
|
||||||
|
// qwen3a
|
||||||
|
ggml_tensor * conv2d_1_w = nullptr;
|
||||||
|
ggml_tensor * conv2d_1_b = nullptr;
|
||||||
|
ggml_tensor * conv2d_2_w = nullptr;
|
||||||
|
ggml_tensor * conv2d_2_b = nullptr;
|
||||||
|
ggml_tensor * conv2d_3_w = nullptr;
|
||||||
|
ggml_tensor * conv2d_3_b = nullptr;
|
||||||
|
|
||||||
// cogvlm
|
// cogvlm
|
||||||
ggml_tensor * mm_post_fc_norm_w = nullptr;
|
ggml_tensor * mm_post_fc_norm_w = nullptr;
|
||||||
ggml_tensor * mm_post_fc_norm_b = nullptr;
|
ggml_tensor * mm_post_fc_norm_b = nullptr;
|
||||||
|
|
|
||||||
|
|
@ -939,6 +939,10 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
||||||
{
|
{
|
||||||
builder = std::make_unique<clip_graph_glm4v>(ctx, img);
|
builder = std::make_unique<clip_graph_glm4v>(ctx, img);
|
||||||
} break;
|
} break;
|
||||||
|
case PROJECTOR_TYPE_QWEN3A:
|
||||||
|
{
|
||||||
|
builder = std::make_unique<clip_graph_qwen3a>(ctx, img);
|
||||||
|
} break;
|
||||||
case PROJECTOR_TYPE_YOUTUVL:
|
case PROJECTOR_TYPE_YOUTUVL:
|
||||||
{
|
{
|
||||||
builder = std::make_unique<clip_graph_youtuvl>(ctx, img);
|
builder = std::make_unique<clip_graph_youtuvl>(ctx, img);
|
||||||
|
|
@ -1402,6 +1406,7 @@ struct clip_model_loader {
|
||||||
} break;
|
} break;
|
||||||
case PROJECTOR_TYPE_ULTRAVOX:
|
case PROJECTOR_TYPE_ULTRAVOX:
|
||||||
case PROJECTOR_TYPE_QWEN2A:
|
case PROJECTOR_TYPE_QWEN2A:
|
||||||
|
case PROJECTOR_TYPE_QWEN3A:
|
||||||
case PROJECTOR_TYPE_GLMA:
|
case PROJECTOR_TYPE_GLMA:
|
||||||
case PROJECTOR_TYPE_VOXTRAL:
|
case PROJECTOR_TYPE_VOXTRAL:
|
||||||
case PROJECTOR_TYPE_MERALION:
|
case PROJECTOR_TYPE_MERALION:
|
||||||
|
|
@ -2072,6 +2077,20 @@ struct clip_model_loader {
|
||||||
model.mm_fc_w = get_tensor(string_format(TN_MM_AUDIO_FC, "weight"));
|
model.mm_fc_w = get_tensor(string_format(TN_MM_AUDIO_FC, "weight"));
|
||||||
model.mm_fc_b = get_tensor(string_format(TN_MM_AUDIO_FC, "bias"));
|
model.mm_fc_b = get_tensor(string_format(TN_MM_AUDIO_FC, "bias"));
|
||||||
} break;
|
} break;
|
||||||
|
case PROJECTOR_TYPE_QWEN3A:
|
||||||
|
{
|
||||||
|
model.conv2d_1_w = get_tensor(string_format(TN_CONV2D, 1, "weight"));
|
||||||
|
model.conv2d_1_b = get_tensor(string_format(TN_CONV2D, 1, "bias"));
|
||||||
|
model.conv2d_2_w = get_tensor(string_format(TN_CONV2D, 2, "weight"));
|
||||||
|
model.conv2d_2_b = get_tensor(string_format(TN_CONV2D, 2, "bias"));
|
||||||
|
model.conv2d_3_w = get_tensor(string_format(TN_CONV2D, 3, "weight"));
|
||||||
|
model.conv2d_3_b = get_tensor(string_format(TN_CONV2D, 3, "bias"));
|
||||||
|
model.conv_out_w = get_tensor(string_format(TN_CONV_OUT, "weight")); // no bias
|
||||||
|
model.mm_1_w = get_tensor(string_format(TN_MM_AUDIO_MLP, 1, "weight"));
|
||||||
|
model.mm_1_b = get_tensor(string_format(TN_MM_AUDIO_MLP, 1, "bias"));
|
||||||
|
model.mm_2_w = get_tensor(string_format(TN_MM_AUDIO_MLP, 2, "weight"));
|
||||||
|
model.mm_2_b = get_tensor(string_format(TN_MM_AUDIO_MLP, 2, "bias"));
|
||||||
|
} break;
|
||||||
case PROJECTOR_TYPE_VOXTRAL:
|
case PROJECTOR_TYPE_VOXTRAL:
|
||||||
{
|
{
|
||||||
model.conv1d_1_w = get_tensor(string_format(TN_CONV1D, 1, "weight"));
|
model.conv1d_1_w = get_tensor(string_format(TN_CONV1D, 1, "weight"));
|
||||||
|
|
@ -2948,6 +2967,15 @@ int clip_n_output_tokens(const struct clip_ctx * ctx, struct clip_image_f32 * im
|
||||||
n_patches /= 2;
|
n_patches /= 2;
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
|
case PROJECTOR_TYPE_QWEN3A:
|
||||||
|
{
|
||||||
|
// 3x stride-2 conv2d: each step is floor((n-1)/2)+1
|
||||||
|
int n = img->nx;
|
||||||
|
n = (n - 1) / 2 + 1;
|
||||||
|
n = (n - 1) / 2 + 1;
|
||||||
|
n = (n - 1) / 2 + 1;
|
||||||
|
n_patches = n;
|
||||||
|
} break;
|
||||||
case PROJECTOR_TYPE_GLMA:
|
case PROJECTOR_TYPE_GLMA:
|
||||||
{
|
{
|
||||||
n_patches = img->nx;
|
n_patches = img->nx;
|
||||||
|
|
@ -3424,6 +3452,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
||||||
case PROJECTOR_TYPE_INTERNVL:
|
case PROJECTOR_TYPE_INTERNVL:
|
||||||
case PROJECTOR_TYPE_NEMOTRON_V2_VL:
|
case PROJECTOR_TYPE_NEMOTRON_V2_VL:
|
||||||
case PROJECTOR_TYPE_QWEN2A:
|
case PROJECTOR_TYPE_QWEN2A:
|
||||||
|
case PROJECTOR_TYPE_QWEN3A:
|
||||||
case PROJECTOR_TYPE_GLMA:
|
case PROJECTOR_TYPE_GLMA:
|
||||||
case PROJECTOR_TYPE_ULTRAVOX:
|
case PROJECTOR_TYPE_ULTRAVOX:
|
||||||
case PROJECTOR_TYPE_LFM2:
|
case PROJECTOR_TYPE_LFM2:
|
||||||
|
|
@ -3653,8 +3682,9 @@ int clip_n_mmproj_embd(const struct clip_ctx * ctx) {
|
||||||
return ctx->model.mm_model_proj->ne[1];
|
return ctx->model.mm_model_proj->ne[1];
|
||||||
case PROJECTOR_TYPE_QWEN2A:
|
case PROJECTOR_TYPE_QWEN2A:
|
||||||
return ctx->model.mm_fc_w->ne[1];
|
return ctx->model.mm_fc_w->ne[1];
|
||||||
case PROJECTOR_TYPE_GLMA:
|
case PROJECTOR_TYPE_QWEN3A:
|
||||||
return ctx->model.mm_2_w->ne[1];
|
return ctx->model.mm_2_w->ne[1];
|
||||||
|
case PROJECTOR_TYPE_GLMA:
|
||||||
case PROJECTOR_TYPE_LFM2:
|
case PROJECTOR_TYPE_LFM2:
|
||||||
case PROJECTOR_TYPE_KIMIVL:
|
case PROJECTOR_TYPE_KIMIVL:
|
||||||
case PROJECTOR_TYPE_PADDLEOCR:
|
case PROJECTOR_TYPE_PADDLEOCR:
|
||||||
|
|
@ -3706,6 +3736,7 @@ bool clip_has_whisper_encoder(const struct clip_ctx * ctx) {
|
||||||
switch (ctx->proj_type()) {
|
switch (ctx->proj_type()) {
|
||||||
case PROJECTOR_TYPE_ULTRAVOX:
|
case PROJECTOR_TYPE_ULTRAVOX:
|
||||||
case PROJECTOR_TYPE_QWEN2A:
|
case PROJECTOR_TYPE_QWEN2A:
|
||||||
|
case PROJECTOR_TYPE_QWEN3A:
|
||||||
case PROJECTOR_TYPE_GLMA:
|
case PROJECTOR_TYPE_GLMA:
|
||||||
case PROJECTOR_TYPE_VOXTRAL:
|
case PROJECTOR_TYPE_VOXTRAL:
|
||||||
case PROJECTOR_TYPE_MERALION:
|
case PROJECTOR_TYPE_MERALION:
|
||||||
|
|
|
||||||
|
|
@ -152,6 +152,11 @@ struct clip_graph_mobilenetv5 : clip_graph {
|
||||||
const mobilenetv5_block & block);
|
const mobilenetv5_block & block);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct clip_graph_qwen3a : clip_graph {
|
||||||
|
clip_graph_qwen3a(clip_ctx * ctx, const clip_image_f32 & img) : clip_graph(ctx, img) {}
|
||||||
|
ggml_cgraph * build() override;
|
||||||
|
};
|
||||||
|
|
||||||
struct clip_graph_kimik25 : clip_graph {
|
struct clip_graph_kimik25 : clip_graph {
|
||||||
clip_graph_kimik25(clip_ctx * ctx, const clip_image_f32 & img) : clip_graph(ctx, img) {}
|
clip_graph_kimik25(clip_ctx * ctx, const clip_image_f32 & img) : clip_graph(ctx, img) {}
|
||||||
ggml_cgraph * build() override;
|
ggml_cgraph * build() override;
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,68 @@
|
||||||
|
#include "models.h"
|
||||||
|
|
||||||
|
ggml_cgraph * clip_graph_qwen3a::build() {
|
||||||
|
ggml_tensor * inp = build_inp_raw(1);
|
||||||
|
|
||||||
|
// conv2d block
|
||||||
|
// TODO: do we need to split by chunks of n_window each like on transformers impl?
|
||||||
|
{
|
||||||
|
inp = ggml_conv_2d(ctx0, model.conv2d_1_w, inp, 2, 2, 1, 1, 1, 1);
|
||||||
|
inp = ggml_add(ctx0, inp, model.conv2d_1_b);
|
||||||
|
inp = ggml_gelu_erf(ctx0, inp);
|
||||||
|
|
||||||
|
inp = ggml_conv_2d(ctx0, model.conv2d_2_w, inp, 2, 2, 1, 1, 1, 1);
|
||||||
|
inp = ggml_add(ctx0, inp, model.conv2d_2_b);
|
||||||
|
inp = ggml_gelu_erf(ctx0, inp);
|
||||||
|
|
||||||
|
inp = ggml_conv_2d(ctx0, model.conv2d_3_w, inp, 2, 2, 1, 1, 1, 1);
|
||||||
|
inp = ggml_add(ctx0, inp, model.conv2d_3_b);
|
||||||
|
inp = ggml_gelu_erf(ctx0, inp);
|
||||||
|
|
||||||
|
// inp [n_pos, n_mels/8, channels, 1] (W, H, C, N)
|
||||||
|
cb(inp, "after_conv_blocks", -1);
|
||||||
|
|
||||||
|
const int64_t n_pos_after_conv = inp->ne[0];
|
||||||
|
const int64_t n_mel_after_conv = inp->ne[1]; // 128/8 = 16
|
||||||
|
|
||||||
|
inp = ggml_cont(ctx0, ggml_permute(ctx0, inp, 0, 2, 3, 1));
|
||||||
|
inp = ggml_reshape_2d(ctx0, inp, n_pos_after_conv, n_mel_after_conv * inp->ne[3]); // [n_pos, 7680]
|
||||||
|
inp = ggml_cont(ctx0, ggml_transpose(ctx0, inp)); // [7680, n_pos]
|
||||||
|
|
||||||
|
// project to n_embd
|
||||||
|
inp = ggml_mul_mat(ctx0, model.conv_out_w, inp);
|
||||||
|
if (model.conv_out_b) {
|
||||||
|
inp = ggml_add(ctx0, inp, model.conv_out_b);
|
||||||
|
}
|
||||||
|
cb(inp, "after_conv_out", -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto n_pos = inp->ne[1];
|
||||||
|
|
||||||
|
ggml_tensor * pos_embd_selected = ggml_view_2d(
|
||||||
|
ctx0, model.position_embeddings,
|
||||||
|
model.position_embeddings->ne[0], n_pos,
|
||||||
|
model.position_embeddings->nb[1], 0
|
||||||
|
);
|
||||||
|
ggml_tensor * cur = build_vit(
|
||||||
|
inp, n_pos,
|
||||||
|
NORM_TYPE_NORMAL,
|
||||||
|
hparams.ffn_op,
|
||||||
|
pos_embd_selected,
|
||||||
|
nullptr);
|
||||||
|
|
||||||
|
cb(cur, "after_transformer", -1);
|
||||||
|
|
||||||
|
// projector
|
||||||
|
cur = build_ffn(cur,
|
||||||
|
model.mm_1_w, model.mm_1_b,
|
||||||
|
nullptr, nullptr,
|
||||||
|
model.mm_2_w, model.mm_2_b,
|
||||||
|
FFN_GELU_ERF,
|
||||||
|
-1);
|
||||||
|
|
||||||
|
cb(cur, "projected", -1);
|
||||||
|
|
||||||
|
ggml_build_forward_expand(gf, cur);
|
||||||
|
|
||||||
|
return gf;
|
||||||
|
}
|
||||||
|
|
@ -198,35 +198,38 @@ struct img_tool {
|
||||||
private:
|
private:
|
||||||
// Bilinear resize function
|
// Bilinear resize function
|
||||||
static void resize_bilinear(const clip_image_u8 & src, clip_image_u8 & dst, int target_width, int target_height) {
|
static void resize_bilinear(const clip_image_u8 & src, clip_image_u8 & dst, int target_width, int target_height) {
|
||||||
GGML_ASSERT(src.nx >= 2 && src.ny >= 2);
|
if (src.nx == 0 || src.ny == 0) { dst.nx = dst.ny = 0; dst.buf.clear(); return; }
|
||||||
|
if (target_width <= 0) target_width = 1;
|
||||||
|
if (target_height <= 0) target_height = 1;
|
||||||
|
|
||||||
dst.nx = target_width;
|
dst.nx = target_width;
|
||||||
dst.ny = target_height;
|
dst.ny = target_height;
|
||||||
dst.buf.resize(3 * target_width * target_height);
|
dst.buf.resize(3 * target_width * target_height);
|
||||||
|
|
||||||
float x_ratio = static_cast<float>(src.nx - 1) / target_width;
|
float x_ratio = target_width > 1 ? static_cast<float>(src.nx - 1) / (target_width - 1) : 0.0f;
|
||||||
float y_ratio = static_cast<float>(src.ny - 1) / target_height;
|
float y_ratio = target_height > 1 ? static_cast<float>(src.ny - 1) / (target_height - 1) : 0.0f;
|
||||||
|
|
||||||
for (int y = 0; y < target_height; y++) {
|
for (int y = 0; y < target_height; ++y) {
|
||||||
for (int x = 0; x < target_width; x++) {
|
for (int x = 0; x < target_width; ++x) {
|
||||||
float px = x_ratio * x;
|
float px = x * x_ratio;
|
||||||
float py = y_ratio * y;
|
float py = y * y_ratio;
|
||||||
int x_floor = std::min(static_cast<int>(px), src.nx - 2);
|
|
||||||
int y_floor = std::min(static_cast<int>(py), src.ny - 2);
|
|
||||||
float x_lerp = px - x_floor;
|
|
||||||
float y_lerp = py - y_floor;
|
|
||||||
|
|
||||||
for (int c = 0; c < 3; c++) {
|
int x0 = std::min(static_cast<int>(px), src.nx - 1);
|
||||||
float top = lerp(
|
int y0 = std::min(static_cast<int>(py), src.ny - 1);
|
||||||
static_cast<float>(src.buf[3 * (y_floor * src.nx + x_floor) + c]),
|
int x1 = std::min(x0 + 1, src.nx - 1);
|
||||||
static_cast<float>(src.buf[3 * (y_floor * src.nx + (x_floor + 1)) + c]),
|
int y1 = std::min(y0 + 1, src.ny - 1);
|
||||||
x_lerp
|
|
||||||
);
|
float xf = px - x0;
|
||||||
float bottom = lerp(
|
float yf = py - y0;
|
||||||
static_cast<float>(src.buf[3 * ((y_floor + 1) * src.nx + x_floor) + c]),
|
|
||||||
static_cast<float>(src.buf[3 * ((y_floor + 1) * src.nx + (x_floor + 1)) + c]),
|
for (int c = 0; c < 3; ++c) {
|
||||||
x_lerp
|
float top = lerp(static_cast<float>(src.buf[3 * (y0 * src.nx + x0) + c]),
|
||||||
);
|
static_cast<float>(src.buf[3 * (y0 * src.nx + x1) + c]),
|
||||||
dst.buf[3 * (y * target_width + x) + c] = static_cast<uint8_t>(lerp(top, bottom, y_lerp));
|
xf);
|
||||||
|
float bottom = lerp(static_cast<float>(src.buf[3 * (y1 * src.nx + x0) + c]),
|
||||||
|
static_cast<float>(src.buf[3 * (y1 * src.nx + x1) + c]),
|
||||||
|
xf);
|
||||||
|
dst.buf[3 * (y * target_width + x) + c] = static_cast<uint8_t>(lerp(top, bottom, yf));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -455,6 +455,7 @@ struct mtmd_context {
|
||||||
// set preprocessor
|
// set preprocessor
|
||||||
switch (proj) {
|
switch (proj) {
|
||||||
case PROJECTOR_TYPE_QWEN2A:
|
case PROJECTOR_TYPE_QWEN2A:
|
||||||
|
case PROJECTOR_TYPE_QWEN3A:
|
||||||
case PROJECTOR_TYPE_QWEN25O:
|
case PROJECTOR_TYPE_QWEN25O:
|
||||||
{
|
{
|
||||||
// <|audio_bos|> ... (embeddings) ... <|audio_eos|>
|
// <|audio_bos|> ... (embeddings) ... <|audio_eos|>
|
||||||
|
|
@ -1027,6 +1028,10 @@ bool mtmd_decode_use_non_causal(mtmd_context * ctx) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool mtmd_decode_use_mrope(mtmd_context * ctx) {
|
bool mtmd_decode_use_mrope(mtmd_context * ctx) {
|
||||||
|
if (ctx->ctx_v == nullptr && ctx->proj_type_a() == PROJECTOR_TYPE_QWEN3A) {
|
||||||
|
// qwen3-asr
|
||||||
|
return true;
|
||||||
|
}
|
||||||
switch (ctx->proj_type_v()) {
|
switch (ctx->proj_type_v()) {
|
||||||
case PROJECTOR_TYPE_QWEN2VL:
|
case PROJECTOR_TYPE_QWEN2VL:
|
||||||
case PROJECTOR_TYPE_QWEN25VL:
|
case PROJECTOR_TYPE_QWEN25VL:
|
||||||
|
|
|
||||||
|
|
@ -91,11 +91,13 @@ add_test_vision "ggml-org/LightOnOCR-1B-1025-GGUF:Q8_0"
|
||||||
add_test_vision "ggml-org/DeepSeek-OCR-GGUF:Q8_0" -p "Free OCR." --chat-template deepseek-ocr
|
add_test_vision "ggml-org/DeepSeek-OCR-GGUF:Q8_0" -p "Free OCR." --chat-template deepseek-ocr
|
||||||
add_test_vision "ggml-org/dots.ocr-GGUF:Q8_0" -p "OCR"
|
add_test_vision "ggml-org/dots.ocr-GGUF:Q8_0" -p "OCR"
|
||||||
add_test_vision "ggml-org/HunyuanOCR-GGUF:Q8_0" -p "OCR"
|
add_test_vision "ggml-org/HunyuanOCR-GGUF:Q8_0" -p "OCR"
|
||||||
|
add_test_vision "ggml-org/gemma-4-E2B-it-GGUF:Q8_0" --jinja
|
||||||
|
|
||||||
add_test_audio "ggml-org/ultravox-v0_5-llama-3_2-1b-GGUF:Q8_0"
|
add_test_audio "ggml-org/ultravox-v0_5-llama-3_2-1b-GGUF:Q8_0"
|
||||||
add_test_audio "ggml-org/Qwen2.5-Omni-3B-GGUF:Q4_K_M"
|
add_test_audio "ggml-org/Qwen2.5-Omni-3B-GGUF:Q4_K_M"
|
||||||
add_test_audio "ggml-org/Voxtral-Mini-3B-2507-GGUF:Q4_K_M"
|
add_test_audio "ggml-org/Voxtral-Mini-3B-2507-GGUF:Q4_K_M"
|
||||||
add_test_audio "ggml-org/LFM2-Audio-1.5B-GGUF:Q8_0"
|
add_test_audio "ggml-org/LFM2-Audio-1.5B-GGUF:Q8_0"
|
||||||
|
add_test_audio "ggml-org/gemma-4-E2B-it-GGUF:Q8_0" --jinja
|
||||||
|
|
||||||
# to test the big models, run: ./tests.sh big
|
# to test the big models, run: ./tests.sh big
|
||||||
if [ "$RUN_BIG_TESTS" = true ]; then
|
if [ "$RUN_BIG_TESTS" = true ]; then
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue