From e97492369888f5311e4d1f3beb325a36bbed70e9 Mon Sep 17 00:00:00 2001 From: Xuan-Son Nguyen Date: Mon, 13 Apr 2026 22:28:17 +0200 Subject: [PATCH] docs: listing qwen3-asr and qwen3-omni as supported (#21857) * docs: listing qwen3-asr and qwen3-omni as supported * nits --- docs/multimodal.md | 9 +++++++++ tools/mtmd/tests.sh | 1 + 2 files changed, 10 insertions(+) diff --git a/docs/multimodal.md b/docs/multimodal.md index 89b9574f0a..33d1df33c3 100644 --- a/docs/multimodal.md +++ b/docs/multimodal.md @@ -114,6 +114,10 @@ NOTE: some models may require large context window, for example: `-c 8192` # Mistral's Voxtral (tool_name) -hf ggml-org/Voxtral-Mini-3B-2507-GGUF + +# Qwen3-ASR +(tool_name) -hf ggml-org/Qwen3-ASR-0.6B-GGUF +(tool_name) -hf ggml-org/Qwen3-ASR-1.7B-GGUF ``` **Mixed modalities**: @@ -124,6 +128,11 @@ NOTE: some models may require large context window, for example: `-c 8192` (tool_name) -hf ggml-org/Qwen2.5-Omni-3B-GGUF (tool_name) -hf ggml-org/Qwen2.5-Omni-7B-GGUF +# Qwen3 Omni +# Capabilities: audio input, vision input +(tool_name) -hf ggml-org/Qwen3-Omni-30B-A3B-Instruct-GGUF +(tool_name) -hf ggml-org/Qwen3-Omni-30B-A3B-Thinking-GGUF + # Gemma 4 # Capabilities: audio input, vision input (tool_name) -hf ggml-org/gemma-4-E2B-it-GGUF diff --git a/tools/mtmd/tests.sh b/tools/mtmd/tests.sh index eb01986164..5da48d61bf 100755 --- a/tools/mtmd/tests.sh +++ b/tools/mtmd/tests.sh @@ -98,6 +98,7 @@ add_test_audio "ggml-org/Qwen2.5-Omni-3B-GGUF:Q4_K_M" add_test_audio "ggml-org/Voxtral-Mini-3B-2507-GGUF:Q4_K_M" add_test_audio "ggml-org/LFM2-Audio-1.5B-GGUF:Q8_0" add_test_audio "ggml-org/gemma-4-E2B-it-GGUF:Q8_0" --jinja +add_test_audio "ggml-org/Qwen3-ASR-0.6B-GGUF:Q8_0" # to test the big models, run: ./tests.sh big if [ "$RUN_BIG_TESTS" = true ]; then