From 947710851f5274948cd251360427516212d34f91 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?DAN=E2=84=A2?= <dranger003@gmail.com>
Date: Sun, 15 Feb 2026 18:57:31 -0500
Subject: [PATCH 1/5] convert_hf_to_gguf: add JoyAI-LLM-Flash tokenizer hash
 mapping to deepseek-v3

---
 convert_hf_to_gguf.py        | 3 +++
 convert_hf_to_gguf_update.py | 4 +++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index 0f614e4df3..798d6e2218 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -1085,6 +1085,9 @@ class TextModel(ModelBase):
         if chkhsh == "cdf5f35325780597efd76153d4d1c16778f766173908894c04afc20108536267":
             # ref: https://huggingface.co/zai-org/GLM-4.7-Flash
             res = "glm4"
+        if chkhsh == "b4b8ca1f9769494fbd956ebc4c249de6131fb277a4a3345a7a92c7dd7a55808d":
+            # ref: https://huggingface.co/jdopensource/JoyAI-LLM-Flash
+            res = "deepseek-v3"
         if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5":
             # ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B
             res = "llama-bpe"
diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py
index a683451508..8c78e86da8 100755
--- a/convert_hf_to_gguf_update.py
+++ b/convert_hf_to_gguf_update.py
@@ -158,6 +158,7 @@ pre_computed_hashes = [
     {"name": "chatglm-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-chat", "chkhsh": "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516"},
     {"name": "glm4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-hf", "chkhsh": "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2"},
     {"name": "glm4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/zai-org/GLM-4.5-Air", "chkhsh": "9ca2dd618e8afaf09731a7cf6e2105b373ba6a1821559f258b272fe83e6eb902"},
+    {"name": "glm4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/zai-org/GLM-4.7-Flash", "chkhsh": "cdf5f35325780597efd76153d4d1c16778f766173908894c04afc20108536267"},
     {"name": "minerva-7b", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0", "chkhsh": "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35"},
     {"name": "hunyuan", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tencent/Hunyuan-A13B-Instruct", "chkhsh": "7e57df22b1fe23a7b1e1c7f3dc4e3f96d43a4eb0836d0c6bdc3436d7b2f1c664"},
     {"name": "hunyuan-dense", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tencent/Hunyuan-4B-Instruct", "chkhsh": "bba3b3366b646dbdded5dbc42d59598b849371afc42f7beafa914afaa5b70aa6"},
@@ -171,7 +172,8 @@ pre_computed_hashes = [
     {"name": "grok-2",    "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/alvarobartt/grok-2-tokenizer", "chkhsh": "66b8d4e19ab16c3bfd89bce5d785fb7e0155e8648708a1f42077cb9fe002c273"},
     # jina-v2-de variants
     {"name": "jina-v2-de", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/aari1995/German_Semantic_V3", "chkhsh": "b3d1dd861f1d4c5c0d2569ce36baf3f90fe8a102db3de50dd71ff860d91be3df"},
-    {"name": "glm4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/zai-org/GLM-4.7-Flash", "chkhsh": "cdf5f35325780597efd76153d4d1c16778f766173908894c04afc20108536267"},
+    # deepseek-v3 variants
+    {"name": "deepseek-v3", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jdopensource/JoyAI-LLM-Flash", "chkhsh": "b4b8ca1f9769494fbd956ebc4c249de6131fb277a4a3345a7a92c7dd7a55808d"},
 ]
 
 

From 7af1cce091223b49a42f67275617aef97e7cff77 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?DAN=E2=84=A2?= <dranger003@gmail.com>
Date: Mon, 16 Feb 2026 07:44:19 -0500
Subject: [PATCH 2/5] llama-vocab: create a new pre-tokenizer name for
 joyai-llm.

---
 convert_hf_to_gguf.py        | 12 ++++++------
 convert_hf_to_gguf_update.py |  5 ++---
 src/llama-vocab.cpp          |  1 +
 src/llama-vocab.h            |  1 +
 4 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index 798d6e2218..08b6f477a7 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -1049,6 +1049,9 @@ class TextModel(ModelBase):
         if chkhsh == "9ca2dd618e8afaf09731a7cf6e2105b373ba6a1821559f258b272fe83e6eb902":
             # ref: https://huggingface.co/zai-org/GLM-4.5-Air
             res = "glm4"
+        if chkhsh == "cdf5f35325780597efd76153d4d1c16778f766173908894c04afc20108536267":
+            # ref: https://huggingface.co/zai-org/GLM-4.7-Flash
+            res = "glm4"
         if chkhsh == "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35":
             # ref: https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0
             res = "minerva-7b"
@@ -1082,12 +1085,6 @@ class TextModel(ModelBase):
         if chkhsh == "b3d1dd861f1d4c5c0d2569ce36baf3f90fe8a102db3de50dd71ff860d91be3df":
             # ref: https://huggingface.co/aari1995/German_Semantic_V3
             res = "jina-v2-de"
-        if chkhsh == "cdf5f35325780597efd76153d4d1c16778f766173908894c04afc20108536267":
-            # ref: https://huggingface.co/zai-org/GLM-4.7-Flash
-            res = "glm4"
-        if chkhsh == "b4b8ca1f9769494fbd956ebc4c249de6131fb277a4a3345a7a92c7dd7a55808d":
-            # ref: https://huggingface.co/jdopensource/JoyAI-LLM-Flash
-            res = "deepseek-v3"
         if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5":
             # ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B
             res = "llama-bpe"
@@ -1268,6 +1265,9 @@ class TextModel(ModelBase):
         if chkhsh == "d30d75d9059f1aa2c19359de71047b3ae408c70875e8a3ccf8c5fba56c9d8af4":
             # ref: https://huggingface.co/Qwen/Qwen3.5-9B-Instruct
             res = "qwen35"
+        if chkhsh == "b4b8ca1f9769494fbd956ebc4c249de6131fb277a4a3345a7a92c7dd7a55808d":
+            # ref: https://huggingface.co/jdopensource/JoyAI-LLM-Flash
+            res = "joyai_llm"
 
         if res is None:
             logger.warning("\n")
diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py
index 8c78e86da8..faceced7af 100755
--- a/convert_hf_to_gguf_update.py
+++ b/convert_hf_to_gguf_update.py
@@ -148,7 +148,8 @@ models = [
     {"name": "youtu",            "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tencent/Youtu-LLM-2B", },
     {"name": "solar-open",       "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/upstage/Solar-Open-100B", },
     {"name": "exaone-moe",       "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/K-EXAONE-236B-A23B", },
-    {"name": "qwen35",           "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen3.5-9B-Instruct", }
+    {"name": "qwen35",           "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen3.5-9B-Instruct", },
+    {"name": "joyai_llm",        "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jdopensource/JoyAI-LLM-Flash", },
 ]
 
 # some models are known to be broken upstream, so we will skip them as exceptions
@@ -172,8 +173,6 @@ pre_computed_hashes = [
     {"name": "grok-2",    "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/alvarobartt/grok-2-tokenizer", "chkhsh": "66b8d4e19ab16c3bfd89bce5d785fb7e0155e8648708a1f42077cb9fe002c273"},
     # jina-v2-de variants
     {"name": "jina-v2-de", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/aari1995/German_Semantic_V3", "chkhsh": "b3d1dd861f1d4c5c0d2569ce36baf3f90fe8a102db3de50dd71ff860d91be3df"},
-    # deepseek-v3 variants
-    {"name": "deepseek-v3", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jdopensource/JoyAI-LLM-Flash", "chkhsh": "b4b8ca1f9769494fbd956ebc4c249de6131fb277a4a3345a7a92c7dd7a55808d"},
 ]
 
 
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
index 62e137fb84..8edb19bebb 100644
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -308,6 +308,7 @@ struct llm_tokenizer_bpe : llm_tokenizer {
                 break;
             case LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM:
             case LLAMA_VOCAB_PRE_TYPE_HUNYUAN_DENSE:
+            case LLAMA_VOCAB_PRE_TYPE_JOYAI_LLM:
                 regex_exprs = {
                     "\\p{N}{1,3}",
                     "[一-龥぀-ゟ゠-ヿ]+",
diff --git a/src/llama-vocab.h b/src/llama-vocab.h
index 718238fb86..527bb2c447 100644
--- a/src/llama-vocab.h
+++ b/src/llama-vocab.h
@@ -55,6 +55,7 @@ enum llama_vocab_pre_type {
     LLAMA_VOCAB_PRE_TYPE_YOUTU           = 44,
     LLAMA_VOCAB_PRE_TYPE_EXAONE_MOE      = 45,
     LLAMA_VOCAB_PRE_TYPE_QWEN35          = 46,
+    LLAMA_VOCAB_PRE_TYPE_JOYAI_LLM       = 47,
 };
 
 struct LLM_KV;

From 629e49994cf62738da4c20b45d449de5e7cd15ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?DAN=E2=84=A2?= <dranger003@gmail.com>
Date: Mon, 16 Feb 2026 07:53:34 -0500
Subject: [PATCH 3/5] Update convert_hf_to_gguf_update.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
---
 convert_hf_to_gguf_update.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py
index faceced7af..9a5998d9c0 100755
--- a/convert_hf_to_gguf_update.py
+++ b/convert_hf_to_gguf_update.py
@@ -149,7 +149,7 @@ models = [
     {"name": "solar-open",       "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/upstage/Solar-Open-100B", },
     {"name": "exaone-moe",       "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/K-EXAONE-236B-A23B", },
     {"name": "qwen35",           "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen3.5-9B-Instruct", },
-    {"name": "joyai_llm",        "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jdopensource/JoyAI-LLM-Flash", },
+    {"name": "joyai-llm",        "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jdopensource/JoyAI-LLM-Flash", },
 ]
 
 # some models are known to be broken upstream, so we will skip them as exceptions

From 5255b32ccc9716e70eb0bfa6fe2427991ae68758 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?DAN=E2=84=A2?= <dranger003@gmail.com>
Date: Mon, 16 Feb 2026 07:53:41 -0500
Subject: [PATCH 4/5] Update convert_hf_to_gguf.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
---
 convert_hf_to_gguf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index 08b6f477a7..67bbf012df 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -1267,7 +1267,7 @@ class TextModel(ModelBase):
             res = "qwen35"
         if chkhsh == "b4b8ca1f9769494fbd956ebc4c249de6131fb277a4a3345a7a92c7dd7a55808d":
             # ref: https://huggingface.co/jdopensource/JoyAI-LLM-Flash
-            res = "joyai_llm"
+            res = "joyai-llm"
 
         if res is None:
             logger.warning("\n")

From f5f2a087d8ac70220c06cda38bf01d851fd3bbee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?DAN=E2=84=A2?= <dranger003@gmail.com>
Date: Mon, 16 Feb 2026 07:55:38 -0500
Subject: [PATCH 5/5] add missing vocab type section

---
 src/llama-vocab.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
index 8edb19bebb..ce481be044 100644
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -2040,6 +2040,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                 tokenizer_pre == "hunyuan-dense") {
                 pre_type = LLAMA_VOCAB_PRE_TYPE_HUNYUAN_DENSE;
                 clean_spaces = false;
+            } else if (
+                tokenizer_pre == "joyai-llm") {
+                pre_type = LLAMA_VOCAB_PRE_TYPE_JOYAI_LLM;
+                clean_spaces = false;
             } else if (
                 tokenizer_pre == "kimi-k2") {
                 pre_type = LLAMA_VOCAB_PRE_TYPE_KIMI_K2;