diff --git a/common/chat-parser.cpp b/common/chat-parser.cpp
index d740dac065..23e23ca8c7 100644
--- a/common/chat-parser.cpp
+++ b/common/chat-parser.cpp
@@ -1395,6 +1395,14 @@ static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) {
builder.consume_reasoning_with_xml_tool_calls(form, "", "");
}
+static void common_chat_parse_solar_open(common_chat_msg_parser & builder) {
+ builder.try_parse_reasoning("<|think|>", "<|end|><|begin|>assistant<|content|>");
+
+ // TODO: Tool calling
+
+ builder.add_content(builder.consume_rest());
+}
+
static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
builder.try_parse_reasoning("", "");
builder.add_content(builder.consume_rest());
@@ -1479,6 +1487,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
case COMMON_CHAT_FORMAT_XIAOMI_MIMO:
common_chat_parse_xiaomi_mimo(builder);
break;
+ case COMMON_CHAT_FORMAT_SOLAR_OPEN:
+ common_chat_parse_solar_open(builder);
+ break;
default:
throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
}
diff --git a/common/chat.cpp b/common/chat.cpp
index 7e940695bd..b98ab21ce1 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -669,6 +669,7 @@ const char * common_chat_format_name(common_chat_format format) {
case COMMON_CHAT_FORMAT_QWEN3_CODER_XML: return "Qwen3 Coder";
case COMMON_CHAT_FORMAT_APRIEL_1_5: return "Apriel 1.5";
case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
+ case COMMON_CHAT_FORMAT_SOLAR_OPEN: return "Solar Open";
case COMMON_CHAT_FORMAT_PEG_SIMPLE: return "peg-simple";
case COMMON_CHAT_FORMAT_PEG_NATIVE: return "peg-native";
case COMMON_CHAT_FORMAT_PEG_CONSTRUCTED: return "peg-constructed";
@@ -2517,6 +2518,27 @@ static common_chat_params common_chat_params_init_granite(const common_chat_temp
return data;
}
+static common_chat_params common_chat_params_init_solar_open(const common_chat_template & tmpl, const struct templates_params & inputs) {
+ common_chat_params data;
+
+ // TODO: Reasoning effort
+ json additional_context = {};
+
+ data.prompt = apply(tmpl, inputs, std::nullopt, std::nullopt, additional_context);
+ data.format = COMMON_CHAT_FORMAT_SOLAR_OPEN;
+
+ data.preserved_tokens = {
+ "<|think|>",
+ "<|content|>",
+ "<|begin|>",
+ "<|end|>",
+ };
+
+ // TODO: Tool calling
+
+ return data;
+}
+
static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
common_chat_params data;
data.prompt = apply(tmpl, inputs);
@@ -2780,6 +2802,13 @@ static common_chat_params common_chat_templates_apply_jinja(
return common_chat_params_init_magistral(tmpl, params);
}
+ // Solar Open
+ if (src.find("<|tool_response:begin|>") != std::string::npos &&
+ src.find("<|tool_response:name|>") != std::string::npos &&
+ src.find("<|tool_response:result|>") != std::string::npos) {
+ return common_chat_params_init_solar_open(tmpl, params);
+ }
+
// Plain handler (no tools)
if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
return common_chat_params_init_without_tools(tmpl, params);
diff --git a/common/chat.h b/common/chat.h
index 6085510a40..8bd4a325ff 100644
--- a/common/chat.h
+++ b/common/chat.h
@@ -124,6 +124,7 @@ enum common_chat_format {
COMMON_CHAT_FORMAT_QWEN3_CODER_XML,
COMMON_CHAT_FORMAT_APRIEL_1_5,
COMMON_CHAT_FORMAT_XIAOMI_MIMO,
+ COMMON_CHAT_FORMAT_SOLAR_OPEN,
// These are intended to be parsed by the PEG parser
COMMON_CHAT_FORMAT_PEG_SIMPLE,
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index edc0ed539d..a1080b15f0 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -1230,6 +1230,9 @@ class TextModel(ModelBase):
if chkhsh == "4a2e2abae11ca2b86d570fc5b44be4d5eb5e72cc8f22dd136a94b37da83ab665":
# ref: https://huggingface.co/KORMo-Team/KORMo-tokenizer
res = "kormo"
+ if chkhsh == "16389f0a1f51ee53e562ffd51c371dc508639ab0e4261502071836e50e223e91":
+ # ref: https://huggingface.co/upstage/Solar-Open-100B
+ res = "solar-open"
if res is None:
logger.warning("\n")
@@ -10617,6 +10620,26 @@ class JanusProVisionModel(MmprojModel):
return []
+@ModelBase.register("SolarOpenForCausalLM")
+class SolarOpenModel(Glm4MoeModel):
+ model_arch = gguf.MODEL_ARCH.GLM4_MOE
+
+ def set_vocab(self):
+ from transformers import AutoTokenizer
+ tokenizer = AutoTokenizer.from_pretrained(self.dir_model)
+ special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=True)
+ tokens, toktypes, tokpre = self.get_vocab_base()
+ self.gguf_writer.add_tokenizer_model("gpt2")
+ self.gguf_writer.add_tokenizer_pre(tokpre)
+ self.gguf_writer.add_token_list(tokens)
+ self.gguf_writer.add_token_types(toktypes)
+ special_vocab._set_special_token("eos", tokenizer.get_added_vocab()["<|endoftext|>"])
+ special_vocab._set_special_token("eot", tokenizer.get_added_vocab()["<|endoftext|>"])
+ special_vocab._set_special_token("unk", tokenizer.get_added_vocab()[""])
+ special_vocab._set_special_token("bos", tokenizer.get_added_vocab()["<|startoftext|>"])
+ special_vocab.add_to_gguf(self.gguf_writer)
+
+
###### CONVERSION LOGIC ######
diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py
index 4378378309..b1ae4105ed 100755
--- a/convert_hf_to_gguf_update.py
+++ b/convert_hf_to_gguf_update.py
@@ -145,6 +145,7 @@ models = [
{"name": "granite-docling", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ibm-granite/granite-docling-258M", },
{"name": "minimax-m2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/MiniMaxAI/MiniMax-M2", },
{"name": "kormo", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/KORMo-Team/KORMo-tokenizer", },
+ {"name": "solar-open", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/upstage/Solar-Open-100B", },
]
# some models are known to be broken upstream, so we will skip them as exceptions
diff --git a/src/llama-chat.cpp b/src/llama-chat.cpp
index fc6a6223cf..b54ebbd155 100644
--- a/src/llama-chat.cpp
+++ b/src/llama-chat.cpp
@@ -74,6 +74,7 @@ static const std::map LLM_CHAT_TEMPLATES = {
{ "seed_oss", LLM_CHAT_TEMPLATE_SEED_OSS },
{ "grok-2", LLM_CHAT_TEMPLATE_GROK_2 },
{ "pangu-embedded", LLM_CHAT_TEMPLATE_PANGU_EMBED },
+ { "solar-open", LLM_CHAT_TEMPLATE_SOLAR_OPEN },
};
llm_chat_template llm_chat_template_from_str(const std::string & name) {
@@ -216,6 +217,8 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
return LLM_CHAT_TEMPLATE_GROK_2;
} else if (tmpl_contains(LU8("[unused9]系统:[unused10]"))) {
return LLM_CHAT_TEMPLATE_PANGU_EMBED;
+ } else if (tmpl_contains("<|begin|>") && tmpl_contains("<|end|>") && tmpl_contains("<|content|>")) {
+ return LLM_CHAT_TEMPLATE_SOLAR_OPEN;
}
return LLM_CHAT_TEMPLATE_UNKNOWN;
}
@@ -845,6 +848,14 @@ int32_t llm_chat_apply_template(
if (add_ass) {
ss << "[unused9]助手:";
}
+ } else if (tmpl == LLM_CHAT_TEMPLATE_SOLAR_OPEN) {
+ for (auto message : chat) {
+ std::string role(message->role);
+ ss << "<|begin|>" << role << "<|content|>" << message->content << "<|end|>";
+ }
+ if (add_ass) {
+ ss << "<|begin|>assistant";
+ }
} else {
// template not supported
return -1;
diff --git a/src/llama-chat.h b/src/llama-chat.h
index 684efb4d67..e1f795249c 100644
--- a/src/llama-chat.h
+++ b/src/llama-chat.h
@@ -54,6 +54,7 @@ enum llm_chat_template {
LLM_CHAT_TEMPLATE_SEED_OSS,
LLM_CHAT_TEMPLATE_GROK_2,
LLM_CHAT_TEMPLATE_PANGU_EMBED,
+ LLM_CHAT_TEMPLATE_SOLAR_OPEN,
LLM_CHAT_TEMPLATE_UNKNOWN,
};
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
index 5e664c8c57..dfb5c0ce82 100644
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -126,6 +126,7 @@ const char * llm_type_name(llm_type type) {
case LLM_TYPE_31B_A3_5B: return "31B.A3.5B";
case LLM_TYPE_80B_A3B: return "80B.A3B";
case LLM_TYPE_100B_A6B: return "100B.A6B";
+ case LLM_TYPE_102B_A12B: return "102B.A12B";
case LLM_TYPE_106B_A12B: return "106B.A12B";
case LLM_TYPE_230B_A10B: return "230B.A10B";
case LLM_TYPE_235B_A22B: return "235B.A22B";
@@ -1778,6 +1779,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
switch (hparams.n_layer) {
case 47: type = LLM_TYPE_106B_A12B; break; // GLM-4.5-Air (46 layers + 1 NextN layer)
+ case 48: type = LLM_TYPE_102B_A12B; break; // Solar Open
case 93: type = LLM_TYPE_355B_A32B; break; // GLM-4.5 (92 layers + 1 NextN layer)
default: type = LLM_TYPE_UNKNOWN;
}
@@ -5206,9 +5208,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), { n_embd, n_embd_head_k * n_head }, flags);
layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), { n_embd, n_embd_k_gqa }, flags);
layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), { n_embd, n_embd_v_gqa }, flags);
- layer.bq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "bias", i), { n_embd_head_k * n_head }, flags);
- layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K, "bias", i), { n_embd_k_gqa }, flags);
- layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), { n_embd_v_gqa }, flags);
+ layer.bq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "bias", i), { n_embd_head_k * n_head }, TENSOR_NOT_REQUIRED | flags);
+ layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K, "bias", i), { n_embd_k_gqa }, TENSOR_NOT_REQUIRED | flags);
+ layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), { n_embd_v_gqa }, TENSOR_NOT_REQUIRED | flags);
layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), { n_embd_head_k * n_head, n_embd }, flags);
diff --git a/src/llama-model.h b/src/llama-model.h
index f4f44a92b6..79200a0d97 100644
--- a/src/llama-model.h
+++ b/src/llama-model.h
@@ -119,6 +119,7 @@ enum llm_type {
LLM_TYPE_31B_A3_5B,
LLM_TYPE_80B_A3B, // Qwen3 Next
LLM_TYPE_100B_A6B,
+ LLM_TYPE_102B_A12B, // Solar-Open
LLM_TYPE_106B_A12B, // GLM-4.5-Air
LLM_TYPE_230B_A10B, // Minimax M2
LLM_TYPE_235B_A22B,
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
index cd4092ca07..c57055082b 100644
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -355,6 +355,7 @@ struct llm_tokenizer_bpe : llm_tokenizer {
case LLAMA_VOCAB_PRE_TYPE_STABLELM2:
case LLAMA_VOCAB_PRE_TYPE_QWEN2:
case LLAMA_VOCAB_PRE_TYPE_HUNYUAN:
+ case LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN:
regex_exprs = {
// original regex from tokenizer.json
// "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
@@ -2015,6 +2016,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
tokenizer_pre == "minimax-m2") {
pre_type = LLAMA_VOCAB_PRE_TYPE_MINIMAX_M2;
clean_spaces = false;
+ } else if (
+ tokenizer_pre == "solar-open") {
+ pre_type = LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN;
+ clean_spaces = false;
} else {
throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
}
@@ -2358,6 +2363,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|| t.first == "<|end|>"
|| t.first == "<|return|>" // o200k_harmony
|| t.first == "<|call|>" // o200k_harmony
+ || t.first == "<|flush|>" // solar-open
+ || t.first == "<|calls|>" // solar-open
|| t.first == ""
|| t.first == "<|endoftext|>"
|| t.first == "<|eom_id|>"
@@ -2404,13 +2411,14 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
LLAMA_LOG_WARN("%s: special_eom_id is not in special_eog_ids - the tokenizer config may be incorrect\n", __func__);
}
- // TODO: workaround for o200k_harmony tokenizer: the "<|end|>" token should not be EOG
- // we don't have a good way to detect this, so for now, if we have "<|return|>" and "<|call|>" tokens,
+ // TODO: workaround for o200k_harmony and solar-open tokenizer: the "<|end|>" token should not be EOG
+ // we don't have a good way to detect this, so for now, if we have "<|return|>" and "<|call|>" tokens ("<|calls|>" and "<|flush|>" for solar-open),
// we remove the "<|end|>" token from the EOG list
{
bool has_return = false;
bool has_call = false;
bool has_end = false;
+ bool has_flush = false;
llama_token end_id = LLAMA_TOKEN_NULL;
@@ -2420,18 +2428,20 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
if (id_to_token[tid].text == "<|return|>") {
has_return = true;
- } else if (id_to_token[tid].text == "<|call|>") {
+ } else if (id_to_token[tid].text == "<|call|>" || id_to_token[tid].text == "<|calls|>") {
has_call = true;
+ } else if (id_to_token[tid].text == "<|flush|>") {
+ has_flush = true;
} else if (id_to_token[tid].text == "<|end|>") {
has_end = true;
end_id = tid;
}
}
- if (has_return && has_call && has_end) {
+ if ((has_return && has_call && has_end) || (has_call && has_flush && has_end)) {
special_eog_ids.erase(end_id);
id_to_token[end_id].attr = LLAMA_TOKEN_ATTR_USER_DEFINED;
- LLAMA_LOG_WARN("%s: special_eog_ids contains both '<|return|>' and '<|call|>' tokens, removing '<|end|>' token from EOG list\n", __func__);
+ LLAMA_LOG_WARN("%s: special_eog_ids contains both '<|return|>' and '<|call|>', or '<|calls|>' and '<|flush|>' tokens, removing '<|end|>' token from EOG list\n", __func__);
}
}
}
diff --git a/src/llama-vocab.h b/src/llama-vocab.h
index 55f8f3923c..f5bdd22311 100644
--- a/src/llama-vocab.h
+++ b/src/llama-vocab.h
@@ -51,6 +51,7 @@ enum llama_vocab_pre_type {
LLAMA_VOCAB_PRE_TYPE_GRANITE_DOCLING = 40,
LLAMA_VOCAB_PRE_TYPE_MINIMAX_M2 = 41,
LLAMA_VOCAB_PRE_TYPE_AFMOE = 42,
+ LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN = 43,
};
struct LLM_KV;