changes for tiny aya
This commit is contained in:
parent
41ea26144e
commit
a47878ab42
|
|
@ -1119,6 +1119,9 @@ class TextModel(ModelBase):
|
|||
if chkhsh == "9c2227e4dd922002fb81bde4fc02b0483ca4f12911410dee2255e4987644e3f8":
|
||||
# ref: https://huggingface.co/CohereForAI/c4ai-command-r-v01
|
||||
res = "command-r"
|
||||
if chkhsh == "d772b220ace2baec124bed8cfafce0ead7d6c38a4b65ef11261cf9d5d62246d1":
|
||||
# ref: /home/saurabh_cohere_com/tiny_aya/curr_best_ckpt
|
||||
res = "tiny_aya"
|
||||
if chkhsh == "e636dc30a262dcc0d8c323492e32ae2b70728f4df7dfe9737d9f920a282b8aea":
|
||||
# ref: https://huggingface.co/Qwen/Qwen1.5-7B
|
||||
res = "qwen2"
|
||||
|
|
@ -6920,6 +6923,14 @@ class Cohere2Model(TextModel):
|
|||
self.gguf_writer.add_rope_dimension_count(int(rotary_pct * (hidden_size // num_attention_heads)))
|
||||
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.NONE)
|
||||
|
||||
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
||||
# Cohere2 runtime in llama.cpp expects no bias tensors; skip them at conversion time.
|
||||
if name.endswith(".bias"):
|
||||
logger.debug(f"Skipping bias tensor {name!r} for Cohere2 conversion.")
|
||||
return
|
||||
|
||||
yield from super().modify_tensors(data_torch, name, bid)
|
||||
|
||||
|
||||
@ModelBase.register("OlmoForCausalLM")
|
||||
@ModelBase.register("OLMoForCausalLM")
|
||||
|
|
|
|||
|
|
@ -99,6 +99,7 @@ models = [
|
|||
{"name": "stablelm2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/stabilityai/stablelm-2-zephyr-1_6b", },
|
||||
{"name": "refact", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/smallcloudai/Refact-1_6-base", },
|
||||
{"name": "command-r", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/CohereForAI/c4ai-command-r-v01", },
|
||||
{"name": "tiny_aya", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/CohereForAI/tiny-aya-base", },
|
||||
{"name": "qwen2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen1.5-7B", },
|
||||
{"name": "olmo", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/allenai/OLMo-1.7-7B-hf", },
|
||||
{"name": "dbrx", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/databricks/dbrx-base", },
|
||||
|
|
|
|||
|
|
@ -415,6 +415,14 @@ struct llm_tokenizer_bpe : llm_tokenizer {
|
|||
"[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))*((?=[\\p{L}])([^A-Z]))+(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))+((?=[\\p{L}])([^A-Z]))*(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
|
||||
};
|
||||
break;
|
||||
case LLAMA_VOCAB_PRE_TYPE_TINY_AYA:
|
||||
regex_exprs = {
|
||||
// digit grouping split from tokenizer.json
|
||||
"\\d{1,3}(?=(?:\\d{3})*\\b)",
|
||||
// main split regex from tokenizer.json
|
||||
"[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]*[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]+(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]+[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]*(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
|
||||
};
|
||||
break;
|
||||
case LLAMA_VOCAB_PRE_TYPE_KIMI_K2:
|
||||
regex_exprs = {
|
||||
// K2 trigger pattern - this will activate the custom K2 handler in unicode.cpp
|
||||
|
|
@ -1987,10 +1995,14 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|||
tokenizer_pre == "megrez") {
|
||||
pre_type = LLAMA_VOCAB_PRE_TYPE_QWEN2;
|
||||
} else if (
|
||||
tokenizer_pre == "gpt-4o" ||
|
||||
tokenizer_pre == "llama4") {
|
||||
tokenizer_pre == "gpt-4o" ||
|
||||
tokenizer_pre == "llama4") {
|
||||
pre_type = LLAMA_VOCAB_PRE_TYPE_GPT4O;
|
||||
clean_spaces = false;
|
||||
} else if (
|
||||
tokenizer_pre == "TINY_AYA") {
|
||||
pre_type = LLAMA_VOCAB_PRE_TYPE_TINY_AYA;
|
||||
clean_spaces = false;
|
||||
} else if (
|
||||
tokenizer_pre == "superbpe") {
|
||||
pre_type = LLAMA_VOCAB_PRE_TYPE_SUPERBPE;
|
||||
|
|
|
|||
|
|
@ -54,6 +54,7 @@ enum llama_vocab_pre_type {
|
|||
LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN = 43,
|
||||
LLAMA_VOCAB_PRE_TYPE_YOUTU = 44,
|
||||
LLAMA_VOCAB_PRE_TYPE_EXAONE_MOE = 45,
|
||||
LLAMA_VOCAB_PRE_TYPE_TINY_AYA = 46,
|
||||
};
|
||||
|
||||
struct LLM_KV;
|
||||
|
|
|
|||
Loading…
Reference in New Issue