removed unnecessary internal methods called by the old set_vocab of KimiLinear
This commit is contained in:
parent
ef5bc30544
commit
ae9771d1dc
|
|
@ -5160,28 +5160,6 @@ class KimiLinearModel(TextModel):
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError(f"Deepseek pre-tokenizer {tokpre!r} is not supported yet!")
|
raise NotImplementedError(f"Deepseek pre-tokenizer {tokpre!r} is not supported yet!")
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _token_bytes_to_string(b: bytes) -> str:
|
|
||||||
"""Convert bytes to string representation for tokenizer"""
|
|
||||||
return ''.join([chr(byte) if byte < 128 else f'<0x{byte:02X}>' for byte in b])
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _bpe(mergeable_ranks: dict[bytes, int], token: bytes, max_rank: int | None = None) -> list[bytes]:
|
|
||||||
"""BPE tokenization for merges extraction"""
|
|
||||||
parts = [bytes([b]) for b in token]
|
|
||||||
while True:
|
|
||||||
min_idx = None
|
|
||||||
min_rank = None
|
|
||||||
for i, pair in enumerate(zip(parts[:-1], parts[1:])):
|
|
||||||
rank = mergeable_ranks.get(pair[0] + pair[1])
|
|
||||||
if rank is not None and (min_rank is None or rank < min_rank):
|
|
||||||
min_idx = i
|
|
||||||
min_rank = rank
|
|
||||||
if min_rank is None or (max_rank is not None and min_rank >= max_rank):
|
|
||||||
break
|
|
||||||
parts = parts[:min_idx] + [parts[min_idx] + parts[min_idx + 1]] + parts[min_idx + 2:]
|
|
||||||
return parts
|
|
||||||
|
|
||||||
def prepare_tensors(self):
|
def prepare_tensors(self):
|
||||||
super().prepare_tensors()
|
super().prepare_tensors()
|
||||||
if self._experts is not None:
|
if self._experts is not None:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue