removed unnecessary internal methods called by the old set_vocab of KimiLinear

This commit is contained in:
Yee Man Chan 2025-12-18 08:14:15 +08:00
parent ef5bc30544
commit ae9771d1dc
1 changed files with 0 additions and 22 deletions

View File

@ -5160,28 +5160,6 @@ class KimiLinearModel(TextModel):
else:
raise NotImplementedError(f"Deepseek pre-tokenizer {tokpre!r} is not supported yet!")
@staticmethod
def _token_bytes_to_string(b: bytes) -> str:
"""Convert bytes to string representation for tokenizer"""
return ''.join([chr(byte) if byte < 128 else f'<0x{byte:02X}>' for byte in b])
@staticmethod
def _bpe(mergeable_ranks: dict[bytes, int], token: bytes, max_rank: int | None = None) -> list[bytes]:
"""BPE tokenization for merges extraction"""
parts = [bytes([b]) for b in token]
while True:
min_idx = None
min_rank = None
for i, pair in enumerate(zip(parts[:-1], parts[1:])):
rank = mergeable_ranks.get(pair[0] + pair[1])
if rank is not None and (min_rank is None or rank < min_rank):
min_idx = i
min_rank = rank
if min_rank is None or (max_rank is not None and min_rank >= max_rank):
break
parts = parts[:min_idx] + [parts[min_idx] + parts[min_idx + 1]] + parts[min_idx + 2:]
return parts
def prepare_tensors(self):
super().prepare_tensors()
if self._experts is not None: