diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index ac353c7dda..c6724f2ed5 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -5160,28 +5160,6 @@ class KimiLinearModel(TextModel): else: raise NotImplementedError(f"Deepseek pre-tokenizer {tokpre!r} is not supported yet!") - @staticmethod - def _token_bytes_to_string(b: bytes) -> str: - """Convert bytes to string representation for tokenizer""" - return ''.join([chr(byte) if byte < 128 else f'<0x{byte:02X}>' for byte in b]) - - @staticmethod - def _bpe(mergeable_ranks: dict[bytes, int], token: bytes, max_rank: int | None = None) -> list[bytes]: - """BPE tokenization for merges extraction""" - parts = [bytes([b]) for b in token] - while True: - min_idx = None - min_rank = None - for i, pair in enumerate(zip(parts[:-1], parts[1:])): - rank = mergeable_ranks.get(pair[0] + pair[1]) - if rank is not None and (min_rank is None or rank < min_rank): - min_idx = i - min_rank = rank - if min_rank is None or (max_rank is not None and min_rank >= max_rank): - break - parts = parts[:min_idx] + [parts[min_idx] + parts[min_idx + 1]] + parts[min_idx + 2:] - return parts - def prepare_tensors(self): super().prepare_tensors() if self._experts is not None: