remove blank lines to make lint happy
This commit is contained in:
parent
4f6ef2c085
commit
719d374bf6
|
|
@ -5088,7 +5088,6 @@ class KimiLinearModel(TextModel):
|
||||||
merged = QwenModel.bpe(mergeable_ranks, token, max_rank=rank)
|
merged = QwenModel.bpe(mergeable_ranks, token, max_rank=rank)
|
||||||
if len(merged) == 2:
|
if len(merged) == 2:
|
||||||
merges.append(' '.join(map(QwenModel.token_bytes_to_string, merged)))
|
merges.append(' '.join(map(QwenModel.token_bytes_to_string, merged)))
|
||||||
|
|
||||||
# Build token list
|
# Build token list
|
||||||
vocab_size = self.hparams["vocab_size"]
|
vocab_size = self.hparams["vocab_size"]
|
||||||
special_tokens = tokenizer.special_tokens
|
special_tokens = tokenizer.special_tokens
|
||||||
|
|
@ -5316,14 +5315,11 @@ class KimiLinearModel(TextModel):
|
||||||
if name.endswith("kv_b_proj.weight"):
|
if name.endswith("kv_b_proj.weight"):
|
||||||
name_kb = name.replace("kv_b_proj", "k_b_proj")
|
name_kb = name.replace("kv_b_proj", "k_b_proj")
|
||||||
name_vb = name.replace("kv_b_proj", "v_b_proj")
|
name_vb = name.replace("kv_b_proj", "v_b_proj")
|
||||||
|
|
||||||
n_head_kv = self.hparams["num_key_value_heads"]
|
n_head_kv = self.hparams["num_key_value_heads"]
|
||||||
v_head_dim = self.hparams["v_head_dim"]
|
v_head_dim = self.hparams["v_head_dim"]
|
||||||
qk_nope_head_dim = self.hparams["qk_nope_head_dim"]
|
qk_nope_head_dim = self.hparams["qk_nope_head_dim"]
|
||||||
logger.info("Split kv_b n_head_kv %d\n" % n_head_kv)
|
logger.info("Split kv_b n_head_kv %d\n" % n_head_kv)
|
||||||
|
|
||||||
assert data_torch.shape[0] == n_head_kv * (v_head_dim + qk_nope_head_dim)
|
assert data_torch.shape[0] == n_head_kv * (v_head_dim + qk_nope_head_dim)
|
||||||
|
|
||||||
kv_b = data_torch.view(n_head_kv, v_head_dim + qk_nope_head_dim, data_torch.shape[-1])
|
kv_b = data_torch.view(n_head_kv, v_head_dim + qk_nope_head_dim, data_torch.shape[-1])
|
||||||
k_b, v_b = torch.split(kv_b, [qk_nope_head_dim, v_head_dim], dim=1)
|
k_b, v_b = torch.split(kv_b, [qk_nope_head_dim, v_head_dim], dim=1)
|
||||||
k_b = k_b.transpose(1, 2)
|
k_b = k_b.transpose(1, 2)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue