llama.cpp/tools/moe-pruning/extract_ppl.py

42 lines
1.4 KiB
Python

import json, os
base = os.path.dirname(os.path.abspath(__file__))
lines = open(os.path.join(base, 'rwsft-training-data.jsonl'), encoding='utf-8').readlines()
split = int(len(lines) * 0.95)
train_lines = lines[:split]
val_lines = lines[split:]
train_out = os.path.join(base, 'ppl-eval-train.txt')
val_out = os.path.join(base, 'ppl-eval-val.txt')
def fmt(s):
# Full prompt+response so the model is conditioned correctly.
# llama-perplexity scores all tokens, but the prompt PPL is identical
# for base vs adapter — the delta is driven by the response tokens.
prompt = s.get('prompt', '').strip()
response = s.get('response', '').strip()
if not response:
return None
if prompt:
return prompt + '\n' + response
return response
with open(train_out, 'w', encoding='utf-8') as f:
for line in train_lines:
text = fmt(json.loads(line))
if text:
f.write(text + '\n\n')
with open(val_out, 'w', encoding='utf-8') as f:
for line in val_lines:
text = fmt(json.loads(line))
if text:
f.write(text + '\n\n')
train_chars = len(open(train_out, encoding='utf-8').read())
val_chars = len(open(val_out, encoding='utf-8').read())
print(f'train: {len(train_lines)} samples, {train_chars:,} chars -> ppl-eval-train.txt')
print(f'val: {len(val_lines)} samples, {val_chars:,} chars -> ppl-eval-val.txt')