llama.cpp/scripts/analyze-ffn-down.py

605 lines
23 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""Deep analysis of WHY ffn_down is hard to quantize.
Compares structural properties of all weight and activation tensors.
"""
import numpy as np
import struct
import sys
import os
DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "data")
def load_f32_tensor(name):
path = os.path.join(DATA_DIR, name)
with open(path, "rb") as f:
nrow, ncol = struct.unpack("qq", f.read(16))
data = np.frombuffer(f.read(), dtype=np.float32)
assert len(data) == nrow * ncol, f"Expected {nrow * ncol}, got {len(data)}"
return data.reshape(nrow, ncol)
def stats(label, arr):
"""Print comprehensive statistics for a flat array."""
a = arr.ravel()
print(f" {label}:")
print(f" shape={arr.shape}, n={len(a)}")
print(f" mean={a.mean():.6f}, std={a.std():.6f}")
print(f" min={a.min():.6f}, max={a.max():.6f}")
print(f" median={np.median(a):.6f}")
print(
f" |mean|/std = {abs(a.mean()) / (a.std() + 1e-10):.4f} (offset-to-spread ratio)"
)
# Kurtosis (excess) - how heavy-tailed vs Gaussian
kurt = np.mean(((a - a.mean()) / (a.std() + 1e-10)) ** 4) - 3.0
# Skewness
skew = np.mean(((a - a.mean()) / (a.std() + 1e-10)) ** 3)
print(f" skewness={skew:.4f}, excess_kurtosis={kurt:.4f}")
# Percentile ranges
pcts = np.percentile(a, [0.1, 1, 5, 25, 50, 75, 95, 99, 99.9])
print(
f" percentiles: 0.1%={pcts[0]:.4f}, 1%={pcts[1]:.4f}, 5%={pcts[2]:.4f}, "
f"25%={pcts[3]:.4f}, 50%={pcts[4]:.4f}, 75%={pcts[5]:.4f}, "
f"95%={pcts[6]:.4f}, 99%={pcts[7]:.4f}, 99.9%={pcts[8]:.4f}"
)
# Sparsity
near_zero = np.sum(np.abs(a) < 0.001 * a.std()) / len(a)
print(f" fraction |x| < 0.001*std: {near_zero:.4f}")
return {
"mean": a.mean(),
"std": a.std(),
"skew": skew,
"kurt": kurt,
"min": a.min(),
"max": a.max(),
}
# ============================================================================
# 1. BASIC WEIGHT TENSOR COMPARISON
# ============================================================================
print("=" * 80)
print("SECTION 1: WEIGHT TENSOR GLOBAL STATISTICS")
print("=" * 80)
tensors = {
"ffn_gate": ("blk_0_ffn_gate_weight.f32bin", "9728x2560 (wide→narrow proj)"),
"ffn_up": ("blk_0_ffn_up_weight.f32bin", "9728x2560 (wide→narrow proj)"),
"ffn_down": ("blk_0_ffn_down_weight.f32bin", "2560x9728 (narrow→wide proj)"),
"attn_q": ("blk_0_attn_q_weight.f32bin", "4096x2560"),
"attn_k": ("blk_0_attn_k_weight.f32bin", "1024x2560"),
"attn_v": ("blk_0_attn_v_weight.f32bin", "1024x2560"),
"attn_out": ("blk_0_attn_output_weight.f32bin", "2560x4096"),
}
weight_data = {}
for name, (fname, desc) in tensors.items():
try:
W = load_f32_tensor(fname)
print(f"\n{'' * 70}")
print(f" {name} [{desc}] — file: {fname}")
weight_data[name] = W
stats(name, W)
except Exception as e:
print(f" {name}: SKIP ({e})")
# ============================================================================
# 2. ROW-LEVEL STATISTICS (each row is a neuron output)
# ============================================================================
print("\n" + "=" * 80)
print("SECTION 2: ROW-LEVEL VARIABILITY (per-neuron weight statistics)")
print("=" * 80)
print(" Each row of the weight matrix produces one output dimension.")
print(" High row-to-row variability in mean/std means the quantizer")
print(" must handle very different distributions across rows.\n")
for name, W in weight_data.items():
row_means = W.mean(axis=1)
row_stds = W.std(axis=1)
row_ranges = W.max(axis=1) - W.min(axis=1)
print(f"\n {name} ({W.shape[0]} rows × {W.shape[1]} cols):")
print(
f" Row means: mean={row_means.mean():.6f}, std={row_means.std():.6f}, "
f"range=[{row_means.min():.6f}, {row_means.max():.6f}]"
)
print(
f" Row stds: mean={row_stds.mean():.6f}, std={row_stds.std():.6f}, "
f"range=[{row_stds.min():.6f}, {row_stds.max():.6f}]"
)
print(f" Row ranges: mean={row_ranges.mean():.6f}, std={row_ranges.std():.6f}")
print(
f" RowMeans CV (std/mean): {row_means.std() / (abs(row_means.mean()) + 1e-10):.4f}"
)
print(f" RowStds CV: {row_stds.std() / (row_stds.mean() + 1e-10):.4f}")
# ============================================================================
# 3. GROUP-LEVEL ANALYSIS (16-element groups, like Q2_K)
# ============================================================================
print("\n" + "=" * 80)
print("SECTION 3: GROUP-LEVEL ANALYSIS (16-element groups)")
print("=" * 80)
print(" Quantization works on 16-element groups. Key question:")
print(" How much does each group need its own OFFSET (dmin)?\n")
GS = 16
for name, W in weight_data.items():
# Look at first 256 rows for speed
nr = min(W.shape[0], 256)
nc = W.shape[1]
group_means = []
group_stds = []
group_ranges = []
group_offsets = [] # |mean| / range — how important is the offset
for r in range(nr):
for g_start in range(0, nc, GS):
g = W[r, g_start : g_start + GS]
gm = g.mean()
gs = g.std()
gr = g.max() - g.min()
gmin = g.min()
group_means.append(gm)
group_stds.append(gs)
group_ranges.append(gr)
# Offset importance: how large is the group mean relative to its range?
# If this is high, offset (dmin) matters a lot
if gr > 1e-10:
group_offsets.append(abs(gm) / gr)
else:
group_offsets.append(0)
gm = np.array(group_means)
gs = np.array(group_stds)
gr = np.array(group_ranges)
go = np.array(group_offsets)
print(f"\n {name} ({len(group_means)} groups):")
print(
f" Group mean: mean={gm.mean():.6f}, std={gm.std():.6f}, "
f"range=[{gm.min():.6f}, {gm.max():.6f}]"
)
print(f" Group std: mean={gs.mean():.6f}, std={gs.std():.6f}")
print(f" Group range: mean={gr.mean():.6f}, std={gr.std():.6f}")
print(f" *** OFFSET IMPORTANCE (|group_mean| / range) ***")
print(
f" mean={go.mean():.4f}, median={np.median(go):.4f}, "
f"p90={np.percentile(go, 90):.4f}, max={go.max():.4f}"
)
print(f" fraction with offset > 0.1: {np.mean(go > 0.1):.3f}")
print(f" fraction with offset > 0.2: {np.mean(go > 0.2):.3f}")
print(f" fraction with offset > 0.3: {np.mean(go > 0.3):.3f}")
# How well does zeroing the min (Q2_K style, clamping min to 0) work?
# vs keeping the actual min
mse_no_offset = 0 # Assume uniform 4 levels [0,1,2,3] * scale
mse_with_offset = 0 # Assume uniform 4 levels [0,1,2,3] * scale + offset
for r in range(nr):
for g_start in range(0, nc, GS):
g = W[r, g_start : g_start + GS]
gmin = g.min()
gmax = g.max()
gr = gmax - gmin
if gr < 1e-10:
continue
# No offset: clamp min to 0, scale = max/3
if gmin > 0:
scale_no = gmax / 3.0
min_no = 0
else:
scale_no = gmax / 3.0
min_no = 0 # lose the negative offset
# Actually use (gmax - 0)/3 but we're clamping gmin to 0
# Better: use actual min/max
scale_w = gr / 3.0
min_w = gmin
for val in g:
# No offset quantization
norm_no = val / (scale_no + 1e-10)
idx_no = max(0, min(3, int(round(norm_no))))
recon_no = scale_no * idx_no
mse_no_offset += (val - recon_no) ** 2
# With offset quantization
norm_w = (val - min_w) / (scale_w + 1e-10)
idx_w = max(0, min(3, int(round(norm_w))))
recon_w = min_w + scale_w * idx_w
mse_with_offset += (val - recon_w) ** 2
total_elements = nr * nc
rmse_no = np.sqrt(mse_no_offset / total_elements)
rmse_w = np.sqrt(mse_with_offset / total_elements)
improvement = (rmse_no - rmse_w) / rmse_no * 100
print(f" Quant RMSE (no offset): {rmse_no:.6f}")
print(f" Quant RMSE (with offset): {rmse_w:.6f}")
print(f" Offset benefit: {improvement:.1f}% RMSE reduction")
# ============================================================================
# 4. ACTIVATION ANALYSIS
# ============================================================================
print("\n" + "=" * 80)
print("SECTION 4: ACTIVATION DISTRIBUTION COMPARISON")
print("=" * 80)
activations = {
"ffn_input (gate/up)": "act_blk0_ffn_input.f32bin",
"ffn_down_input (swiglu)": "act_blk0_ffn_down_input.f32bin",
"attn_input (q/k/v)": "act_blk0_attn_input.f32bin",
"attn_output_input": "act_blk0_attn_output_input.f32bin",
}
act_data = {}
for name, fname in activations.items():
try:
A = load_f32_tensor(fname)
act_data[name] = A
print(f"\n{'' * 70}")
print(f" {name}{fname}")
stats(name, A)
except Exception as e:
print(f" {name}: SKIP ({e})")
# ============================================================================
# 5. THE CRITICAL QUESTION: PER-DIMENSION ACTIVATION MAGNITUDE
# ============================================================================
print("\n" + "=" * 80)
print("SECTION 5: PER-DIMENSION ACTIVATION POWER (per-column RMS)")
print("=" * 80)
print(" If activation dimensions have very different magnitudes,")
print(" the quantization error in each weight dimension is weighted differently.")
print(" Dimensions with high activation power amplify weight errors.\n")
for name, A in act_data.items():
col_rms = np.sqrt(np.mean(A**2, axis=0)) # RMS per column (dimension)
print(f"\n {name} ({A.shape[1]} dimensions):")
print(f" Col RMS: mean={col_rms.mean():.6f}, std={col_rms.std():.6f}")
print(f" Col RMS range: [{col_rms.min():.6f}, {col_rms.max():.6f}]")
print(f" Col RMS CV (std/mean): {col_rms.std() / (col_rms.mean() + 1e-10):.4f}")
print(f" Max/Min ratio: {col_rms.max() / (col_rms.min() + 1e-10):.1f}x")
# Top 10 and bottom 10 dimensions by power
top10 = np.argsort(col_rms)[-10:][::-1]
bot10 = np.argsort(col_rms)[:10]
print(
f" Top-10 dims by RMS: {[(int(d), f'{col_rms[d]:.4f}') for d in top10[:5]]}..."
)
print(
f" Bot-10 dims by RMS: {[(int(d), f'{col_rms[d]:.4f}') for d in bot10[:5]]}..."
)
# How much do the top 10% of dimensions contribute to total power?
total_power = np.sum(col_rms**2)
sorted_power = np.sort(col_rms**2)[::-1]
top10pct = int(len(col_rms) * 0.1)
top10pct_power = np.sum(sorted_power[:top10pct])
top1pct = max(1, int(len(col_rms) * 0.01))
top1pct_power = np.sum(sorted_power[:top1pct])
print(
f" Top 10% of dims contribute {top10pct_power / total_power * 100:.1f}% of total power"
)
print(
f" Top 1% of dims contribute {top1pct_power / total_power * 100:.1f}% of total power"
)
# ============================================================================
# 6. CROSS-CORRELATION: WEIGHT ERROR × ACTIVATION POWER
# ============================================================================
print("\n" + "=" * 80)
print("SECTION 6: WHERE DO WEIGHT ERRORS MEET HIGH ACTIVATION POWER?")
print("=" * 80)
print(" For each weight dimension, compute: activation_rms[dim] × weight_error[dim]")
print(" This tells us which dimensions contribute most to matmul error.\n")
# Focus on ffn_down vs ffn_gate for comparison
focus = [
("ffn_down", "blk_0_ffn_down_weight.f32bin", "act_blk0_ffn_down_input.f32bin"),
("ffn_gate", "blk_0_ffn_gate_weight.f32bin", "act_blk0_ffn_input.f32bin"),
("ffn_up", "blk_0_ffn_up_weight.f32bin", "act_blk0_ffn_input.f32bin"),
("attn_q", "blk_0_attn_q_weight.f32bin", "act_blk0_attn_input.f32bin"),
]
for name, wfile, afile in focus:
W = load_f32_tensor(wfile)
A = load_f32_tensor(afile)
if W.shape[1] != A.shape[1]:
print(f" {name}: dim mismatch W={W.shape[1]} vs A={A.shape[1]}, SKIP")
continue
nc = W.shape[1]
# Per-column activation RMS
act_rms = np.sqrt(np.mean(A**2, axis=0))
# Per-column weight std and range (how "hard" to quantize)
w_std = W.std(axis=0)
w_range = W.max(axis=0) - W.min(axis=0)
# Per-column weight kurtosis (heavy tails = harder to quantize)
w_kurt = (
np.mean(((W - W.mean(axis=0)) / (W.std(axis=0) + 1e-10)) ** 4, axis=0) - 3.0
)
# Weight error proxy: with 2-bit uniform quant on 16-element groups
# Higher variance columns → more error
nr = min(W.shape[0], 256)
# Simple Q2_K-style error estimate per dimension:
# For each group of 16 in the column direction, quantize and measure error
dim_mse = np.zeros(nc)
for g_start in range(0, nc, GS):
g_end = min(g_start + GS, nc)
for r in range(nr):
g = W[r, g_start:g_end]
gmin = min(g.min(), 0) # Q2_K clamps min to ≤0
gmax = g.max()
gr = gmax - gmin
if gr < 1e-10:
continue
scale = gr / 3.0
for i, val in enumerate(g):
norm = (val - gmin) / scale
idx = max(0, min(3, int(round(norm))))
recon = gmin + scale * idx
dim_mse[g_start + i] += (val - recon) ** 2
dim_rmse = np.sqrt(dim_mse / nr)
# The key metric: dimension-level contribution to matmul error
# matmul_error_contribution[d] ≈ act_rms[d] * weight_rmse[d]
matmul_contrib = act_rms * dim_rmse
print(f"\n {name} ({nc} dimensions):")
print(
f" act_rms: mean={act_rms.mean():.4f}, CV={act_rms.std() / act_rms.mean():.4f}"
)
print(
f" w_rmse: mean={dim_rmse.mean():.6f}, CV={dim_rmse.std() / (dim_rmse.mean() + 1e-10):.4f}"
)
print(
f" matmul_contrib: mean={matmul_contrib.mean():.6f}, "
f"std={matmul_contrib.std():.6f}"
)
# Correlation between activation power and weight error
corr = np.corrcoef(act_rms, dim_rmse)[0, 1]
print(f" CORRELATION act_rms ↔ weight_rmse: {corr:.4f}")
print(f" (>0 means high-power dims are also hard to quantize — BAD)")
# Top contributors to matmul error
top_dims = np.argsort(matmul_contrib)[-20:][::-1]
print(f" Top-5 error-contributing dimensions:")
for d in top_dims[:5]:
print(
f" dim {d}: act_rms={act_rms[d]:.4f}, w_rmse={dim_rmse[d]:.6f}, "
f"contrib={matmul_contrib[d]:.6f}, w_std={w_std[d]:.6f}, w_kurt={w_kurt[d]:.2f}"
)
# Distribution of matmul contributions
total_contrib = matmul_contrib.sum()
sorted_contrib = np.sort(matmul_contrib)[::-1]
for pct in [0.01, 0.05, 0.10, 0.25]:
n = max(1, int(nc * pct))
print(
f" Top {pct * 100:.0f}% dims: {sorted_contrib[:n].sum() / total_contrib * 100:.1f}% "
f"of total matmul error"
)
# ============================================================================
# 7. THE STRUCTURAL ASYMMETRY: COLUMN DIRECTION GROUP ANALYSIS
# ============================================================================
print("\n" + "=" * 80)
print("SECTION 7: STRUCTURAL ASYMMETRY — COLUMN vs ROW GROUPING")
print("=" * 80)
print(" Quantization groups along the ROW (inner dim). For ffn_down,")
print(" each row has 9728 elements (38 groups of 256).")
print(" For ffn_gate, each row has 2560 elements (10 groups of 256).")
print(" More groups = more metadata (scales/offsets) relative to data bits.\n")
for name, wfile, afile in focus:
W = load_f32_tensor(wfile)
nc = W.shape[1]
n_groups_per_row = nc // 256 # super-blocks per row
print(f"\n {name}: {nc} cols → {n_groups_per_row} super-blocks per row")
print(f" Groups per row: {nc // GS} (16-element groups)")
print(
f" With Q2_K (2.625 bpw): {n_groups_per_row * 2} scale+offset bytes per row"
)
# How much do group means vary WITHIN a row?
nr = min(W.shape[0], 64)
intra_row_mean_var = []
for r in range(nr):
group_means = []
for g_start in range(0, nc, GS):
group_means.append(W[r, g_start : g_start + GS].mean())
group_means = np.array(group_means)
intra_row_mean_var.append(group_means.std())
print(
f" Intra-row group mean variability (avg across rows): "
f"mean={np.mean(intra_row_mean_var):.6f}"
)
# How much does the sign of group means vary?
pos_frac = 0
neg_frac = 0
total_groups = 0
for r in range(nr):
for g_start in range(0, nc, GS):
gm = W[r, g_start : g_start + GS].mean()
if gm > 0.001:
pos_frac += 1
elif gm < -0.001:
neg_frac += 1
total_groups += 1
print(
f" Group mean sign: {pos_frac / total_groups * 100:.1f}% positive, "
f"{neg_frac / total_groups * 100:.1f}% negative, "
f"{(1 - pos_frac / total_groups - neg_frac / total_groups) * 100:.1f}% near-zero"
)
# ============================================================================
# 8. THE SWIGLU EFFECT: WHY ffn_down INPUT IS SPECIAL
# ============================================================================
print("\n" + "=" * 80)
print("SECTION 8: THE SWIGLU EFFECT — ffn_down ACTIVATION STRUCTURE")
print("=" * 80)
print(" ffn_down's activation is the SwiGLU output: silu(gate) * up")
print(" This creates a specific activation pattern that differs from")
print(" raw FFN input (RMSNorm output).\n")
if "ffn_input (gate/up)" in act_data and "ffn_down_input (swiglu)" in act_data:
A_in = act_data["ffn_input (gate/up)"]
A_swiglu = act_data["ffn_down_input (swiglu)"]
print(f" FFN input (RMSNorm output): {A_in.shape}")
print(f" SwiGLU output: {A_swiglu.shape}")
# Per-token analysis
for t in range(min(A_swiglu.shape[0], 3)):
tok_in = A_in[t]
tok_sw = A_swiglu[t]
print(f"\n Token {t}:")
print(
f" FFN input: mean={tok_in.mean():.6f}, std={tok_in.std():.6f}, "
f"|max|={np.abs(tok_in).max():.6f}"
)
print(
f" SwiGLU out: mean={tok_sw.mean():.6f}, std={tok_sw.std():.6f}, "
f"|max|={np.abs(tok_sw).max():.6f}"
)
# SwiGLU creates lots of near-zero values (silu suppresses negatives)
frac_nearzero_sw = np.mean(np.abs(tok_sw) < 0.01 * tok_sw.std())
frac_nearzero_in = np.mean(np.abs(tok_in) < 0.01 * tok_in.std())
print(
f" Near-zero fraction: FFN input={frac_nearzero_in:.3f}, "
f"SwiGLU={frac_nearzero_sw:.3f}"
)
# Sparsity pattern
frac_neg = np.mean(tok_sw < 0)
print(f" SwiGLU negative fraction: {frac_neg:.3f}")
# Dimension-level analysis of SwiGLU
print(f"\n Dimension-level SwiGLU properties:")
dim_mean_sw = A_swiglu.mean(axis=0)
dim_std_sw = A_swiglu.std(axis=0)
dim_sparsity = np.mean(A_swiglu < 0, axis=0) # fraction of tokens negative per dim
print(f" Dim mean range: [{dim_mean_sw.min():.6f}, {dim_mean_sw.max():.6f}]")
print(f" Dim std range: [{dim_std_sw.min():.6f}, {dim_std_sw.max():.6f}]")
print(
f" Dim negative fraction: mean={dim_sparsity.mean():.3f}, "
f"range=[{dim_sparsity.min():.3f}, {dim_sparsity.max():.3f}]"
)
# Highly sparse dimensions (mostly near-zero after SwiGLU)
high_sparsity = np.sum(dim_sparsity > 0.7)
low_sparsity = np.sum(dim_sparsity < 0.3)
print(f" Dims with >70% negative tokens: {high_sparsity}/{len(dim_sparsity)}")
print(f" Dims with <30% negative tokens: {low_sparsity}/{len(dim_sparsity)}")
# ============================================================================
# 9. QUANTIZATION NOISE × ACTIVATION POWER: THE MATMUL ERROR DECOMPOSITION
# ============================================================================
print("\n" + "=" * 80)
print("SECTION 9: MATMUL ERROR DECOMPOSITION")
print("=" * 80)
print(
" matmul_error ≈ sum over groups of (activation_power_in_group × "
"weight_mse_in_group)"
)
print(
" If activation power is concentrated in groups with high weight error, "
"matmul error explodes.\n"
)
# For ffn_down specifically, compare where activation power sits vs weight error
W_down = load_f32_tensor("blk_0_ffn_down_weight.f32bin")
A_swiglu = load_f32_tensor("act_blk0_ffn_down_input.f32bin")
W_gate = load_f32_tensor("blk_0_ffn_gate_weight.f32bin")
A_ffn_in = load_f32_tensor("act_blk0_ffn_input.f32bin")
for label, W, A in [("ffn_down", W_down, A_swiglu), ("ffn_gate", W_gate, A_ffn_in)]:
nc = W.shape[1]
nr = min(W.shape[0], 128)
# Compute per-superblock (256) activation power and weight error
n_sb = nc // 256
sb_act_power = np.zeros(n_sb)
sb_weight_mse = np.zeros(n_sb)
for sb in range(n_sb):
s = sb * 256
e = s + 256
# Activation power: mean squared activation in this region
sb_act_power[sb] = np.mean(A[:, s:e] ** 2)
# Weight MSE: Q2_K-style uniform quant error
mse = 0
count = 0
for r in range(nr):
for g in range(0, 256, GS):
gvals = W[r, s + g : s + g + GS]
gmin = min(gvals.min(), 0)
gmax = gvals.max()
gr = gmax - gmin
if gr < 1e-10:
continue
scale = gr / 3.0
for v in gvals:
norm = (v - gmin) / scale
idx = max(0, min(3, int(round(norm))))
recon = gmin + scale * idx
mse += (v - recon) ** 2
count += 1
sb_weight_mse[sb] = mse / max(count, 1)
# Correlation between activation power and weight error across super-blocks
valid = sb_act_power > 1e-10
if valid.sum() > 10:
corr = np.corrcoef(np.sqrt(sb_act_power[valid]), np.sqrt(sb_weight_mse[valid]))[
0, 1
]
else:
corr = 0
print(f"\n {label}:")
print(f" Super-blocks: {n_sb}")
print(
f" act_power: mean={sb_act_power.mean():.6f}, "
f"std={np.sqrt(sb_act_power.var()):.6f}, "
f"range=[{sb_act_power.min():.6f}, {sb_act_power.max():.6f}]"
)
print(
f" weight_mse: mean={sb_weight_mse.mean():.6f}, "
f"range=[{sb_weight_mse.min():.6f}, {sb_weight_mse.max():.6f}]"
)
print(f" CORRELATION (act_power ↔ weight_mse): {corr:.4f}")
# Show top-5 super-blocks by contribution to matmul error
contrib = sb_act_power * sb_weight_mse
top5 = np.argsort(contrib)[-5:][::-1]
print(f" Top-5 error-contributing super-blocks (of {n_sb}):")
for idx in top5:
print(
f" SB {idx * 256}-{(idx + 1) * 256 - 1}: act_power={sb_act_power[idx]:.6f}, "
f"weight_mse={sb_weight_mse[idx]:.6f}, contrib={contrib[idx]:.6f}"
)
print("\n" + "=" * 80)
print("ANALYSIS COMPLETE")
print("=" * 80)