Loading Parameters (size 2622750720): embedder_input_embedding Loading Parameters (size 10240): final_norm_scale Loading Parameters: (layer=0, size 26214400) griffin_linear_x_w Loading Parameters: (layer=0, size 10240) griffin_linear_x_biases Loading Parameters: (layer=0, size 26214400) griffin_linear_y_w Loading Parameters: (layer=0, size 10240) griffin_linear_y_biases Loading Parameters: (layer=0, size 26214400) griffin_linear_out_w Loading Parameters: (layer=0, size 10240) griffin_linear_out_biases Loading Parameters: (layer=0, size 40960) griffin_conv_w Loading Parameters: (layer=0, size 10240) griffin_conv_biases Loading Parameters: (layer=0, size 5242880) griffin_gate_w Loading Parameters: (layer=0, size 20480) griffin_gate_biases Loading Parameters: (layer=0, size 10240) griffin_a Loading Parameters: (layer=0, size 157286400) gating_einsum_w Loading Parameters: (layer=0, size 78643200) linear_w Loading Parameters: (layer=0, size 10240) pre_attention_norm_scale Loading Parameters: (layer=0, size 10240) pre_ffw_norm_scale Loading Parameters: (layer=0, size 61440) ffw_gating_biases Loading Parameters: (layer=0, size 10240) ffw_output_biases Loading Parameters: (layer=1, size 26214400) griffin_linear_x_w Loading Parameters: (layer=1, size 10240) griffin_linear_x_biases Loading Parameters: (layer=1, size 26214400) griffin_linear_y_w Loading Parameters: (layer=1, size 10240) griffin_linear_y_biases Loading Parameters: (layer=1, size 26214400) griffin_linear_out_w Loading Parameters: (layer=1, size 10240) griffin_linear_out_biases Loading Parameters: (layer=1, size 40960) griffin_conv_w Loading Parameters: (layer=1, size 10240) griffin_conv_biases Loading Parameters: (layer=1, size 5242880) griffin_gate_w Loading Parameters: (layer=1, size 20480) griffin_gate_biases Loading Parameters: (layer=1, size 10240) griffin_a Loading Parameters: (layer=1, size 157286400) gating_einsum_w Loading Parameters: (layer=1, size 78643200) linear_w Loading Parameters: (layer=1, size 10240) pre_attention_norm_scale Loading Parameters: (layer=1, size 10240) pre_ffw_norm_scale Loading Parameters: (layer=1, size 61440) ffw_gating_biases Loading Parameters: (layer=1, size 10240) ffw_output_biases Loading Parameters: (layer=2, size 26214400) attn_vec_einsum_w Loading Parameters: (layer=2, size 78643200) qkv_einsum_w Loading Parameters: (layer=2, size 157286400) gating_einsum_w Loading Parameters: (layer=2, size 78643200) linear_w Loading Parameters: (layer=2, size 10240) pre_attention_norm_scale Loading Parameters: (layer=2, size 10240) pre_ffw_norm_scale Loading Parameters: (layer=2, size 61440) ffw_gating_biases Loading Parameters: (layer=2, size 10240) ffw_output_biases Loading Parameters: (layer=2, size 10240) attention_output_biases Loading Parameters: (layer=3, size 26214400) griffin_linear_x_w Loading Parameters: (layer=3, size 10240) griffin_linear_x_biases Loading Parameters: (layer=3, size 26214400) griffin_linear_y_w Loading Parameters: (layer=3, size 10240) griffin_linear_y_biases Loading Parameters: (layer=3, size 26214400) griffin_linear_out_w Loading Parameters: (layer=3, size 10240) griffin_linear_out_biases Loading Parameters: (layer=3, size 40960) griffin_conv_w Loading Parameters: (layer=3, size 10240) griffin_conv_biases Loading Parameters: (layer=3, size 5242880) griffin_gate_w Loading Parameters: (layer=3, size 20480) griffin_gate_biases Loading Parameters: (layer=3, size 10240) griffin_a Loading Parameters: (layer=3, size 157286400) gating_einsum_w Loading Parameters: (layer=3, size 78643200) linear_w Loading Parameters: (layer=3, size 10240) pre_attention_norm_scale Loading Parameters: (layer=3, size 10240) pre_ffw_norm_scale Loading Parameters: (layer=3, size 61440) ffw_gating_biases Loading Parameters: (layer=3, size 10240) ffw_output_biases Loading Parameters: (layer=4, size 26214400) griffin_linear_x_w Loading Parameters: (layer=4, size 10240) griffin_linear_x_biases Loading Parameters: (layer=4, size 26214400) griffin_linear_y_w Loading Parameters: (layer=4, size 10240) griffin_linear_y_biases Loading Parameters: (layer=4, size 26214400) griffin_linear_out_w Loading Parameters: (layer=4, size 10240) griffin_linear_out_biases Loading Parameters: (layer=4, size 40960) griffin_conv_w Loading Parameters: (layer=4, size 10240) griffin_conv_biases Loading Parameters: (layer=4, size 5242880) griffin_gate_w Loading Parameters: (layer=4, size 20480) griffin_gate_biases Loading Parameters: (layer=4, size 10240) griffin_a Loading Parameters: (layer=4, size 157286400) gating_einsum_w Loading Parameters: (layer=4, size 78643200) linear_w Loading Parameters: (layer=4, size 10240) pre_attention_norm_scale Loading Parameters: (layer=4, size 10240) pre_ffw_norm_scale Loading Parameters: (layer=4, size 61440) ffw_gating_biases Loading Parameters: (layer=4, size 10240) ffw_output_biases Loading Parameters: (layer=5, size 26214400) attn_vec_einsum_w Loading Parameters: (layer=5, size 78643200) qkv_einsum_w Loading Parameters: (layer=5, size 157286400) gating_einsum_w Loading Parameters: (layer=5, size 78643200) linear_w Loading Parameters: (layer=5, size 10240) pre_attention_norm_scale Loading Parameters: (layer=5, size 10240) pre_ffw_norm_scale Loading Parameters: (layer=5, size 61440) ffw_gating_biases Loading Parameters: (layer=5, size 10240) ffw_output_biases Loading Parameters: (layer=5, size 10240) attention_output_biases Loading Parameters: (layer=6, size 26214400) griffin_linear_x_w Loading Parameters: (layer=6, size 10240) griffin_linear_x_biases Loading Parameters: (layer=6, size 26214400) griffin_linear_y_w Loading Parameters: (layer=6, size 10240) griffin_linear_y_biases Loading Parameters: (layer=6, size 26214400) griffin_linear_out_w Loading Parameters: (layer=6, size 10240) griffin_linear_out_biases Loading Parameters: (layer=6, size 40960) griffin_conv_w Loading Parameters: (layer=6, size 10240) griffin_conv_biases Loading Parameters: (layer=6, size 5242880) griffin_gate_w Loading Parameters: (layer=6, size 20480) griffin_gate_biases Loading Parameters: (layer=6, size 10240) griffin_a Loading Parameters: (layer=6, size 157286400) gating_einsum_w Loading Parameters: (layer=6, size 78643200) linear_w Loading Parameters: (layer=6, size 10240) pre_attention_norm_scale Loading Parameters: (layer=6, size 10240) pre_ffw_norm_scale Loading Parameters: (layer=6, size 61440) ffw_gating_biases Loading Parameters: (layer=6, size 10240) ffw_output_biases Loading Parameters: (layer=7, size 26214400) griffin_linear_x_w Loading Parameters: (layer=7, size 10240) griffin_linear_x_biases Loading Parameters: (layer=7, size 26214400) griffin_linear_y_w Loading Parameters: (layer=7, size 10240) griffin_linear_y_biases Loading Parameters: (layer=7, size 26214400) griffin_linear_out_w Loading Parameters: (layer=7, size 10240) griffin_linear_out_biases Loading Parameters: (layer=7, size 40960) griffin_conv_w Loading Parameters: (layer=7, size 10240) griffin_conv_biases Loading Parameters: (layer=7, size 5242880) griffin_gate_w Loading Parameters: (layer=7, size 20480) griffin_gate_biases Loading Parameters: (layer=7, size 10240) griffin_a Loading Parameters: (layer=7, size 157286400) gating_einsum_w Loading Parameters: (layer=7, size 78643200) linear_w Loading Parameters: (layer=7, size 10240) pre_attention_norm_scale Loading Parameters: (layer=7, size 10240) pre_ffw_norm_scale Loading Parameters: (layer=7, size 61440) ffw_gating_biases Loading Parameters: (layer=7, size 10240) ffw_output_biases Loading Parameters: (layer=8, size 26214400) attn_vec_einsum_w Loading Parameters: (layer=8, size 78643200) qkv_einsum_w Loading Parameters: (layer=8, size 157286400) gating_einsum_w Loading Parameters: (layer=8, size 78643200) linear_w Loading Parameters: (layer=8, size 10240) pre_attention_norm_scale Loading Parameters: (layer=8, size 10240) pre_ffw_norm_scale Loading Parameters: (layer=8, size 61440) ffw_gating_biases Loading Parameters: (layer=8, size 10240) ffw_output_biases Loading Parameters: (layer=8, size 10240) attention_output_biases Loading Parameters: (layer=9, size 26214400) griffin_linear_x_w Loading Parameters: (layer=9, size 10240) griffin_linear_x_biases Loading Parameters: (layer=9, size 26214400) griffin_linear_y_w Loading Parameters: (layer=9, size 10240) griffin_linear_y_biases Loading Parameters: (layer=9, size 26214400) griffin_linear_out_w Loading Parameters: (layer=9, size 10240) griffin_linear_out_biases Loading Parameters: (layer=9, size 40960) griffin_conv_w Loading Parameters: (layer=9, size 10240) griffin_conv_biases Loading Parameters: (layer=9, size 5242880) griffin_gate_w Loading Parameters: (layer=9, size 20480) griffin_gate_biases Loading Parameters: (layer=9, size 10240) griffin_a Loading Parameters: (layer=9, size 157286400) gating_einsum_w Loading Parameters: (layer=9, size 78643200) linear_w Loading Parameters: (layer=9, size 10240) pre_attention_norm_scale Loading Parameters: (layer=9, size 10240) pre_ffw_norm_scale Loading Parameters: (layer=9, size 61440) ffw_gating_biases Loading Parameters: (layer=9, size 10240) ffw_output_biases Loading Parameters: (layer=10, size 26214400) griffin_linear_x_w Loading Parameters: (layer=10, size 10240) griffin_linear_x_biases Loading Parameters: (layer=10, size 26214400) griffin_linear_y_w Loading Parameters: (layer=10, size 10240) griffin_linear_y_biases Loading Parameters: (layer=10, size 26214400) griffin_linear_out_w Loading Parameters: (layer=10, size 10240) griffin_linear_out_biases Loading Parameters: (layer=10, size 40960) griffin_conv_w Loading Parameters: (layer=10, size 10240) griffin_conv_biases Loading Parameters: (layer=10, size 5242880) griffin_gate_w Loading Parameters: (layer=10, size 20480) griffin_gate_biases Loading Parameters: (layer=10, size 10240) griffin_a Loading Parameters: (layer=10, size 157286400) gating_einsum_w Loading Parameters: (layer=10, size 78643200) linear_w Loading Parameters: (layer=10, size 10240) pre_attention_norm_scale Loading Parameters: (layer=10, size 10240) pre_ffw_norm_scale Loading Parameters: (layer=10, size 61440) ffw_gating_biases Loading Parameters: (layer=10, size 10240) ffw_output_biases Loading Parameters: (layer=11, size 26214400) attn_vec_einsum_w Loading Parameters: (layer=11, size 78643200) qkv_einsum_w Loading Parameters: (layer=11, size 157286400) gating_einsum_w Loading Parameters: (layer=11, size 78643200) linear_w Loading Parameters: (layer=11, size 10240) pre_attention_norm_scale Loading Parameters: (layer=11, size 10240) pre_ffw_norm_scale Loading Parameters: (layer=11, size 61440) ffw_gating_biases Loading Parameters: (layer=11, size 10240) ffw_output_biases Loading Parameters: (layer=11, size 10240) attention_output_biases Loading Parameters: (layer=12, size 26214400) griffin_linear_x_w Loading Parameters: (layer=12, size 10240) griffin_linear_x_biases Loading Parameters: (layer=12, size 26214400) griffin_linear_y_w Loading Parameters: (layer=12, size 10240) griffin_linear_y_biases Loading Parameters: (layer=12, size 26214400) griffin_linear_out_w Loading Parameters: (layer=12, size 10240) griffin_linear_out_biases Loading Parameters: (layer=12, size 40960) griffin_conv_w Loading Parameters: (layer=12, size 10240) griffin_conv_biases Loading Parameters: (layer=12, size 5242880) griffin_gate_w Loading Parameters: (layer=12, size 20480) griffin_gate_biases Loading Parameters: (layer=12, size 10240) griffin_a Loading Parameters: (layer=12, size 157286400) gating_einsum_w Loading Parameters: (layer=12, size 78643200) linear_w Loading Parameters: (layer=12, size 10240) pre_attention_norm_scale Loading Parameters: (layer=12, size 10240) pre_ffw_norm_scale Loading Parameters: (layer=12, size 61440) ffw_gating_biases Loading Parameters: (layer=12, size 10240) ffw_output_biases Loading Parameters: (layer=13, size 26214400) griffin_linear_x_w Loading Parameters: (layer=13, size 10240) griffin_linear_x_biases Loading Parameters: (layer=13, size 26214400) griffin_linear_y_w Loading Parameters: (layer=13, size 10240) griffin_linear_y_biases Loading Parameters: (layer=13, size 26214400) griffin_linear_out_w Loading Parameters: (layer=13, size 10240) griffin_linear_out_biases Loading Parameters: (layer=13, size 40960) griffin_conv_w Loading Parameters: (layer=13, size 10240) griffin_conv_biases Loading Parameters: (layer=13, size 5242880) griffin_gate_w Loading Parameters: (layer=13, size 20480) griffin_gate_biases Loading Parameters: (layer=13, size 10240) griffin_a Loading Parameters: (layer=13, size 157286400) gating_einsum_w Loading Parameters: (layer=13, size 78643200) linear_w Loading Parameters: (layer=13, size 10240) pre_attention_norm_scale Loading Parameters: (layer=13, size 10240) pre_ffw_norm_scale Loading Parameters: (layer=13, size 61440) ffw_gating_biases Loading Parameters: (layer=13, size 10240) ffw_output_biases Loading Parameters: (layer=14, size 26214400) attn_vec_einsum_w Loading Parameters: (layer=14, size 78643200) qkv_einsum_w Loading Parameters: (layer=14, size 157286400) gating_einsum_w Loading Parameters: (layer=14, size 78643200) linear_w Loading Parameters: (layer=14, size 10240) pre_attention_norm_scale Loading Parameters: (layer=14, size 10240) pre_ffw_norm_scale Loading Parameters: (layer=14, size 61440) ffw_gating_biases Loading Parameters: (layer=14, size 10240) ffw_output_biases Loading Parameters: (layer=14, size 10240) attention_output_biases Loading Parameters: (layer=15, size 26214400) griffin_linear_x_w Loading Parameters: (layer=15, size 10240) griffin_linear_x_biases Loading Parameters: (layer=15, size 26214400) griffin_linear_y_w Loading Parameters: (layer=15, size 10240) griffin_linear_y_biases Loading Parameters: (layer=15, size 26214400) griffin_linear_out_w Loading Parameters: (layer=15, size 10240) griffin_linear_out_biases Loading Parameters: (layer=15, size 40960) griffin_conv_w Loading Parameters: (layer=15, size 10240) griffin_conv_biases Loading Parameters: (layer=15, size 5242880) griffin_gate_w Loading Parameters: (layer=15, size 20480) griffin_gate_biases Loading Parameters: (layer=15, size 10240) griffin_a Loading Parameters: (layer=15, size 157286400) gating_einsum_w Loading Parameters: (layer=15, size 78643200) linear_w Loading Parameters: (layer=15, size 10240) pre_attention_norm_scale Loading Parameters: (layer=15, size 10240) pre_ffw_norm_scale Loading Parameters: (layer=15, size 61440) ffw_gating_biases Loading Parameters: (layer=15, size 10240) ffw_output_biases Loading Parameters: (layer=16, size 26214400) griffin_linear_x_w Loading Parameters: (layer=16, size 10240) griffin_linear_x_biases Loading Parameters: (layer=16, size 26214400) griffin_linear_y_w Loading Parameters: (layer=16, size 10240) griffin_linear_y_biases Loading Parameters: (layer=16, size 26214400) griffin_linear_out_w Loading Parameters: (layer=16, size 10240) griffin_linear_out_biases Loading Parameters: (layer=16, size 40960) griffin_conv_w Loading Parameters: (layer=16, size 10240) griffin_conv_biases Loading Parameters: (layer=16, size 5242880) griffin_gate_w Loading Parameters: (layer=16, size 20480) griffin_gate_biases Loading Parameters: (layer=16, size 10240) griffin_a Loading Parameters: (layer=16, size 157286400) gating_einsum_w Loading Parameters: (layer=16, size 78643200) linear_w Loading Parameters: (layer=16, size 10240) pre_attention_norm_scale Loading Parameters: (layer=16, size 10240) pre_ffw_norm_scale Loading Parameters: (layer=16, size 61440) ffw_gating_biases Loading Parameters: (layer=16, size 10240) ffw_output_biases Loading Parameters: (layer=17, size 26214400) attn_vec_einsum_w Loading Parameters: (layer=17, size 78643200) qkv_einsum_w Loading Parameters: (layer=17, size 157286400) gating_einsum_w Loading Parameters: (layer=17, size 78643200) linear_w Loading Parameters: (layer=17, size 10240) pre_attention_norm_scale Loading Parameters: (layer=17, size 10240) pre_ffw_norm_scale Loading Parameters: (layer=17, size 61440) ffw_gating_biases Loading Parameters: (layer=17, size 10240) ffw_output_biases Loading Parameters: (layer=17, size 10240) attention_output_biases Loading Parameters: (layer=18, size 26214400) griffin_linear_x_w Loading Parameters: (layer=18, size 10240) griffin_linear_x_biases Loading Parameters: (layer=18, size 26214400) griffin_linear_y_w Loading Parameters: (layer=18, size 10240) griffin_linear_y_biases Loading Parameters: (layer=18, size 26214400) griffin_linear_out_w Loading Parameters: (layer=18, size 10240) griffin_linear_out_biases Loading Parameters: (layer=18, size 40960) griffin_conv_w Loading Parameters: (layer=18, size 10240) griffin_conv_biases Loading Parameters: (layer=18, size 5242880) griffin_gate_w Loading Parameters: (layer=18, size 20480) griffin_gate_biases Loading Parameters: (layer=18, size 10240) griffin_a Loading Parameters: (layer=18, size 157286400) gating_einsum_w Loading Parameters: (layer=18, size 78643200) linear_w Loading Parameters: (layer=18, size 10240) pre_attention_norm_scale Loading Parameters: (layer=18, size 10240) pre_ffw_norm_scale Loading Parameters: (layer=18, size 61440) ffw_gating_biases Loading Parameters: (layer=18, size 10240) ffw_output_biases Loading Parameters: (layer=19, size 26214400) griffin_linear_x_w Loading Parameters: (layer=19, size 10240) griffin_linear_x_biases Loading Parameters: (layer=19, size 26214400) griffin_linear_y_w Loading Parameters: (layer=19, size 10240) griffin_linear_y_biases Loading Parameters: (layer=19, size 26214400) griffin_linear_out_w Loading Parameters: (layer=19, size 10240) griffin_linear_out_biases Loading Parameters: (layer=19, size 40960) griffin_conv_w Loading Parameters: (layer=19, size 10240) griffin_conv_biases Loading Parameters: (layer=19, size 5242880) griffin_gate_w Loading Parameters: (layer=19, size 20480) griffin_gate_biases Loading Parameters: (layer=19, size 10240) griffin_a Loading Parameters: (layer=19, size 157286400) gating_einsum_w Loading Parameters: (layer=19, size 78643200) linear_w Loading Parameters: (layer=19, size 10240) pre_attention_norm_scale Loading Parameters: (layer=19, size 10240) pre_ffw_norm_scale Loading Parameters: (layer=19, size 61440) ffw_gating_biases Loading Parameters: (layer=19, size 10240) ffw_output_biases Loading Parameters: (layer=20, size 26214400) attn_vec_einsum_w Loading Parameters: (layer=20, size 78643200) qkv_einsum_w Loading Parameters: (layer=20, size 157286400) gating_einsum_w Loading Parameters: (layer=20, size 78643200) linear_w Loading Parameters: (layer=20, size 10240) pre_attention_norm_scale Loading Parameters: (layer=20, size 10240) pre_ffw_norm_scale Loading Parameters: (layer=20, size 61440) ffw_gating_biases Loading Parameters: (layer=20, size 10240) ffw_output_biases Loading Parameters: (layer=20, size 10240) attention_output_biases Loading Parameters: (layer=21, size 26214400) griffin_linear_x_w Loading Parameters: (layer=21, size 10240) griffin_linear_x_biases Loading Parameters: (layer=21, size 26214400) griffin_linear_y_w Loading Parameters: (layer=21, size 10240) griffin_linear_y_biases Loading Parameters: (layer=21, size 26214400) griffin_linear_out_w Loading Parameters: (layer=21, size 10240) griffin_linear_out_biases Loading Parameters: (layer=21, size 40960) griffin_conv_w Loading Parameters: (layer=21, size 10240) griffin_conv_biases Loading Parameters: (layer=21, size 5242880) griffin_gate_w Loading Parameters: (layer=21, size 20480) griffin_gate_biases Loading Parameters: (layer=21, size 10240) griffin_a Loading Parameters: (layer=21, size 157286400) gating_einsum_w Loading Parameters: (layer=21, size 78643200) linear_w Loading Parameters: (layer=21, size 10240) pre_attention_norm_scale Loading Parameters: (layer=21, size 10240) pre_ffw_norm_scale Loading Parameters: (layer=21, size 61440) ffw_gating_biases Loading Parameters: (layer=21, size 10240) ffw_output_biases Loading Parameters: (layer=22, size 26214400) griffin_linear_x_w Loading Parameters: (layer=22, size 10240) griffin_linear_x_biases Loading Parameters: (layer=22, size 26214400) griffin_linear_y_w Loading Parameters: (layer=22, size 10240) griffin_linear_y_biases Loading Parameters: (layer=22, size 26214400) griffin_linear_out_w Loading Parameters: (layer=22, size 10240) griffin_linear_out_biases Loading Parameters: (layer=22, size 40960) griffin_conv_w Loading Parameters: (layer=22, size 10240) griffin_conv_biases Loading Parameters: (layer=22, size 5242880) griffin_gate_w Loading Parameters: (layer=22, size 20480) griffin_gate_biases Loading Parameters: (layer=22, size 10240) griffin_a Loading Parameters: (layer=22, size 157286400) gating_einsum_w Loading Parameters: (layer=22, size 78643200) linear_w Loading Parameters: (layer=22, size 10240) pre_attention_norm_scale Loading Parameters: (layer=22, size 10240) pre_ffw_norm_scale Loading Parameters: (layer=22, size 61440) ffw_gating_biases Loading Parameters: (layer=22, size 10240) ffw_output_biases Loading Parameters: (layer=23, size 26214400) attn_vec_einsum_w Loading Parameters: (layer=23, size 78643200) qkv_einsum_w Loading Parameters: (layer=23, size 157286400) gating_einsum_w Loading Parameters: (layer=23, size 78643200) linear_w Loading Parameters: (layer=23, size 10240) pre_attention_norm_scale Loading Parameters: (layer=23, size 10240) pre_ffw_norm_scale Loading Parameters: (layer=23, size 61440) ffw_gating_biases Loading Parameters: (layer=23, size 10240) ffw_output_biases Loading Parameters: (layer=23, size 10240) attention_output_biases Loading Parameters: (layer=24, size 26214400) griffin_linear_x_w Loading Parameters: (layer=24, size 10240) griffin_linear_x_biases Loading Parameters: (layer=24, size 26214400) griffin_linear_y_w Loading Parameters: (layer=24, size 10240) griffin_linear_y_biases Loading Parameters: (layer=24, size 26214400) griffin_linear_out_w Loading Parameters: (layer=24, size 10240) griffin_linear_out_biases Loading Parameters: (layer=24, size 40960) griffin_conv_w Loading Parameters: (layer=24, size 10240) griffin_conv_biases Loading Parameters: (layer=24, size 5242880) griffin_gate_w Loading Parameters: (layer=24, size 20480) griffin_gate_biases Loading Parameters: (layer=24, size 10240) griffin_a Loading Parameters: (layer=24, size 157286400) gating_einsum_w Loading Parameters: (layer=24, size 78643200) linear_w Loading Parameters: (layer=24, size 10240) pre_attention_norm_scale Loading Parameters: (layer=24, size 10240) pre_ffw_norm_scale Loading Parameters: (layer=24, size 61440) ffw_gating_biases Loading Parameters: (layer=24, size 10240) ffw_output_biases Loading Parameters: (layer=25, size 26214400) griffin_linear_x_w Loading Parameters: (layer=25, size 10240) griffin_linear_x_biases Loading Parameters: (layer=25, size 26214400) griffin_linear_y_w Loading Parameters: (layer=25, size 10240) griffin_linear_y_biases Loading Parameters: (layer=25, size 26214400) griffin_linear_out_w Loading Parameters: (layer=25, size 10240) griffin_linear_out_biases Loading Parameters: (layer=25, size 40960) griffin_conv_w Loading Parameters: (layer=25, size 10240) griffin_conv_biases Loading Parameters: (layer=25, size 5242880) griffin_gate_w Loading Parameters: (layer=25, size 20480) griffin_gate_biases Loading Parameters: (layer=25, size 10240) griffin_a Loading Parameters: (layer=25, size 157286400) gating_einsum_w Loading Parameters: (layer=25, size 78643200) linear_w Loading Parameters: (layer=25, size 10240) pre_attention_norm_scale Loading Parameters: (layer=25, size 10240) pre_ffw_norm_scale Loading Parameters: (layer=25, size 61440) ffw_gating_biases Loading Parameters: (layer=25, size 10240) ffw_output_biases