model : fix wavtokenizer embedding notions (#19479)
This commit is contained in:
parent
89181c0b6d
commit
6d95707827
|
|
@ -42,7 +42,6 @@ struct llama_hparams {
|
||||||
|
|
||||||
uint32_t n_ctx_train; // context size the model was trained on
|
uint32_t n_ctx_train; // context size the model was trained on
|
||||||
uint32_t n_embd;
|
uint32_t n_embd;
|
||||||
uint32_t n_embd_features = 0;
|
|
||||||
uint32_t n_layer;
|
uint32_t n_layer;
|
||||||
int32_t n_layer_kv_from_start = -1; // if non-negative, the first n_layer_kv_from_start layers have KV cache
|
int32_t n_layer_kv_from_start = -1; // if non-negative, the first n_layer_kv_from_start layers have KV cache
|
||||||
uint32_t n_rot;
|
uint32_t n_rot;
|
||||||
|
|
|
||||||
|
|
@ -523,7 +523,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
||||||
ml.get_key(LLM_KV_EXPERT_GROUP_USED_COUNT, hparams.n_group_used, false);
|
ml.get_key(LLM_KV_EXPERT_GROUP_USED_COUNT, hparams.n_group_used, false);
|
||||||
|
|
||||||
if (arch == LLM_ARCH_WAVTOKENIZER_DEC) {
|
if (arch == LLM_ARCH_WAVTOKENIZER_DEC) {
|
||||||
ml.get_key(LLM_KV_FEATURES_LENGTH, hparams.n_embd_features);
|
ml.get_key(LLM_KV_FEATURES_LENGTH, hparams.n_embd);
|
||||||
|
ml.get_key(LLM_KV_EMBEDDING_LENGTH, hparams.n_embd_out_impl);
|
||||||
|
|
||||||
ml.get_key(LLM_KV_POSNET_EMBEDDING_LENGTH, hparams.posnet.n_embd);
|
ml.get_key(LLM_KV_POSNET_EMBEDDING_LENGTH, hparams.posnet.n_embd);
|
||||||
ml.get_key(LLM_KV_POSNET_BLOCK_COUNT, hparams.posnet.n_layer);
|
ml.get_key(LLM_KV_POSNET_BLOCK_COUNT, hparams.posnet.n_layer);
|
||||||
|
|
@ -6046,9 +6047,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
||||||
} break;
|
} break;
|
||||||
case LLM_ARCH_WAVTOKENIZER_DEC:
|
case LLM_ARCH_WAVTOKENIZER_DEC:
|
||||||
{
|
{
|
||||||
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {hparams.n_embd_features, n_vocab}, 0);
|
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {hparams.n_embd, n_vocab}, 0);
|
||||||
|
|
||||||
conv1d = create_tensor(tn(LLM_TENSOR_CONV1D, "weight"), {7, hparams.n_embd_features, hparams.posnet.n_embd}, 0);
|
conv1d = create_tensor(tn(LLM_TENSOR_CONV1D, "weight"), {7, hparams.n_embd, hparams.posnet.n_embd}, 0);
|
||||||
conv1d_b = create_tensor(tn(LLM_TENSOR_CONV1D, "bias"), {1, hparams.posnet.n_embd}, 0);
|
conv1d_b = create_tensor(tn(LLM_TENSOR_CONV1D, "bias"), {1, hparams.posnet.n_embd}, 0);
|
||||||
|
|
||||||
// posnet
|
// posnet
|
||||||
|
|
@ -6144,8 +6145,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
||||||
output_norm_b = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "bias"), {n_embd}, 0);
|
output_norm_b = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "bias"), {n_embd}, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {hparams.convnext.n_embd, n_embd}, 0);
|
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {hparams.convnext.n_embd, hparams.n_embd_out()}, 0);
|
||||||
output_b = create_tensor(tn(LLM_TENSOR_OUTPUT, "bias"), {n_embd}, 0);
|
output_b = create_tensor(tn(LLM_TENSOR_OUTPUT, "bias"), {hparams.n_embd_out()}, 0);
|
||||||
} break;
|
} break;
|
||||||
case LLM_ARCH_BAILINGMOE:
|
case LLM_ARCH_BAILINGMOE:
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -1036,7 +1036,7 @@ lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|14
|
||||||
|
|
||||||
#if 1
|
#if 1
|
||||||
// spectral operations
|
// spectral operations
|
||||||
const int n_embd = llama_model_n_embd(model_cts);
|
const int n_embd = llama_model_n_embd_out(model_cts);
|
||||||
const float * embd = llama_get_embeddings(ctx_cts);
|
const float * embd = llama_get_embeddings(ctx_cts);
|
||||||
|
|
||||||
auto audio = embd_to_audio(embd, n_codes, n_embd, params.cpuparams.n_threads);
|
auto audio = embd_to_audio(embd, n_codes, n_embd, params.cpuparams.n_threads);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue