arch: refactor LLM_TENSOR_NAMES (#18051)

* arch: refactor LLM_TENSOR_NAMES

* update docs

* typo

* fix LLM_ARCH_NEMOTRON_H_MOE

* show more meaningful error message on missing tensor

* fix and tested LLM_ARCH_NEMOTRON_H_MOE
This commit is contained in:
Xuan-Son Nguyen 2025-12-16 13:22:30 +01:00 committed by GitHub
parent 7b1db3d3b7
commit 7f2b2f3c77
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 1897 additions and 2284 deletions

View File

@ -97,7 +97,7 @@ The model params and tensors layout must be defined in `llama.cpp` source files:
1. Define a new `llm_arch` enum value in `src/llama-arch.h`. 1. Define a new `llm_arch` enum value in `src/llama-arch.h`.
2. In `src/llama-arch.cpp`: 2. In `src/llama-arch.cpp`:
- Add the architecture name to the `LLM_ARCH_NAMES` map. - Add the architecture name to the `LLM_ARCH_NAMES` map.
- Add the tensor mappings to the `LLM_TENSOR_NAMES` map. - Add the list of model tensors to `llm_get_tensor_names` (you may also need to update `LLM_TENSOR_NAMES`)
3. Add any non-standard metadata loading in the `llama_model_loader` constructor in `src/llama-model-loader.cpp`. 3. Add any non-standard metadata loading in the `llama_model_loader` constructor in `src/llama-model-loader.cpp`.
4. If the model has a RoPE operation, add a case for the architecture in `llama_model_rope_type` function in `src/llama-model.cpp`. 4. If the model has a RoPE operation, add a case for the architecture in `llama_model_rope_type` function in `src/llama-model.cpp`.

File diff suppressed because it is too large Load Diff

View File

@ -3,6 +3,7 @@
#include "ggml.h" // ggml_op #include "ggml.h" // ggml_op
#include <string> #include <string>
#include <set>
// //
// gguf constants (sync with gguf.py) // gguf constants (sync with gguf.py)
@ -316,6 +317,7 @@ enum llm_tensor {
LLM_TENSOR_DENSE_3_OUT, LLM_TENSOR_DENSE_3_OUT,
LLM_TENSOR_OUTPUT, LLM_TENSOR_OUTPUT,
LLM_TENSOR_OUTPUT_NORM, LLM_TENSOR_OUTPUT_NORM,
LLM_TENSOR_OUTPUT_NORM_LFM2, // fix for wrong tensor name
LLM_TENSOR_ROPE_FREQS, LLM_TENSOR_ROPE_FREQS,
LLM_TENSOR_ROPE_FACTORS_LONG, LLM_TENSOR_ROPE_FACTORS_LONG,
LLM_TENSOR_ROPE_FACTORS_SHORT, LLM_TENSOR_ROPE_FACTORS_SHORT,
@ -526,6 +528,10 @@ struct LLM_TN_IMPL {
const int bid; const int bid;
const int xid; const int xid;
const std::set<llm_tensor> model_tensors;
LLM_TN_IMPL(llm_arch arch, llm_tensor tensor, const char * suffix, int bid, int xid);
std::string str() const; std::string str() const;
operator std::string() const { operator std::string() const {
@ -547,11 +553,11 @@ struct LLM_TN {
llm_arch arch; llm_arch arch;
LLM_TN_IMPL operator()(llm_tensor tensor, const char * suffix, int bid = -1, int xid = -1) const { LLM_TN_IMPL operator()(llm_tensor tensor, const char * suffix, int bid = -1, int xid = -1) const {
return { arch, tensor, suffix, bid, xid }; return LLM_TN_IMPL(arch, tensor, suffix, bid, xid);
} }
LLM_TN_IMPL operator()(llm_tensor tensor, int bid = -1, int xid = -1) const { LLM_TN_IMPL operator()(llm_tensor tensor, int bid = -1, int xid = -1) const {
return { arch, tensor, nullptr, bid, xid }; return LLM_TN_IMPL(arch, tensor, nullptr, bid, xid);
} }
}; };