arch: refactor LLM_TENSOR_NAMES (#18051)

* arch: refactor LLM_TENSOR_NAMES * update docs * typo * fix LLM_ARCH_NEMOTRON_H_MOE * show more meaningful error message on missing tensor * fix and tested LLM_ARCH_NEMOTRON_H_MOE
2025-12-16 13:22:30 +01:00 · 2025-12-16 13:22:30 +01:00 · 7f2b2f3c77
parent 7b1db3d3b7
commit 7f2b2f3c77
3 changed files with 1897 additions and 2284 deletions
--- a/docs/development/HOWTO-add-model.md
+++ b/docs/development/HOWTO-add-model.md
@ -97,7 +97,7 @@ The model params and tensors layout must be defined in `llama.cpp` source files:
 1. Define a new `llm_arch` enum value in `src/llama-arch.h`.
 2. In `src/llama-arch.cpp`:
    - Add the architecture name to the `LLM_ARCH_NAMES` map.
-    - Add the tensor mappings to the `LLM_TENSOR_NAMES` map.
+    - Add the list of model tensors to `llm_get_tensor_names` (you may also need to update `LLM_TENSOR_NAMES`)
 3. Add any non-standard metadata loading in the `llama_model_loader` constructor in `src/llama-model-loader.cpp`.
 4. If the model has a RoPE operation, add a case for the architecture in `llama_model_rope_type` function in `src/llama-model.cpp`.
--- a/src/llama-arch.cpp
+++ b/src/llama-arch.cpp
--- a/src/llama-arch.h
+++ b/src/llama-arch.h
@ -3,6 +3,7 @@
 #include "ggml.h" // ggml_op
 #include <string>
 #include <set>
 //
 // gguf constants (sync with gguf.py)
@ -316,6 +317,7 @@ enum llm_tensor {
    LLM_TENSOR_DENSE_3_OUT,
    LLM_TENSOR_OUTPUT,
    LLM_TENSOR_OUTPUT_NORM,
    LLM_TENSOR_OUTPUT_NORM_LFM2, // fix for wrong tensor name
    LLM_TENSOR_ROPE_FREQS,
    LLM_TENSOR_ROPE_FACTORS_LONG,
    LLM_TENSOR_ROPE_FACTORS_SHORT,
@ -526,6 +528,10 @@ struct LLM_TN_IMPL {
    const int bid;
    const int xid;
    const std::set<llm_tensor> model_tensors;
    LLM_TN_IMPL(llm_arch arch, llm_tensor tensor, const char * suffix, int bid, int xid);
    std::string str() const;
    operator std::string() const {
@ -547,11 +553,11 @@ struct LLM_TN {
    llm_arch arch;
    LLM_TN_IMPL operator()(llm_tensor tensor, const char * suffix, int bid = -1, int xid = -1) const {
-        return { arch, tensor, suffix, bid, xid };
+        return LLM_TN_IMPL(arch, tensor, suffix, bid, xid);
    }
    LLM_TN_IMPL operator()(llm_tensor tensor, int bid = -1, int xid = -1) const {
-        return { arch, tensor, nullptr, bid, xid };
+        return LLM_TN_IMPL(arch, tensor, nullptr, bid, xid);
    }
 };