arch: refactor LLM_TENSOR_NAMES (#18051)
* arch: refactor LLM_TENSOR_NAMES * update docs * typo * fix LLM_ARCH_NEMOTRON_H_MOE * show more meaningful error message on missing tensor * fix and tested LLM_ARCH_NEMOTRON_H_MOE
This commit is contained in:
parent
7b1db3d3b7
commit
7f2b2f3c77
|
|
@ -97,7 +97,7 @@ The model params and tensors layout must be defined in `llama.cpp` source files:
|
||||||
1. Define a new `llm_arch` enum value in `src/llama-arch.h`.
|
1. Define a new `llm_arch` enum value in `src/llama-arch.h`.
|
||||||
2. In `src/llama-arch.cpp`:
|
2. In `src/llama-arch.cpp`:
|
||||||
- Add the architecture name to the `LLM_ARCH_NAMES` map.
|
- Add the architecture name to the `LLM_ARCH_NAMES` map.
|
||||||
- Add the tensor mappings to the `LLM_TENSOR_NAMES` map.
|
- Add the list of model tensors to `llm_get_tensor_names` (you may also need to update `LLM_TENSOR_NAMES`)
|
||||||
3. Add any non-standard metadata loading in the `llama_model_loader` constructor in `src/llama-model-loader.cpp`.
|
3. Add any non-standard metadata loading in the `llama_model_loader` constructor in `src/llama-model-loader.cpp`.
|
||||||
4. If the model has a RoPE operation, add a case for the architecture in `llama_model_rope_type` function in `src/llama-model.cpp`.
|
4. If the model has a RoPE operation, add a case for the architecture in `llama_model_rope_type` function in `src/llama-model.cpp`.
|
||||||
|
|
||||||
|
|
|
||||||
3803
src/llama-arch.cpp
3803
src/llama-arch.cpp
File diff suppressed because it is too large
Load Diff
|
|
@ -3,6 +3,7 @@
|
||||||
#include "ggml.h" // ggml_op
|
#include "ggml.h" // ggml_op
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <set>
|
||||||
|
|
||||||
//
|
//
|
||||||
// gguf constants (sync with gguf.py)
|
// gguf constants (sync with gguf.py)
|
||||||
|
|
@ -316,6 +317,7 @@ enum llm_tensor {
|
||||||
LLM_TENSOR_DENSE_3_OUT,
|
LLM_TENSOR_DENSE_3_OUT,
|
||||||
LLM_TENSOR_OUTPUT,
|
LLM_TENSOR_OUTPUT,
|
||||||
LLM_TENSOR_OUTPUT_NORM,
|
LLM_TENSOR_OUTPUT_NORM,
|
||||||
|
LLM_TENSOR_OUTPUT_NORM_LFM2, // fix for wrong tensor name
|
||||||
LLM_TENSOR_ROPE_FREQS,
|
LLM_TENSOR_ROPE_FREQS,
|
||||||
LLM_TENSOR_ROPE_FACTORS_LONG,
|
LLM_TENSOR_ROPE_FACTORS_LONG,
|
||||||
LLM_TENSOR_ROPE_FACTORS_SHORT,
|
LLM_TENSOR_ROPE_FACTORS_SHORT,
|
||||||
|
|
@ -526,6 +528,10 @@ struct LLM_TN_IMPL {
|
||||||
const int bid;
|
const int bid;
|
||||||
const int xid;
|
const int xid;
|
||||||
|
|
||||||
|
const std::set<llm_tensor> model_tensors;
|
||||||
|
|
||||||
|
LLM_TN_IMPL(llm_arch arch, llm_tensor tensor, const char * suffix, int bid, int xid);
|
||||||
|
|
||||||
std::string str() const;
|
std::string str() const;
|
||||||
|
|
||||||
operator std::string() const {
|
operator std::string() const {
|
||||||
|
|
@ -547,11 +553,11 @@ struct LLM_TN {
|
||||||
llm_arch arch;
|
llm_arch arch;
|
||||||
|
|
||||||
LLM_TN_IMPL operator()(llm_tensor tensor, const char * suffix, int bid = -1, int xid = -1) const {
|
LLM_TN_IMPL operator()(llm_tensor tensor, const char * suffix, int bid = -1, int xid = -1) const {
|
||||||
return { arch, tensor, suffix, bid, xid };
|
return LLM_TN_IMPL(arch, tensor, suffix, bid, xid);
|
||||||
}
|
}
|
||||||
|
|
||||||
LLM_TN_IMPL operator()(llm_tensor tensor, int bid = -1, int xid = -1) const {
|
LLM_TN_IMPL operator()(llm_tensor tensor, int bid = -1, int xid = -1) const {
|
||||||
return { arch, tensor, nullptr, bid, xid };
|
return LLM_TN_IMPL(arch, tensor, nullptr, bid, xid);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue