arch : add description about LLM_TENSOR_INFOS (#17550)

2025-11-27 16:34:13 +02:00 · 2025-11-27 16:34:13 +02:00 · c386114922
parent 6783b11fb0
commit c386114922
1 changed files with 10 additions and 0 deletions
--- a/src/llama-arch.cpp
+++ b/src/llama-arch.cpp
@ -2487,6 +2487,16 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
    },
 };
 // declare information about the model weight tensors:
 // - the layer in which the tensor is going to be used. this is needed in order to assign the correct buffer type for the weight
 // - the operator which is going to use the weight. this is needed to determine if the respective backend supports the operator
 //
 // for example, input layers are usually assigned to CPU/host buffer types
 //
 // a mismatch between the declared information and the actual layer/op in which the tensor is used can lead to sub-optimal
 //   assignment of the buffer types and extra overhead during computation
 // example: https://github.com/ggml-org/llama.cpp/pull/17548
 //
 static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
    {LLM_TENSOR_TOKEN_EMBD,                 {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},
    {LLM_TENSOR_POS_EMBD,                   {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},