arch : add description about LLM_TENSOR_INFOS (#17550)
This commit is contained in:
parent
6783b11fb0
commit
c386114922
|
|
@ -2487,6 +2487,16 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// declare information about the model weight tensors:
|
||||||
|
// - the layer in which the tensor is going to be used. this is needed in order to assign the correct buffer type for the weight
|
||||||
|
// - the operator which is going to use the weight. this is needed to determine if the respective backend supports the operator
|
||||||
|
//
|
||||||
|
// for example, input layers are usually assigned to CPU/host buffer types
|
||||||
|
//
|
||||||
|
// a mismatch between the declared information and the actual layer/op in which the tensor is used can lead to sub-optimal
|
||||||
|
// assignment of the buffer types and extra overhead during computation
|
||||||
|
// example: https://github.com/ggml-org/llama.cpp/pull/17548
|
||||||
|
//
|
||||||
static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
|
static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
|
||||||
{LLM_TENSOR_TOKEN_EMBD, {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},
|
{LLM_TENSOR_TOKEN_EMBD, {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},
|
||||||
{LLM_TENSOR_POS_EMBD, {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},
|
{LLM_TENSOR_POS_EMBD, {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue