fix: correct misspellings in code comments (#21217)
- emdeddings → embeddings (gemma3.cpp, gemma3n-iswa.cpp, gemma-embedding.cpp) - imlpemented → implemented (llama-adapter.cpp) - interere → interfere (llama-graph.cpp) - overridde → overridden (chat.cpp) - stastistics → statistics (ngram-map.h) - layed → laid (llama-kv-cache.h) - worster → worst (llama-context.cpp) - sequantial → sequential (llama-batch.h)
This commit is contained in:
parent
eec6f85d7b
commit
0b6ff47996
|
|
@ -221,7 +221,7 @@ using chat_template_caps = jinja::caps;
|
|||
struct common_chat_templates {
|
||||
bool add_bos;
|
||||
bool add_eos;
|
||||
bool has_explicit_template; // Model had builtin template or template overridde was specified.
|
||||
bool has_explicit_template; // Model had builtin template or template overridden was specified.
|
||||
std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml)
|
||||
std::unique_ptr<common_chat_template> template_tool_use;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ struct common_ngram_map_value {
|
|||
// statistics of a n-gram
|
||||
struct common_ngram_map_key {
|
||||
size_t key_idx; // index of key n-gram in token-history
|
||||
size_t stat_idx; // index of last token of stastistics computation (key_num, values)
|
||||
size_t stat_idx; // index of last token of statistics computation (key_num, values)
|
||||
|
||||
uint16_t key_num; // number of occurrences of this key n-gram in token-history
|
||||
common_ngram_map_value values[COMMON_NGRAM_MAX_VALUES]; // some known values after the key
|
||||
|
|
|
|||
|
|
@ -294,7 +294,7 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
|
|||
}
|
||||
|
||||
// get extra buffer types of the CPU
|
||||
// TODO: a more general solution for non-CPU extra buft should be imlpemented in the future
|
||||
// TODO: a more general solution for non-CPU extra buft should be implemented in the future
|
||||
// ref: https://github.com/ggml-org/llama.cpp/pull/12593#pullrequestreview-2718659948
|
||||
std::vector<ggml_backend_buffer_type_t> buft_extra;
|
||||
{
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ struct llama_ubatch {
|
|||
}
|
||||
|
||||
// typical for M-RoPE cases:
|
||||
// 0 - sequantial position of the tokens/embeddings in the sequence
|
||||
// 0 - sequential position of the tokens/embeddings in the sequence
|
||||
// 1 - y position in the image
|
||||
// 2 - x position in the image
|
||||
// 3 - other
|
||||
|
|
|
|||
|
|
@ -586,7 +586,7 @@ void llama_context::sched_reserve() {
|
|||
|
||||
// reserve again with pp graph to avoid ggml-alloc reallocations during inference
|
||||
{
|
||||
// TODO: not sure if the following graph would be worster case for multi-stream KV caches:
|
||||
// TODO: not sure if the following graph would be worst case for multi-stream KV caches:
|
||||
//
|
||||
// auto * gf = graph_reserve(n_tokens, 1, n_tokens, mctx.get());
|
||||
//
|
||||
|
|
|
|||
|
|
@ -1665,7 +1665,7 @@ ggml_tensor * llm_graph_context::build_inp_attn_scale() const {
|
|||
|
||||
ggml_tensor * llm_graph_context::build_inp_out_ids() const {
|
||||
// note: when all tokens are output, we could skip this optimization to spare the ggml_get_rows() calls,
|
||||
// but this would make the graph topology depend on the number of output tokens, which can interere with
|
||||
// but this would make the graph topology depend on the number of output tokens, which can interfere with
|
||||
// features that require constant topology such as pipeline parallelism
|
||||
// ref: https://github.com/ggml-org/llama.cpp/pull/14275#issuecomment-2987424471
|
||||
//if (n_outputs < n_tokens) {
|
||||
|
|
|
|||
|
|
@ -333,7 +333,7 @@ public:
|
|||
ggml_tensor * get_v(ggml_context * ctx, int32_t il) const;
|
||||
|
||||
// store k_cur and v_cur in the cache based on the provided head location
|
||||
// note: the heads in k_cur and v_cur should be layed out contiguously in memory
|
||||
// note: the heads in k_cur and v_cur should be laid out contiguously in memory
|
||||
// - k_cur [n_embd_head_k, n_head_k, n_tokens]
|
||||
// - k_idxs [n_tokens]
|
||||
// - v_cur [n_embd_head_v, n_head_v, n_tokens]
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ llm_build_gemma_embedding::llm_build_gemma_embedding(const llama_model & model,
|
|||
|
||||
inpL = build_inp_embd(model.tok_embd);
|
||||
|
||||
// important: do not normalize weights for raw embeddings input (i.e. encoded image emdeddings)
|
||||
// important: do not normalize weights for raw embeddings input (i.e. encoded image embeddings)
|
||||
inpL = ggml_scale(ctx0, inpL, ubatch.token ? sqrtf(n_embd) : 1.0f);
|
||||
cb(inpL, "inp_scaled", -1);
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ llm_build_gemma3<iswa>::llm_build_gemma3(const llama_model & model, const llm_gr
|
|||
|
||||
inpL = build_inp_embd(model.tok_embd);
|
||||
|
||||
// important: do not normalize weights for raw embeddings input (i.e. encoded image emdeddings)
|
||||
// important: do not normalize weights for raw embeddings input (i.e. encoded image embeddings)
|
||||
inpL = ggml_scale(ctx0, inpL, ubatch.token ? sqrtf(n_embd) : 1.0f);
|
||||
cb(inpL, "inp_scaled", -1);
|
||||
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ llm_build_gemma3n_iswa::llm_build_gemma3n_iswa(const llama_model & model, const
|
|||
|
||||
inpL = build_inp_embd(model.tok_embd);
|
||||
|
||||
// important: do not normalize weights for raw embeddings input (i.e. encoded image emdeddings)
|
||||
// important: do not normalize weights for raw embeddings input (i.e. encoded image embeddings)
|
||||
inpL = ggml_scale(ctx0, inpL, ubatch.token ? sqrtf(n_embd) : 1.0f);
|
||||
cb(inpL, "inp_scaled", -1);
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue