diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp index 2a57ef5375..341aac0610 100644 --- a/src/llama-graph.cpp +++ b/src/llama-graph.cpp @@ -2207,6 +2207,7 @@ ggml_tensor * llm_graph_context::build_rs( ggml_tensor * output_states = get_state_rows(ctx0, states, state_copy_main); ggml_build_forward_expand(gf, output_states); + // copy extra states which won't be changed further (between n_seqs and n_rs) ggml_tensor * states_extra = ggml_get_rows(ctx0, states, state_copy_extra); ggml_build_forward_expand(gf, ggml_cpy(ctx0, diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index 7afad1b66e..2ae023aaca 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -2233,7 +2233,6 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { || t.first == "<|end_of_text|>" // granite || t.first == "" || t.first == "_" - || t.first == "_" || t.first == "[EOT]" // Kimi-K2 || t.first == "<|end▁of▁sentence|>" // DeepSeek || t.first == "" // smoldocling