diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp index 1512869ec6..9ca2e579d7 100644 --- a/src/llama-graph.cpp +++ b/src/llama-graph.cpp @@ -644,8 +644,6 @@ ggml_tensor * llm_graph_context::build_ffn( llm_ffn_op_type type_op, llm_ffn_gate_type type_gate, int il) const { - - ggml_tensor * tmp = up ? build_lora_mm(up, cur) : cur; cb(tmp, "ffn_up", il); @@ -1377,9 +1375,9 @@ ggml_tensor * llm_graph_context::build_attn( // [TAG_NO_CACHE_PAD] // TODO: if ubatch.equal_seqs() == true, we can split the three tensors below into ubatch.n_seqs_unq streams - LLAMA_LOG_INFO("ubatch.equal_seqs() = %d, n_seqs = %d\n", ubatch.equal_seqs(), ubatch.n_seqs); - - // sassert(!ubatch.equal_seqs()); + if (ubatch.n_seqs > 1) { + assert(!ubatch.equal_seqs()); + } ggml_tensor * q = q_cur; ggml_tensor * k = k_cur; diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 860e558595..88784ddadd 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -7589,7 +7589,6 @@ struct llm_build_modern_bert : public llm_graph_context { LLM_NORM, il); cb(x_attn_in, "attn_pre_norm", il); } else { - LLAMA_LOG_INFO("Identity Tensor\n"); cb(x_attn_in, "attn_pre_norm_identity", il); }