diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 55a76f8248..2505489b1e 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -3015,6 +3015,10 @@ struct ggml_tensor * ggml_mul_mat( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { + + printf("Up: {%lld, %lld}\n", a->ne[0], a->ne[1]); + printf("Cur: {%lld, %lld}\n", b->ne[0], b->ne[1]); + GGML_ASSERT(ggml_can_mul_mat(a, b)); GGML_ASSERT(!ggml_is_transposed(a)); diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp index a05de6e585..ae8b150d28 100644 --- a/src/llama-graph.cpp +++ b/src/llama-graph.cpp @@ -644,6 +644,8 @@ ggml_tensor * llm_graph_context::build_ffn( llm_ffn_op_type type_op, llm_ffn_gate_type type_gate, int il) const { + + ggml_tensor * tmp = up ? build_lora_mm(up, cur) : cur; cb(tmp, "ffn_up", il); @@ -1377,7 +1379,7 @@ ggml_tensor * llm_graph_context::build_attn( // TODO: if ubatch.equal_seqs() == true, we can split the three tensors below into ubatch.n_seqs_unq streams LLAMA_LOG_INFO("ubatch.equal_seqs() = %d, n_seqs = %d\n", ubatch.equal_seqs(), ubatch.n_seqs); - assert(!ubatch.equal_seqs()); + // sassert(!ubatch.equal_seqs()); ggml_tensor * q = q_cur; ggml_tensor * k = k_cur;