diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp
index 43869ec228..0d612c1819 100644
--- a/ggml/src/ggml-openvino/ggml-decoder.cpp
+++ b/ggml/src/ggml-openvino/ggml-decoder.cpp
@@ -38,6 +38,10 @@ GgmlOvDecoder::GgmlOvDecoder(struct ggml_tensor* node, struct ggml_cgraph* cgrap
             printed = true;
         }
 
+        if (getenv("GGML_OPENVINO_DUMP_CGRAPH")) {
+            dump_cgraph(m_cgraph);
+        }
+
         set_max_token_len();
         for (int node_n = 0; node_n < m_cgraph->n_nodes; node_n++) {
             auto* cur_node = m_cgraph->nodes[node_n];
@@ -47,10 +51,6 @@ GgmlOvDecoder::GgmlOvDecoder(struct ggml_tensor* node, struct ggml_cgraph* cgrap
         m_model_weights = model_weights;
 
         add_extra_inputs();
-
-        if (getenv("GGML_OPENVINO_DUMP_CGRAPH")) {
-            dump_cgraph(m_cgraph);
-        }
     }
 }
 
@@ -142,17 +142,40 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node,
 
     if (m_node) {
         switch (node->op) {
+        case GGML_OP_RESHAPE: {
+            if (node->ne[0] * node->ne[1] == node->src[0]->ne[0]) {
+                m_op_case = 1;
+            } else if (node->src[0]->ne[0] * node->src[0]->ne[1] == node->ne[0]) {
+                m_op_case = 2;
+            }
+            break;
+        }
         case GGML_OP_CONT: {
-            // Currently only two cases, either the input comes from a VIEW which is subtensor or from a PERMUTE
-            m_continuous = ggml_nelements(node->src[0]) == ggml_nelements(node->src[0]->view_src);
+            if (ggml_nelements(node->src[0]) == ggml_nelements(node->src[0]->view_src)) {
+                // The input comes from a PERMUTE
+                m_op_case = 1;
+            } else {
+                // The input comes from a VIEW which is subtensor
+                m_op_case = 2;
+            }
             break;
         }
         case GGML_OP_CPY: {
-            m_continuous = ggml_is_contiguous(node);
+            if (ggml_is_contiguous(node)) {
+                // Write K to cache_k
+                m_op_case = 1;
+            } else {
+                // Write V to cache_v
+                m_op_case = 2;
+            }
             break;
         }
         case GGML_OP_MUL_MAT: {
-            m_continuous = node->src[0]->view_src == nullptr;
+            if (node->src[0]->view_src == nullptr) {
+                m_op_case = 1;
+            } else {
+                m_op_case = 2;
+            }
             break;
         }
         default:
diff --git a/ggml/src/ggml-openvino/ggml-decoder.h b/ggml/src/ggml-openvino/ggml-decoder.h
index 959e00b65d..b8cc4c4cdf 100644
--- a/ggml/src/ggml-openvino/ggml-decoder.h
+++ b/ggml/src/ggml-openvino/ggml-decoder.h
@@ -69,8 +69,8 @@ public:
         return m_outputs.at(name);
     }
 
-    virtual bool check_if_continuous() const override {
-        return m_continuous;
+    virtual int get_op_case() const override {
+        return m_op_case;
     }
 
     virtual const std::map<std::string, std::shared_ptr<ov::Node>>& get_model_inputs() const override {
@@ -110,7 +110,7 @@ private:
     std::vector<ggml_tensor*> m_nodes;
     std::string m_op_name;
     mutable std::string m_name;
-    bool m_continuous;
+    int m_op_case;
     std::vector<std::pair<std::string, std::string>> m_op_node_name;
     std::map<std::string, std::shared_ptr<ov::Node>> m_model_inputs;
     std::map<std::string, std::shared_ptr<ov::Node>> m_model_extra_inputs;
@@ -119,4 +119,4 @@ private:
     std::vector<std::string> m_model_output_names;
 };
 
-void print_tensor_address_map(const struct ggml_cgraph* cgraph);
\ No newline at end of file
+void print_tensor_address_map(const struct ggml_cgraph* cgraph);
diff --git a/ggml/src/ggml-openvino/openvino/decoder.hpp b/ggml/src/ggml-openvino/openvino/decoder.hpp
index 3987760a29..b3cf75817f 100644
--- a/ggml/src/ggml-openvino/openvino/decoder.hpp
+++ b/ggml/src/ggml-openvino/openvino/decoder.hpp
@@ -49,7 +49,7 @@ public:
 
     virtual void visit_subgraph(std::function<void(std::shared_ptr<GgmlDecoder>)> node_visitor) const = 0;
 
-    virtual bool check_if_continuous() const = 0;
+    virtual int get_op_case() const = 0;
 
     virtual const std::map<std::string, std::shared_ptr<ov::Node>>& get_model_inputs() const = 0;
     virtual const std::map<std::string, std::shared_ptr<ov::Node>>& get_model_extra_inputs() const = 0;
@@ -59,4 +59,4 @@ public:
 
 }  // namespace ggml
 }  // namespace frontend
-}  // namespace ov
\ No newline at end of file
+}  // namespace ov
diff --git a/ggml/src/ggml-openvino/openvino/node_context.hpp b/ggml/src/ggml-openvino/openvino/node_context.hpp
index e934e2ac36..44f55222e3 100644
--- a/ggml/src/ggml-openvino/openvino/node_context.hpp
+++ b/ggml/src/ggml-openvino/openvino/node_context.hpp
@@ -81,8 +81,8 @@ public:
         return m_decoder->get_attribute(name);
     }
 
-    bool check_if_continuous() const {
-        return m_decoder->check_if_continuous();
+    int get_op_case() const {
+        return m_decoder->get_op_case();
     }
 
 private:
diff --git a/ggml/src/ggml-openvino/openvino/op/cont.cpp b/ggml/src/ggml-openvino/openvino/op/cont.cpp
index e8e9bf0a4e..a052bf06ca 100644
--- a/ggml/src/ggml-openvino/openvino/op/cont.cpp
+++ b/ggml/src/ggml-openvino/openvino/op/cont.cpp
@@ -17,11 +17,13 @@ namespace op {
 OutputVector translate_cont(const NodeContext& context) {
     num_inputs_check(context, 1, 1);
 
+    int op_case = context.get_op_case();
+    FRONT_END_CHECK_IMPLEMENTED(op_case == 1 || op_case == 2, "Unsupported CONT case");
+
     auto src_shape = context.get_input_shape(0).to_shape();
     auto dst_shape = context.get_output_shape(0).to_shape();
 
-    bool continuous = context.check_if_continuous();
-    if (continuous) {
+    if (op_case == 1) {
         // The input comes from a PERMUTE
         dst_shape[1] = -1;
         auto result = std::make_shared<ov::op::v1::Reshape>(
diff --git a/ggml/src/ggml-openvino/openvino/op/cpy.cpp b/ggml/src/ggml-openvino/openvino/op/cpy.cpp
index 2808d3ee91..4ab1502f81 100644
--- a/ggml/src/ggml-openvino/openvino/op/cpy.cpp
+++ b/ggml/src/ggml-openvino/openvino/op/cpy.cpp
@@ -22,13 +22,16 @@ namespace op {
 
 OutputVector translate_cpy(const NodeContext& context) {
     num_inputs_check(context, 2, 2);
+
+    int op_case = context.get_op_case();
+    FRONT_END_CHECK_IMPLEMENTED(op_case == 1 || op_case == 2, "Unsupported CPY case");
+
     auto src0 = context.get_input(0);
     auto src1 = context.get_input(1);
     auto past_token_len = context.get_input("past_token_len");
 
     auto src0_shape = context.get_input_shape(0).to_shape();
     auto output_shape = context.get_output_shape(0).to_shape();
-    bool continuous = context.check_if_continuous();
 
     std::vector<size_t> input0_strides = context.get_input_stride(0);
     std::vector<size_t> output_strides = context.get_output_stride(0);
@@ -36,7 +39,7 @@ OutputVector translate_cpy(const NodeContext& context) {
     auto one = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{}, {1});
 
     src0 = std::make_shared<ov::op::v1::ConvertLike>(src0, src1);
-    if (continuous) {
+    if (op_case == 1) {
         // Write K to cache_k
         int64_t head_size = src0_shape[2];
         int64_t num_heads = src0_shape[1];
diff --git a/ggml/src/ggml-openvino/openvino/op/mulmat.cpp b/ggml/src/ggml-openvino/openvino/op/mulmat.cpp
index 3e9c5c5083..5673551f70 100644
--- a/ggml/src/ggml-openvino/openvino/op/mulmat.cpp
+++ b/ggml/src/ggml-openvino/openvino/op/mulmat.cpp
@@ -22,8 +22,10 @@ namespace op {
 OutputVector translate_mulmat(const NodeContext& context) {
     num_inputs_check(context, 2, 2);
 
-    bool continuous = context.check_if_continuous();
-    if (continuous) {
+    int op_case = context.get_op_case();
+    FRONT_END_CHECK_IMPLEMENTED(op_case == 1 || op_case == 2, "Unsupported MULMAT case");
+
+    if (op_case == 1) {
         auto src0 = context.get_input(0);
         auto src1 = std::make_shared<ov::op::v0::Convert>(context.get_input(1), context.get_input_type(0));
         auto result_lp = std::make_shared<ov::op::v0::MatMul>(src1, src0, false, true);
diff --git a/ggml/src/ggml-openvino/openvino/op/reshape.cpp b/ggml/src/ggml-openvino/openvino/op/reshape.cpp
index 06b2bd339e..f6586d674c 100644
--- a/ggml/src/ggml-openvino/openvino/op/reshape.cpp
+++ b/ggml/src/ggml-openvino/openvino/op/reshape.cpp
@@ -1,6 +1,8 @@
 #include <cstdint>
+#include <memory>
 #include <openvino/core/node.hpp>
 #include <openvino/core/node_output.hpp>
+#include <openvino/frontend/exception.hpp>
 #include <openvino/op/constant.hpp>
 #include <openvino/op/reshape.hpp>
 #include <vector>
@@ -19,11 +21,22 @@ OutputVector translate_reshape(const NodeContext& context) {
         return {context.get_input(0)};
     }
 
+    int op_case = context.get_op_case();
+    FRONT_END_CHECK_IMPLEMENTED(op_case == 1 || op_case == 2, "Unsupported RESHAPE case");
+
     auto output_shape = context.get_output_shape(0).to_shape();
-    auto new_shape_node =
-        ov::op::v0::Constant::create(ov::element::i64,
-                                     {3},
-                                     std::vector<int64_t>{-1, (int64_t)output_shape[1], (int64_t)output_shape[2]});
+    std::shared_ptr<ov::Node> new_shape_node;
+    if (op_case == 1) {
+        new_shape_node =
+            ov::op::v0::Constant::create(ov::element::i64,
+                                         {3},
+                                         std::vector<int64_t>{-1, (int64_t)output_shape[1], (int64_t)output_shape[2]});
+    } else {
+        new_shape_node =
+            ov::op::v0::Constant::create(ov::element::i64,
+                                         {3},
+                                         std::vector<int64_t>{(int64_t)output_shape[0], -1, (int64_t)output_shape[2]});
+    }
     Output<Node> res = std::make_shared<ov::op::v1::Reshape>(context.get_input(0), new_shape_node, false);
     return {res};
 }
diff --git a/ggml/src/ggml-openvino/openvino/translate_session.cpp b/ggml/src/ggml-openvino/openvino/translate_session.cpp
index 012e9178c6..910a0d8336 100644
--- a/ggml/src/ggml-openvino/openvino/translate_session.cpp
+++ b/ggml/src/ggml-openvino/openvino/translate_session.cpp
@@ -31,10 +31,6 @@ std::shared_ptr<Model> TranslateSession::translate_graph(const frontend::InputMo
     const auto& ggml_model = std::dynamic_pointer_cast<InputModel>(input_model);
     std::shared_ptr<GgmlDecoder> ggml_model_decoder = ggml_model->get_model_decoder();
 
-    FRONT_END_GENERAL_CHECK(ggml_model, "nullptr for InputModel is given for translation into OV Model");
-    const auto& model_inputs = ggml_model->get_inputs();
-    const auto& model_outputs = ggml_model->get_outputs();
-
     for (const auto& it : ggml_model_decoder->get_model_inputs()) {
         params.push_back(std::dynamic_pointer_cast<ov::op::v0::Parameter>(it.second));
         (*tensor_map)[it.first] = it.second;
diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp
index 944c7e53bd..d4a25ab59b 100644
--- a/src/llama-graph.cpp
+++ b/src/llama-graph.cpp
@@ -1275,7 +1275,7 @@ ggml_tensor * llm_graph_context::build_inp_embd(ggml_tensor * tok_embd) const {
 
     if (ubatch.token) {
         inp->tokens = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ubatch.n_tokens);
-        //cb(inp->tokens, "inp_tokens", -1);
+        cb(inp->tokens, "inp_tokens", -1);
         ggml_set_input(inp->tokens);
         res->t_tokens = inp->tokens;
 
@@ -1327,6 +1327,7 @@ ggml_tensor * llm_graph_context::build_inp_pos() const {
     auto & cur = inp->pos;
 
     cur = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, (int64_t)n_tokens*hparams.n_pos_per_embd());
+    cb(cur, "inp_pos", -1);
     ggml_set_input(cur);
 
     res->add_input(std::move(inp));
@@ -1362,6 +1363,7 @@ ggml_tensor * llm_graph_context::build_inp_out_ids() const {
     auto & cur = inp->out_ids;
 
     cur = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_outputs);
+    cb(cur, "inp_out_ids", -1);
     ggml_set_input(cur);
 
     res->add_input(std::move(inp));
@@ -1603,6 +1605,7 @@ llm_graph_input_attn_no_cache * llm_graph_context::build_attn_inp_no_cache() con
 
     // note: there is no KV cache, so the number of KV values is equal to the number of tokens in the batch
     inp->self_kq_mask = ggml_new_tensor_4d(ctx0, GGML_TYPE_F32, n_tokens, n_tokens, 1, 1);
+    cb(inp->self_kq_mask, "KQ_mask", -1);
     ggml_set_input(inp->self_kq_mask);
 
     inp->self_kq_mask_cnv = cparams.flash_attn ? ggml_cast(ctx0, inp->self_kq_mask, GGML_TYPE_F16) : inp->self_kq_mask;
@@ -1661,7 +1664,7 @@ ggml_tensor * llm_graph_context::build_attn(
     }
 
     if (wo_b) {
-        //cb(cur, "kqv_wo", il);
+        cb(cur, "kqv_wo", il);
     }
 
     if (wo_b) {
@@ -1691,6 +1694,7 @@ static std::unique_ptr<llm_graph_input_attn_kv> build_attn_inp_kv_impl(
         inp->self_v_idxs = mctx_cur->build_input_v_idxs(ctx0, ubatch);
 
         inp->self_kq_mask = ggml_new_tensor_4d(ctx0, GGML_TYPE_F32, n_kv, n_tokens/n_stream, 1, n_stream);
+        cb(inp->self_kq_mask, "KQ_mask", -1);
         ggml_set_input(inp->self_kq_mask);
 
         inp->self_kq_mask_cnv = cparams.flash_attn ? ggml_cast(ctx0, inp->self_kq_mask, GGML_TYPE_F16) : inp->self_kq_mask;
@@ -1818,7 +1822,7 @@ ggml_tensor * llm_graph_context::build_attn(
     }
 
     if (wo_b) {
-        //cb(cur, "kqv_wo", il);
+        cb(cur, "kqv_wo", il);
     }
 
     if (wo_b) {
@@ -1873,7 +1877,7 @@ ggml_tensor * llm_graph_context::build_attn(
     }
 
     if (wo_b) {
-        //cb(cur, "kqv_wo", il);
+        cb(cur, "kqv_wo", il);
     }
 
     if (wo_b) {