diff --git a/ggml/src/ggml-openvino.cpp b/ggml/src/ggml-openvino.cpp
index d2a21511dd..fd24356412 100644
--- a/ggml/src/ggml-openvino.cpp
+++ b/ggml/src/ggml-openvino.cpp
@@ -647,168 +647,6 @@ void ggml_backend_openvino_reshape(ggml_tensor *dst) {
 }
 
 void ggml_backend_openvino_view(ggml_tensor *dst) {
-
-    /*
-    // Case 1: Set the output tensor shape as the same shape of the input tensor [1, 7, 9216], for next CONT node operator
-    if (dst->ne[0] > dst->ne[1] && (dst->ne[0] * dst->nb[0] != dst->nb[1]) && dst->ne[2] == 1) {
-        // if (dst->view_offs == 0) {
-        //     return;
-        // }
-        ov::Core core;
-        ov::Shape input_shape{ static_cast<size_t>(dst->src[0]->ne[2]), static_cast<size_t>(dst->src[0]->ne[1]), static_cast<size_t>(dst->src[0]->ne[0])};
-        ov::Shape out_shape{ static_cast<size_t>(dst->ne[2]), static_cast<size_t>(dst->ne[1]), static_cast<size_t>(dst->ne[0])};
-
-        auto input_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, input_shape);
-
-        // auto new_shape_node = ov::op::v0::Constant::create(ov::element::i64,
-        //     ov::Shape{input_shape.size()},
-        //     std::vector<int64_t>(input_shape.begin(), input_shape.end()));
-        // auto res = std::make_shared<ov::op::v1::Reshape>(input_param, new_shape_node, false);
-
-        int64_t split_addr = dst->view_offs / dst->nb[0];
-        std::vector<int64_t> begin = { 0, 0, split_addr };
-        std::vector<int64_t> end   = { static_cast<int64_t>(dst->src[0]->ne[2]),
-                                        static_cast<int64_t>(dst->src[0]->ne[1]),
-                                        split_addr + static_cast<int64_t>(dst->ne[0]) };
-        std::vector<int64_t> strides = { 1, 1, 1 };
-
-        auto begin_const = ov::op::v0::Constant::create(ov::element::i64, { begin.size() }, begin);
-        auto end_const   = ov::op::v0::Constant::create(ov::element::i64, { end.size() }, end);
-        auto strides_const = ov::op::v0::Constant::create(ov::element::i64, { strides.size() }, strides);
-
-        std::vector<int64_t> begin_mask = {0, 0, 0};
-        std::vector<int64_t> end_mask   = {0, 0, 0};
-        auto slice = std::make_shared<ov::op::v1::StridedSlice>(
-            input_param, 
-            begin_const, 
-            end_const, 
-            strides_const, 
-            begin_mask, 
-            end_mask
-        );
-
-        auto model = std::make_shared<ov::Model>(ov::OutputVector{ slice },
-                                                 ov::ParameterVector{ input_param });
-
-        auto compiled_model = core.compile_model(model, "CPU");
-
-        ov::InferRequest infer_request = compiled_model.create_infer_request();
-
-        ov::Tensor input_tensor(ov::element::f32, input_shape, dst->src[0]->data);
-        infer_request.set_input_tensor(0, input_tensor);
-
-        ov::Tensor output_tensor(ov::element::f32, out_shape, dst->data);
-        infer_request.set_output_tensor(0, output_tensor);
-
-        infer_request.infer();
-    }
-    */
-
-
-    /*
-    // Case 2: Slice contiguous input tensor [98304, 1, 1] to contiguout output tensor [ 21504, 1, 1]
-    if (ggml_is_contiguous(dst) && dst->ne[1] == 1 && (dst->ne[0] * dst->nb[0] == dst->nb[1])) {
-        ov::Core core;
-        ov::Shape input_shape = { static_cast<size_t>(dst->src[0]->ne[2]),
-                                    static_cast<size_t>(dst->src[0]->ne[1]),
-                                    static_cast<size_t>(dst->src[0]->ne[0])};
-        ov::Shape output_shape = { static_cast<size_t>(dst->ne[2]),
-                                    static_cast<size_t>(dst->ne[1]),
-                                    static_cast<size_t>(dst->ne[0])};
-        auto input_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, input_shape);
-
-
-        std::vector<int64_t> begin = { 0, 0, 0 };
-        std::vector<int64_t> end   = { static_cast<int64_t>(dst->ne[2]),
-                                        static_cast<int64_t>(dst->ne[1]),
-                                        static_cast<int64_t>(dst->ne[0]) };
-        std::vector<int64_t> strides = { 1, 1, 1 };
-
-        auto begin_const = ov::op::v0::Constant::create(ov::element::i64, { begin.size() }, begin);
-        auto end_const   = ov::op::v0::Constant::create(ov::element::i64, { end.size() }, end);
-        auto strides_const = ov::op::v0::Constant::create(ov::element::i64, { strides.size() }, strides);
-
-        std::vector<int64_t> begin_mask = {0, 0, 0};
-        std::vector<int64_t> end_mask   = {0, 0, 0};
-        auto slice = std::make_shared<ov::op::v1::StridedSlice>(
-            input_param, 
-            begin_const, 
-            end_const, 
-            strides_const, 
-            begin_mask, 
-            end_mask
-        );
-
-        std::shared_ptr<ov::Model> model = std::make_shared<ov::Model>(ov::OutputVector{ slice },
-                                                 ov::ParameterVector{ input_param });
-
-        auto compiled_model = core.compile_model(model, "CPU");
-        ov::InferRequest infer_request = compiled_model.create_infer_request();
-
-        ov::Tensor input_tensor(ov::element::f16, input_shape, dst->src[0]->data);
-        ov::Tensor output_tensor(ov::element::f16, output_shape, dst->data);
-        infer_request.set_input_tensor(0, input_tensor);
-        infer_request.set_output_tensor(0, output_tensor);
-
-        infer_request.infer();
-    }
-    */
-
-    /*
-    // Case 3: Reshape the input tensor [1, 1, 98304] to output tensor [1, 3072, 32](Physical shape)
-    if (dst->ne[0] < dst->ne[1] && dst->ne[2] == 1) {
-        ov::Core core;
-        ov::Shape input_shape = { static_cast<size_t>(dst->src[0]->ne[2]),
-                                    static_cast<size_t>(dst->src[0]->ne[1]),
-                                    static_cast<size_t>(dst->src[0]->ne[0])};
-        ov::Shape output_shape = { static_cast<size_t>(dst->nb[2]),
-                                    static_cast<size_t>(dst->ne[1]),
-                                    static_cast<size_t>(dst->nb[1] / dst->nb[0])};
-        auto input_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, input_shape);
-    
-        auto new_shape_node = ov::op::v0::Constant::create(ov::element::i64,
-                                                        ov::Shape{output_shape.size()},
-                                                        std::vector<int64_t>(output_shape.begin(), output_shape.end()));
-        auto res = std::make_shared<ov::op::v1::Reshape>(input_param, new_shape_node, false);
-
-        std::shared_ptr<ov::Model> model = std::make_shared<ov::Model>(ov::OutputVector{res},
-                                                                        ov::ParameterVector{input_param});
-        auto compiled_model = core.compile_model(model, "CPU");
-        ov::InferRequest infer_request = compiled_model.create_infer_request();
-    
-        ov::Tensor input_tensor(ov::element::f16, input_shape, dst->src[0]->data);
-        ov::Tensor output_tensor(ov::element::f16, output_shape, dst->data);
-        infer_request.set_input_tensor(0, input_tensor);
-        infer_request.set_output_tensor(0, output_tensor);
-    
-        infer_request.infer();
-    }
-    */
-
-    /*
-    // Case 4:
-    if (dst->ne[0] != 1 && dst->ne[1] != 1 && dst->ne[2] !=1) {
-        
-    }
-    */
-
-    ov::Core core;
-    ov::Shape input_shape{static_cast<size_t>(dst->src[0]->ne[2]), static_cast<size_t>(dst->src[0]->ne[1]), static_cast<size_t>(dst->src[0]->ne[0])};
-    // ov::Shape output_shape{static_cast<size_t>(dst->ne[2]), static_cast<size_t>(dst->ne[1]), static_cast<size_t>(dst->ne[0])};
-    auto input_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, input_shape);
-
-    std::shared_ptr<ov::Model> model = std::make_shared<ov::Model>(ov::OutputVector{input_param},
-                                                                    ov::ParameterVector{input_param});
-    auto compiled_model = core.compile_model(model, "CPU");
-    ov::InferRequest infer_request = compiled_model.create_infer_request();
-
-    ov::Tensor input_tensor(ov::element::f32, input_shape, dst->src[0]->data);
-    // ov::Tensor output_tensor(ov::element::f32, input_shape, dst->data);
-    infer_request.set_input_tensor(0, input_tensor);
-    // infer_request.set_output_tensor(0, output_tensor);
-
-    infer_request.infer();
-
     GGML_UNUSED(dst);
 }
 
@@ -823,7 +661,7 @@ void ggml_backend_openvino_dup_bytes(struct ggml_tensor *dst) {
     const size_t element_size = ggml_type_size(src0->type);
 
     // Case 1: Both tensors are contiguous
-    if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst)) {
+    if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && (src0->ne[0] * element_size == src0->nb[1])) {
         ov::Shape input_shape = {
             static_cast<size_t>(src0->ne[2]),
             static_cast<size_t>(src0->ne[1]),
@@ -1152,6 +990,7 @@ static enum ggml_status ggml_backend_openvino_graph_compute(ggml_backend_t backe
     std::vector<int> permute_indices;
 
     std::vector<int> mul_mat_indices;
+    std::vector<int> add_indices;
 
     for (int i = 0; i < cgraph->n_nodes; i++) {
         if (cgraph->nodes[i]->op == GGML_OP_CONT) {
@@ -1168,6 +1007,8 @@ static enum ggml_status ggml_backend_openvino_graph_compute(ggml_backend_t backe
             permute_indices.push_back(i);
         } else if (cgraph->nodes[i]->op == GGML_OP_MUL_MAT) {
             mul_mat_indices.push_back(i);
+        } else if (cgraph->nodes[i]->op == GGML_OP_ADD) {
+            add_indices.push_back(i);
         }
     }
 
@@ -1177,48 +1018,49 @@ static enum ggml_status ggml_backend_openvino_graph_compute(ggml_backend_t backe
     bool prompt_process_flag = true;
     if (cgraph->nodes[0]->ne[1] == 1) {
         prompt_process_flag = false;
-    }
-    //     int end_node = cgraph->n_nodes - 1;
-    //     openvino_frontend_compute(backend, cgraph, 0, end_node, prompt_process_flag);
-    // } else {
-
-    for (int i = 0; i < cgraph->n_nodes; i++) {
-        if (std::find(permute_indices.begin(), permute_indices.end(), i) != permute_indices.end()) {
-            ggml_backend_openvino_permute(cgraph->nodes[i]);
-        // } else if (std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) != mul_mat_indices.end()) {
-        //     ggml_backend_openvino_mul_mat(cgraph->nodes[i]);
-        // } else if (std::find(view_indices.begin(), view_indices.end(), i) != view_indices.end()) {
-        //     ggml_backend_openvino_view(cgraph->nodes[i]);
-        // } else if (std::find(cont_indices.begin(), cont_indices.end(), i) != cont_indices.end()) {
-        //     ggml_backend_openvino_dup_bytes(cgraph->nodes[i]);
-        } else if (std::find(transpose_indices.begin(), transpose_indices.end(), i) != transpose_indices.end()) {
-            ggml_backend_openvino_transpose(cgraph->nodes[i]);
-        // } else if (std::find(reshape_indices.begin(), reshape_indices.end(), i) != reshape_indices.end()) {
-        //     ggml_backend_openvino_reshape(cgraph->nodes[i]);
-        } else if (std::find(cpy_indices.begin(), cpy_indices.end(), i) != cpy_indices.end()) {
-            ggml_backend_openvino_cpy(cgraph->nodes[i]);
-        } else {
-            // Process a range of nodes with openvino_frontend_compute
-            int start_index = i;
-            while (i < cgraph->n_nodes
-                    && std::find(permute_indices.begin(), permute_indices.end(), i) == permute_indices.end()
-                    // && std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) == mul_mat_indices.end()
-                    // && std::find(view_indices.begin(), view_indices.end(), i) == view_indices.end()
-                    // && std::find(cont_indices.begin(), cont_indices.end(), i) == cont_indices.end()
-                    // && std::find(reshape_indices.begin(), reshape_indices.end(), i) == reshape_indices.end()
-                    && std::find(transpose_indices.begin(), transpose_indices.end(), i) == transpose_indices.end()
-                    && std::find(cpy_indices.begin(), cpy_indices.end(), i) == cpy_indices.end()
-                    ) {
-                i++;
+        // int end_node = cgraph->n_nodes - 1;
+        // openvino_frontend_compute(backend, cgraph, 0, end_node, prompt_process_flag);
+        for (int i = 0; i < cgraph->n_nodes; i++) {
+            if (std::find(view_indices.begin(), view_indices.end(), i) != view_indices.end()) {
+                ggml_backend_openvino_view(cgraph->nodes[i]);
+            } else if (std::find(cont_indices.begin(), cont_indices.end(), i) != cont_indices.end()) {
+                ggml_backend_openvino_dup_bytes(cgraph->nodes[i]);
+            } else if (std::find(reshape_indices.begin(), reshape_indices.end(), i) != reshape_indices.end()) {
+                ggml_backend_openvino_reshape(cgraph->nodes[i]);
+            } else {
+                // Process a range of nodes with openvino_frontend_compute
+                int start_index = i;
+                while (i < cgraph->n_nodes
+                        && std::find(view_indices.begin(), view_indices.end(), i) == view_indices.end()
+                        && std::find(cont_indices.begin(), cont_indices.end(), i) == cont_indices.end()
+                        && std::find(reshape_indices.begin(), reshape_indices.end(), i) == reshape_indices.end()
+                        ) {
+                    i++;
+                }
+                if (start_index < i) {
+                        openvino_frontend_compute(backend, cgraph, start_index, --i, prompt_process_flag);
+                }
             }
-            if (start_index < i) {
-                    openvino_frontend_compute(backend, cgraph, start_index, --i, prompt_process_flag);
+        }
+    } else {
+        for (int i = 0; i < cgraph->n_nodes; i++) {
+            if (std::find(permute_indices.begin(), permute_indices.end(), i) != permute_indices.end()) {
+                ggml_backend_openvino_permute(cgraph->nodes[i]);
+            } else {
+                // Process a range of nodes with openvino_frontend_compute
+                int start_index = i;
+                while (i < cgraph->n_nodes
+                        && std::find(permute_indices.begin(), permute_indices.end(), i) == permute_indices.end()
+                        ) {
+                    i++;
+                }
+                if (start_index < i) {
+                        openvino_frontend_compute(backend, cgraph, start_index, --i, prompt_process_flag);
+                }
             }
         }
     }
 
-    // }
-
     return GGML_STATUS_SUCCESS;
 
     GGML_UNUSED(backend);
diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp
index 4483241481..d91338127a 100644
--- a/ggml/src/ggml-openvino/ggml-decoder.cpp
+++ b/ggml/src/ggml-openvino/ggml-decoder.cpp
@@ -26,7 +26,9 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
         }
         case GGML_OP_CONT:
         {
-            if (ggml_is_contiguous(node->src[0]) && ggml_is_contiguous(node)) {
+            if (ggml_is_contiguous(node->src[0])
+                && ggml_is_contiguous(node)
+                && (node->src[0]->ne[0] * node->src[0]->nb[0] == node->src[0]->nb[1])) {
                 inputs[src0_name] = node->src[0];
                 outputs[node_name] = node;
                 m_input_names.push_back(src0_name);
@@ -112,22 +114,31 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
                 m_op_node_name.emplace_back(src1_name, ggml_op_name(node->op));
                 m_output_names.push_back(node_name);
 
-                int src0_elem_size = ggml_type_size(node->src[0]->type);
-                int src1_elem_size = ggml_type_size(node->src[1]->type);
+                // int src0_elem_size = ggml_type_size(node->src[0]->type);
+                // int src1_elem_size = ggml_type_size(node->src[1]->type);
 
-                int src0_logical_rows = node->src[0]->ne[1];
-                int src1_logical_rows = node->src[1]->ne[1];
+                // int src0_logical_rows = node->src[0]->ne[1];
+                // int src1_logical_rows = node->src[1]->ne[1];
 
-                int src0_phys_cols = node->src[0]->nb[0] / src0_elem_size;
-                int src0_phys_rows = src0_logical_rows;
+                // int src0_phys_cols = node->src[0]->nb[0] / src0_elem_size;
+                // int src0_phys_rows = src0_logical_rows;
 
-                int src1_phys_cols = node->src[1]->nb[1] / src1_elem_size;
-                int src1_phys_rows = src1_logical_rows;
-                ov::Shape src0_phys_shape = {1, static_cast<size_t>(src0_phys_rows), static_cast<size_t>(src0_phys_cols) };
-                ov::Shape src1_phys_shape = {1, static_cast<size_t>(src1_phys_rows), static_cast<size_t>(src1_phys_cols) };
-                auto input0_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, src0_phys_shape);
-                auto input1_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, src1_phys_shape);
+                // int src1_phys_cols = node->src[1]->nb[1] / src1_elem_size;
+                // int src1_phys_rows = src1_logical_rows;
+                // ov::Shape src0_phys_shape = {1, static_cast<size_t>(src0_phys_rows), static_cast<size_t>(src0_phys_cols) };
+                // ov::Shape src1_phys_shape = {1, static_cast<size_t>(src1_phys_rows), static_cast<size_t>(src1_phys_cols) };
+                // auto input0_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, src0_phys_shape);
+                // auto input1_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, src1_phys_shape);
+                // m_params.push_back(input0_param);
+                // m_params.push_back(input1_param);
+
+                ov::Shape input0_shape = { static_cast<size_t>(node->src[0]->ne[2]),
+                    static_cast<size_t>(node->src[0]->ne[1]),
+                    static_cast<size_t>(node->src[0]->ne[0])};
+                auto input0_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, input0_shape);
                 m_params.push_back(input0_param);
+                ov::Shape input1_shape = { 1, 1, static_cast<size_t>(node->src[1]->nb[2] / node->src[1]->nb[0])};
+                auto input1_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, input1_shape);
                 m_params.push_back(input1_param);
 
                 m_continuous = false;
@@ -147,7 +158,8 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
             // ov::Shape input_shape = { static_cast<size_t>(node->src[0]->ne[2]),
             //                             static_cast<size_t>(node->src[0]->ne[1]),
             //                             static_cast<size_t>(node->src[0]->ne[0])};
-            // auto input_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, input_shape);
+            // auto type = get_input_type(src0_name);
+            // auto input_param = std::make_shared<ov::op::v0::Parameter>(type, input_shape);
             // m_params.push_back(input_param);
 
             // if (node->ne[0] > node->ne[1] && (node->ne[0] * node->nb[0] != node->nb[1]) && node->ne[2] == 1) {
diff --git a/ggml/src/ggml-openvino/utils.cpp b/ggml/src/ggml-openvino/utils.cpp
index a0adc917e7..b8315a0013 100644
--- a/ggml/src/ggml-openvino/utils.cpp
+++ b/ggml/src/ggml-openvino/utils.cpp
@@ -27,12 +27,12 @@ std::vector<std::pair<std::string, ov::Tensor>> get_ggml_graph_input_tensors(std
             printf("Subgraph input %d: %g\n", inp, *(double*)(input_data));
         #endif
         ov::Tensor input_tensor;
-        auto input_shape = ggml_decoder->get_input_shape(name).to_shape();
+        ov::Shape input_shape = ggml_decoder->get_input_shape(name).to_shape();
 
-        if (flag & op_node_name == "CONT" && input_shape[0] == 1 && input_shape[1] != 1) {
-            std::vector<size_t> input_stride = ggml_decoder->get_input_stride(name);
-            ov::element::Type input_type = ggml_decoder->get_input_type(name);
-            size_t element_size = input_type.size();
+        ov::element::Type input_type = ggml_decoder->get_input_type(name);
+        size_t element_size = input_type.size();
+        std::vector<size_t> input_stride = ggml_decoder->get_input_stride(name);
+        if (op_node_name == "CONT" && input_shape[0] == 1 && (input_shape[1] != 1 && flag || input_shape[2]*element_size!=input_stride[1])) {
             const size_t num_rows    = static_cast<size_t>(ggml_decoder->get_input_shape(name).to_shape()[1]);
             const size_t dim2        = static_cast<size_t>(ggml_decoder->get_input_shape(name).to_shape()[0]);
             size_t phys_stride = static_cast<size_t>(input_stride[1]) / element_size;
@@ -42,14 +42,14 @@ std::vector<std::pair<std::string, ov::Tensor>> get_ggml_graph_input_tensors(std
             std::vector<size_t> input_stride = ggml_decoder->get_input_stride(name);
             ov::element::Type input_type = ggml_decoder->get_input_type(name);
             size_t element_size = input_type.size();
-            ov::Shape phys_shape;
+            // ov::Shape phys_shape;
             static int iter = 0;
             if (iter++ % 2 == 0) {
-                phys_shape = {1, input_shape[1], input_stride[2] / element_size};
-                input_tensor = ov::Tensor(ov::element::f32, phys_shape, input_data);
+                // phys_shape = {1, input_shape[1], input_stride[2] / element_size};
+                input_tensor = ov::Tensor(ov::element::f32, input_shape, input_data);
             } else {
-                phys_shape = {1, input_shape[1], input_stride[1] / element_size};
-                input_tensor = ov::Tensor(ov::element::f16, phys_shape, input_data);
+                ov::Shape flat_shape = {1, 1, input_stride[0] / element_size};
+                input_tensor = ov::Tensor(ov::element::f16, flat_shape, input_data);
             }
         } else {
             input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), ggml_decoder->get_input_shape(name).to_shape(), input_data);
@@ -161,6 +161,11 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c
         auto output_tensor = infer_request.get_output_tensor(i);
         // output_tensor.get_shape();
         std::memcpy(output_tensors[output_names[i]], output_tensor.data(), output_tensor.get_byte_size());
+        // std::cout << std::left  << "[ " << std::setw(2) << i << " ]: "
+        //             << "output_names: " << std::setw(20) << output_names[i]
+        //             << " output data: " << std::setw(15) << ((float*)output_tensor.data())[0]
+        //             << std::setw(15) << ((float*)output_tensor.data())[1] << std::right
+        //             << std::endl;
         #ifdef GGML_OPENVINO_DEBUG
             printf("Output %s after: %g\n", output_names[i].c_str(), *(double*)(output_tensor.data()));
         #endif