Move CPY from GGML OV Backend to OV Frontend
This commit is contained in:
parent
2b04bd43be
commit
cb2729bc4a
|
|
@ -815,9 +815,9 @@ static enum ggml_status ggml_backend_openvino_graph_compute(ggml_backend_t backe
|
|||
} else if (std::find(reshape_indices.begin(), reshape_indices.end(), i) != reshape_indices.end()) {
|
||||
ggml_backend_openvino_reshape(cgraph->nodes[i]);
|
||||
} else if (std::find(view_indices.begin(), view_indices.end(), i) != view_indices.end()) {
|
||||
ggml_backend_openvino_view(cgraph->nodes[i]);
|
||||
} else if (std::find(cpy_indices.begin(), cpy_indices.end(), i) != cpy_indices.end()) {
|
||||
ggml_backend_openvino_cpy(cgraph->nodes[i]);
|
||||
ggml_backend_openvino_view(cgraph->nodes[i]);
|
||||
// } else if (std::find(cpy_indices.begin(), cpy_indices.end(), i) != cpy_indices.end()) {
|
||||
// ggml_backend_openvino_cpy(cgraph->nodes[i]);
|
||||
} else if (std::find(transpose_indices.begin(), transpose_indices.end(), i) != transpose_indices.end()) {
|
||||
ggml_backend_openvino_transpose(cgraph->nodes[i]);
|
||||
} else if (std::find(permute_indices.begin(), permute_indices.end(), i) != permute_indices.end()) {
|
||||
|
|
@ -829,7 +829,6 @@ static enum ggml_status ggml_backend_openvino_graph_compute(ggml_backend_t backe
|
|||
int start_index = i;
|
||||
while (i < cgraph->n_nodes &&
|
||||
std::find(cont_indices.begin(), cont_indices.end(), i) == cont_indices.end() &&
|
||||
std::find(cpy_indices.begin(), cpy_indices.end(), i) == cpy_indices.end() &&
|
||||
std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) == mul_mat_indices.end()) {
|
||||
i++;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -51,6 +51,8 @@ public:
|
|||
|
||||
// virtual size_t output(size_t index) const = 0;
|
||||
|
||||
virtual bool check_if_continuous() const = 0;
|
||||
|
||||
};
|
||||
|
||||
} // namespace ggml
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#include "ggml-decoder.h"
|
||||
#include <ggml.h>
|
||||
#include <ggml-impl.h>
|
||||
#include <ggml-cpu-impl.h>
|
||||
|
||||
void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, ggml_tensor *>& inputs, std::map<std::string, ggml_tensor *>& outputs) {
|
||||
switch (node->op) {
|
||||
|
|
@ -9,8 +10,6 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
case GGML_OP_RESHAPE:
|
||||
case GGML_OP_TRANSPOSE:
|
||||
case GGML_OP_PERMUTE:
|
||||
case GGML_OP_CONT:
|
||||
case GGML_OP_CPY:
|
||||
case GGML_OP_RMS_NORM:
|
||||
{
|
||||
inputs[node->src[0]->name] = node->src[0];
|
||||
|
|
@ -19,6 +18,103 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
m_output_names.push_back(node->name);
|
||||
break;
|
||||
}
|
||||
case GGML_OP_CONT:
|
||||
{
|
||||
if (ggml_is_contiguous(node->src[0]) && ggml_is_contiguous(node)) {
|
||||
inputs[node->src[0]->name] = node->src[0];
|
||||
outputs[node->name] = node;
|
||||
m_input_names.push_back(node->src[0]->name);
|
||||
m_output_names.push_back(node->name);
|
||||
m_continuous = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (node->src[0]->type == node->type && node->src[0]->ne[0] == node->ne[0] &&
|
||||
node->src[0]->nb[0] == ggml_type_size(node->src[0]->type) && node->nb[0] == ggml_type_size(node->src[0]->type)) {
|
||||
|
||||
for (size_t i01 = 0; i01 < node->src[0]->ne[1]; ++i01) {
|
||||
const char *src_row = reinterpret_cast<const char *>(node->src[0]->data) + i01 * node->src[0]->nb[1];
|
||||
char *dst_row = reinterpret_cast<char *>(node->data) + i01 * node->nb[1];
|
||||
std::memcpy(dst_row, src_row, node->src[0]->ne[0] * ggml_type_size(node->src[0]->type));
|
||||
}
|
||||
|
||||
inputs[node->name] = node;
|
||||
outputs[node->name] = node;
|
||||
m_input_names.push_back(node->name);
|
||||
m_output_names.push_back(node->name);
|
||||
m_continuous = false;
|
||||
break;
|
||||
}
|
||||
|
||||
// if (ggml_is_contiguous(node)) {
|
||||
const size_t rs = node->src[0]->ne[0] * ggml_type_size(node->src[0]->type); // Row size in bytes for dst
|
||||
|
||||
// Create OpenVINO tensors for source and destination
|
||||
// The tensors are reshaped to a 2D structure (num_rows x ne00) for easier iteration and compatibility with the simplified loop.
|
||||
ov::Tensor src_tensor(ov::element::f32,
|
||||
ov::Shape{node->src[0]->ne[3] * node->src[0]->ne[2] * node->src[0]->ne[1], node->src[0]->ne[0]},
|
||||
node->src[0]->data);
|
||||
ov::Tensor dst_tensor(ov::element::f32,
|
||||
ov::Shape{node->src[0]->ne[3] * node->src[0]->ne[2] * node->src[0]->ne[1], node->src[0]->ne[0]},
|
||||
node->data);
|
||||
|
||||
// Perform the copy in a single loop
|
||||
const size_t num_rows = node->src[0]->ne[3] * node->src[0]->ne[2] * node->src[0]->ne[1];
|
||||
for (size_t row = 0; row < num_rows; ++row) {
|
||||
// Calculate the source row pointer based on original strides
|
||||
// The source row pointer is calculated based on the combined index row and the strides nb03, nb02, and nb01.
|
||||
const char* src0_ptr = (char*)src_tensor.data() +
|
||||
// Calculates which block of the i03 dimension the current row belongs to
|
||||
(row / (node->src[0]->ne[2] * node->src[0]->ne[1])) * node->src[0]->nb[3] + // 0
|
||||
// Calculates which block of the i02 dimension the current row belongs to within the current i03 block.
|
||||
((row / node->src[0]->ne[1]) % node->src[0]->ne[2]) * node->src[0]->nb[2] + // 0, 0,......, 0,384, 384,......, 384,768,......, 2304
|
||||
// Calculates the position within the current i02 block in terms of the i01 index.
|
||||
(row % node->src[0]->ne[1]) * node->src[0]->nb[1]; // 0,2688,......,83328, 0, 2688,......,83328, 0,......, 83328
|
||||
|
||||
// Destination row pointer is linear
|
||||
// Since dst is contiguous, its rows are accessed linearly using a single stride rs, simplifying the destination pointer calculation.
|
||||
char* dst_ptr = (char*)dst_tensor.data() + row * rs;
|
||||
|
||||
// Copy row
|
||||
std::memcpy(dst_ptr, src0_ptr, rs);
|
||||
}
|
||||
|
||||
inputs[node->name] = node;
|
||||
outputs[node->name] = node;
|
||||
m_input_names.push_back(node->name);
|
||||
m_output_names.push_back(node->name);
|
||||
m_continuous = false;
|
||||
break;
|
||||
//}
|
||||
}
|
||||
case GGML_OP_CPY:
|
||||
{
|
||||
if (ggml_is_contiguous(node)) {
|
||||
inputs[node->src[0]->name] = node->src[0];
|
||||
outputs[node->name] = node;
|
||||
m_input_names.push_back(node->src[0]->name);
|
||||
m_output_names.push_back(node->name);
|
||||
m_continuous = true;
|
||||
break;
|
||||
} else {
|
||||
for (int64_t i1 = 0; i1 < node->ne[1]; ++i1) { // ne[1] = 3072
|
||||
for (int64_t i0 = 0; i0 < node->ne[0]; ++i0) { // ne[0] = 7
|
||||
int64_t src_index = i0 * node->src[0]->nb[0] / sizeof(float) + // stride in nb[0]
|
||||
i1 * node->src[0]->nb[1] / sizeof(float); // stride in nb[1]
|
||||
char *dst_ptr = static_cast<char *>(node->data) +
|
||||
i0 * node->nb[0] + i1 * node->nb[1];
|
||||
*(ggml_fp16_t *)dst_ptr = GGML_FP32_TO_FP16(((float*)node->src[0]->data)[src_index]);
|
||||
}
|
||||
}
|
||||
// inputs[node->src[0]->name] = node->src[0];
|
||||
inputs[node->name] = node;
|
||||
outputs[node->name] = node;
|
||||
m_input_names.push_back(node->name);
|
||||
m_output_names.push_back(node->name);
|
||||
m_continuous = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// For view, input is node itself
|
||||
case GGML_OP_VIEW:
|
||||
{
|
||||
|
|
|
|||
|
|
@ -62,6 +62,9 @@ public:
|
|||
return m_outputs.at(name);
|
||||
}
|
||||
|
||||
virtual bool check_if_continuous() const override {
|
||||
return m_continuous;
|
||||
}
|
||||
private:
|
||||
void set_input_output(ggml_tensor* node, std::map<std::string, ggml_tensor *>& inputs, std::map<std::string, ggml_tensor *>& outputs);
|
||||
|
||||
|
|
@ -75,5 +78,6 @@ private:
|
|||
std::vector<std::shared_ptr<GgmlOvDecoder>> m_decoders;
|
||||
const std::string m_op_name;
|
||||
mutable std::string m_name;
|
||||
bool m_continuous;
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue