Fix test-backend-ops: Treat quantized tensors as weights
This commit is contained in:
parent
a1ce428004
commit
9900245e0b
|
|
@ -76,13 +76,15 @@ GgmlOvDecoder::GgmlOvDecoder(struct ggml_cgraph* cgraph,
|
||||||
add_extra_inputs();
|
add_extra_inputs();
|
||||||
}
|
}
|
||||||
|
|
||||||
GgmlOvDecoder::GgmlOvDecoder(struct ggml_cgraph* cgraph) {
|
GgmlOvDecoder::GgmlOvDecoder(struct ggml_cgraph* cgraph,
|
||||||
|
std::map<std::string, std::shared_ptr<ov::Node>>& model_weights) {
|
||||||
if (getenv("GGML_OPENVINO_DUMP_CGRAPH")) {
|
if (getenv("GGML_OPENVINO_DUMP_CGRAPH")) {
|
||||||
std::string filename = "cgraph.txt";
|
std::string filename = "cgraph.txt";
|
||||||
dump_cgraph(cgraph, filename);
|
dump_cgraph(cgraph, filename);
|
||||||
}
|
}
|
||||||
|
|
||||||
m_cgraph = cgraph;
|
m_cgraph = cgraph;
|
||||||
|
m_model_weights = model_weights;
|
||||||
for (int node_n = 0; node_n < cgraph->n_nodes; node_n++) {
|
for (int node_n = 0; node_n < cgraph->n_nodes; node_n++) {
|
||||||
auto* cur_node = cgraph->nodes[node_n];
|
auto* cur_node = cgraph->nodes[node_n];
|
||||||
if (cur_node->op == GGML_OP_NONE) {
|
if (cur_node->op == GGML_OP_NONE) {
|
||||||
|
|
@ -123,10 +125,12 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, bool naive) {
|
||||||
|
|
||||||
// Add model inputs and weights constants, if called for the whole graph
|
// Add model inputs and weights constants, if called for the whole graph
|
||||||
if (naive) {
|
if (naive) {
|
||||||
auto param_node = std::make_shared<ov::op::v0::Parameter>(get_ov_type(src), get_graph_input_shape(src));
|
if (m_model_weights.find(src_name) == m_model_weights.end()) {
|
||||||
param_node->set_friendly_name(src_name);
|
auto param_node = std::make_shared<ov::op::v0::Parameter>(get_ov_type(src), get_graph_input_shape(src));
|
||||||
param_node->output(0).get_tensor().set_names({src_name});
|
param_node->set_friendly_name(src_name);
|
||||||
m_model_inputs[src_name] = param_node;
|
param_node->output(0).get_tensor().set_names({src_name});
|
||||||
|
m_model_inputs[src_name] = param_node;
|
||||||
|
}
|
||||||
|
|
||||||
} else if (!m_node && !src->view_src) {
|
} else if (!m_node && !src->view_src) {
|
||||||
ggml_backend_buffer* buffer = src->buffer;
|
ggml_backend_buffer* buffer = src->buffer;
|
||||||
|
|
@ -381,7 +385,7 @@ std::map<std::string, std::shared_ptr<ov::Node>> GgmlOvDecoder::create_weight_no
|
||||||
std::string src_name(src->name);
|
std::string src_name(src->name);
|
||||||
if (!src->view_src) {
|
if (!src->view_src) {
|
||||||
ggml_backend_buffer* buffer = src->buffer;
|
ggml_backend_buffer* buffer = src->buffer;
|
||||||
if (buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS) {
|
if (buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS || ggml_is_quantized(src->type)) {
|
||||||
bool should_create = false;
|
bool should_create = false;
|
||||||
{
|
{
|
||||||
std::lock_guard<std::mutex> lock(weights_mutex);
|
std::lock_guard<std::mutex> lock(weights_mutex);
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,7 @@ public:
|
||||||
int context_size, int num_heads, int num_heads_kv, int head_size);
|
int context_size, int num_heads, int num_heads_kv, int head_size);
|
||||||
|
|
||||||
// Naive graph decoder
|
// Naive graph decoder
|
||||||
GgmlOvDecoder(struct ggml_cgraph* cgraph);
|
GgmlOvDecoder(struct ggml_cgraph* cgraph, std::map<std::string, std::shared_ptr<ov::Node>>& model_weights);
|
||||||
|
|
||||||
virtual ov::Any get_attribute(const std::string& name) const override {
|
virtual ov::Any get_attribute(const std::string& name) const override {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
@ -115,6 +115,8 @@ public:
|
||||||
|
|
||||||
ov::PartialShape get_graph_input_shape(const ggml_tensor* src) const;
|
ov::PartialShape get_graph_input_shape(const ggml_tensor* src) const;
|
||||||
|
|
||||||
|
static void dump_cgraph(const struct ggml_cgraph* cgraph, std::string& filename);
|
||||||
|
|
||||||
static std::shared_ptr<ov::Node> create_weight_node(ggml_tensor* tensor);
|
static std::shared_ptr<ov::Node> create_weight_node(ggml_tensor* tensor);
|
||||||
static std::map<std::string, std::shared_ptr<ov::Node>> create_weight_nodes(struct ggml_cgraph* cgraph);
|
static std::map<std::string, std::shared_ptr<ov::Node>> create_weight_nodes(struct ggml_cgraph* cgraph);
|
||||||
|
|
||||||
|
|
@ -126,7 +128,6 @@ public:
|
||||||
private:
|
private:
|
||||||
void set_input_output(ggml_tensor* node, bool naive = false);
|
void set_input_output(ggml_tensor* node, bool naive = false);
|
||||||
void add_extra_inputs();
|
void add_extra_inputs();
|
||||||
static void dump_cgraph(const struct ggml_cgraph* cgraph, std::string& filename);
|
|
||||||
static std::vector<size_t> get_shape(const ggml_tensor* tensor);
|
static std::vector<size_t> get_shape(const ggml_tensor* tensor);
|
||||||
static std::vector<size_t> get_stride(const ggml_tensor* tensor);
|
static std::vector<size_t> get_stride(const ggml_tensor* tensor);
|
||||||
static ov::element::Type get_ov_type(const ggml_tensor* tensor);
|
static ov::element::Type get_ov_type(const ggml_tensor* tensor);
|
||||||
|
|
|
||||||
|
|
@ -403,14 +403,22 @@ static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, con
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
for (int i = 0; i < GGML_MAX_SRC; i++) {
|
for (int i = 0; i < GGML_MAX_SRC; i++) {
|
||||||
if (supported_types.find(op->type) == supported_types.end()) {
|
auto* src = op->src[i];
|
||||||
GGML_LOG_WARN("OpenVINO backend does not support tensor type %s\n", ggml_type_name(op->type));
|
if (src == nullptr) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (supported_types.find(src->type) == supported_types.end()) {
|
||||||
|
GGML_LOG_WARN("OpenVINO backend does not support tensor type %s\n", ggml_type_name(src->type));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (op->src[i] != nullptr && op->src[i]->ne[3] != 1) {
|
if (src->ne[3] != 1) {
|
||||||
GGML_LOG_WARN("OpenVINO backend does not support tensors with ne[3] != 1\n");
|
GGML_LOG_WARN("OpenVINO backend does not support tensors with ne[3] != 1\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (ggml_is_quantized(src->type) && src->ne[2] != 1) {
|
||||||
|
GGML_LOG_WARN("OpenVINO backend does not support 3D quantized tensors\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is_op_unsupported_case(op)) {
|
if (is_op_unsupported_case(op)) {
|
||||||
|
|
|
||||||
|
|
@ -281,10 +281,14 @@ enum ggml_status naive_compute(struct ggml_cgraph* cgraph,
|
||||||
return GGML_STATUS_FAILED;
|
return GGML_STATUS_FAILED;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto decoder = std::make_shared<GgmlOvDecoder>(cgraph);
|
auto model_weights = GgmlOvDecoder::create_weight_nodes(cgraph);
|
||||||
|
auto decoder = std::make_shared<GgmlOvDecoder>(cgraph, model_weights);
|
||||||
auto input_model = std::make_shared<ov::frontend::ggml::InputModel>(decoder);
|
auto input_model = std::make_shared<ov::frontend::ggml::InputModel>(decoder);
|
||||||
auto naive = true;
|
auto naive = true;
|
||||||
auto model = ov::frontend::ggml::FrontEnd::convert(input_model, naive);
|
auto model = ov::frontend::ggml::FrontEnd::convert(input_model, naive);
|
||||||
|
if (getenv("GGML_OPENVINO_DUMP_IR")) {
|
||||||
|
ov::serialize(model, "IR_naive.xml");
|
||||||
|
}
|
||||||
auto infer_request = core.compile_model(model, device, config).create_infer_request();
|
auto infer_request = core.compile_model(model, device, config).create_infer_request();
|
||||||
|
|
||||||
auto ov_params = model->get_parameters();
|
auto ov_params = model->get_parameters();
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue