wip
This commit is contained in:
parent
15ec91f52c
commit
2884bbcb50
|
|
@ -1,7 +1,3 @@
|
||||||
#include <functional>
|
|
||||||
#include <memory>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "backend-ops.hpp"
|
#include "backend-ops.hpp"
|
||||||
#include "common.hpp"
|
#include "common.hpp"
|
||||||
#include "ggml-backend-impl.h"
|
#include "ggml-backend-impl.h"
|
||||||
|
|
@ -10,6 +6,10 @@
|
||||||
#include "tensor.hpp"
|
#include "tensor.hpp"
|
||||||
#include "utils.hpp"
|
#include "utils.hpp"
|
||||||
|
|
||||||
|
#include <functional>
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
qnn::ggml_backend_qnn_device_context * get_device_context(ggml_backend_dev_t dev) {
|
qnn::ggml_backend_qnn_device_context * get_device_context(ggml_backend_dev_t dev) {
|
||||||
|
|
@ -41,14 +41,20 @@ ggml_status ggml_backend_qnn_buffer_init_tensor(ggml_backend_buffer_t buffer, gg
|
||||||
return GGML_STATUS_SUCCESS;
|
return GGML_STATUS_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_backend_qnn_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data,
|
void ggml_backend_qnn_buffer_set_tensor(ggml_backend_buffer_t buffer,
|
||||||
size_t offset, size_t size) {
|
ggml_tensor * tensor,
|
||||||
|
const void * data,
|
||||||
|
size_t offset,
|
||||||
|
size_t size) {
|
||||||
GGML_UNUSED(buffer);
|
GGML_UNUSED(buffer);
|
||||||
memcpy((char *) tensor->data + offset, data, size);
|
memcpy((char *) tensor->data + offset, data, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_backend_qnn_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data,
|
void ggml_backend_qnn_buffer_get_tensor(ggml_backend_buffer_t buffer,
|
||||||
size_t offset, size_t size) {
|
const ggml_tensor * tensor,
|
||||||
|
void * data,
|
||||||
|
size_t offset,
|
||||||
|
size_t size) {
|
||||||
GGML_UNUSED(buffer);
|
GGML_UNUSED(buffer);
|
||||||
memcpy(data, (const char *) tensor->data + offset, size);
|
memcpy(data, (const char *) tensor->data + offset, size);
|
||||||
}
|
}
|
||||||
|
|
@ -149,8 +155,10 @@ bool ggml_backend_is_qnn(ggml_backend_t backend) {
|
||||||
return ggml_guid_matches(backend->guid, ggml_backend_qnn_guid());
|
return ggml_guid_matches(backend->guid, ggml_backend_qnn_guid());
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ggml_backend_qnn_cpy_tensor_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, const ggml_tensor * src,
|
bool ggml_backend_qnn_cpy_tensor_async(ggml_backend_t backend_src,
|
||||||
ggml_tensor * dst) {
|
ggml_backend_t backend_dst,
|
||||||
|
const ggml_tensor * src,
|
||||||
|
ggml_tensor * dst) {
|
||||||
GGML_UNUSED(backend_src);
|
GGML_UNUSED(backend_src);
|
||||||
GGML_UNUSED(backend_dst);
|
GGML_UNUSED(backend_dst);
|
||||||
GGML_UNUSED(src);
|
GGML_UNUSED(src);
|
||||||
|
|
@ -204,6 +212,7 @@ constexpr const ggml_backend_i ggml_backend_qnn_interface = {
|
||||||
/* .graph_compute = */ ggml_backend_qnn_graph_compute,
|
/* .graph_compute = */ ggml_backend_qnn_graph_compute,
|
||||||
/* .event_record = */ nullptr,
|
/* .event_record = */ nullptr,
|
||||||
/* .event_wait = */ nullptr,
|
/* .event_wait = */ nullptr,
|
||||||
|
/* .graph_optimize = */ nullptr,
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -307,8 +316,10 @@ ggml_backend_buffer_type_t ggml_backend_qnn_device_get_buffer_type(ggml_backend_
|
||||||
return ggml_backend_qnn_buffer_type(dev);
|
return ggml_backend_qnn_buffer_type(dev);
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_backend_buffer_t ggml_backend_qnn_device_buffer_from_ptr(ggml_backend_dev_t dev, void * ptr, size_t size,
|
ggml_backend_buffer_t ggml_backend_qnn_device_buffer_from_ptr(ggml_backend_dev_t dev,
|
||||||
size_t max_tensor_size) {
|
void * ptr,
|
||||||
|
size_t size,
|
||||||
|
size_t max_tensor_size) {
|
||||||
// TODO
|
// TODO
|
||||||
GGML_UNUSED(dev);
|
GGML_UNUSED(dev);
|
||||||
GGML_UNUSED(max_tensor_size);
|
GGML_UNUSED(max_tensor_size);
|
||||||
|
|
|
||||||
|
|
@ -58,9 +58,9 @@ constexpr const qnn::device_caps kDeviceCaps[] = {
|
||||||
{
|
{
|
||||||
// https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/HtpOpDefSupplement.html#matmul
|
// https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/HtpOpDefSupplement.html#matmul
|
||||||
kQnnNpuLibName, GGML_BACKEND_DEVICE_TYPE_ACCEL,
|
kQnnNpuLibName, GGML_BACKEND_DEVICE_TYPE_ACCEL,
|
||||||
|
(1L << GGML_TYPE_F32) | (1L << GGML_TYPE_F16) | (1L << GGML_TYPE_I16),
|
||||||
#ifdef GGML_HEXAGON_ENABLE_QUANTIZED_TENSORS
|
#ifdef GGML_HEXAGON_ENABLE_QUANTIZED_TENSORS
|
||||||
(1L << GGML_TYPE_F32) | (1L << GGML_TYPE_F16) | (1L << GGML_TYPE_I16),
|
(1L << GGML_TYPE_Q2_K) | (1L << GGML_TYPE_Q3_K) | (1L << GGML_TYPE_Q4_K) | (1L << GGML_TYPE_Q8_K),
|
||||||
(1L << GGML_TYPE_Q2_K) | (1L << GGML_TYPE_Q3_K) | (1L << GGML_TYPE_Q4_K) | (1L << GGML_TYPE_Q8_K),
|
|
||||||
#else
|
#else
|
||||||
(1L << GGML_TYPE_F32) | (1L << GGML_TYPE_F16),
|
(1L << GGML_TYPE_F32) | (1L << GGML_TYPE_F16),
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue