Merge d058fc4373 into 58062860af
This commit is contained in:
commit
6919ebc677
|
|
@ -50,6 +50,8 @@ struct htp_ops_context {
|
||||||
struct fastdiv_values src1_div21; // fastdiv values for ne2 * ne1
|
struct fastdiv_values src1_div21; // fastdiv values for ne2 * ne1
|
||||||
|
|
||||||
uint32_t flags;
|
uint32_t flags;
|
||||||
|
|
||||||
|
atomic_uint shared_atomic_lock;
|
||||||
};
|
};
|
||||||
|
|
||||||
int op_matmul(struct htp_ops_context * octx);
|
int op_matmul(struct htp_ops_context * octx);
|
||||||
|
|
|
||||||
|
|
@ -1712,6 +1712,7 @@ static void quantize_fp32_q8x4x2(const struct htp_tensor * src,
|
||||||
static void htp_quantize_fp32_q8x4x2(unsigned int n, unsigned int i, void * data) {
|
static void htp_quantize_fp32_q8x4x2(unsigned int n, unsigned int i, void * data) {
|
||||||
struct htp_ops_context * octx = data;
|
struct htp_ops_context * octx = data;
|
||||||
quantize_fp32_q8x4x2(&octx->src1, octx->src1_spad.data, &octx->src0_spad, n, i, octx->src1_nrows_per_thread);
|
quantize_fp32_q8x4x2(&octx->src1, octx->src1_spad.data, &octx->src0_spad, n, i, octx->src1_nrows_per_thread);
|
||||||
|
atomic_fetch_add(&octx->shared_atomic_lock, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ** matmul callbacks for worker_pool
|
// ** matmul callbacks for worker_pool
|
||||||
|
|
@ -2027,6 +2028,8 @@ int op_matmul(struct htp_ops_context * octx) {
|
||||||
octx->src0_nrows_per_thread = (src0_nrows + octx->n_threads - 1) / octx->n_threads;
|
octx->src0_nrows_per_thread = (src0_nrows + octx->n_threads - 1) / octx->n_threads;
|
||||||
octx->src0_nrows_per_thread += (octx->src0_nrows_per_thread & 1); // round up to even
|
octx->src0_nrows_per_thread += (octx->src0_nrows_per_thread & 1); // round up to even
|
||||||
|
|
||||||
|
atomic_store(&octx->shared_atomic_lock, 0);
|
||||||
|
|
||||||
if (need_quant) {
|
if (need_quant) {
|
||||||
// Run quant jobs
|
// Run quant jobs
|
||||||
const uint32_t n_quant_jobs = MIN(src1_nrows, octx->n_threads);
|
const uint32_t n_quant_jobs = MIN(src1_nrows, octx->n_threads);
|
||||||
|
|
@ -2034,6 +2037,8 @@ int op_matmul(struct htp_ops_context * octx) {
|
||||||
worker_pool_run_func(octx->ctx->worker_pool, quant_job_func, octx, n_quant_jobs);
|
worker_pool_run_func(octx->ctx->worker_pool, quant_job_func, octx, n_quant_jobs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FARF(HIGH, "matmul-%s : quant jobs finished! Atomic lock: %u\n", op_type, atomic_load(&octx->shared_atomic_lock));
|
||||||
|
|
||||||
if (!(octx->flags & HTP_OPFLAGS_SKIP_COMPUTE)) {
|
if (!(octx->flags & HTP_OPFLAGS_SKIP_COMPUTE)) {
|
||||||
// Run matmul jobs
|
// Run matmul jobs
|
||||||
const uint32_t n_matmul_jobs = octx->n_threads;
|
const uint32_t n_matmul_jobs = octx->n_threads;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue