#pragma once #include "common.cuh" typedef struct{ unsigned int n; //batch size unsigned int c; //number if channels unsigned int h; //height unsigned int w; //width unsigned int k; //number of filters unsigned int r; //filter height unsigned int s; //filter width unsigned int u; //stride height unsigned int v; //stride width unsigned int p; //padding height unsigned int q; //padding width unsigned int d_h; //dilation height unsigned int d_w; //dilation width unsigned int Oh; //output height unsigned int Ow; //output width unsigned int layout; uint3 SC_fastdiv; uint3 OW_fastdiv; uint3 C_fastdiv; uint3 RS_fastdiv; uint3 S_fastdiv; } param_t; #define CUDA_CONV2D_IMPLICT_BLOCK_SIZE 256 void ggml_cuda_op_conv2d_implicit(ggml_backend_cuda_context & ctx, ggml_tensor * dst);