31 lines
1.3 KiB
Plaintext
31 lines
1.3 KiB
Plaintext
#pragma once
|
|
#include "common.cuh"
|
|
|
|
typedef struct{
|
|
unsigned int n; //batch size
|
|
unsigned int c; //number if channels
|
|
unsigned int h; //height
|
|
unsigned int w; //width
|
|
unsigned int k; //number of filters
|
|
unsigned int r; //filter height
|
|
unsigned int s; //filter width
|
|
unsigned int u; //stride height
|
|
unsigned int v; //stride width
|
|
unsigned int p; //padding height
|
|
unsigned int q; //padding width
|
|
unsigned int d_h; //dilation height
|
|
unsigned int d_w; //dilation width
|
|
unsigned int Oh; //output height
|
|
unsigned int Ow; //output width
|
|
unsigned int layout;
|
|
uint3 SC_fastdiv;
|
|
uint3 OW_fastdiv;
|
|
uint3 C_fastdiv;
|
|
uint3 RS_fastdiv;
|
|
uint3 S_fastdiv;
|
|
} param_t;
|
|
|
|
|
|
#define CUDA_CONV2D_IMPLICT_BLOCK_SIZE 256
|
|
void ggml_cuda_op_conv2d_implicit(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|