Merge branch 'ggml-org:master' into power-law-sampler

This commit is contained in:
ddh0 2025-12-26 10:33:56 -06:00 committed by GitHub
commit 51070e0db7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 233 additions and 27 deletions

View File

@ -17,7 +17,7 @@ OpenCL (Open Computing Language) is an open, royalty-free standard for cross-pla
### Llama.cpp + OpenCL
The llama.cpp OpenCL backend is designed to enable llama.cpp on **Qualcomm Adreno GPU** firstly via OpenCL. Thanks to the portabilty of OpenCL, the OpenCL backend can also run on certain Intel GPUs although the performance is not optimal.
The llama.cpp OpenCL backend is designed to enable llama.cpp on **Qualcomm Adreno GPU** firstly via OpenCL. Thanks to the portabilty of OpenCL, the OpenCL backend can also run on certain Intel GPUs such as those that do not have [SYCL](/docs/backend/SYCL.md) support although the performance is not optimal.
## OS

View File

@ -3702,3 +3702,106 @@ void ggml_cann_out_prod(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
break;
}
}
void ggml_cann_ssm_conv(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
ggml_tensor * src0 = dst->src[0]; // conv_x
ggml_tensor * src1 = dst->src[1]; // conv1d.weight
// This op is currently defined only for F32 in ggml_cpu
GGML_ASSERT(src0->type == GGML_TYPE_F32);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32);
// Shapes follow ggml_compute_forward_ssm_conv_f32
const int64_t nc = src1->ne[0]; // d_conv
const int64_t ncs = src0->ne[0]; // d_conv - 1 + n_t
const int64_t nr = src0->ne[1]; // d_inner
const int64_t n_s = src0->ne[2]; // n_seqs
const int64_t n_t = dst->ne[1]; // tokens per sequence
GGML_ASSERT(dst->ne[0] == nr); // dst: {d_inner, n_t, n_s}
GGML_ASSERT(src1->ne[1] == nr); // weight: {d_conv, d_inner}
GGML_ASSERT(ncs == nc - 1 + n_t); // conv_x: {d_conv - 1 + n_t, d_inner, n_s}
GGML_ASSERT(src0->nb[0] == sizeof(float));
GGML_ASSERT(src1->nb[0] == sizeof(float));
// --- Build CANN tensors ---
// 1) Input: conv_x as NCL
//
// src0->ne = { ncs, nr, n_s, 1 } // {L_in, C, N}
// Passing ACL_FORMAT_NCL here means:
// reversed dims -> [N, C, L_in] = [n_s, nr, ncs]
acl_tensor_ptr acl_x = ggml_cann_create_tensor(src0, src0->ne, src0->nb, 3, ACL_FORMAT_NCL);
// 2) Weights: depthwise conv kernel, view src1 as {K, 1, C}
//
// src1 original: ne = { nc, nr, 1, 1 } // [K, C, 1, 1]
// we want a view: ne_w = { nc, 1, nr } // [K, 1, C]
// so that reversed dims -> [C, 1, K] which matches
// [out_channels, in_channels/groups, kernel_size]
int64_t w_ne[GGML_MAX_DIMS] = { nc, 1, nr, 1 }; // [K, 1 input ch. per group, C groups]
// Layout: src1 data is [K, C] with
// offset(k, c) = k*nb0 + c*nb1
// We want offset_w(k, 0, c) = k*nb0 + c*nb1,
// so we can reuse nb0 and nb1, and set nb2 = nb1.
size_t w_nb[GGML_MAX_DIMS] = { src1->nb[0], src1->nb[1], src1->nb[1], src1->nb[3] }; // same as src1
acl_tensor_ptr acl_w = ggml_cann_create_tensor(
src1->data, ggml_cann_type_mapping(src1->type), ggml_type_size(src1->type), w_ne, w_nb, 3, ACL_FORMAT_NCL);
// 3) Output: dst is { d_inner, n_t, n_s } (CLN)
//
// We need an NCL view of the same buffer:
// desired NCL logical shape: { L_out = n_t, C = nr, N = n_s }
//
// Original CLN layout:
// dst->ne = { nr, n_t, n_s }
// dst->nb[0] = sizeof(float)
// dst->nb[1] = nr * sizeof(float)
// dst->nb[2] = nr * n_t * sizeof(float)
//
// We want offset_new(L, C, N) = offset_orig(C, L, N).
// Choose:
// nb_y[0] = nr * sizeof(float); // step in L
// nb_y[1] = sizeof(float); // step in C
// nb_y[2] = nr * n_t * sizeof(float); // step in N
int64_t y_ne[GGML_MAX_DIMS] = { n_t, nr, n_s, 1 }; // [L_out, C, N]
size_t y_nb[GGML_MAX_DIMS] = { dst->ne[0] * sizeof(float), sizeof(float), dst->ne[0] * dst->ne[1] * sizeof(float), dst->nb[3] }; // [nr, 1, nr * n_t]
acl_tensor_ptr acl_y = ggml_cann_create_tensor(
dst->data, ggml_cann_type_mapping(dst->type), ggml_type_size(dst->type), y_ne, y_nb, 3, ACL_FORMAT_NCL);
// --- Conv1d parameters: depthwise, stride 1, no padding ("valid") ---
int64_t strideVal[1] = { 1 };
int64_t paddingVal[1] = { 0 };
int64_t dilationVal[1] = { 1 };
acl_int_array_ptr stride = ggml_cann_create_int_array(strideVal, 1);
acl_int_array_ptr padding = ggml_cann_create_int_array(paddingVal, 1);
acl_int_array_ptr dilation = ggml_cann_create_int_array(dilationVal, 1);
const bool transposed = false;
const int64_t groups = nr; // depthwise: one group per inner dim
int8_t cubeMathType = 0;
#ifdef ASCEND_310P
cubeMathType = 1;
#endif
GGML_CANN_CALL_ACLNN_OP(ctx,
Convolution,
acl_x.get(), // input: N, C, L_in = ncs
acl_w.get(), // weight: [C, 1, K] with groups=nr
nullptr, // bias
stride.get(),
padding.get(),
dilation.get(),
transposed,
padding.get(), // output padding (unused for non-transposed)
groups,
acl_y.get(),
cubeMathType);
}

View File

@ -1033,6 +1033,8 @@ void ggml_cann_op_unary(std::function<void(ggml_backend_cann_context &, aclTenso
ggml_backend_cann_context & ctx,
ggml_tensor * dst);
void ggml_cann_ssm_conv(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/**
* @brief Applies a gated (GLU-style) unary operation using the CANN backend.
*

View File

@ -1888,6 +1888,8 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context & ctx, struct gg
break;
case GGML_OP_OUT_PROD:
ggml_cann_out_prod(ctx, dst);
case GGML_OP_SSM_CONV:
ggml_cann_ssm_conv(ctx, dst);
break;
default:
return false;
@ -2471,6 +2473,8 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, const ggml_ten
}
return true;
}
case GGML_OP_SSM_CONV:
return true;
default:
return false;
}

View File

@ -651,7 +651,7 @@ struct vk_device_struct {
vk_pipeline pipeline_add_id_f32;
vk_pipeline pipeline_concat_f32, pipeline_concat_f16, pipeline_concat_i32;
vk_pipeline pipeline_upscale_nearest_f32, pipeline_upscale_bilinear_f32, pipeline_upscale_bicubic_f32;
vk_pipeline pipeline_upscale_nearest_f32, pipeline_upscale_bilinear_f32, pipeline_upscale_bicubic_f32, pipeline_upscale_bilinear_antialias_f32;
vk_pipeline pipeline_scale_f32;
vk_pipeline pipeline_sqr_f32;
vk_pipeline pipeline_sqrt_f32;
@ -1192,6 +1192,7 @@ struct vk_op_diag_mask_push_constants {
struct vk_op_rope_push_constants {
uint32_t rope_mode;
uint32_t ncols;
uint32_t nrows;
uint32_t n_dims;
float freq_scale;
uint32_t p_delta_rows;
@ -3955,6 +3956,7 @@ static void ggml_vk_load_shaders(vk_device& device) {
ggml_vk_create_pipeline(device, device->pipeline_upscale_nearest_f32, "upscale_f32", upscale_f32_len, upscale_f32_data, "main", 2, sizeof(vk_op_upscale_push_constants), {512, 1, 1}, {GGML_SCALE_MODE_NEAREST}, 1);
ggml_vk_create_pipeline(device, device->pipeline_upscale_bilinear_f32, "upscale_f32", upscale_f32_len, upscale_f32_data, "main", 2, sizeof(vk_op_upscale_push_constants), {512, 1, 1}, {GGML_SCALE_MODE_BILINEAR}, 1);
ggml_vk_create_pipeline(device, device->pipeline_upscale_bicubic_f32, "upscale_f32", upscale_f32_len, upscale_f32_data, "main", 2, sizeof(vk_op_upscale_push_constants), {512, 1, 1}, {GGML_SCALE_MODE_BICUBIC}, 1);
ggml_vk_create_pipeline(device, device->pipeline_upscale_bilinear_antialias_f32, "upscale_f32", upscale_f32_len, upscale_f32_data, "main", 2, sizeof(vk_op_upscale_push_constants), {512, 1, 1}, {GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ANTIALIAS}, 1);
ggml_vk_create_pipeline(device, device->pipeline_scale_f32, "scale_f32", scale_f32_len, scale_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
@ -8432,7 +8434,7 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
return nullptr;
case GGML_OP_UPSCALE:
if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
ggml_scale_mode mode = (ggml_scale_mode)(ggml_get_op_params_i32(dst, 0) & 0xFF);
uint32_t mode = (ggml_get_op_params_i32(dst, 0) & (0xFF | GGML_SCALE_FLAG_ANTIALIAS));
switch (mode) {
case GGML_SCALE_MODE_NEAREST:
return ctx->device->pipeline_upscale_nearest_f32;
@ -8440,6 +8442,8 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
return ctx->device->pipeline_upscale_bilinear_f32;
case GGML_SCALE_MODE_BICUBIC:
return ctx->device->pipeline_upscale_bicubic_f32;
case GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ANTIALIAS:
return ctx->device->pipeline_upscale_bilinear_antialias_f32;
default:
return nullptr;
}
@ -9090,10 +9094,20 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context& subctx, co
elements = { num_groups * (uint32_t)src0->ne[3], 1, 1 };
} break;
case GGML_OP_DIAG_MASK_INF:
case GGML_OP_ROPE:
case GGML_OP_ROPE_BACK:
elements = { (uint32_t)ggml_nrows(src0), (uint32_t)ne00, 1 };
break;
case GGML_OP_ROPE:
case GGML_OP_ROPE_BACK:
{
uint32_t nrows = (uint32_t)ggml_nrows(src0);
uint32_t z = 1;
if (nrows > ctx->device->properties.limits.maxComputeWorkGroupCount[0]) {
z = CEIL_DIV(nrows, 32768);
nrows = 32768;
}
elements = { nrows, (uint32_t)ne00, z };
} break;
case GGML_OP_GET_ROWS:
elements = { (uint32_t)ne00, (uint32_t)ne10, (uint32_t)(ne11 * ne12) };
elements[1] = std::min(elements[1], ctx->device->properties.limits.maxComputeWorkGroupCount[1]);
@ -10021,7 +10035,7 @@ static vk_op_rope_push_constants ggml_vk_make_rope_constants(const ggml_tensor *
uint32_t nb02 = src0->nb[2] / ggml_type_size(src0->type);
vk_op_rope_push_constants rope {
(uint32_t)mode, (uint32_t)src0->ne[0], (uint32_t)n_dims, freq_scale, (uint32_t)src0->ne[1],
(uint32_t)mode, (uint32_t)src0->ne[0], (uint32_t)ggml_nrows(src0), (uint32_t)n_dims, freq_scale, (uint32_t)src0->ne[1],
freq_base, ext_factor, attn_factor, {corr_dims[0], corr_dims[1]}, theta_scale,
has_ff, (uint32_t)src0->ne[2], nb01, nb02,
{ sections[0], sections[1], sections[2], sections[3] }, is_imrope, backprop, set_rows_stride,
@ -14330,7 +14344,12 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
}
return true;
case GGML_OP_UPSCALE:
return op->src[0]->type == GGML_TYPE_F32 && !(op->op_params[0] & GGML_SCALE_FLAG_ANTIALIAS);
if (op->op_params[0] & GGML_SCALE_FLAG_ANTIALIAS) {
if ((op->op_params[0] & 0xFF) != GGML_SCALE_MODE_BILINEAR) {
return false;
}
}
return op->src[0]->type == GGML_TYPE_F32;
case GGML_OP_ACC:
return op->src[0]->type == GGML_TYPE_F32;
case GGML_OP_CONCAT:

View File

@ -6,6 +6,9 @@
void main() {
const uint i0 = 2*gl_GlobalInvocationID.y;
// i1 is actually i2*nb2+i1, but the rows are contiguous
const uint i1 = gl_GlobalInvocationID.x;
const uint i1 = gl_GlobalInvocationID.x + 32768 * gl_GlobalInvocationID.z;
if (i1 >= pc.nrows) {
return;
}
rope_multi(i0, i1, pc);
}

View File

@ -6,6 +6,9 @@
void main() {
const uint i0 = 2*gl_GlobalInvocationID.y;
// i1 is actually i2*nb2+i1, but the rows are contiguous
const uint i1 = gl_GlobalInvocationID.x;
const uint i1 = gl_GlobalInvocationID.x + 32768 * gl_GlobalInvocationID.z;
if (i1 >= pc.nrows) {
return;
}
rope_neox(i0, i1, pc);
}

View File

@ -6,6 +6,9 @@
void main() {
const uint i0 = 2*gl_GlobalInvocationID.y;
// i1 is actually i2*nb2+i1, but the rows are contiguous
const uint i1 = gl_GlobalInvocationID.x;
const uint i1 = gl_GlobalInvocationID.x + 32768 * gl_GlobalInvocationID.z;
if (i1 >= pc.nrows) {
return;
}
rope_norm(i0, i1, pc);
}

View File

@ -6,6 +6,7 @@
struct rope_params {
uint rope_mode;
uint ncols;
uint nrows;
uint n_dims;
float freq_scale;
uint p_delta_rows;

View File

@ -6,6 +6,9 @@
void main() {
const uint i0 = 2*gl_GlobalInvocationID.y;
// i1 is actually i2*nb2+i1, but the rows are contiguous
const uint i1 = gl_GlobalInvocationID.x;
const uint i1 = gl_GlobalInvocationID.x + 32768 * gl_GlobalInvocationID.z;
if (i1 >= pc.nrows) {
return;
}
rope_vision(i0, i1, pc);
}

View File

@ -21,6 +21,7 @@ layout (binding = 1) writeonly buffer D {D_TYPE data_d[];};
#define NEAREST 0
#define BILINEAR 1
#define BICUBIC 2
#define BILINEAR_ANTIALIAS 513
layout (constant_id = 0) const uint scale_mode = 0;
@ -62,6 +63,56 @@ float interpolate_bilinear(uint i10, uint i11, uint i12, uint i13) {
return fetch_bilinear(c0, c1, d, i12, i13);
}
float triangle_filter(float x) {
return max(1.0f - abs(x), 0.0f);
}
float interpolate_bilinear_antialias(uint i10, uint i11, uint i12, uint i13) {
const float support1 = max(1.0f, 1.0f / p.sf1);
const float invscale1 = 1.0f / support1;
const float support0 = max(1.0f, 1.0f / p.sf0);
const float invscale0 = 1.0f / support0;
const uint i02 = uint(i12 / p.sf2);
const uint i03 = uint(i13 / p.sf3);
const float y = (float(i11) + p.pixel_offset) / p.sf1;
const float x = (float(i10) + p.pixel_offset) / p.sf0;
// the range of source pixels that contribute
const int x_min = max(int(x - support0 + p.pixel_offset), 0);
const int x_max = min(int(x + support0 + p.pixel_offset), int(p.ne00));
const int y_min = max(int(y - support1 + p.pixel_offset), 0);
const int y_max = min(int(y + support1 + p.pixel_offset), int(p.ne01));
// bilinear filter with antialiasing
float val = 0.0f;
float total_weight = 0.0f;
for (int sy = y_min; sy < y_max; sy++) {
const float weight_y = triangle_filter((sy - y + p.pixel_offset) * invscale1);
for (int sx = x_min; sx < x_max; sx++) {
const float weight_x = triangle_filter((sx - x + p.pixel_offset) * invscale0);
const float weight = weight_x * weight_y;
if (weight <= 0.0f) {
continue;
}
const float pixel = data_a[p.a_offset + i03 * p.nb03 + i02 * p.nb02 + sy * p.nb01 + sx * p.nb00];
val += pixel * weight;
total_weight += weight;
}
}
if (total_weight > 0.0f) {
val /= total_weight;
}
return val;
}
// Bicubic interpolation with alpha = -0.75
// https://en.wikipedia.org/wiki/Bicubic_interpolation#Bicubic_convolution_algorithm
const vec4 bcoeffs1 = vec4( 1.25, -2.25, 0.0, 1.0);
@ -118,6 +169,9 @@ void main() {
case BICUBIC:
result = interpolate_bicubic(i10, i11, i12, i13);
break;
case BILINEAR_ANTIALIAS:
result = interpolate_bilinear_antialias(i10, i11, i12, i13);
break;
}
data_d[p.d_offset + idx] = D_TYPE(result);

View File

@ -402,12 +402,20 @@ static std::string var_to_str(ggml_op_pool pool) {
}
static std::string var_to_str(ggml_scale_mode mode) {
switch (mode) {
case GGML_SCALE_MODE_NEAREST: return "nearest";
case GGML_SCALE_MODE_BILINEAR: return "bilinear";
case GGML_SCALE_MODE_BICUBIC: return "bicubic";
default: return std::to_string(mode);
std::string str;
switch (mode & 0xFF) {
case GGML_SCALE_MODE_NEAREST: str = "nearest"; break;
case GGML_SCALE_MODE_BILINEAR: str = "bilinear"; break;
case GGML_SCALE_MODE_BICUBIC: str = "bicubic"; break;
default: str = std::to_string(mode); break;
}
if (mode & GGML_SCALE_FLAG_ALIGN_CORNERS) {
str += "|align_corners";
}
if (mode & GGML_SCALE_FLAG_ANTIALIAS) {
str += "|antialias";
}
return str;
}
#define VAR_TO_STR(x) (#x "=" + var_to_str(x))
@ -5535,18 +5543,16 @@ struct test_interpolate : public test_case {
const ggml_type type;
const std::array<int64_t, 4> ne;
const std::array<int64_t, 4> ne_tgt;
const uint32_t mode = GGML_SCALE_MODE_NEAREST;
const ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST;
std::string vars() override {
ggml_scale_mode mode = (ggml_scale_mode)(this->mode & 0xFF);
std::string flags = (this->mode & GGML_SCALE_FLAG_ALIGN_CORNERS) ? "align_corners" : "none";
return VARS_TO_STR5(type, ne, ne_tgt, mode, flags);
return VARS_TO_STR4(type, ne, ne_tgt, mode);
}
test_interpolate(ggml_type type = GGML_TYPE_F32,
std::array<int64_t, 4> ne = {2, 5, 7, 11},
std::array<int64_t, 4> ne_tgt = {5, 7, 11, 13},
uint32_t mode = GGML_SCALE_MODE_NEAREST)
ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST)
: type(type), ne(ne), ne_tgt(ne_tgt), mode(mode) {}
ggml_tensor * build_graph(ggml_context * ctx) override {
@ -7775,6 +7781,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
test_cases.emplace_back(new test_rope(type, {128, 40, 2, 1}, 128, GGML_ROPE_TYPE_NORMAL, 512, fs, ef, af, ff, v, fw)); // llama 13B
test_cases.emplace_back(new test_rope(type, {128, 52, 2, 1}, 128, GGML_ROPE_TYPE_NORMAL, 512, fs, ef, af, ff, v, fw)); // llama 30B
test_cases.emplace_back(new test_rope(type, {128, 64, 2, 1}, 128, GGML_ROPE_TYPE_NORMAL, 512, fs, ef, af, ff, v, fw)); // llama 65B
test_cases.emplace_back(new test_rope(type, {16, 16, 8192, 1}, 16, GGML_ROPE_TYPE_NORMAL, 512, fs, ef, af, ff, v, fw));
}
if (all) {
@ -7789,6 +7796,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
test_cases.emplace_back(new test_rope(type, { 80, 32, 2, 1}, 20, GGML_ROPE_TYPE_NEOX, 512, fs, ef, af, ff, v, fw)); // neox (stablelm)
test_cases.emplace_back(new test_rope(type, { 80, 32, 2, 1}, 32, GGML_ROPE_TYPE_NEOX, 512, fs, ef, af, ff, v, fw)); // neox (phi-2)
test_cases.emplace_back(new test_rope(type, { 80, 32, 4, 1}, 32, GGML_ROPE_TYPE_NEOX, 512, fs, ef, af, ff, v, fw)); // neox (phi-2)
test_cases.emplace_back(new test_rope(type, { 16, 16, 8192, 1}, 16, GGML_ROPE_TYPE_NEOX, 512, fs, ef, af, ff, v, fw));
}
if (all) {
@ -7802,6 +7810,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
test_cases.emplace_back(new test_rope(type, {128, 28, 2, 1}, 32, GGML_ROPE_TYPE_IMROPE, 512, fs, ef, af, ff, v, fw));
test_cases.emplace_back(new test_rope(type, { 80, 16, 2, 1}, 80, GGML_ROPE_TYPE_VISION, 512, fs, ef, af, ff, v, fw)); // rope_multi,m-rope (qwen2vl ViT)
test_cases.emplace_back(new test_rope(type, {128, 16, 2, 1}, 128, GGML_ROPE_TYPE_IMROPE, 512, fs, ef, af, ff, v, fw)); // rope_multi,m-rope (qwen3vl)
test_cases.emplace_back(new test_rope(type, {16, 16, 8192, 1}, 16, GGML_ROPE_TYPE_IMROPE, 512, fs, ef, af, ff, v, fw));
}
test_cases.emplace_back(new test_rope(type, { 64, 128, 2, 1}, 64, GGML_ROPE_TYPE_NEOX, 512, fs, ef, af, ff, v, fw)); // neox (falcon 40B)
@ -7880,9 +7889,9 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {5, 7, 11, 13}, {2, 5, 7, 11}, mode));
}
for (ggml_scale_mode mode : {GGML_SCALE_MODE_BILINEAR, GGML_SCALE_MODE_BICUBIC}) {
test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {2, 5, 7, 11}, {5, 7, 11, 13}, mode | GGML_SCALE_FLAG_ALIGN_CORNERS));
test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {1, 4, 3, 2}, {2, 8, 3, 2}, mode | GGML_SCALE_FLAG_ALIGN_CORNERS));
test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {4, 1, 3, 2}, {1, 1, 3, 2}, mode | GGML_SCALE_FLAG_ALIGN_CORNERS));
test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {2, 5, 7, 11}, {5, 7, 11, 13}, (ggml_scale_mode)(mode | GGML_SCALE_FLAG_ALIGN_CORNERS)));
test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {1, 4, 3, 2}, {2, 8, 3, 2}, (ggml_scale_mode)(mode | GGML_SCALE_FLAG_ALIGN_CORNERS)));
test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {4, 1, 3, 2}, {1, 1, 3, 2}, (ggml_scale_mode)(mode | GGML_SCALE_FLAG_ALIGN_CORNERS)));
}
test_cases.emplace_back(new test_sum());

View File

@ -1007,8 +1007,10 @@ private:
return ret;
}
void clear_slot(server_slot & slot) const {
GGML_ASSERT(!slot.is_processing());
void clear_slot(server_slot & slot, bool allow_processing = false) const {
if (!allow_processing) {
GGML_ASSERT(!slot.is_processing());
}
SLT_WRN(slot, "clearing slot with %zu tokens\n", slot.prompt.tokens.size());
@ -2336,7 +2338,7 @@ private:
if (!llama_memory_seq_rm(llama_get_memory(ctx), slot.id, p0, -1)) {
SLT_WRN(slot, "failed to truncate tokens with position >= %d - clearing the memory\n", p0);
clear_slot(slot);
clear_slot(slot, /*allow_processing=*/true);
// there is no common part left
slot.n_prompt_tokens_cache = 0;