Merge branch 'ggml-org:master' into power-law-sampler

This commit is contained in:
ddh0 2025-12-11 12:52:53 -06:00 committed by GitHub
commit 66e2d17c7f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
25 changed files with 501 additions and 268 deletions

View File

@ -56,7 +56,7 @@ docker run -v /path/to/models:/models ghcr.io/ggml-org/llama.cpp:light -m /model
or with a server image: or with a server image:
```bash ```bash
docker run -v /path/to/models:/models -p 8000:8000 ghcr.io/ggml-org/llama.cpp:server -m /models/7B/ggml-model-q4_0.gguf --port 8000 --host 0.0.0.0 -n 512 docker run -v /path/to/models:/models -p 8080:8080 ghcr.io/ggml-org/llama.cpp:server -m /models/7B/ggml-model-q4_0.gguf --port 8080 --host 0.0.0.0 -n 512
``` ```
## Docker With CUDA ## Docker With CUDA
@ -91,7 +91,7 @@ After building locally, Usage is similar to the non-CUDA examples, but you'll ne
```bash ```bash
docker run --gpus all -v /path/to/models:/models local/llama.cpp:full-cuda --run -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1 docker run --gpus all -v /path/to/models:/models local/llama.cpp:full-cuda --run -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
docker run --gpus all -v /path/to/models:/models local/llama.cpp:light-cuda -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1 docker run --gpus all -v /path/to/models:/models local/llama.cpp:light-cuda -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
docker run --gpus all -v /path/to/models:/models local/llama.cpp:server-cuda -m /models/7B/ggml-model-q4_0.gguf --port 8000 --host 0.0.0.0 -n 512 --n-gpu-layers 1 docker run --gpus all -v /path/to/models:/models local/llama.cpp:server-cuda -m /models/7B/ggml-model-q4_0.gguf --port 8080 --host 0.0.0.0 -n 512 --n-gpu-layers 1
``` ```
## Docker With MUSA ## Docker With MUSA
@ -125,5 +125,5 @@ After building locally, Usage is similar to the non-MUSA examples, but you'll ne
```bash ```bash
docker run -v /path/to/models:/models local/llama.cpp:full-musa --run -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1 docker run -v /path/to/models:/models local/llama.cpp:full-musa --run -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
docker run -v /path/to/models:/models local/llama.cpp:light-musa -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1 docker run -v /path/to/models:/models local/llama.cpp:light-musa -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
docker run -v /path/to/models:/models local/llama.cpp:server-musa -m /models/7B/ggml-model-q4_0.gguf --port 8000 --host 0.0.0.0 -n 512 --n-gpu-layers 1 docker run -v /path/to/models:/models local/llama.cpp:server-musa -m /models/7B/ggml-model-q4_0.gguf --port 8080 --host 0.0.0.0 -n 512 --n-gpu-layers 1
``` ```

View File

@ -312,16 +312,9 @@ static struct buffer_address ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * al
} }
// this is a very naive implementation, but for our case the number of free blocks should be very small // this is a very naive implementation, but for our case the number of free blocks should be very small
static void ggml_dyn_tallocr_free_tensor(struct ggml_dyn_tallocr * alloc, struct buffer_address addr, size_t size, const struct ggml_tensor * tensor) { static void ggml_dyn_tallocr_free_bytes(struct ggml_dyn_tallocr * alloc, struct buffer_address addr, size_t size) {
size = aligned_offset(NULL, size, alloc->alignment); size = aligned_offset(NULL, size, alloc->alignment);
AT_PRINTF("%s: freeing %s at {chunk=%d, offset=%zu} (%zu bytes) - n_free_blocks = %d\n",
__func__, tensor->name, addr.chunk, addr.offset, size, alloc->chunks[addr.chunk]->n_free_blocks);
#ifdef GGML_ALLOCATOR_DEBUG
remove_allocated_tensor(alloc, addr, tensor);
#endif
struct tallocr_chunk * chunk = alloc->chunks[addr.chunk]; struct tallocr_chunk * chunk = alloc->chunks[addr.chunk];
// see if we can merge with an existing block // see if we can merge with an existing block
@ -357,8 +350,6 @@ static void ggml_dyn_tallocr_free_tensor(struct ggml_dyn_tallocr * alloc, struct
} }
// otherwise, add a new block // otherwise, add a new block
ggml_dyn_tallocr_insert_block(chunk, addr.offset, size); ggml_dyn_tallocr_insert_block(chunk, addr.offset, size);
GGML_UNUSED(tensor);
} }
static void ggml_dyn_tallocr_reset(struct ggml_dyn_tallocr * alloc) { static void ggml_dyn_tallocr_reset(struct ggml_dyn_tallocr * alloc) {
@ -616,13 +607,17 @@ static void ggml_gallocr_free_extra_space(ggml_gallocr_t galloc, struct ggml_ten
GGML_ASSERT(parent_size >= node_size); GGML_ASSERT(parent_size >= node_size);
if (parent_size > node_size) { // note: we want after the freeing the chunks to continue to be aligned
struct ggml_dyn_tallocr * p_alloc = galloc->buf_tallocs[p_hn->buffer_id]; struct ggml_dyn_tallocr * p_alloc = galloc->buf_tallocs[p_hn->buffer_id];
parent_size = aligned_offset(NULL, parent_size, p_alloc->alignment);
node_size = aligned_offset(NULL, node_size, p_alloc->alignment);
if (parent_size > node_size) {
struct buffer_address p_addr = p_hn->addr; struct buffer_address p_addr = p_hn->addr;
p_addr.offset += node_size; p_addr.offset += node_size;
size_t extra_size = parent_size - node_size; size_t extra_size = parent_size - node_size;
AT_PRINTF("freeing extra %zu bytes from parent %s for %s\n", extra_size, parent->name, node->name); AT_PRINTF("freeing extra %zu bytes from parent %s for %s\n", extra_size, parent->name, node->name);
ggml_dyn_tallocr_free_tensor(p_alloc, p_addr, extra_size, parent); ggml_dyn_tallocr_free_bytes(p_alloc, p_addr, extra_size);
} }
} }
@ -706,7 +701,14 @@ static void ggml_gallocr_free_node(ggml_gallocr_t galloc, struct ggml_tensor * n
struct ggml_dyn_tallocr * alloc = galloc->buf_tallocs[buffer_id]; struct ggml_dyn_tallocr * alloc = galloc->buf_tallocs[buffer_id];
ggml_backend_buffer_type_t buft = galloc->bufts[buffer_id]; ggml_backend_buffer_type_t buft = galloc->bufts[buffer_id];
size_t size = ggml_backend_buft_get_alloc_size(buft, node); size_t size = ggml_backend_buft_get_alloc_size(buft, node);
ggml_dyn_tallocr_free_tensor(alloc, hn->addr, size, node);
AT_PRINTF("%s: freeing %s at {chunk=%d, offset=%zu} (%zu bytes) - n_free_blocks = %d\n",
__func__, node->name, hn->addr.chunk, hn->addr.offset, size, alloc->chunks[hn->addr.chunk]->n_free_blocks);
#ifdef GGML_ALLOCATOR_DEBUG
remove_allocated_tensor(alloc, hn->addr, node);
#endif
ggml_dyn_tallocr_free_bytes(alloc, hn->addr, size);
hn->allocated = false; hn->allocated = false;
} }

View File

@ -4630,9 +4630,9 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
case GGML_OP_CUMSUM: case GGML_OP_CUMSUM:
case GGML_OP_TRI: case GGML_OP_TRI:
case GGML_OP_DIAG: case GGML_OP_DIAG:
return true;
case GGML_OP_SOLVE_TRI: case GGML_OP_SOLVE_TRI:
return op->src[0]->ne[0] <= 64 && op->src[1]->ne[0] <= 32; return true;
default: default:
return false; return false;
} }

View File

@ -3,6 +3,80 @@
#include "solve_tri.cuh" #include "solve_tri.cuh"
#define MAX_N_FAST 64 #define MAX_N_FAST 64
#define MAX_K_FAST 32
static __global__ void get_batch_pointers(const float * A,
float * X,
const float ** A_ptrs,
float ** X_ptrs,
int64_t ne02,
int64_t total_batches,
size_t s02,
size_t s03,
size_t s2,
size_t s3) {
const int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx >= total_batches) {
return;
}
const int64_t i3 = idx / ne02;
const int64_t i2 = idx % ne02;
A_ptrs[idx] = A + i3 * s03 + i2 * s02;
X_ptrs[idx] = X + i3 * s3 + i2 * s2;
}
static void solve_tri_f32_cublas(ggml_backend_cuda_context & ctx,
const float * A,
const float * B,
float * X,
int n,
int k,
int64_t ne02,
int64_t ne03,
size_t s02,
size_t s03,
size_t s12,
size_t s13,
size_t s2,
size_t s3,
cudaStream_t stream) {
const float alpha = 1.0f;
const int64_t total_batches = ne02 * ne03;
if (total_batches == 0) {
return;
}
// Bulk copy B -> X (contiguous tensors)
if (X != B) {
const int64_t total_elements_BX = n * k * total_batches;
CUDA_CHECK(cudaMemcpyAsync(X, B, total_elements_BX * sizeof(float), cudaMemcpyDeviceToDevice, stream));
}
const int id = ggml_cuda_get_device();
ggml_cuda_pool_alloc<const float *> A_ptrs_alloc(ctx.pool(id), total_batches);
ggml_cuda_pool_alloc<float *> X_ptrs_alloc(ctx.pool(id), total_batches);
const float ** A_ptrs_dev = A_ptrs_alloc.get();
float ** X_ptrs_dev = X_ptrs_alloc.get();
get_batch_pointers<<<(total_batches + 255) / 256, 256, 0, stream>>>(A, X, A_ptrs_dev, X_ptrs_dev, ne02,
total_batches, s02, s03, s2, s3);
CUBLAS_CHECK(cublasSetStream(ctx.cublas_handle(id), stream));
// Yes, this is necessary, without this we get RMSE errors
CUBLAS_CHECK(cublasSetMathMode(ctx.cublas_handle(id), CUBLAS_DEFAULT_MATH));
CUBLAS_CHECK(cublasStrsmBatched(ctx.cublas_handle(id), CUBLAS_SIDE_RIGHT, CUBLAS_FILL_MODE_UPPER, CUBLAS_OP_N,
CUBLAS_DIAG_NON_UNIT, k, n, &alpha, A_ptrs_dev, n, X_ptrs_dev, k, total_batches));
// revert to standard mode from common.cuh
CUBLAS_CHECK(cublasSetMathMode(ctx.cublas_handle(id), CUBLAS_TF32_TENSOR_OP_MATH));
GGML_UNUSED_VARS(s12, s13);
}
// ====================== // ======================
// Fast Kernel (n <= 64, k <= 32) - Warp-based parallel reduction // Fast Kernel (n <= 64, k <= 32) - Warp-based parallel reduction
@ -176,20 +250,26 @@ static void solve_tri_f32_cuda(const float * A,
} }
void ggml_cuda_op_solve_tri(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { void ggml_cuda_op_solve_tri(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
const ggml_tensor * src0 = dst->src[0]; // A (triangular n x x matrix) const ggml_tensor * src0 = dst->src[0]; // A (n×n, lower triangular)
const ggml_tensor * src1 = dst->src[1]; // B (right hand side of n x k equation columns) const ggml_tensor * src1 = dst->src[1]; // B (n×k)
ggml_is_contiguous(src0); ggml_is_contiguous(src0);
ggml_is_contiguous(src1); ggml_is_contiguous(src1);
const int64_t n = src0->ne[0]; const int64_t n = src0->ne[0];
const int64_t k = src1->ne[0]; const int64_t k = src1->ne[0];
const int64_t ne02 = src0->ne[2];
const int64_t ne03 = src0->ne[3];
GGML_ASSERT(n <= 64); if (n <= MAX_N_FAST && k <= MAX_K_FAST) {
GGML_ASSERT(k <= 32); solve_tri_f32_cuda((const float *) src0->data, (const float *) src1->data, (float *) dst->data, n, k,
src0->ne[2], src0->ne[3], src0->nb[2] / sizeof(float), src0->nb[3] / sizeof(float),
solve_tri_f32_cuda((const float *) src0->data, (const float *) src1->data, (float *) dst->data, n, k, src0->ne[2],
src0->ne[3], src0->nb[2] / sizeof(float), src0->nb[3] / sizeof(float),
src1->nb[2] / sizeof(float), src1->nb[3] / sizeof(float), dst->nb[2] / sizeof(float), src1->nb[2] / sizeof(float), src1->nb[3] / sizeof(float), dst->nb[2] / sizeof(float),
dst->nb[3] / sizeof(float), ctx.stream()); dst->nb[3] / sizeof(float), ctx.stream());
} else {
solve_tri_f32_cublas(ctx, (const float *) src0->data, (const float *) src1->data, (float *) dst->data, n, k,
ne02, ne03, src0->nb[2] / sizeof(float), src0->nb[3] / sizeof(float),
src1->nb[2] / sizeof(float), src1->nb[3] / sizeof(float), dst->nb[2] / sizeof(float),
dst->nb[3] / sizeof(float), ctx.stream());
}
} }

View File

@ -19,6 +19,9 @@
#define CUDA_R_16F HIPBLAS_R_16F #define CUDA_R_16F HIPBLAS_R_16F
#define CUDA_R_16BF HIPBLAS_R_16B #define CUDA_R_16BF HIPBLAS_R_16B
#define CUDA_R_32F HIPBLAS_R_32F #define CUDA_R_32F HIPBLAS_R_32F
#define CUBLAS_SIDE_RIGHT HIPBLAS_SIDE_RIGHT
#define CUBLAS_FILL_MODE_UPPER HIPBLAS_FILL_MODE_UPPER
#define CUBLAS_DIAG_NON_UNIT HIPBLAS_DIAG_NON_UNIT
#define CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED hipDeviceAttributeVirtualMemoryManagementSupported #define CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED hipDeviceAttributeVirtualMemoryManagementSupported
#define CU_MEM_ALLOC_GRANULARITY_RECOMMENDED hipMemAllocationGranularityRecommended #define CU_MEM_ALLOC_GRANULARITY_RECOMMENDED hipMemAllocationGranularityRecommended
#define CU_MEM_ALLOCATION_TYPE_PINNED hipMemAllocationTypePinned #define CU_MEM_ALLOCATION_TYPE_PINNED hipMemAllocationTypePinned
@ -30,6 +33,7 @@
#define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width) #define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width)
#define __all_sync(mask, var) __all(var) #define __all_sync(mask, var) __all(var)
#define __any_sync(mask, var) __any(var) #define __any_sync(mask, var) __any(var)
#define cublasStrsmBatched hipblasStrsmBatched
#define cublasCreate hipblasCreate #define cublasCreate hipblasCreate
#define cublasDestroy hipblasDestroy #define cublasDestroy hipblasDestroy
#define cublasGemmEx hipblasGemmEx #define cublasGemmEx hipblasGemmEx

View File

@ -12,11 +12,16 @@
#define CUBLAS_GEMM_DEFAULT_TENSOR_OP MUBLAS_GEMM_DEFAULT #define CUBLAS_GEMM_DEFAULT_TENSOR_OP MUBLAS_GEMM_DEFAULT
#define CUBLAS_OP_N MUBLAS_OP_N #define CUBLAS_OP_N MUBLAS_OP_N
#define CUBLAS_OP_T MUBLAS_OP_T #define CUBLAS_OP_T MUBLAS_OP_T
#define CUBLAS_DEFAULT_MATH MUBLAS_DEFAULT_MATH
#define CUBLAS_SIDE_RIGHT MUBLAS_SIDE_RIGHT
#define CUBLAS_FILL_MODE_UPPER MUBLAS_FILL_MODE_UPPER
#define CUBLAS_DIAG_NON_UNIT MUBLAS_DIAG_NON_UNIT
#define CUBLAS_STATUS_SUCCESS MUBLAS_STATUS_SUCCESS #define CUBLAS_STATUS_SUCCESS MUBLAS_STATUS_SUCCESS
#define CUBLAS_TF32_TENSOR_OP_MATH MUBLAS_TENSOR_OP_MATH #define CUBLAS_TF32_TENSOR_OP_MATH MUBLAS_TENSOR_OP_MATH
#define CUDA_R_16F MUSA_R_16F #define CUDA_R_16F MUSA_R_16F
#define CUDA_R_16BF MUSA_R_16BF #define CUDA_R_16BF MUSA_R_16BF
#define CUDA_R_32F MUSA_R_32F #define CUDA_R_32F MUSA_R_32F
#define cublasStrsmBatched mublasStrsmBatched
#define cublasComputeType_t cudaDataType_t #define cublasComputeType_t cudaDataType_t
#define cublasCreate mublasCreate #define cublasCreate mublasCreate
#define cublasDestroy mublasDestroy #define cublasDestroy mublasDestroy

View File

@ -695,6 +695,8 @@ llama_ubatch llama_batch_allocr::ubatch_add(const std::vector<int32_t> & idxs, u
udata->seq_idx .resize(LLAMA_MAX_SEQ, -1); udata->seq_idx .resize(LLAMA_MAX_SEQ, -1);
udata->output .resize(n_tokens); udata->output .resize(n_tokens);
udata->seq_id_data.reserve(n_tokens);
seq_set_t seq_set_unq; seq_set_t seq_set_unq;
for (size_t i = 0; i < idxs.size(); ++i) { for (size_t i = 0; i < idxs.size(); ++i) {
@ -716,11 +718,13 @@ llama_ubatch llama_batch_allocr::ubatch_add(const std::vector<int32_t> & idxs, u
} }
udata->n_seq_id[i] = batch.n_seq_id[idxs[i]]; udata->n_seq_id[i] = batch.n_seq_id[idxs[i]];
udata->seq_id[i] = batch.seq_id[idxs[i]];
udata->output[i] = batch.logits[idxs[i]]; udata->output[i] = batch.logits[idxs[i]];
for (int s = 0; s < udata->n_seq_id[i]; ++s) { for (int s = 0; s < udata->n_seq_id[i]; ++s) {
seq_set_unq.set(udata->seq_id[i][s]); const llama_seq_id seq_id = batch.seq_id[idxs[i]][s];
udata->seq_id_data.push_back(seq_id);
seq_set_unq.set(seq_id);
} }
if (udata->output[i]) { if (udata->output[i]) {
@ -728,6 +732,12 @@ llama_ubatch llama_batch_allocr::ubatch_add(const std::vector<int32_t> & idxs, u
} }
} }
llama_seq_id * seq_id_ptr = udata->seq_id_data.data();
for (size_t i = 0; i < idxs.size(); ++i) {
udata->seq_id[i] = seq_id_ptr;
seq_id_ptr += udata->n_seq_id[i];
}
for (uint32_t s = 0; s < n_seq_max; ++s) { for (uint32_t s = 0; s < n_seq_max; ++s) {
if (seq_set_unq.test(s)) { if (seq_set_unq.test(s)) {
udata->seq_idx[s] = udata->seq_id_unq.size(); udata->seq_idx[s] = udata->seq_id_unq.size();

View File

@ -56,13 +56,15 @@ struct llama_ubatch {
std::vector<float> embd; std::vector<float> embd;
std::vector<llama_pos> pos; std::vector<llama_pos> pos;
std::vector<int32_t> n_seq_id; std::vector<int32_t> n_seq_id;
std::vector<llama_seq_id *> seq_id; std::vector<llama_seq_id *> seq_id; // these point into the seq_id_data below
std::vector<llama_seq_id> seq_id_unq; std::vector<llama_seq_id> seq_id_unq;
std::vector<int32_t> seq_idx; std::vector<int32_t> seq_idx;
std::vector<int8_t> output; std::vector<int8_t> output;
std::vector<llama_seq_id> seq_id_data;
}; };
// the llama_ubatch pointers above point to this data if set. otherwise - points to non-owning data // the llama_ubatch pointers above point to this data if set. otherwise - point to external non-owning data
std::shared_ptr<data_t> data; std::shared_ptr<data_t> data;
}; };

View File

@ -7861,9 +7861,24 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 30, 30, 7, 1 }, { 8, 30, 7, 1 })); test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 30, 30, 7, 1 }, { 8, 30, 7, 1 }));
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 42, 42, 5, 2 }, { 10, 42, 5, 2 })); test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 42, 42, 5, 2 }, { 10, 42, 5, 2 }));
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 2, 2 }, { 10, 64, 2, 2 })); test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 2, 2 }, { 10, 64, 2, 2 }));
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 2, 2 }, { 64, 64, 2, 2 }));
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 79, 79, 5, 3 }, { 417, 79, 5, 3 }));
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 2 }, { 32, 128, 4, 2 }));
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 80, 80, 2, 8 }, { 80, 80, 2, 8 }));
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 80, 80, 2, 8 }, { 79, 80, 2, 8 }));
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 80, 80, 2, 8 }, { 81, 80, 2, 8 }));
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 80, 80, 8, 8 }, { 80, 80, 8, 8 }));
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 80, 80, 8, 8 }, { 79, 80, 8, 8 }));
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 80, 80, 8, 8 }, { 81, 80, 8, 8 }));
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 84, 84, 4, 4 }, { 32, 84, 4, 4 }));
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 95, 95, 8, 8 }, { 40, 95, 8, 8 }));
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 100, 100, 4, 4 }, { 41, 100, 4, 4 })); test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 100, 100, 4, 4 }, { 41, 100, 4, 4 }));
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 4 }, { 31, 128, 4, 4 })); test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 4 }, { 31, 128, 4, 4 }));
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 4, 4 }, { 300, 64, 4, 4 })); test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 4 }, { 32, 128, 4, 4 }));
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 3, 4 }, { 32, 128, 3, 4 }));
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 1 }, { 32, 128, 4, 1 }));
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 4, 4 }, { 200, 64, 4, 4 }));
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 4, 4 }, { 384, 64, 4, 4 }));
for (bool v : {false, true}) { for (bool v : {false, true}) {
for (bool circular : {false, true}) { for (bool circular : {false, true}) {
@ -8064,12 +8079,13 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 16416, 1, 128, {8, 1}, {4, 1}, {0, 2, 1, 3})); test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 16416, 1, 128, {8, 1}, {4, 1}, {0, 2, 1, 3}));
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 128, 1, 16416, {8, 1}, {4, 1}, {0, 1, 2, 3}, 2*16416)); test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 128, 1, 16416, {8, 1}, {4, 1}, {0, 1, 2, 3}, 2*16416));
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 4, 2 }, { 6, 64, 4, 2 })); test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 4, 4 }, { 32, 64, 4, 4 }));
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 1 }, { 8, 128, 4, 1 })); test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 2 }, { 32, 128, 4, 2 }));
// qwen3next with CHUNK_SIZE 64 // qwen3next with CHUNK_SIZE 64
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 8, 32 }, { 64, 64, 8, 32 })); test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 8, 32 }, { 64, 64, 8, 32 }));
// qwen3next with CHUNK_SIZE 128 // qwen3next with CHUNK_SIZE 128
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 32 }, { 128, 128, 4, 32 })); test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 32 }, { 128, 128, 4, 32 }));
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 256, 256, 4, 2 }, { 128, 256, 4, 2 }));
test_cases.emplace_back(new test_tri(GGML_TRI_TYPE_LOWER, GGML_TYPE_F32, { 256, 256, 4, 4 })); test_cases.emplace_back(new test_tri(GGML_TRI_TYPE_LOWER, GGML_TYPE_F32, { 256, 256, 4, 4 }));
test_cases.emplace_back(new test_tri(GGML_TRI_TYPE_UPPER_DIAG, GGML_TYPE_F32, { 1024, 1024, 8, 4 })); test_cases.emplace_back(new test_tri(GGML_TRI_TYPE_UPPER_DIAG, GGML_TYPE_F32, { 1024, 1024, 8, 4 }));

Binary file not shown.

View File

@ -41,7 +41,7 @@
"@tailwindcss/vite": "^4.0.0", "@tailwindcss/vite": "^4.0.0",
"@types/node": "^22", "@types/node": "^22",
"@vitest/browser": "^3.2.3", "@vitest/browser": "^3.2.3",
"bits-ui": "^2.8.11", "bits-ui": "^2.14.4",
"clsx": "^2.1.1", "clsx": "^2.1.1",
"dexie": "^4.0.11", "dexie": "^4.0.11",
"eslint": "^9.18.0", "eslint": "^9.18.0",
@ -3343,17 +3343,17 @@
} }
}, },
"node_modules/bits-ui": { "node_modules/bits-ui": {
"version": "2.8.11", "version": "2.14.4",
"resolved": "https://registry.npmjs.org/bits-ui/-/bits-ui-2.8.11.tgz", "resolved": "https://registry.npmjs.org/bits-ui/-/bits-ui-2.14.4.tgz",
"integrity": "sha512-lKN9rAk69my6j7H1D4B87r8LrHuEtfEsf1xCixBj9yViql2BdI3f04HyyyT7T1GOCpgb9+8b0B+nm3LN81Konw==", "integrity": "sha512-W6kenhnbd/YVvur+DKkaVJ6GldE53eLewur5AhUCqslYQ0vjZr8eWlOfwZnMiPB+PF5HMVqf61vXBvmyrAmPWg==",
"dev": true, "dev": true,
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"@floating-ui/core": "^1.7.1", "@floating-ui/core": "^1.7.1",
"@floating-ui/dom": "^1.7.1", "@floating-ui/dom": "^1.7.1",
"esm-env": "^1.1.2", "esm-env": "^1.1.2",
"runed": "^0.29.1", "runed": "^0.35.1",
"svelte-toolbelt": "^0.9.3", "svelte-toolbelt": "^0.10.6",
"tabbable": "^6.2.0" "tabbable": "^6.2.0"
}, },
"engines": { "engines": {
@ -3368,9 +3368,9 @@
} }
}, },
"node_modules/bits-ui/node_modules/runed": { "node_modules/bits-ui/node_modules/runed": {
"version": "0.29.2", "version": "0.35.1",
"resolved": "https://registry.npmjs.org/runed/-/runed-0.29.2.tgz", "resolved": "https://registry.npmjs.org/runed/-/runed-0.35.1.tgz",
"integrity": "sha512-0cq6cA6sYGZwl/FvVqjx9YN+1xEBu9sDDyuWdDW1yWX7JF2wmvmVKfH+hVCZs+csW+P3ARH92MjI3H9QTagOQA==", "integrity": "sha512-2F4Q/FZzbeJTFdIS/PuOoPRSm92sA2LhzTnv6FXhCoENb3huf5+fDuNOg1LNvGOouy3u/225qxmuJvcV3IZK5Q==",
"dev": true, "dev": true,
"funding": [ "funding": [
"https://github.com/sponsors/huntabyte", "https://github.com/sponsors/huntabyte",
@ -3378,23 +3378,31 @@
], ],
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"esm-env": "^1.0.0" "dequal": "^2.0.3",
"esm-env": "^1.0.0",
"lz-string": "^1.5.0"
}, },
"peerDependencies": { "peerDependencies": {
"@sveltejs/kit": "^2.21.0",
"svelte": "^5.7.0" "svelte": "^5.7.0"
},
"peerDependenciesMeta": {
"@sveltejs/kit": {
"optional": true
}
} }
}, },
"node_modules/bits-ui/node_modules/svelte-toolbelt": { "node_modules/bits-ui/node_modules/svelte-toolbelt": {
"version": "0.9.3", "version": "0.10.6",
"resolved": "https://registry.npmjs.org/svelte-toolbelt/-/svelte-toolbelt-0.9.3.tgz", "resolved": "https://registry.npmjs.org/svelte-toolbelt/-/svelte-toolbelt-0.10.6.tgz",
"integrity": "sha512-HCSWxCtVmv+c6g1ACb8LTwHVbDqLKJvHpo6J8TaqwUme2hj9ATJCpjCPNISR1OCq2Q4U1KT41if9ON0isINQZw==", "integrity": "sha512-YWuX+RE+CnWYx09yseAe4ZVMM7e7GRFZM6OYWpBKOb++s+SQ8RBIMMe+Bs/CznBMc0QPLjr+vDBxTAkozXsFXQ==",
"dev": true, "dev": true,
"funding": [ "funding": [
"https://github.com/sponsors/huntabyte" "https://github.com/sponsors/huntabyte"
], ],
"dependencies": { "dependencies": {
"clsx": "^2.1.1", "clsx": "^2.1.1",
"runed": "^0.29.0", "runed": "^0.35.1",
"style-to-object": "^1.0.8" "style-to-object": "^1.0.8"
}, },
"engines": { "engines": {

View File

@ -43,7 +43,7 @@
"@tailwindcss/vite": "^4.0.0", "@tailwindcss/vite": "^4.0.0",
"@types/node": "^22", "@types/node": "^22",
"@vitest/browser": "^3.2.3", "@vitest/browser": "^3.2.3",
"bits-ui": "^2.8.11", "bits-ui": "^2.14.4",
"clsx": "^2.1.1", "clsx": "^2.1.1",
"dexie": "^4.0.11", "dexie": "^4.0.11",
"eslint": "^9.18.0", "eslint": "^9.18.0",

View File

@ -331,6 +331,7 @@
class="{INPUT_CLASSES} border-radius-bottom-none mx-auto max-w-[48rem] overflow-hidden rounded-3xl backdrop-blur-md {disabled class="{INPUT_CLASSES} border-radius-bottom-none mx-auto max-w-[48rem] overflow-hidden rounded-3xl backdrop-blur-md {disabled
? 'cursor-not-allowed opacity-60' ? 'cursor-not-allowed opacity-60'
: ''} {className}" : ''} {className}"
data-slot="chat-form"
> >
<ChatAttachmentsList <ChatAttachmentsList
bind:uploadedFiles bind:uploadedFiles

View File

@ -1,6 +1,5 @@
<script lang="ts"> <script lang="ts">
import { Input } from '$lib/components/ui/input'; import { SearchInput } from '$lib/components/app';
import { Search } from '@lucide/svelte';
interface Props { interface Props {
value?: string; value?: string;
@ -15,19 +14,6 @@
onInput, onInput,
class: className class: className
}: Props = $props(); }: Props = $props();
function handleInput(event: Event) {
const target = event.target as HTMLInputElement;
value = target.value;
onInput?.(target.value);
}
</script> </script>
<div class="relative mb-4 {className}"> <SearchInput bind:value {placeholder} {onInput} class="mb-4 {className}" />
<Search
class="absolute top-1/2 left-3 h-4 w-4 -translate-y-1/2 transform text-muted-foreground"
/>
<Input bind:value class="pl-10" oninput={handleInput} {placeholder} type="search" />
</div>

View File

@ -64,6 +64,7 @@ export { default as CopyToClipboardIcon } from './misc/CopyToClipboardIcon.svelt
export { default as KeyboardShortcutInfo } from './misc/KeyboardShortcutInfo.svelte'; export { default as KeyboardShortcutInfo } from './misc/KeyboardShortcutInfo.svelte';
export { default as MarkdownContent } from './misc/MarkdownContent.svelte'; export { default as MarkdownContent } from './misc/MarkdownContent.svelte';
export { default as RemoveButton } from './misc/RemoveButton.svelte'; export { default as RemoveButton } from './misc/RemoveButton.svelte';
export { default as SearchInput } from './misc/SearchInput.svelte';
export { default as SyntaxHighlightedCode } from './misc/SyntaxHighlightedCode.svelte'; export { default as SyntaxHighlightedCode } from './misc/SyntaxHighlightedCode.svelte';
export { default as ModelsSelector } from './models/ModelsSelector.svelte'; export { default as ModelsSelector } from './models/ModelsSelector.svelte';

View File

@ -0,0 +1,73 @@
<script lang="ts">
import { Input } from '$lib/components/ui/input';
import { Search, X } from '@lucide/svelte';
interface Props {
value?: string;
placeholder?: string;
onInput?: (value: string) => void;
onClose?: () => void;
onKeyDown?: (event: KeyboardEvent) => void;
class?: string;
id?: string;
ref?: HTMLInputElement | null;
}
let {
value = $bindable(''),
placeholder = 'Search...',
onInput,
onClose,
onKeyDown,
class: className,
id,
ref = $bindable(null)
}: Props = $props();
let showClearButton = $derived(!!value || !!onClose);
function handleInput(event: Event) {
const target = event.target as HTMLInputElement;
value = target.value;
onInput?.(target.value);
}
function handleClear() {
if (value) {
value = '';
onInput?.('');
ref?.focus();
} else {
onClose?.();
}
}
</script>
<div class="relative {className}">
<Search
class="absolute top-1/2 left-3 h-4 w-4 -translate-y-1/2 transform text-muted-foreground"
/>
<Input
{id}
bind:value
bind:ref
class="pl-9 {showClearButton ? 'pr-9' : ''}"
oninput={handleInput}
onkeydown={onKeyDown}
{placeholder}
type="search"
/>
{#if showClearButton}
<button
type="button"
class="absolute top-1/2 right-3 -translate-y-1/2 transform text-muted-foreground transition-colors hover:text-foreground"
onclick={handleClear}
aria-label={value ? 'Clear search' : 'Close'}
>
<X class="h-4 w-4" />
</button>
{/if}
</div>

View File

@ -2,8 +2,8 @@
import { onMount, tick } from 'svelte'; import { onMount, tick } from 'svelte';
import { ChevronDown, EyeOff, Loader2, MicOff, Package, Power } from '@lucide/svelte'; import { ChevronDown, EyeOff, Loader2, MicOff, Package, Power } from '@lucide/svelte';
import * as Tooltip from '$lib/components/ui/tooltip'; import * as Tooltip from '$lib/components/ui/tooltip';
import * as Popover from '$lib/components/ui/popover';
import { cn } from '$lib/components/ui/utils'; import { cn } from '$lib/components/ui/utils';
import { portalToBody } from '$lib/utils';
import { import {
modelsStore, modelsStore,
modelOptions, modelOptions,
@ -17,12 +17,8 @@
import { usedModalities, conversationsStore } from '$lib/stores/conversations.svelte'; import { usedModalities, conversationsStore } from '$lib/stores/conversations.svelte';
import { ServerModelStatus } from '$lib/enums'; import { ServerModelStatus } from '$lib/enums';
import { isRouterMode } from '$lib/stores/server.svelte'; import { isRouterMode } from '$lib/stores/server.svelte';
import { DialogModelInformation } from '$lib/components/app'; import { DialogModelInformation, SearchInput } from '$lib/components/app';
import { import type { ModelOption } from '$lib/types/models';
MENU_MAX_WIDTH,
MENU_OFFSET,
VIEWPORT_GUTTER
} from '$lib/constants/floating-ui-constraints';
interface Props { interface Props {
class?: string; class?: string;
@ -145,185 +141,126 @@
return options.some((option) => option.model === currentModel); return options.some((option) => option.model === currentModel);
}); });
let isOpen = $state(false); let searchTerm = $state('');
let showModelDialog = $state(false); let searchInputRef = $state<HTMLInputElement | null>(null);
let container: HTMLDivElement | null = null; let highlightedIndex = $state<number>(-1);
let menuRef = $state<HTMLDivElement | null>(null);
let triggerButton = $state<HTMLButtonElement | null>(null);
let menuPosition = $state<{
top: number;
left: number;
width: number;
placement: 'top' | 'bottom';
maxHeight: number;
} | null>(null);
onMount(async () => { let filteredOptions: ModelOption[] = $derived(
try { (() => {
await modelsStore.fetch(); const term = searchTerm.trim().toLowerCase();
} catch (error) { if (!term) return options;
console.error('Unable to load models:', error);
} return options.filter(
(option) =>
option.model.toLowerCase().includes(term) || option.name?.toLowerCase().includes(term)
);
})()
);
// Get indices of compatible options for keyboard navigation
let compatibleIndices = $derived(
filteredOptions
.map((option, index) => (isModelCompatible(option) ? index : -1))
.filter((i) => i !== -1)
);
// Reset highlighted index when search term changes
$effect(() => {
void searchTerm;
highlightedIndex = -1;
}); });
function toggleOpen() { let isOpen = $state(false);
if (loading || updating) return; let showModelDialog = $state(false);
if (isRouter) { onMount(() => {
// Router mode: show dropdown modelsStore.fetch().catch((error) => {
if (isOpen) { console.error('Unable to load models:', error);
closeMenu(); });
} else { });
openMenu();
} function handleOpenChange(open: boolean) {
} else {
// Single model mode: show dialog
showModelDialog = true;
}
}
async function openMenu() {
if (loading || updating) return; if (loading || updating) return;
if (open) {
isOpen = true; isOpen = true;
await tick(); searchTerm = '';
updateMenuPosition(); highlightedIndex = -1;
requestAnimationFrame(() => updateMenuPosition());
// Focus search input after popover opens
tick().then(() => {
requestAnimationFrame(() => searchInputRef?.focus());
});
if (isRouter) { if (isRouter) {
modelsStore.fetchRouterModels().then(() => { modelsStore.fetchRouterModels().then(() => {
modelsStore.fetchModalitiesForLoadedModels(); modelsStore.fetchModalitiesForLoadedModels();
}); });
} }
} else {
isOpen = false;
searchTerm = '';
highlightedIndex = -1;
}
}
function handleTriggerClick() {
if (loading || updating) return;
if (!isRouter) {
// Single model mode: show dialog instead of popover
showModelDialog = true;
}
// For router mode, the Popover handles open/close
} }
export function open() { export function open() {
if (isRouter) { if (isRouter) {
openMenu(); handleOpenChange(true);
} else { } else {
showModelDialog = true; showModelDialog = true;
} }
} }
function closeMenu() { function closeMenu() {
if (!isOpen) return; handleOpenChange(false);
isOpen = false;
menuPosition = null;
} }
function handlePointerDown(event: PointerEvent) { function handleSearchKeyDown(event: KeyboardEvent) {
if (!container) return; if (event.isComposing) return;
const target = event.target as Node | null; if (event.key === 'ArrowDown') {
event.preventDefault();
if (compatibleIndices.length === 0) return;
if (target && !container.contains(target) && !(menuRef && menuRef.contains(target))) { const currentPos = compatibleIndices.indexOf(highlightedIndex);
closeMenu(); if (currentPos === -1 || currentPos === compatibleIndices.length - 1) {
} highlightedIndex = compatibleIndices[0];
}
function handleKeydown(event: KeyboardEvent) {
if (event.key === 'Escape') {
closeMenu();
}
}
function handleResize() {
if (isOpen) {
updateMenuPosition();
}
}
function updateMenuPosition() {
if (!isOpen || !triggerButton || !menuRef) return;
const triggerRect = triggerButton.getBoundingClientRect();
const viewportWidth = window.innerWidth;
const viewportHeight = window.innerHeight;
if (viewportWidth === 0 || viewportHeight === 0) return;
const scrollWidth = menuRef.scrollWidth;
const scrollHeight = menuRef.scrollHeight;
const availableWidth = Math.max(0, viewportWidth - VIEWPORT_GUTTER * 2);
const constrainedMaxWidth = Math.min(MENU_MAX_WIDTH, availableWidth || MENU_MAX_WIDTH);
const safeMaxWidth =
constrainedMaxWidth > 0 ? constrainedMaxWidth : Math.min(MENU_MAX_WIDTH, viewportWidth);
const desiredMinWidth = Math.min(160, safeMaxWidth || 160);
let width = Math.min(
Math.max(triggerRect.width, scrollWidth, desiredMinWidth),
safeMaxWidth || 320
);
const availableBelow = Math.max(
0,
viewportHeight - VIEWPORT_GUTTER - triggerRect.bottom - MENU_OFFSET
);
const availableAbove = Math.max(0, triggerRect.top - VIEWPORT_GUTTER - MENU_OFFSET);
const viewportAllowance = Math.max(0, viewportHeight - VIEWPORT_GUTTER * 2);
const fallbackAllowance = Math.max(1, viewportAllowance > 0 ? viewportAllowance : scrollHeight);
function computePlacement(placement: 'top' | 'bottom') {
const available = placement === 'bottom' ? availableBelow : availableAbove;
const allowedHeight =
available > 0 ? Math.min(available, fallbackAllowance) : fallbackAllowance;
const maxHeight = Math.min(scrollHeight, allowedHeight);
const height = Math.max(0, maxHeight);
let top: number;
if (placement === 'bottom') {
const rawTop = triggerRect.bottom + MENU_OFFSET;
const minTop = VIEWPORT_GUTTER;
const maxTop = viewportHeight - VIEWPORT_GUTTER - height;
if (maxTop < minTop) {
top = minTop;
} else { } else {
top = Math.min(Math.max(rawTop, minTop), maxTop); highlightedIndex = compatibleIndices[currentPos + 1];
} }
} else if (event.key === 'ArrowUp') {
event.preventDefault();
if (compatibleIndices.length === 0) return;
const currentPos = compatibleIndices.indexOf(highlightedIndex);
if (currentPos === -1 || currentPos === 0) {
highlightedIndex = compatibleIndices[compatibleIndices.length - 1];
} else { } else {
const rawTop = triggerRect.top - MENU_OFFSET - height; highlightedIndex = compatibleIndices[currentPos - 1];
const minTop = VIEWPORT_GUTTER; }
const maxTop = viewportHeight - VIEWPORT_GUTTER - height; } else if (event.key === 'Enter') {
if (maxTop < minTop) { event.preventDefault();
top = minTop; if (highlightedIndex >= 0 && highlightedIndex < filteredOptions.length) {
} else { const option = filteredOptions[highlightedIndex];
top = Math.max(Math.min(rawTop, maxTop), minTop); if (isModelCompatible(option)) {
handleSelect(option.id);
}
} else if (compatibleIndices.length > 0) {
// No selection - highlight first compatible option
highlightedIndex = compatibleIndices[0];
} }
} }
return { placement, top, height, maxHeight };
}
const belowMetrics = computePlacement('bottom');
const aboveMetrics = computePlacement('top');
let metrics = belowMetrics;
if (scrollHeight > belowMetrics.maxHeight && aboveMetrics.maxHeight > belowMetrics.maxHeight) {
metrics = aboveMetrics;
}
let left = triggerRect.right - width;
const maxLeft = viewportWidth - VIEWPORT_GUTTER - width;
if (maxLeft < VIEWPORT_GUTTER) {
left = VIEWPORT_GUTTER;
} else {
if (left > maxLeft) {
left = maxLeft;
}
if (left < VIEWPORT_GUTTER) {
left = VIEWPORT_GUTTER;
}
}
menuPosition = {
top: Math.round(metrics.top),
left: Math.round(left),
width: Math.round(width),
placement: metrics.placement,
maxHeight: Math.round(metrics.maxHeight)
};
} }
async function handleSelect(modelId: string) { async function handleSelect(modelId: string) {
@ -356,6 +293,14 @@
if (shouldCloseMenu) { if (shouldCloseMenu) {
closeMenu(); closeMenu();
// Focus the chat textarea after model selection
requestAnimationFrame(() => {
const textarea = document.querySelector<HTMLTextAreaElement>(
'[data-slot="chat-form"] textarea'
);
textarea?.focus();
});
} }
} }
@ -404,10 +349,7 @@
} }
</script> </script>
<svelte:window onresize={handleResize} /> <div class={cn('relative inline-flex flex-col items-end gap-1', className)}>
<svelte:document onpointerdown={handlePointerDown} onkeydown={handleKeydown} />
<div class={cn('relative inline-flex flex-col items-end gap-1', className)} bind:this={container}>
{#if loading && options.length === 0 && isRouter} {#if loading && options.length === 0 && isRouter}
<div class="flex items-center gap-2 text-xs text-muted-foreground"> <div class="flex items-center gap-2 text-xs text-muted-foreground">
<Loader2 class="h-3.5 w-3.5 animate-spin" /> <Loader2 class="h-3.5 w-3.5 animate-spin" />
@ -418,9 +360,8 @@
{:else} {:else}
{@const selectedOption = getDisplayOption()} {@const selectedOption = getDisplayOption()}
<div class="relative"> <Popover.Root bind:open={isOpen} onOpenChange={handleOpenChange}>
<button <Popover.Trigger
type="button"
class={cn( class={cn(
`inline-flex cursor-pointer items-center gap-1.5 rounded-sm bg-muted-foreground/10 px-1.5 py-1 text-xs transition hover:text-foreground focus:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-60`, `inline-flex cursor-pointer items-center gap-1.5 rounded-sm bg-muted-foreground/10 px-1.5 py-1 text-xs transition hover:text-foreground focus:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-60`,
!isCurrentModelInCache() !isCurrentModelInCache()
@ -430,15 +371,11 @@
: isHighlightedCurrentModelActive : isHighlightedCurrentModelActive
? 'text-foreground' ? 'text-foreground'
: 'text-muted-foreground', : 'text-muted-foreground',
isOpen ? 'text-foreground' : '', isOpen ? 'text-foreground' : ''
className
)} )}
style="max-width: min(calc(100cqw - 6.5rem), 32rem)" style="max-width: min(calc(100cqw - 6.5rem), 32rem)"
aria-haspopup={isRouter ? 'listbox' : undefined} onclick={handleTriggerClick}
aria-expanded={isRouter ? isOpen : undefined} disabled={disabled || updating || !isRouter}
onclick={toggleOpen}
bind:this={triggerButton}
disabled={disabled || updating}
> >
<Package class="h-3.5 w-3.5" /> <Package class="h-3.5 w-3.5" />
@ -451,33 +388,35 @@
{:else if isRouter} {:else if isRouter}
<ChevronDown class="h-3 w-3.5" /> <ChevronDown class="h-3 w-3.5" />
{/if} {/if}
</button> </Popover.Trigger>
{#if isOpen && isRouter} <Popover.Content
<div class="group/popover-content w-96 max-w-[calc(100vw-2rem)] p-0"
bind:this={menuRef} align="end"
use:portalToBody sideOffset={8}
class={cn( collisionPadding={16}
'fixed z-[1000] overflow-hidden rounded-md border bg-popover shadow-lg transition-opacity',
menuPosition ? 'opacity-100' : 'pointer-events-none opacity-0'
)}
role="listbox"
style:top={menuPosition ? `${menuPosition.top}px` : undefined}
style:left={menuPosition ? `${menuPosition.left}px` : undefined}
style:width={menuPosition ? `${menuPosition.width}px` : undefined}
data-placement={menuPosition?.placement ?? 'bottom'}
> >
<div class="flex max-h-[50dvh] flex-col overflow-hidden">
<div <div
class="overflow-y-auto py-1" class="order-1 shrink-0 border-b p-4 group-data-[side=top]/popover-content:order-2 group-data-[side=top]/popover-content:border-t group-data-[side=top]/popover-content:border-b-0"
style:max-height={menuPosition && menuPosition.maxHeight > 0 >
? `${menuPosition.maxHeight}px` <SearchInput
: undefined} id="model-search"
placeholder="Search models..."
bind:value={searchTerm}
bind:ref={searchInputRef}
onClose={closeMenu}
onKeyDown={handleSearchKeyDown}
/>
</div>
<div
class="models-list order-2 min-h-0 flex-1 overflow-y-auto group-data-[side=top]/popover-content:order-1"
> >
{#if !isCurrentModelInCache() && currentModel} {#if !isCurrentModelInCache() && currentModel}
<!-- Show unavailable model as first option (disabled) --> <!-- Show unavailable model as first option (disabled) -->
<button <button
type="button" type="button"
class="flex w-full cursor-not-allowed items-center bg-red-400/10 px-3 py-2 text-left text-sm text-red-400" class="flex w-full cursor-not-allowed items-center bg-red-400/10 px-4 py-2 text-left text-sm text-red-400"
role="option" role="option"
aria-selected="true" aria-selected="true"
aria-disabled="true" aria-disabled="true"
@ -488,20 +427,25 @@
</button> </button>
<div class="my-1 h-px bg-border"></div> <div class="my-1 h-px bg-border"></div>
{/if} {/if}
{#each options as option (option.id)} {#if filteredOptions.length === 0}
<p class="px-4 py-3 text-sm text-muted-foreground">No models found.</p>
{/if}
{#each filteredOptions as option, index (option.id)}
{@const status = getModelStatus(option.model)} {@const status = getModelStatus(option.model)}
{@const isLoaded = status === ServerModelStatus.LOADED} {@const isLoaded = status === ServerModelStatus.LOADED}
{@const isLoading = status === ServerModelStatus.LOADING} {@const isLoading = status === ServerModelStatus.LOADING}
{@const isSelected = currentModel === option.model || activeId === option.id} {@const isSelected = currentModel === option.model || activeId === option.id}
{@const isCompatible = isModelCompatible(option)} {@const isCompatible = isModelCompatible(option)}
{@const isHighlighted = index === highlightedIndex}
{@const missingModalities = getMissingModalities(option)} {@const missingModalities = getMissingModalities(option)}
<div <div
class={cn( class={cn(
'group flex w-full items-center gap-2 px-3 py-2 text-left text-sm transition focus:outline-none', 'group flex w-full items-center gap-2 px-4 py-2 text-left text-sm transition focus:outline-none',
isCompatible isCompatible
? 'cursor-pointer hover:bg-muted focus:bg-muted' ? 'cursor-pointer hover:bg-muted focus:bg-muted'
: 'cursor-not-allowed opacity-50', : 'cursor-not-allowed opacity-50',
isSelected isSelected || isHighlighted
? 'bg-accent text-accent-foreground' ? 'bg-accent text-accent-foreground'
: isCompatible : isCompatible
? 'hover:bg-accent hover:text-accent-foreground' ? 'hover:bg-accent hover:text-accent-foreground'
@ -509,10 +453,11 @@
isLoaded ? 'text-popover-foreground' : 'text-muted-foreground' isLoaded ? 'text-popover-foreground' : 'text-muted-foreground'
)} )}
role="option" role="option"
aria-selected={isSelected} aria-selected={isSelected || isHighlighted}
aria-disabled={!isCompatible} aria-disabled={!isCompatible}
tabindex={isCompatible ? 0 : -1} tabindex={isCompatible ? 0 : -1}
onclick={() => isCompatible && handleSelect(option.id)} onclick={() => isCompatible && handleSelect(option.id)}
onmouseenter={() => (highlightedIndex = index)}
onkeydown={(e) => { onkeydown={(e) => {
if (isCompatible && (e.key === 'Enter' || e.key === ' ')) { if (isCompatible && (e.key === 'Enter' || e.key === ' ')) {
e.preventDefault(); e.preventDefault();
@ -586,8 +531,8 @@
{/each} {/each}
</div> </div>
</div> </div>
{/if} </Popover.Content>
</div> </Popover.Root>
{/if} {/if}
</div> </div>

View File

@ -0,0 +1,19 @@
import Root from './popover.svelte';
import Close from './popover-close.svelte';
import Content from './popover-content.svelte';
import Trigger from './popover-trigger.svelte';
import Portal from './popover-portal.svelte';
export {
Root,
Content,
Trigger,
Close,
Portal,
//
Root as Popover,
Content as PopoverContent,
Trigger as PopoverTrigger,
Close as PopoverClose,
Portal as PopoverPortal
};

View File

@ -0,0 +1,7 @@
<script lang="ts">
import { Popover as PopoverPrimitive } from 'bits-ui';
let { ref = $bindable(null), ...restProps }: PopoverPrimitive.CloseProps = $props();
</script>
<PopoverPrimitive.Close bind:ref data-slot="popover-close" {...restProps} />

View File

@ -0,0 +1,37 @@
<script lang="ts">
import { Popover as PopoverPrimitive } from 'bits-ui';
import PopoverPortal from './popover-portal.svelte';
import { cn, type WithoutChildrenOrChild } from '$lib/components/ui/utils.js';
import type { ComponentProps } from 'svelte';
let {
ref = $bindable(null),
class: className,
sideOffset = 4,
side,
align = 'center',
collisionPadding = 8,
avoidCollisions = true,
portalProps,
...restProps
}: PopoverPrimitive.ContentProps & {
portalProps?: WithoutChildrenOrChild<ComponentProps<typeof PopoverPortal>>;
} = $props();
</script>
<PopoverPortal {...portalProps}>
<PopoverPrimitive.Content
bind:ref
data-slot="popover-content"
{sideOffset}
{side}
{align}
{collisionPadding}
{avoidCollisions}
class={cn(
'z-50 w-72 origin-(--bits-popover-content-transform-origin) rounded-md border bg-popover p-4 text-popover-foreground shadow-md outline-hidden data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-end-2 data-[side=right]:slide-in-from-start-2 data-[side=top]:slide-in-from-bottom-2 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:zoom-in-95',
className
)}
{...restProps}
/>
</PopoverPortal>

View File

@ -0,0 +1,7 @@
<script lang="ts">
import { Popover as PopoverPrimitive } from 'bits-ui';
let { ...restProps }: PopoverPrimitive.PortalProps = $props();
</script>
<PopoverPrimitive.Portal {...restProps} />

View File

@ -0,0 +1,17 @@
<script lang="ts">
import { cn } from '$lib/components/ui/utils.js';
import { Popover as PopoverPrimitive } from 'bits-ui';
let {
ref = $bindable(null),
class: className,
...restProps
}: PopoverPrimitive.TriggerProps = $props();
</script>
<PopoverPrimitive.Trigger
bind:ref
data-slot="popover-trigger"
class={cn('', className)}
{...restProps}
/>

View File

@ -0,0 +1,7 @@
<script lang="ts">
import { Popover as PopoverPrimitive } from 'bits-ui';
let { open = $bindable(false), ...restProps }: PopoverPrimitive.RootProps = $props();
</script>
<PopoverPrimitive.Root bind:open {...restProps} />

View File

@ -1,3 +1,2 @@
export const VIEWPORT_GUTTER = 8; export const VIEWPORT_GUTTER = 8;
export const MENU_OFFSET = 6; export const MENU_OFFSET = 6;
export const MENU_MAX_WIDTH = 320;

View File

@ -295,14 +295,21 @@ class ModelsStore {
* Fetch props for a specific model from /props endpoint * Fetch props for a specific model from /props endpoint
* Uses caching to avoid redundant requests * Uses caching to avoid redundant requests
* *
* In ROUTER mode, this will only fetch props if the model is loaded,
* since unloaded models return 400 from /props endpoint.
*
* @param modelId - Model identifier to fetch props for * @param modelId - Model identifier to fetch props for
* @returns Props data or null if fetch failed * @returns Props data or null if fetch failed or model not loaded
*/ */
async fetchModelProps(modelId: string): Promise<ApiLlamaCppServerProps | null> { async fetchModelProps(modelId: string): Promise<ApiLlamaCppServerProps | null> {
// Return cached props if available // Return cached props if available
const cached = this.modelPropsCache.get(modelId); const cached = this.modelPropsCache.get(modelId);
if (cached) return cached; if (cached) return cached;
if (serverStore.isRouterMode && !this.isModelLoaded(modelId)) {
return null;
}
// Avoid duplicate fetches // Avoid duplicate fetches
if (this.modelPropsFetching.has(modelId)) return null; if (this.modelPropsFetching.has(modelId)) return null;