Merge branch 'ggml-org:master' into power-law-sampler
This commit is contained in:
commit
66e2d17c7f
|
|
@ -56,7 +56,7 @@ docker run -v /path/to/models:/models ghcr.io/ggml-org/llama.cpp:light -m /model
|
|||
or with a server image:
|
||||
|
||||
```bash
|
||||
docker run -v /path/to/models:/models -p 8000:8000 ghcr.io/ggml-org/llama.cpp:server -m /models/7B/ggml-model-q4_0.gguf --port 8000 --host 0.0.0.0 -n 512
|
||||
docker run -v /path/to/models:/models -p 8080:8080 ghcr.io/ggml-org/llama.cpp:server -m /models/7B/ggml-model-q4_0.gguf --port 8080 --host 0.0.0.0 -n 512
|
||||
```
|
||||
|
||||
## Docker With CUDA
|
||||
|
|
@ -91,7 +91,7 @@ After building locally, Usage is similar to the non-CUDA examples, but you'll ne
|
|||
```bash
|
||||
docker run --gpus all -v /path/to/models:/models local/llama.cpp:full-cuda --run -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
|
||||
docker run --gpus all -v /path/to/models:/models local/llama.cpp:light-cuda -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
|
||||
docker run --gpus all -v /path/to/models:/models local/llama.cpp:server-cuda -m /models/7B/ggml-model-q4_0.gguf --port 8000 --host 0.0.0.0 -n 512 --n-gpu-layers 1
|
||||
docker run --gpus all -v /path/to/models:/models local/llama.cpp:server-cuda -m /models/7B/ggml-model-q4_0.gguf --port 8080 --host 0.0.0.0 -n 512 --n-gpu-layers 1
|
||||
```
|
||||
|
||||
## Docker With MUSA
|
||||
|
|
@ -125,5 +125,5 @@ After building locally, Usage is similar to the non-MUSA examples, but you'll ne
|
|||
```bash
|
||||
docker run -v /path/to/models:/models local/llama.cpp:full-musa --run -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
|
||||
docker run -v /path/to/models:/models local/llama.cpp:light-musa -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
|
||||
docker run -v /path/to/models:/models local/llama.cpp:server-musa -m /models/7B/ggml-model-q4_0.gguf --port 8000 --host 0.0.0.0 -n 512 --n-gpu-layers 1
|
||||
docker run -v /path/to/models:/models local/llama.cpp:server-musa -m /models/7B/ggml-model-q4_0.gguf --port 8080 --host 0.0.0.0 -n 512 --n-gpu-layers 1
|
||||
```
|
||||
|
|
|
|||
|
|
@ -312,16 +312,9 @@ static struct buffer_address ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * al
|
|||
}
|
||||
|
||||
// this is a very naive implementation, but for our case the number of free blocks should be very small
|
||||
static void ggml_dyn_tallocr_free_tensor(struct ggml_dyn_tallocr * alloc, struct buffer_address addr, size_t size, const struct ggml_tensor * tensor) {
|
||||
static void ggml_dyn_tallocr_free_bytes(struct ggml_dyn_tallocr * alloc, struct buffer_address addr, size_t size) {
|
||||
size = aligned_offset(NULL, size, alloc->alignment);
|
||||
|
||||
AT_PRINTF("%s: freeing %s at {chunk=%d, offset=%zu} (%zu bytes) - n_free_blocks = %d\n",
|
||||
__func__, tensor->name, addr.chunk, addr.offset, size, alloc->chunks[addr.chunk]->n_free_blocks);
|
||||
|
||||
#ifdef GGML_ALLOCATOR_DEBUG
|
||||
remove_allocated_tensor(alloc, addr, tensor);
|
||||
#endif
|
||||
|
||||
struct tallocr_chunk * chunk = alloc->chunks[addr.chunk];
|
||||
|
||||
// see if we can merge with an existing block
|
||||
|
|
@ -357,8 +350,6 @@ static void ggml_dyn_tallocr_free_tensor(struct ggml_dyn_tallocr * alloc, struct
|
|||
}
|
||||
// otherwise, add a new block
|
||||
ggml_dyn_tallocr_insert_block(chunk, addr.offset, size);
|
||||
|
||||
GGML_UNUSED(tensor);
|
||||
}
|
||||
|
||||
static void ggml_dyn_tallocr_reset(struct ggml_dyn_tallocr * alloc) {
|
||||
|
|
@ -616,13 +607,17 @@ static void ggml_gallocr_free_extra_space(ggml_gallocr_t galloc, struct ggml_ten
|
|||
|
||||
GGML_ASSERT(parent_size >= node_size);
|
||||
|
||||
if (parent_size > node_size) {
|
||||
// note: we want after the freeing the chunks to continue to be aligned
|
||||
struct ggml_dyn_tallocr * p_alloc = galloc->buf_tallocs[p_hn->buffer_id];
|
||||
parent_size = aligned_offset(NULL, parent_size, p_alloc->alignment);
|
||||
node_size = aligned_offset(NULL, node_size, p_alloc->alignment);
|
||||
|
||||
if (parent_size > node_size) {
|
||||
struct buffer_address p_addr = p_hn->addr;
|
||||
p_addr.offset += node_size;
|
||||
size_t extra_size = parent_size - node_size;
|
||||
AT_PRINTF("freeing extra %zu bytes from parent %s for %s\n", extra_size, parent->name, node->name);
|
||||
ggml_dyn_tallocr_free_tensor(p_alloc, p_addr, extra_size, parent);
|
||||
ggml_dyn_tallocr_free_bytes(p_alloc, p_addr, extra_size);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -706,7 +701,14 @@ static void ggml_gallocr_free_node(ggml_gallocr_t galloc, struct ggml_tensor * n
|
|||
struct ggml_dyn_tallocr * alloc = galloc->buf_tallocs[buffer_id];
|
||||
ggml_backend_buffer_type_t buft = galloc->bufts[buffer_id];
|
||||
size_t size = ggml_backend_buft_get_alloc_size(buft, node);
|
||||
ggml_dyn_tallocr_free_tensor(alloc, hn->addr, size, node);
|
||||
|
||||
AT_PRINTF("%s: freeing %s at {chunk=%d, offset=%zu} (%zu bytes) - n_free_blocks = %d\n",
|
||||
__func__, node->name, hn->addr.chunk, hn->addr.offset, size, alloc->chunks[hn->addr.chunk]->n_free_blocks);
|
||||
#ifdef GGML_ALLOCATOR_DEBUG
|
||||
remove_allocated_tensor(alloc, hn->addr, node);
|
||||
#endif
|
||||
|
||||
ggml_dyn_tallocr_free_bytes(alloc, hn->addr, size);
|
||||
hn->allocated = false;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4630,9 +4630,9 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
|
|||
case GGML_OP_CUMSUM:
|
||||
case GGML_OP_TRI:
|
||||
case GGML_OP_DIAG:
|
||||
return true;
|
||||
case GGML_OP_SOLVE_TRI:
|
||||
return op->src[0]->ne[0] <= 64 && op->src[1]->ne[0] <= 32;
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,80 @@
|
|||
#include "solve_tri.cuh"
|
||||
|
||||
#define MAX_N_FAST 64
|
||||
#define MAX_K_FAST 32
|
||||
|
||||
static __global__ void get_batch_pointers(const float * A,
|
||||
float * X,
|
||||
const float ** A_ptrs,
|
||||
float ** X_ptrs,
|
||||
int64_t ne02,
|
||||
int64_t total_batches,
|
||||
size_t s02,
|
||||
size_t s03,
|
||||
size_t s2,
|
||||
size_t s3) {
|
||||
const int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (idx >= total_batches) {
|
||||
return;
|
||||
}
|
||||
|
||||
const int64_t i3 = idx / ne02;
|
||||
const int64_t i2 = idx % ne02;
|
||||
|
||||
A_ptrs[idx] = A + i3 * s03 + i2 * s02;
|
||||
X_ptrs[idx] = X + i3 * s3 + i2 * s2;
|
||||
}
|
||||
|
||||
static void solve_tri_f32_cublas(ggml_backend_cuda_context & ctx,
|
||||
const float * A,
|
||||
const float * B,
|
||||
float * X,
|
||||
int n,
|
||||
int k,
|
||||
int64_t ne02,
|
||||
int64_t ne03,
|
||||
size_t s02,
|
||||
size_t s03,
|
||||
size_t s12,
|
||||
size_t s13,
|
||||
size_t s2,
|
||||
size_t s3,
|
||||
cudaStream_t stream) {
|
||||
const float alpha = 1.0f;
|
||||
const int64_t total_batches = ne02 * ne03;
|
||||
if (total_batches == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Bulk copy B -> X (contiguous tensors)
|
||||
if (X != B) {
|
||||
const int64_t total_elements_BX = n * k * total_batches;
|
||||
CUDA_CHECK(cudaMemcpyAsync(X, B, total_elements_BX * sizeof(float), cudaMemcpyDeviceToDevice, stream));
|
||||
}
|
||||
|
||||
const int id = ggml_cuda_get_device();
|
||||
|
||||
ggml_cuda_pool_alloc<const float *> A_ptrs_alloc(ctx.pool(id), total_batches);
|
||||
ggml_cuda_pool_alloc<float *> X_ptrs_alloc(ctx.pool(id), total_batches);
|
||||
|
||||
const float ** A_ptrs_dev = A_ptrs_alloc.get();
|
||||
float ** X_ptrs_dev = X_ptrs_alloc.get();
|
||||
|
||||
get_batch_pointers<<<(total_batches + 255) / 256, 256, 0, stream>>>(A, X, A_ptrs_dev, X_ptrs_dev, ne02,
|
||||
total_batches, s02, s03, s2, s3);
|
||||
|
||||
CUBLAS_CHECK(cublasSetStream(ctx.cublas_handle(id), stream));
|
||||
|
||||
// Yes, this is necessary, without this we get RMSE errors
|
||||
CUBLAS_CHECK(cublasSetMathMode(ctx.cublas_handle(id), CUBLAS_DEFAULT_MATH));
|
||||
CUBLAS_CHECK(cublasStrsmBatched(ctx.cublas_handle(id), CUBLAS_SIDE_RIGHT, CUBLAS_FILL_MODE_UPPER, CUBLAS_OP_N,
|
||||
CUBLAS_DIAG_NON_UNIT, k, n, &alpha, A_ptrs_dev, n, X_ptrs_dev, k, total_batches));
|
||||
|
||||
// revert to standard mode from common.cuh
|
||||
CUBLAS_CHECK(cublasSetMathMode(ctx.cublas_handle(id), CUBLAS_TF32_TENSOR_OP_MATH));
|
||||
|
||||
GGML_UNUSED_VARS(s12, s13);
|
||||
}
|
||||
|
||||
// ======================
|
||||
// Fast Kernel (n <= 64, k <= 32) - Warp-based parallel reduction
|
||||
|
|
@ -176,20 +250,26 @@ static void solve_tri_f32_cuda(const float * A,
|
|||
}
|
||||
|
||||
void ggml_cuda_op_solve_tri(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
||||
const ggml_tensor * src0 = dst->src[0]; // A (triangular n x x matrix)
|
||||
const ggml_tensor * src1 = dst->src[1]; // B (right hand side of n x k equation columns)
|
||||
const ggml_tensor * src0 = dst->src[0]; // A (n×n, lower triangular)
|
||||
const ggml_tensor * src1 = dst->src[1]; // B (n×k)
|
||||
|
||||
ggml_is_contiguous(src0);
|
||||
ggml_is_contiguous(src1);
|
||||
|
||||
const int64_t n = src0->ne[0];
|
||||
const int64_t k = src1->ne[0];
|
||||
const int64_t ne02 = src0->ne[2];
|
||||
const int64_t ne03 = src0->ne[3];
|
||||
|
||||
GGML_ASSERT(n <= 64);
|
||||
GGML_ASSERT(k <= 32);
|
||||
|
||||
solve_tri_f32_cuda((const float *) src0->data, (const float *) src1->data, (float *) dst->data, n, k, src0->ne[2],
|
||||
src0->ne[3], src0->nb[2] / sizeof(float), src0->nb[3] / sizeof(float),
|
||||
if (n <= MAX_N_FAST && k <= MAX_K_FAST) {
|
||||
solve_tri_f32_cuda((const float *) src0->data, (const float *) src1->data, (float *) dst->data, n, k,
|
||||
src0->ne[2], src0->ne[3], src0->nb[2] / sizeof(float), src0->nb[3] / sizeof(float),
|
||||
src1->nb[2] / sizeof(float), src1->nb[3] / sizeof(float), dst->nb[2] / sizeof(float),
|
||||
dst->nb[3] / sizeof(float), ctx.stream());
|
||||
} else {
|
||||
solve_tri_f32_cublas(ctx, (const float *) src0->data, (const float *) src1->data, (float *) dst->data, n, k,
|
||||
ne02, ne03, src0->nb[2] / sizeof(float), src0->nb[3] / sizeof(float),
|
||||
src1->nb[2] / sizeof(float), src1->nb[3] / sizeof(float), dst->nb[2] / sizeof(float),
|
||||
dst->nb[3] / sizeof(float), ctx.stream());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,6 +19,9 @@
|
|||
#define CUDA_R_16F HIPBLAS_R_16F
|
||||
#define CUDA_R_16BF HIPBLAS_R_16B
|
||||
#define CUDA_R_32F HIPBLAS_R_32F
|
||||
#define CUBLAS_SIDE_RIGHT HIPBLAS_SIDE_RIGHT
|
||||
#define CUBLAS_FILL_MODE_UPPER HIPBLAS_FILL_MODE_UPPER
|
||||
#define CUBLAS_DIAG_NON_UNIT HIPBLAS_DIAG_NON_UNIT
|
||||
#define CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED hipDeviceAttributeVirtualMemoryManagementSupported
|
||||
#define CU_MEM_ALLOC_GRANULARITY_RECOMMENDED hipMemAllocationGranularityRecommended
|
||||
#define CU_MEM_ALLOCATION_TYPE_PINNED hipMemAllocationTypePinned
|
||||
|
|
@ -30,6 +33,7 @@
|
|||
#define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width)
|
||||
#define __all_sync(mask, var) __all(var)
|
||||
#define __any_sync(mask, var) __any(var)
|
||||
#define cublasStrsmBatched hipblasStrsmBatched
|
||||
#define cublasCreate hipblasCreate
|
||||
#define cublasDestroy hipblasDestroy
|
||||
#define cublasGemmEx hipblasGemmEx
|
||||
|
|
|
|||
|
|
@ -12,11 +12,16 @@
|
|||
#define CUBLAS_GEMM_DEFAULT_TENSOR_OP MUBLAS_GEMM_DEFAULT
|
||||
#define CUBLAS_OP_N MUBLAS_OP_N
|
||||
#define CUBLAS_OP_T MUBLAS_OP_T
|
||||
#define CUBLAS_DEFAULT_MATH MUBLAS_DEFAULT_MATH
|
||||
#define CUBLAS_SIDE_RIGHT MUBLAS_SIDE_RIGHT
|
||||
#define CUBLAS_FILL_MODE_UPPER MUBLAS_FILL_MODE_UPPER
|
||||
#define CUBLAS_DIAG_NON_UNIT MUBLAS_DIAG_NON_UNIT
|
||||
#define CUBLAS_STATUS_SUCCESS MUBLAS_STATUS_SUCCESS
|
||||
#define CUBLAS_TF32_TENSOR_OP_MATH MUBLAS_TENSOR_OP_MATH
|
||||
#define CUDA_R_16F MUSA_R_16F
|
||||
#define CUDA_R_16BF MUSA_R_16BF
|
||||
#define CUDA_R_32F MUSA_R_32F
|
||||
#define cublasStrsmBatched mublasStrsmBatched
|
||||
#define cublasComputeType_t cudaDataType_t
|
||||
#define cublasCreate mublasCreate
|
||||
#define cublasDestroy mublasDestroy
|
||||
|
|
|
|||
|
|
@ -695,6 +695,8 @@ llama_ubatch llama_batch_allocr::ubatch_add(const std::vector<int32_t> & idxs, u
|
|||
udata->seq_idx .resize(LLAMA_MAX_SEQ, -1);
|
||||
udata->output .resize(n_tokens);
|
||||
|
||||
udata->seq_id_data.reserve(n_tokens);
|
||||
|
||||
seq_set_t seq_set_unq;
|
||||
|
||||
for (size_t i = 0; i < idxs.size(); ++i) {
|
||||
|
|
@ -716,11 +718,13 @@ llama_ubatch llama_batch_allocr::ubatch_add(const std::vector<int32_t> & idxs, u
|
|||
}
|
||||
|
||||
udata->n_seq_id[i] = batch.n_seq_id[idxs[i]];
|
||||
udata->seq_id[i] = batch.seq_id[idxs[i]];
|
||||
udata->output[i] = batch.logits[idxs[i]];
|
||||
|
||||
for (int s = 0; s < udata->n_seq_id[i]; ++s) {
|
||||
seq_set_unq.set(udata->seq_id[i][s]);
|
||||
const llama_seq_id seq_id = batch.seq_id[idxs[i]][s];
|
||||
|
||||
udata->seq_id_data.push_back(seq_id);
|
||||
seq_set_unq.set(seq_id);
|
||||
}
|
||||
|
||||
if (udata->output[i]) {
|
||||
|
|
@ -728,6 +732,12 @@ llama_ubatch llama_batch_allocr::ubatch_add(const std::vector<int32_t> & idxs, u
|
|||
}
|
||||
}
|
||||
|
||||
llama_seq_id * seq_id_ptr = udata->seq_id_data.data();
|
||||
for (size_t i = 0; i < idxs.size(); ++i) {
|
||||
udata->seq_id[i] = seq_id_ptr;
|
||||
seq_id_ptr += udata->n_seq_id[i];
|
||||
}
|
||||
|
||||
for (uint32_t s = 0; s < n_seq_max; ++s) {
|
||||
if (seq_set_unq.test(s)) {
|
||||
udata->seq_idx[s] = udata->seq_id_unq.size();
|
||||
|
|
|
|||
|
|
@ -56,13 +56,15 @@ struct llama_ubatch {
|
|||
std::vector<float> embd;
|
||||
std::vector<llama_pos> pos;
|
||||
std::vector<int32_t> n_seq_id;
|
||||
std::vector<llama_seq_id *> seq_id;
|
||||
std::vector<llama_seq_id *> seq_id; // these point into the seq_id_data below
|
||||
std::vector<llama_seq_id> seq_id_unq;
|
||||
std::vector<int32_t> seq_idx;
|
||||
std::vector<int8_t> output;
|
||||
|
||||
std::vector<llama_seq_id> seq_id_data;
|
||||
};
|
||||
|
||||
// the llama_ubatch pointers above point to this data if set. otherwise - points to non-owning data
|
||||
// the llama_ubatch pointers above point to this data if set. otherwise - point to external non-owning data
|
||||
std::shared_ptr<data_t> data;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -7861,9 +7861,24 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 30, 30, 7, 1 }, { 8, 30, 7, 1 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 42, 42, 5, 2 }, { 10, 42, 5, 2 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 2, 2 }, { 10, 64, 2, 2 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 2, 2 }, { 64, 64, 2, 2 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 79, 79, 5, 3 }, { 417, 79, 5, 3 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 2 }, { 32, 128, 4, 2 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 80, 80, 2, 8 }, { 80, 80, 2, 8 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 80, 80, 2, 8 }, { 79, 80, 2, 8 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 80, 80, 2, 8 }, { 81, 80, 2, 8 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 80, 80, 8, 8 }, { 80, 80, 8, 8 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 80, 80, 8, 8 }, { 79, 80, 8, 8 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 80, 80, 8, 8 }, { 81, 80, 8, 8 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 84, 84, 4, 4 }, { 32, 84, 4, 4 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 95, 95, 8, 8 }, { 40, 95, 8, 8 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 100, 100, 4, 4 }, { 41, 100, 4, 4 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 4 }, { 31, 128, 4, 4 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 4, 4 }, { 300, 64, 4, 4 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 4 }, { 32, 128, 4, 4 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 3, 4 }, { 32, 128, 3, 4 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 1 }, { 32, 128, 4, 1 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 4, 4 }, { 200, 64, 4, 4 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 4, 4 }, { 384, 64, 4, 4 }));
|
||||
|
||||
for (bool v : {false, true}) {
|
||||
for (bool circular : {false, true}) {
|
||||
|
|
@ -8064,12 +8079,13 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
|
|||
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 16416, 1, 128, {8, 1}, {4, 1}, {0, 2, 1, 3}));
|
||||
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 128, 1, 16416, {8, 1}, {4, 1}, {0, 1, 2, 3}, 2*16416));
|
||||
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 4, 2 }, { 6, 64, 4, 2 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 1 }, { 8, 128, 4, 1 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 4, 4 }, { 32, 64, 4, 4 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 2 }, { 32, 128, 4, 2 }));
|
||||
// qwen3next with CHUNK_SIZE 64
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 8, 32 }, { 64, 64, 8, 32 }));
|
||||
// qwen3next with CHUNK_SIZE 128
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 32 }, { 128, 128, 4, 32 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 256, 256, 4, 2 }, { 128, 256, 4, 2 }));
|
||||
|
||||
test_cases.emplace_back(new test_tri(GGML_TRI_TYPE_LOWER, GGML_TYPE_F32, { 256, 256, 4, 4 }));
|
||||
test_cases.emplace_back(new test_tri(GGML_TRI_TYPE_UPPER_DIAG, GGML_TYPE_F32, { 1024, 1024, 8, 4 }));
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -41,7 +41,7 @@
|
|||
"@tailwindcss/vite": "^4.0.0",
|
||||
"@types/node": "^22",
|
||||
"@vitest/browser": "^3.2.3",
|
||||
"bits-ui": "^2.8.11",
|
||||
"bits-ui": "^2.14.4",
|
||||
"clsx": "^2.1.1",
|
||||
"dexie": "^4.0.11",
|
||||
"eslint": "^9.18.0",
|
||||
|
|
@ -3343,17 +3343,17 @@
|
|||
}
|
||||
},
|
||||
"node_modules/bits-ui": {
|
||||
"version": "2.8.11",
|
||||
"resolved": "https://registry.npmjs.org/bits-ui/-/bits-ui-2.8.11.tgz",
|
||||
"integrity": "sha512-lKN9rAk69my6j7H1D4B87r8LrHuEtfEsf1xCixBj9yViql2BdI3f04HyyyT7T1GOCpgb9+8b0B+nm3LN81Konw==",
|
||||
"version": "2.14.4",
|
||||
"resolved": "https://registry.npmjs.org/bits-ui/-/bits-ui-2.14.4.tgz",
|
||||
"integrity": "sha512-W6kenhnbd/YVvur+DKkaVJ6GldE53eLewur5AhUCqslYQ0vjZr8eWlOfwZnMiPB+PF5HMVqf61vXBvmyrAmPWg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@floating-ui/core": "^1.7.1",
|
||||
"@floating-ui/dom": "^1.7.1",
|
||||
"esm-env": "^1.1.2",
|
||||
"runed": "^0.29.1",
|
||||
"svelte-toolbelt": "^0.9.3",
|
||||
"runed": "^0.35.1",
|
||||
"svelte-toolbelt": "^0.10.6",
|
||||
"tabbable": "^6.2.0"
|
||||
},
|
||||
"engines": {
|
||||
|
|
@ -3368,9 +3368,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/bits-ui/node_modules/runed": {
|
||||
"version": "0.29.2",
|
||||
"resolved": "https://registry.npmjs.org/runed/-/runed-0.29.2.tgz",
|
||||
"integrity": "sha512-0cq6cA6sYGZwl/FvVqjx9YN+1xEBu9sDDyuWdDW1yWX7JF2wmvmVKfH+hVCZs+csW+P3ARH92MjI3H9QTagOQA==",
|
||||
"version": "0.35.1",
|
||||
"resolved": "https://registry.npmjs.org/runed/-/runed-0.35.1.tgz",
|
||||
"integrity": "sha512-2F4Q/FZzbeJTFdIS/PuOoPRSm92sA2LhzTnv6FXhCoENb3huf5+fDuNOg1LNvGOouy3u/225qxmuJvcV3IZK5Q==",
|
||||
"dev": true,
|
||||
"funding": [
|
||||
"https://github.com/sponsors/huntabyte",
|
||||
|
|
@ -3378,23 +3378,31 @@
|
|||
],
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"esm-env": "^1.0.0"
|
||||
"dequal": "^2.0.3",
|
||||
"esm-env": "^1.0.0",
|
||||
"lz-string": "^1.5.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@sveltejs/kit": "^2.21.0",
|
||||
"svelte": "^5.7.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"@sveltejs/kit": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/bits-ui/node_modules/svelte-toolbelt": {
|
||||
"version": "0.9.3",
|
||||
"resolved": "https://registry.npmjs.org/svelte-toolbelt/-/svelte-toolbelt-0.9.3.tgz",
|
||||
"integrity": "sha512-HCSWxCtVmv+c6g1ACb8LTwHVbDqLKJvHpo6J8TaqwUme2hj9ATJCpjCPNISR1OCq2Q4U1KT41if9ON0isINQZw==",
|
||||
"version": "0.10.6",
|
||||
"resolved": "https://registry.npmjs.org/svelte-toolbelt/-/svelte-toolbelt-0.10.6.tgz",
|
||||
"integrity": "sha512-YWuX+RE+CnWYx09yseAe4ZVMM7e7GRFZM6OYWpBKOb++s+SQ8RBIMMe+Bs/CznBMc0QPLjr+vDBxTAkozXsFXQ==",
|
||||
"dev": true,
|
||||
"funding": [
|
||||
"https://github.com/sponsors/huntabyte"
|
||||
],
|
||||
"dependencies": {
|
||||
"clsx": "^2.1.1",
|
||||
"runed": "^0.29.0",
|
||||
"runed": "^0.35.1",
|
||||
"style-to-object": "^1.0.8"
|
||||
},
|
||||
"engines": {
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@
|
|||
"@tailwindcss/vite": "^4.0.0",
|
||||
"@types/node": "^22",
|
||||
"@vitest/browser": "^3.2.3",
|
||||
"bits-ui": "^2.8.11",
|
||||
"bits-ui": "^2.14.4",
|
||||
"clsx": "^2.1.1",
|
||||
"dexie": "^4.0.11",
|
||||
"eslint": "^9.18.0",
|
||||
|
|
|
|||
|
|
@ -331,6 +331,7 @@
|
|||
class="{INPUT_CLASSES} border-radius-bottom-none mx-auto max-w-[48rem] overflow-hidden rounded-3xl backdrop-blur-md {disabled
|
||||
? 'cursor-not-allowed opacity-60'
|
||||
: ''} {className}"
|
||||
data-slot="chat-form"
|
||||
>
|
||||
<ChatAttachmentsList
|
||||
bind:uploadedFiles
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
<script lang="ts">
|
||||
import { Input } from '$lib/components/ui/input';
|
||||
import { Search } from '@lucide/svelte';
|
||||
import { SearchInput } from '$lib/components/app';
|
||||
|
||||
interface Props {
|
||||
value?: string;
|
||||
|
|
@ -15,19 +14,6 @@
|
|||
onInput,
|
||||
class: className
|
||||
}: Props = $props();
|
||||
|
||||
function handleInput(event: Event) {
|
||||
const target = event.target as HTMLInputElement;
|
||||
|
||||
value = target.value;
|
||||
onInput?.(target.value);
|
||||
}
|
||||
</script>
|
||||
|
||||
<div class="relative mb-4 {className}">
|
||||
<Search
|
||||
class="absolute top-1/2 left-3 h-4 w-4 -translate-y-1/2 transform text-muted-foreground"
|
||||
/>
|
||||
|
||||
<Input bind:value class="pl-10" oninput={handleInput} {placeholder} type="search" />
|
||||
</div>
|
||||
<SearchInput bind:value {placeholder} {onInput} class="mb-4 {className}" />
|
||||
|
|
|
|||
|
|
@ -64,6 +64,7 @@ export { default as CopyToClipboardIcon } from './misc/CopyToClipboardIcon.svelt
|
|||
export { default as KeyboardShortcutInfo } from './misc/KeyboardShortcutInfo.svelte';
|
||||
export { default as MarkdownContent } from './misc/MarkdownContent.svelte';
|
||||
export { default as RemoveButton } from './misc/RemoveButton.svelte';
|
||||
export { default as SearchInput } from './misc/SearchInput.svelte';
|
||||
export { default as SyntaxHighlightedCode } from './misc/SyntaxHighlightedCode.svelte';
|
||||
export { default as ModelsSelector } from './models/ModelsSelector.svelte';
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,73 @@
|
|||
<script lang="ts">
|
||||
import { Input } from '$lib/components/ui/input';
|
||||
import { Search, X } from '@lucide/svelte';
|
||||
|
||||
interface Props {
|
||||
value?: string;
|
||||
placeholder?: string;
|
||||
onInput?: (value: string) => void;
|
||||
onClose?: () => void;
|
||||
onKeyDown?: (event: KeyboardEvent) => void;
|
||||
class?: string;
|
||||
id?: string;
|
||||
ref?: HTMLInputElement | null;
|
||||
}
|
||||
|
||||
let {
|
||||
value = $bindable(''),
|
||||
placeholder = 'Search...',
|
||||
onInput,
|
||||
onClose,
|
||||
onKeyDown,
|
||||
class: className,
|
||||
id,
|
||||
ref = $bindable(null)
|
||||
}: Props = $props();
|
||||
|
||||
let showClearButton = $derived(!!value || !!onClose);
|
||||
|
||||
function handleInput(event: Event) {
|
||||
const target = event.target as HTMLInputElement;
|
||||
|
||||
value = target.value;
|
||||
onInput?.(target.value);
|
||||
}
|
||||
|
||||
function handleClear() {
|
||||
if (value) {
|
||||
value = '';
|
||||
onInput?.('');
|
||||
ref?.focus();
|
||||
} else {
|
||||
onClose?.();
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<div class="relative {className}">
|
||||
<Search
|
||||
class="absolute top-1/2 left-3 h-4 w-4 -translate-y-1/2 transform text-muted-foreground"
|
||||
/>
|
||||
|
||||
<Input
|
||||
{id}
|
||||
bind:value
|
||||
bind:ref
|
||||
class="pl-9 {showClearButton ? 'pr-9' : ''}"
|
||||
oninput={handleInput}
|
||||
onkeydown={onKeyDown}
|
||||
{placeholder}
|
||||
type="search"
|
||||
/>
|
||||
|
||||
{#if showClearButton}
|
||||
<button
|
||||
type="button"
|
||||
class="absolute top-1/2 right-3 -translate-y-1/2 transform text-muted-foreground transition-colors hover:text-foreground"
|
||||
onclick={handleClear}
|
||||
aria-label={value ? 'Clear search' : 'Close'}
|
||||
>
|
||||
<X class="h-4 w-4" />
|
||||
</button>
|
||||
{/if}
|
||||
</div>
|
||||
|
|
@ -2,8 +2,8 @@
|
|||
import { onMount, tick } from 'svelte';
|
||||
import { ChevronDown, EyeOff, Loader2, MicOff, Package, Power } from '@lucide/svelte';
|
||||
import * as Tooltip from '$lib/components/ui/tooltip';
|
||||
import * as Popover from '$lib/components/ui/popover';
|
||||
import { cn } from '$lib/components/ui/utils';
|
||||
import { portalToBody } from '$lib/utils';
|
||||
import {
|
||||
modelsStore,
|
||||
modelOptions,
|
||||
|
|
@ -17,12 +17,8 @@
|
|||
import { usedModalities, conversationsStore } from '$lib/stores/conversations.svelte';
|
||||
import { ServerModelStatus } from '$lib/enums';
|
||||
import { isRouterMode } from '$lib/stores/server.svelte';
|
||||
import { DialogModelInformation } from '$lib/components/app';
|
||||
import {
|
||||
MENU_MAX_WIDTH,
|
||||
MENU_OFFSET,
|
||||
VIEWPORT_GUTTER
|
||||
} from '$lib/constants/floating-ui-constraints';
|
||||
import { DialogModelInformation, SearchInput } from '$lib/components/app';
|
||||
import type { ModelOption } from '$lib/types/models';
|
||||
|
||||
interface Props {
|
||||
class?: string;
|
||||
|
|
@ -145,185 +141,126 @@
|
|||
return options.some((option) => option.model === currentModel);
|
||||
});
|
||||
|
||||
let isOpen = $state(false);
|
||||
let showModelDialog = $state(false);
|
||||
let container: HTMLDivElement | null = null;
|
||||
let menuRef = $state<HTMLDivElement | null>(null);
|
||||
let triggerButton = $state<HTMLButtonElement | null>(null);
|
||||
let menuPosition = $state<{
|
||||
top: number;
|
||||
left: number;
|
||||
width: number;
|
||||
placement: 'top' | 'bottom';
|
||||
maxHeight: number;
|
||||
} | null>(null);
|
||||
let searchTerm = $state('');
|
||||
let searchInputRef = $state<HTMLInputElement | null>(null);
|
||||
let highlightedIndex = $state<number>(-1);
|
||||
|
||||
onMount(async () => {
|
||||
try {
|
||||
await modelsStore.fetch();
|
||||
} catch (error) {
|
||||
console.error('Unable to load models:', error);
|
||||
}
|
||||
let filteredOptions: ModelOption[] = $derived(
|
||||
(() => {
|
||||
const term = searchTerm.trim().toLowerCase();
|
||||
if (!term) return options;
|
||||
|
||||
return options.filter(
|
||||
(option) =>
|
||||
option.model.toLowerCase().includes(term) || option.name?.toLowerCase().includes(term)
|
||||
);
|
||||
})()
|
||||
);
|
||||
|
||||
// Get indices of compatible options for keyboard navigation
|
||||
let compatibleIndices = $derived(
|
||||
filteredOptions
|
||||
.map((option, index) => (isModelCompatible(option) ? index : -1))
|
||||
.filter((i) => i !== -1)
|
||||
);
|
||||
|
||||
// Reset highlighted index when search term changes
|
||||
$effect(() => {
|
||||
void searchTerm;
|
||||
highlightedIndex = -1;
|
||||
});
|
||||
|
||||
function toggleOpen() {
|
||||
if (loading || updating) return;
|
||||
|
||||
if (isRouter) {
|
||||
// Router mode: show dropdown
|
||||
if (isOpen) {
|
||||
closeMenu();
|
||||
} else {
|
||||
openMenu();
|
||||
}
|
||||
} else {
|
||||
// Single model mode: show dialog
|
||||
showModelDialog = true;
|
||||
}
|
||||
}
|
||||
|
||||
async function openMenu() {
|
||||
let isOpen = $state(false);
|
||||
let showModelDialog = $state(false);
|
||||
|
||||
onMount(() => {
|
||||
modelsStore.fetch().catch((error) => {
|
||||
console.error('Unable to load models:', error);
|
||||
});
|
||||
});
|
||||
|
||||
function handleOpenChange(open: boolean) {
|
||||
if (loading || updating) return;
|
||||
|
||||
if (open) {
|
||||
isOpen = true;
|
||||
await tick();
|
||||
updateMenuPosition();
|
||||
requestAnimationFrame(() => updateMenuPosition());
|
||||
searchTerm = '';
|
||||
highlightedIndex = -1;
|
||||
|
||||
// Focus search input after popover opens
|
||||
tick().then(() => {
|
||||
requestAnimationFrame(() => searchInputRef?.focus());
|
||||
});
|
||||
|
||||
if (isRouter) {
|
||||
modelsStore.fetchRouterModels().then(() => {
|
||||
modelsStore.fetchModalitiesForLoadedModels();
|
||||
});
|
||||
}
|
||||
} else {
|
||||
isOpen = false;
|
||||
searchTerm = '';
|
||||
highlightedIndex = -1;
|
||||
}
|
||||
}
|
||||
|
||||
function handleTriggerClick() {
|
||||
if (loading || updating) return;
|
||||
|
||||
if (!isRouter) {
|
||||
// Single model mode: show dialog instead of popover
|
||||
showModelDialog = true;
|
||||
}
|
||||
// For router mode, the Popover handles open/close
|
||||
}
|
||||
|
||||
export function open() {
|
||||
if (isRouter) {
|
||||
openMenu();
|
||||
handleOpenChange(true);
|
||||
} else {
|
||||
showModelDialog = true;
|
||||
}
|
||||
}
|
||||
|
||||
function closeMenu() {
|
||||
if (!isOpen) return;
|
||||
|
||||
isOpen = false;
|
||||
menuPosition = null;
|
||||
handleOpenChange(false);
|
||||
}
|
||||
|
||||
function handlePointerDown(event: PointerEvent) {
|
||||
if (!container) return;
|
||||
function handleSearchKeyDown(event: KeyboardEvent) {
|
||||
if (event.isComposing) return;
|
||||
|
||||
const target = event.target as Node | null;
|
||||
if (event.key === 'ArrowDown') {
|
||||
event.preventDefault();
|
||||
if (compatibleIndices.length === 0) return;
|
||||
|
||||
if (target && !container.contains(target) && !(menuRef && menuRef.contains(target))) {
|
||||
closeMenu();
|
||||
}
|
||||
}
|
||||
|
||||
function handleKeydown(event: KeyboardEvent) {
|
||||
if (event.key === 'Escape') {
|
||||
closeMenu();
|
||||
}
|
||||
}
|
||||
|
||||
function handleResize() {
|
||||
if (isOpen) {
|
||||
updateMenuPosition();
|
||||
}
|
||||
}
|
||||
|
||||
function updateMenuPosition() {
|
||||
if (!isOpen || !triggerButton || !menuRef) return;
|
||||
|
||||
const triggerRect = triggerButton.getBoundingClientRect();
|
||||
const viewportWidth = window.innerWidth;
|
||||
const viewportHeight = window.innerHeight;
|
||||
|
||||
if (viewportWidth === 0 || viewportHeight === 0) return;
|
||||
|
||||
const scrollWidth = menuRef.scrollWidth;
|
||||
const scrollHeight = menuRef.scrollHeight;
|
||||
|
||||
const availableWidth = Math.max(0, viewportWidth - VIEWPORT_GUTTER * 2);
|
||||
const constrainedMaxWidth = Math.min(MENU_MAX_WIDTH, availableWidth || MENU_MAX_WIDTH);
|
||||
const safeMaxWidth =
|
||||
constrainedMaxWidth > 0 ? constrainedMaxWidth : Math.min(MENU_MAX_WIDTH, viewportWidth);
|
||||
const desiredMinWidth = Math.min(160, safeMaxWidth || 160);
|
||||
|
||||
let width = Math.min(
|
||||
Math.max(triggerRect.width, scrollWidth, desiredMinWidth),
|
||||
safeMaxWidth || 320
|
||||
);
|
||||
|
||||
const availableBelow = Math.max(
|
||||
0,
|
||||
viewportHeight - VIEWPORT_GUTTER - triggerRect.bottom - MENU_OFFSET
|
||||
);
|
||||
const availableAbove = Math.max(0, triggerRect.top - VIEWPORT_GUTTER - MENU_OFFSET);
|
||||
const viewportAllowance = Math.max(0, viewportHeight - VIEWPORT_GUTTER * 2);
|
||||
const fallbackAllowance = Math.max(1, viewportAllowance > 0 ? viewportAllowance : scrollHeight);
|
||||
|
||||
function computePlacement(placement: 'top' | 'bottom') {
|
||||
const available = placement === 'bottom' ? availableBelow : availableAbove;
|
||||
const allowedHeight =
|
||||
available > 0 ? Math.min(available, fallbackAllowance) : fallbackAllowance;
|
||||
const maxHeight = Math.min(scrollHeight, allowedHeight);
|
||||
const height = Math.max(0, maxHeight);
|
||||
|
||||
let top: number;
|
||||
if (placement === 'bottom') {
|
||||
const rawTop = triggerRect.bottom + MENU_OFFSET;
|
||||
const minTop = VIEWPORT_GUTTER;
|
||||
const maxTop = viewportHeight - VIEWPORT_GUTTER - height;
|
||||
if (maxTop < minTop) {
|
||||
top = minTop;
|
||||
const currentPos = compatibleIndices.indexOf(highlightedIndex);
|
||||
if (currentPos === -1 || currentPos === compatibleIndices.length - 1) {
|
||||
highlightedIndex = compatibleIndices[0];
|
||||
} else {
|
||||
top = Math.min(Math.max(rawTop, minTop), maxTop);
|
||||
highlightedIndex = compatibleIndices[currentPos + 1];
|
||||
}
|
||||
} else if (event.key === 'ArrowUp') {
|
||||
event.preventDefault();
|
||||
if (compatibleIndices.length === 0) return;
|
||||
|
||||
const currentPos = compatibleIndices.indexOf(highlightedIndex);
|
||||
if (currentPos === -1 || currentPos === 0) {
|
||||
highlightedIndex = compatibleIndices[compatibleIndices.length - 1];
|
||||
} else {
|
||||
const rawTop = triggerRect.top - MENU_OFFSET - height;
|
||||
const minTop = VIEWPORT_GUTTER;
|
||||
const maxTop = viewportHeight - VIEWPORT_GUTTER - height;
|
||||
if (maxTop < minTop) {
|
||||
top = minTop;
|
||||
} else {
|
||||
top = Math.max(Math.min(rawTop, maxTop), minTop);
|
||||
highlightedIndex = compatibleIndices[currentPos - 1];
|
||||
}
|
||||
} else if (event.key === 'Enter') {
|
||||
event.preventDefault();
|
||||
if (highlightedIndex >= 0 && highlightedIndex < filteredOptions.length) {
|
||||
const option = filteredOptions[highlightedIndex];
|
||||
if (isModelCompatible(option)) {
|
||||
handleSelect(option.id);
|
||||
}
|
||||
} else if (compatibleIndices.length > 0) {
|
||||
// No selection - highlight first compatible option
|
||||
highlightedIndex = compatibleIndices[0];
|
||||
}
|
||||
}
|
||||
|
||||
return { placement, top, height, maxHeight };
|
||||
}
|
||||
|
||||
const belowMetrics = computePlacement('bottom');
|
||||
const aboveMetrics = computePlacement('top');
|
||||
|
||||
let metrics = belowMetrics;
|
||||
if (scrollHeight > belowMetrics.maxHeight && aboveMetrics.maxHeight > belowMetrics.maxHeight) {
|
||||
metrics = aboveMetrics;
|
||||
}
|
||||
|
||||
let left = triggerRect.right - width;
|
||||
const maxLeft = viewportWidth - VIEWPORT_GUTTER - width;
|
||||
if (maxLeft < VIEWPORT_GUTTER) {
|
||||
left = VIEWPORT_GUTTER;
|
||||
} else {
|
||||
if (left > maxLeft) {
|
||||
left = maxLeft;
|
||||
}
|
||||
if (left < VIEWPORT_GUTTER) {
|
||||
left = VIEWPORT_GUTTER;
|
||||
}
|
||||
}
|
||||
|
||||
menuPosition = {
|
||||
top: Math.round(metrics.top),
|
||||
left: Math.round(left),
|
||||
width: Math.round(width),
|
||||
placement: metrics.placement,
|
||||
maxHeight: Math.round(metrics.maxHeight)
|
||||
};
|
||||
}
|
||||
|
||||
async function handleSelect(modelId: string) {
|
||||
|
|
@ -356,6 +293,14 @@
|
|||
|
||||
if (shouldCloseMenu) {
|
||||
closeMenu();
|
||||
|
||||
// Focus the chat textarea after model selection
|
||||
requestAnimationFrame(() => {
|
||||
const textarea = document.querySelector<HTMLTextAreaElement>(
|
||||
'[data-slot="chat-form"] textarea'
|
||||
);
|
||||
textarea?.focus();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -404,10 +349,7 @@
|
|||
}
|
||||
</script>
|
||||
|
||||
<svelte:window onresize={handleResize} />
|
||||
<svelte:document onpointerdown={handlePointerDown} onkeydown={handleKeydown} />
|
||||
|
||||
<div class={cn('relative inline-flex flex-col items-end gap-1', className)} bind:this={container}>
|
||||
<div class={cn('relative inline-flex flex-col items-end gap-1', className)}>
|
||||
{#if loading && options.length === 0 && isRouter}
|
||||
<div class="flex items-center gap-2 text-xs text-muted-foreground">
|
||||
<Loader2 class="h-3.5 w-3.5 animate-spin" />
|
||||
|
|
@ -418,9 +360,8 @@
|
|||
{:else}
|
||||
{@const selectedOption = getDisplayOption()}
|
||||
|
||||
<div class="relative">
|
||||
<button
|
||||
type="button"
|
||||
<Popover.Root bind:open={isOpen} onOpenChange={handleOpenChange}>
|
||||
<Popover.Trigger
|
||||
class={cn(
|
||||
`inline-flex cursor-pointer items-center gap-1.5 rounded-sm bg-muted-foreground/10 px-1.5 py-1 text-xs transition hover:text-foreground focus:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-60`,
|
||||
!isCurrentModelInCache()
|
||||
|
|
@ -430,15 +371,11 @@
|
|||
: isHighlightedCurrentModelActive
|
||||
? 'text-foreground'
|
||||
: 'text-muted-foreground',
|
||||
isOpen ? 'text-foreground' : '',
|
||||
className
|
||||
isOpen ? 'text-foreground' : ''
|
||||
)}
|
||||
style="max-width: min(calc(100cqw - 6.5rem), 32rem)"
|
||||
aria-haspopup={isRouter ? 'listbox' : undefined}
|
||||
aria-expanded={isRouter ? isOpen : undefined}
|
||||
onclick={toggleOpen}
|
||||
bind:this={triggerButton}
|
||||
disabled={disabled || updating}
|
||||
onclick={handleTriggerClick}
|
||||
disabled={disabled || updating || !isRouter}
|
||||
>
|
||||
<Package class="h-3.5 w-3.5" />
|
||||
|
||||
|
|
@ -451,33 +388,35 @@
|
|||
{:else if isRouter}
|
||||
<ChevronDown class="h-3 w-3.5" />
|
||||
{/if}
|
||||
</button>
|
||||
</Popover.Trigger>
|
||||
|
||||
{#if isOpen && isRouter}
|
||||
<div
|
||||
bind:this={menuRef}
|
||||
use:portalToBody
|
||||
class={cn(
|
||||
'fixed z-[1000] overflow-hidden rounded-md border bg-popover shadow-lg transition-opacity',
|
||||
menuPosition ? 'opacity-100' : 'pointer-events-none opacity-0'
|
||||
)}
|
||||
role="listbox"
|
||||
style:top={menuPosition ? `${menuPosition.top}px` : undefined}
|
||||
style:left={menuPosition ? `${menuPosition.left}px` : undefined}
|
||||
style:width={menuPosition ? `${menuPosition.width}px` : undefined}
|
||||
data-placement={menuPosition?.placement ?? 'bottom'}
|
||||
<Popover.Content
|
||||
class="group/popover-content w-96 max-w-[calc(100vw-2rem)] p-0"
|
||||
align="end"
|
||||
sideOffset={8}
|
||||
collisionPadding={16}
|
||||
>
|
||||
<div class="flex max-h-[50dvh] flex-col overflow-hidden">
|
||||
<div
|
||||
class="overflow-y-auto py-1"
|
||||
style:max-height={menuPosition && menuPosition.maxHeight > 0
|
||||
? `${menuPosition.maxHeight}px`
|
||||
: undefined}
|
||||
class="order-1 shrink-0 border-b p-4 group-data-[side=top]/popover-content:order-2 group-data-[side=top]/popover-content:border-t group-data-[side=top]/popover-content:border-b-0"
|
||||
>
|
||||
<SearchInput
|
||||
id="model-search"
|
||||
placeholder="Search models..."
|
||||
bind:value={searchTerm}
|
||||
bind:ref={searchInputRef}
|
||||
onClose={closeMenu}
|
||||
onKeyDown={handleSearchKeyDown}
|
||||
/>
|
||||
</div>
|
||||
<div
|
||||
class="models-list order-2 min-h-0 flex-1 overflow-y-auto group-data-[side=top]/popover-content:order-1"
|
||||
>
|
||||
{#if !isCurrentModelInCache() && currentModel}
|
||||
<!-- Show unavailable model as first option (disabled) -->
|
||||
<button
|
||||
type="button"
|
||||
class="flex w-full cursor-not-allowed items-center bg-red-400/10 px-3 py-2 text-left text-sm text-red-400"
|
||||
class="flex w-full cursor-not-allowed items-center bg-red-400/10 px-4 py-2 text-left text-sm text-red-400"
|
||||
role="option"
|
||||
aria-selected="true"
|
||||
aria-disabled="true"
|
||||
|
|
@ -488,20 +427,25 @@
|
|||
</button>
|
||||
<div class="my-1 h-px bg-border"></div>
|
||||
{/if}
|
||||
{#each options as option (option.id)}
|
||||
{#if filteredOptions.length === 0}
|
||||
<p class="px-4 py-3 text-sm text-muted-foreground">No models found.</p>
|
||||
{/if}
|
||||
{#each filteredOptions as option, index (option.id)}
|
||||
{@const status = getModelStatus(option.model)}
|
||||
{@const isLoaded = status === ServerModelStatus.LOADED}
|
||||
{@const isLoading = status === ServerModelStatus.LOADING}
|
||||
{@const isSelected = currentModel === option.model || activeId === option.id}
|
||||
{@const isCompatible = isModelCompatible(option)}
|
||||
{@const isHighlighted = index === highlightedIndex}
|
||||
{@const missingModalities = getMissingModalities(option)}
|
||||
|
||||
<div
|
||||
class={cn(
|
||||
'group flex w-full items-center gap-2 px-3 py-2 text-left text-sm transition focus:outline-none',
|
||||
'group flex w-full items-center gap-2 px-4 py-2 text-left text-sm transition focus:outline-none',
|
||||
isCompatible
|
||||
? 'cursor-pointer hover:bg-muted focus:bg-muted'
|
||||
: 'cursor-not-allowed opacity-50',
|
||||
isSelected
|
||||
isSelected || isHighlighted
|
||||
? 'bg-accent text-accent-foreground'
|
||||
: isCompatible
|
||||
? 'hover:bg-accent hover:text-accent-foreground'
|
||||
|
|
@ -509,10 +453,11 @@
|
|||
isLoaded ? 'text-popover-foreground' : 'text-muted-foreground'
|
||||
)}
|
||||
role="option"
|
||||
aria-selected={isSelected}
|
||||
aria-selected={isSelected || isHighlighted}
|
||||
aria-disabled={!isCompatible}
|
||||
tabindex={isCompatible ? 0 : -1}
|
||||
onclick={() => isCompatible && handleSelect(option.id)}
|
||||
onmouseenter={() => (highlightedIndex = index)}
|
||||
onkeydown={(e) => {
|
||||
if (isCompatible && (e.key === 'Enter' || e.key === ' ')) {
|
||||
e.preventDefault();
|
||||
|
|
@ -586,8 +531,8 @@
|
|||
{/each}
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
</Popover.Content>
|
||||
</Popover.Root>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,19 @@
|
|||
import Root from './popover.svelte';
|
||||
import Close from './popover-close.svelte';
|
||||
import Content from './popover-content.svelte';
|
||||
import Trigger from './popover-trigger.svelte';
|
||||
import Portal from './popover-portal.svelte';
|
||||
|
||||
export {
|
||||
Root,
|
||||
Content,
|
||||
Trigger,
|
||||
Close,
|
||||
Portal,
|
||||
//
|
||||
Root as Popover,
|
||||
Content as PopoverContent,
|
||||
Trigger as PopoverTrigger,
|
||||
Close as PopoverClose,
|
||||
Portal as PopoverPortal
|
||||
};
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
<script lang="ts">
|
||||
import { Popover as PopoverPrimitive } from 'bits-ui';
|
||||
|
||||
let { ref = $bindable(null), ...restProps }: PopoverPrimitive.CloseProps = $props();
|
||||
</script>
|
||||
|
||||
<PopoverPrimitive.Close bind:ref data-slot="popover-close" {...restProps} />
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
<script lang="ts">
|
||||
import { Popover as PopoverPrimitive } from 'bits-ui';
|
||||
import PopoverPortal from './popover-portal.svelte';
|
||||
import { cn, type WithoutChildrenOrChild } from '$lib/components/ui/utils.js';
|
||||
import type { ComponentProps } from 'svelte';
|
||||
|
||||
let {
|
||||
ref = $bindable(null),
|
||||
class: className,
|
||||
sideOffset = 4,
|
||||
side,
|
||||
align = 'center',
|
||||
collisionPadding = 8,
|
||||
avoidCollisions = true,
|
||||
portalProps,
|
||||
...restProps
|
||||
}: PopoverPrimitive.ContentProps & {
|
||||
portalProps?: WithoutChildrenOrChild<ComponentProps<typeof PopoverPortal>>;
|
||||
} = $props();
|
||||
</script>
|
||||
|
||||
<PopoverPortal {...portalProps}>
|
||||
<PopoverPrimitive.Content
|
||||
bind:ref
|
||||
data-slot="popover-content"
|
||||
{sideOffset}
|
||||
{side}
|
||||
{align}
|
||||
{collisionPadding}
|
||||
{avoidCollisions}
|
||||
class={cn(
|
||||
'z-50 w-72 origin-(--bits-popover-content-transform-origin) rounded-md border bg-popover p-4 text-popover-foreground shadow-md outline-hidden data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-end-2 data-[side=right]:slide-in-from-start-2 data-[side=top]:slide-in-from-bottom-2 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:zoom-in-95',
|
||||
className
|
||||
)}
|
||||
{...restProps}
|
||||
/>
|
||||
</PopoverPortal>
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
<script lang="ts">
|
||||
import { Popover as PopoverPrimitive } from 'bits-ui';
|
||||
|
||||
let { ...restProps }: PopoverPrimitive.PortalProps = $props();
|
||||
</script>
|
||||
|
||||
<PopoverPrimitive.Portal {...restProps} />
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
<script lang="ts">
|
||||
import { cn } from '$lib/components/ui/utils.js';
|
||||
import { Popover as PopoverPrimitive } from 'bits-ui';
|
||||
|
||||
let {
|
||||
ref = $bindable(null),
|
||||
class: className,
|
||||
...restProps
|
||||
}: PopoverPrimitive.TriggerProps = $props();
|
||||
</script>
|
||||
|
||||
<PopoverPrimitive.Trigger
|
||||
bind:ref
|
||||
data-slot="popover-trigger"
|
||||
class={cn('', className)}
|
||||
{...restProps}
|
||||
/>
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
<script lang="ts">
|
||||
import { Popover as PopoverPrimitive } from 'bits-ui';
|
||||
|
||||
let { open = $bindable(false), ...restProps }: PopoverPrimitive.RootProps = $props();
|
||||
</script>
|
||||
|
||||
<PopoverPrimitive.Root bind:open {...restProps} />
|
||||
|
|
@ -1,3 +1,2 @@
|
|||
export const VIEWPORT_GUTTER = 8;
|
||||
export const MENU_OFFSET = 6;
|
||||
export const MENU_MAX_WIDTH = 320;
|
||||
|
|
|
|||
|
|
@ -295,14 +295,21 @@ class ModelsStore {
|
|||
* Fetch props for a specific model from /props endpoint
|
||||
* Uses caching to avoid redundant requests
|
||||
*
|
||||
* In ROUTER mode, this will only fetch props if the model is loaded,
|
||||
* since unloaded models return 400 from /props endpoint.
|
||||
*
|
||||
* @param modelId - Model identifier to fetch props for
|
||||
* @returns Props data or null if fetch failed
|
||||
* @returns Props data or null if fetch failed or model not loaded
|
||||
*/
|
||||
async fetchModelProps(modelId: string): Promise<ApiLlamaCppServerProps | null> {
|
||||
// Return cached props if available
|
||||
const cached = this.modelPropsCache.get(modelId);
|
||||
if (cached) return cached;
|
||||
|
||||
if (serverStore.isRouterMode && !this.isModelLoaded(modelId)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Avoid duplicate fetches
|
||||
if (this.modelPropsFetching.has(modelId)) return null;
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue