Merge branch 'ggml-org:master' into power-law-sampler
This commit is contained in:
commit
66e2d17c7f
|
|
@ -56,7 +56,7 @@ docker run -v /path/to/models:/models ghcr.io/ggml-org/llama.cpp:light -m /model
|
||||||
or with a server image:
|
or with a server image:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker run -v /path/to/models:/models -p 8000:8000 ghcr.io/ggml-org/llama.cpp:server -m /models/7B/ggml-model-q4_0.gguf --port 8000 --host 0.0.0.0 -n 512
|
docker run -v /path/to/models:/models -p 8080:8080 ghcr.io/ggml-org/llama.cpp:server -m /models/7B/ggml-model-q4_0.gguf --port 8080 --host 0.0.0.0 -n 512
|
||||||
```
|
```
|
||||||
|
|
||||||
## Docker With CUDA
|
## Docker With CUDA
|
||||||
|
|
@ -91,7 +91,7 @@ After building locally, Usage is similar to the non-CUDA examples, but you'll ne
|
||||||
```bash
|
```bash
|
||||||
docker run --gpus all -v /path/to/models:/models local/llama.cpp:full-cuda --run -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
|
docker run --gpus all -v /path/to/models:/models local/llama.cpp:full-cuda --run -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
|
||||||
docker run --gpus all -v /path/to/models:/models local/llama.cpp:light-cuda -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
|
docker run --gpus all -v /path/to/models:/models local/llama.cpp:light-cuda -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
|
||||||
docker run --gpus all -v /path/to/models:/models local/llama.cpp:server-cuda -m /models/7B/ggml-model-q4_0.gguf --port 8000 --host 0.0.0.0 -n 512 --n-gpu-layers 1
|
docker run --gpus all -v /path/to/models:/models local/llama.cpp:server-cuda -m /models/7B/ggml-model-q4_0.gguf --port 8080 --host 0.0.0.0 -n 512 --n-gpu-layers 1
|
||||||
```
|
```
|
||||||
|
|
||||||
## Docker With MUSA
|
## Docker With MUSA
|
||||||
|
|
@ -125,5 +125,5 @@ After building locally, Usage is similar to the non-MUSA examples, but you'll ne
|
||||||
```bash
|
```bash
|
||||||
docker run -v /path/to/models:/models local/llama.cpp:full-musa --run -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
|
docker run -v /path/to/models:/models local/llama.cpp:full-musa --run -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
|
||||||
docker run -v /path/to/models:/models local/llama.cpp:light-musa -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
|
docker run -v /path/to/models:/models local/llama.cpp:light-musa -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
|
||||||
docker run -v /path/to/models:/models local/llama.cpp:server-musa -m /models/7B/ggml-model-q4_0.gguf --port 8000 --host 0.0.0.0 -n 512 --n-gpu-layers 1
|
docker run -v /path/to/models:/models local/llama.cpp:server-musa -m /models/7B/ggml-model-q4_0.gguf --port 8080 --host 0.0.0.0 -n 512 --n-gpu-layers 1
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -312,16 +312,9 @@ static struct buffer_address ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * al
|
||||||
}
|
}
|
||||||
|
|
||||||
// this is a very naive implementation, but for our case the number of free blocks should be very small
|
// this is a very naive implementation, but for our case the number of free blocks should be very small
|
||||||
static void ggml_dyn_tallocr_free_tensor(struct ggml_dyn_tallocr * alloc, struct buffer_address addr, size_t size, const struct ggml_tensor * tensor) {
|
static void ggml_dyn_tallocr_free_bytes(struct ggml_dyn_tallocr * alloc, struct buffer_address addr, size_t size) {
|
||||||
size = aligned_offset(NULL, size, alloc->alignment);
|
size = aligned_offset(NULL, size, alloc->alignment);
|
||||||
|
|
||||||
AT_PRINTF("%s: freeing %s at {chunk=%d, offset=%zu} (%zu bytes) - n_free_blocks = %d\n",
|
|
||||||
__func__, tensor->name, addr.chunk, addr.offset, size, alloc->chunks[addr.chunk]->n_free_blocks);
|
|
||||||
|
|
||||||
#ifdef GGML_ALLOCATOR_DEBUG
|
|
||||||
remove_allocated_tensor(alloc, addr, tensor);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct tallocr_chunk * chunk = alloc->chunks[addr.chunk];
|
struct tallocr_chunk * chunk = alloc->chunks[addr.chunk];
|
||||||
|
|
||||||
// see if we can merge with an existing block
|
// see if we can merge with an existing block
|
||||||
|
|
@ -357,8 +350,6 @@ static void ggml_dyn_tallocr_free_tensor(struct ggml_dyn_tallocr * alloc, struct
|
||||||
}
|
}
|
||||||
// otherwise, add a new block
|
// otherwise, add a new block
|
||||||
ggml_dyn_tallocr_insert_block(chunk, addr.offset, size);
|
ggml_dyn_tallocr_insert_block(chunk, addr.offset, size);
|
||||||
|
|
||||||
GGML_UNUSED(tensor);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_dyn_tallocr_reset(struct ggml_dyn_tallocr * alloc) {
|
static void ggml_dyn_tallocr_reset(struct ggml_dyn_tallocr * alloc) {
|
||||||
|
|
@ -616,13 +607,17 @@ static void ggml_gallocr_free_extra_space(ggml_gallocr_t galloc, struct ggml_ten
|
||||||
|
|
||||||
GGML_ASSERT(parent_size >= node_size);
|
GGML_ASSERT(parent_size >= node_size);
|
||||||
|
|
||||||
if (parent_size > node_size) {
|
// note: we want after the freeing the chunks to continue to be aligned
|
||||||
struct ggml_dyn_tallocr * p_alloc = galloc->buf_tallocs[p_hn->buffer_id];
|
struct ggml_dyn_tallocr * p_alloc = galloc->buf_tallocs[p_hn->buffer_id];
|
||||||
|
parent_size = aligned_offset(NULL, parent_size, p_alloc->alignment);
|
||||||
|
node_size = aligned_offset(NULL, node_size, p_alloc->alignment);
|
||||||
|
|
||||||
|
if (parent_size > node_size) {
|
||||||
struct buffer_address p_addr = p_hn->addr;
|
struct buffer_address p_addr = p_hn->addr;
|
||||||
p_addr.offset += node_size;
|
p_addr.offset += node_size;
|
||||||
size_t extra_size = parent_size - node_size;
|
size_t extra_size = parent_size - node_size;
|
||||||
AT_PRINTF("freeing extra %zu bytes from parent %s for %s\n", extra_size, parent->name, node->name);
|
AT_PRINTF("freeing extra %zu bytes from parent %s for %s\n", extra_size, parent->name, node->name);
|
||||||
ggml_dyn_tallocr_free_tensor(p_alloc, p_addr, extra_size, parent);
|
ggml_dyn_tallocr_free_bytes(p_alloc, p_addr, extra_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -706,7 +701,14 @@ static void ggml_gallocr_free_node(ggml_gallocr_t galloc, struct ggml_tensor * n
|
||||||
struct ggml_dyn_tallocr * alloc = galloc->buf_tallocs[buffer_id];
|
struct ggml_dyn_tallocr * alloc = galloc->buf_tallocs[buffer_id];
|
||||||
ggml_backend_buffer_type_t buft = galloc->bufts[buffer_id];
|
ggml_backend_buffer_type_t buft = galloc->bufts[buffer_id];
|
||||||
size_t size = ggml_backend_buft_get_alloc_size(buft, node);
|
size_t size = ggml_backend_buft_get_alloc_size(buft, node);
|
||||||
ggml_dyn_tallocr_free_tensor(alloc, hn->addr, size, node);
|
|
||||||
|
AT_PRINTF("%s: freeing %s at {chunk=%d, offset=%zu} (%zu bytes) - n_free_blocks = %d\n",
|
||||||
|
__func__, node->name, hn->addr.chunk, hn->addr.offset, size, alloc->chunks[hn->addr.chunk]->n_free_blocks);
|
||||||
|
#ifdef GGML_ALLOCATOR_DEBUG
|
||||||
|
remove_allocated_tensor(alloc, hn->addr, node);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ggml_dyn_tallocr_free_bytes(alloc, hn->addr, size);
|
||||||
hn->allocated = false;
|
hn->allocated = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4630,9 +4630,9 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
|
||||||
case GGML_OP_CUMSUM:
|
case GGML_OP_CUMSUM:
|
||||||
case GGML_OP_TRI:
|
case GGML_OP_TRI:
|
||||||
case GGML_OP_DIAG:
|
case GGML_OP_DIAG:
|
||||||
return true;
|
|
||||||
case GGML_OP_SOLVE_TRI:
|
case GGML_OP_SOLVE_TRI:
|
||||||
return op->src[0]->ne[0] <= 64 && op->src[1]->ne[0] <= 32;
|
return true;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,80 @@
|
||||||
#include "solve_tri.cuh"
|
#include "solve_tri.cuh"
|
||||||
|
|
||||||
#define MAX_N_FAST 64
|
#define MAX_N_FAST 64
|
||||||
|
#define MAX_K_FAST 32
|
||||||
|
|
||||||
|
static __global__ void get_batch_pointers(const float * A,
|
||||||
|
float * X,
|
||||||
|
const float ** A_ptrs,
|
||||||
|
float ** X_ptrs,
|
||||||
|
int64_t ne02,
|
||||||
|
int64_t total_batches,
|
||||||
|
size_t s02,
|
||||||
|
size_t s03,
|
||||||
|
size_t s2,
|
||||||
|
size_t s3) {
|
||||||
|
const int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
if (idx >= total_batches) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int64_t i3 = idx / ne02;
|
||||||
|
const int64_t i2 = idx % ne02;
|
||||||
|
|
||||||
|
A_ptrs[idx] = A + i3 * s03 + i2 * s02;
|
||||||
|
X_ptrs[idx] = X + i3 * s3 + i2 * s2;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void solve_tri_f32_cublas(ggml_backend_cuda_context & ctx,
|
||||||
|
const float * A,
|
||||||
|
const float * B,
|
||||||
|
float * X,
|
||||||
|
int n,
|
||||||
|
int k,
|
||||||
|
int64_t ne02,
|
||||||
|
int64_t ne03,
|
||||||
|
size_t s02,
|
||||||
|
size_t s03,
|
||||||
|
size_t s12,
|
||||||
|
size_t s13,
|
||||||
|
size_t s2,
|
||||||
|
size_t s3,
|
||||||
|
cudaStream_t stream) {
|
||||||
|
const float alpha = 1.0f;
|
||||||
|
const int64_t total_batches = ne02 * ne03;
|
||||||
|
if (total_batches == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bulk copy B -> X (contiguous tensors)
|
||||||
|
if (X != B) {
|
||||||
|
const int64_t total_elements_BX = n * k * total_batches;
|
||||||
|
CUDA_CHECK(cudaMemcpyAsync(X, B, total_elements_BX * sizeof(float), cudaMemcpyDeviceToDevice, stream));
|
||||||
|
}
|
||||||
|
|
||||||
|
const int id = ggml_cuda_get_device();
|
||||||
|
|
||||||
|
ggml_cuda_pool_alloc<const float *> A_ptrs_alloc(ctx.pool(id), total_batches);
|
||||||
|
ggml_cuda_pool_alloc<float *> X_ptrs_alloc(ctx.pool(id), total_batches);
|
||||||
|
|
||||||
|
const float ** A_ptrs_dev = A_ptrs_alloc.get();
|
||||||
|
float ** X_ptrs_dev = X_ptrs_alloc.get();
|
||||||
|
|
||||||
|
get_batch_pointers<<<(total_batches + 255) / 256, 256, 0, stream>>>(A, X, A_ptrs_dev, X_ptrs_dev, ne02,
|
||||||
|
total_batches, s02, s03, s2, s3);
|
||||||
|
|
||||||
|
CUBLAS_CHECK(cublasSetStream(ctx.cublas_handle(id), stream));
|
||||||
|
|
||||||
|
// Yes, this is necessary, without this we get RMSE errors
|
||||||
|
CUBLAS_CHECK(cublasSetMathMode(ctx.cublas_handle(id), CUBLAS_DEFAULT_MATH));
|
||||||
|
CUBLAS_CHECK(cublasStrsmBatched(ctx.cublas_handle(id), CUBLAS_SIDE_RIGHT, CUBLAS_FILL_MODE_UPPER, CUBLAS_OP_N,
|
||||||
|
CUBLAS_DIAG_NON_UNIT, k, n, &alpha, A_ptrs_dev, n, X_ptrs_dev, k, total_batches));
|
||||||
|
|
||||||
|
// revert to standard mode from common.cuh
|
||||||
|
CUBLAS_CHECK(cublasSetMathMode(ctx.cublas_handle(id), CUBLAS_TF32_TENSOR_OP_MATH));
|
||||||
|
|
||||||
|
GGML_UNUSED_VARS(s12, s13);
|
||||||
|
}
|
||||||
|
|
||||||
// ======================
|
// ======================
|
||||||
// Fast Kernel (n <= 64, k <= 32) - Warp-based parallel reduction
|
// Fast Kernel (n <= 64, k <= 32) - Warp-based parallel reduction
|
||||||
|
|
@ -176,20 +250,26 @@ static void solve_tri_f32_cuda(const float * A,
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_cuda_op_solve_tri(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
void ggml_cuda_op_solve_tri(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
||||||
const ggml_tensor * src0 = dst->src[0]; // A (triangular n x x matrix)
|
const ggml_tensor * src0 = dst->src[0]; // A (n×n, lower triangular)
|
||||||
const ggml_tensor * src1 = dst->src[1]; // B (right hand side of n x k equation columns)
|
const ggml_tensor * src1 = dst->src[1]; // B (n×k)
|
||||||
|
|
||||||
ggml_is_contiguous(src0);
|
ggml_is_contiguous(src0);
|
||||||
ggml_is_contiguous(src1);
|
ggml_is_contiguous(src1);
|
||||||
|
|
||||||
const int64_t n = src0->ne[0];
|
const int64_t n = src0->ne[0];
|
||||||
const int64_t k = src1->ne[0];
|
const int64_t k = src1->ne[0];
|
||||||
|
const int64_t ne02 = src0->ne[2];
|
||||||
|
const int64_t ne03 = src0->ne[3];
|
||||||
|
|
||||||
GGML_ASSERT(n <= 64);
|
if (n <= MAX_N_FAST && k <= MAX_K_FAST) {
|
||||||
GGML_ASSERT(k <= 32);
|
solve_tri_f32_cuda((const float *) src0->data, (const float *) src1->data, (float *) dst->data, n, k,
|
||||||
|
src0->ne[2], src0->ne[3], src0->nb[2] / sizeof(float), src0->nb[3] / sizeof(float),
|
||||||
solve_tri_f32_cuda((const float *) src0->data, (const float *) src1->data, (float *) dst->data, n, k, src0->ne[2],
|
src1->nb[2] / sizeof(float), src1->nb[3] / sizeof(float), dst->nb[2] / sizeof(float),
|
||||||
src0->ne[3], src0->nb[2] / sizeof(float), src0->nb[3] / sizeof(float),
|
dst->nb[3] / sizeof(float), ctx.stream());
|
||||||
|
} else {
|
||||||
|
solve_tri_f32_cublas(ctx, (const float *) src0->data, (const float *) src1->data, (float *) dst->data, n, k,
|
||||||
|
ne02, ne03, src0->nb[2] / sizeof(float), src0->nb[3] / sizeof(float),
|
||||||
src1->nb[2] / sizeof(float), src1->nb[3] / sizeof(float), dst->nb[2] / sizeof(float),
|
src1->nb[2] / sizeof(float), src1->nb[3] / sizeof(float), dst->nb[2] / sizeof(float),
|
||||||
dst->nb[3] / sizeof(float), ctx.stream());
|
dst->nb[3] / sizeof(float), ctx.stream());
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,9 @@
|
||||||
#define CUDA_R_16F HIPBLAS_R_16F
|
#define CUDA_R_16F HIPBLAS_R_16F
|
||||||
#define CUDA_R_16BF HIPBLAS_R_16B
|
#define CUDA_R_16BF HIPBLAS_R_16B
|
||||||
#define CUDA_R_32F HIPBLAS_R_32F
|
#define CUDA_R_32F HIPBLAS_R_32F
|
||||||
|
#define CUBLAS_SIDE_RIGHT HIPBLAS_SIDE_RIGHT
|
||||||
|
#define CUBLAS_FILL_MODE_UPPER HIPBLAS_FILL_MODE_UPPER
|
||||||
|
#define CUBLAS_DIAG_NON_UNIT HIPBLAS_DIAG_NON_UNIT
|
||||||
#define CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED hipDeviceAttributeVirtualMemoryManagementSupported
|
#define CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED hipDeviceAttributeVirtualMemoryManagementSupported
|
||||||
#define CU_MEM_ALLOC_GRANULARITY_RECOMMENDED hipMemAllocationGranularityRecommended
|
#define CU_MEM_ALLOC_GRANULARITY_RECOMMENDED hipMemAllocationGranularityRecommended
|
||||||
#define CU_MEM_ALLOCATION_TYPE_PINNED hipMemAllocationTypePinned
|
#define CU_MEM_ALLOCATION_TYPE_PINNED hipMemAllocationTypePinned
|
||||||
|
|
@ -30,6 +33,7 @@
|
||||||
#define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width)
|
#define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width)
|
||||||
#define __all_sync(mask, var) __all(var)
|
#define __all_sync(mask, var) __all(var)
|
||||||
#define __any_sync(mask, var) __any(var)
|
#define __any_sync(mask, var) __any(var)
|
||||||
|
#define cublasStrsmBatched hipblasStrsmBatched
|
||||||
#define cublasCreate hipblasCreate
|
#define cublasCreate hipblasCreate
|
||||||
#define cublasDestroy hipblasDestroy
|
#define cublasDestroy hipblasDestroy
|
||||||
#define cublasGemmEx hipblasGemmEx
|
#define cublasGemmEx hipblasGemmEx
|
||||||
|
|
|
||||||
|
|
@ -12,11 +12,16 @@
|
||||||
#define CUBLAS_GEMM_DEFAULT_TENSOR_OP MUBLAS_GEMM_DEFAULT
|
#define CUBLAS_GEMM_DEFAULT_TENSOR_OP MUBLAS_GEMM_DEFAULT
|
||||||
#define CUBLAS_OP_N MUBLAS_OP_N
|
#define CUBLAS_OP_N MUBLAS_OP_N
|
||||||
#define CUBLAS_OP_T MUBLAS_OP_T
|
#define CUBLAS_OP_T MUBLAS_OP_T
|
||||||
|
#define CUBLAS_DEFAULT_MATH MUBLAS_DEFAULT_MATH
|
||||||
|
#define CUBLAS_SIDE_RIGHT MUBLAS_SIDE_RIGHT
|
||||||
|
#define CUBLAS_FILL_MODE_UPPER MUBLAS_FILL_MODE_UPPER
|
||||||
|
#define CUBLAS_DIAG_NON_UNIT MUBLAS_DIAG_NON_UNIT
|
||||||
#define CUBLAS_STATUS_SUCCESS MUBLAS_STATUS_SUCCESS
|
#define CUBLAS_STATUS_SUCCESS MUBLAS_STATUS_SUCCESS
|
||||||
#define CUBLAS_TF32_TENSOR_OP_MATH MUBLAS_TENSOR_OP_MATH
|
#define CUBLAS_TF32_TENSOR_OP_MATH MUBLAS_TENSOR_OP_MATH
|
||||||
#define CUDA_R_16F MUSA_R_16F
|
#define CUDA_R_16F MUSA_R_16F
|
||||||
#define CUDA_R_16BF MUSA_R_16BF
|
#define CUDA_R_16BF MUSA_R_16BF
|
||||||
#define CUDA_R_32F MUSA_R_32F
|
#define CUDA_R_32F MUSA_R_32F
|
||||||
|
#define cublasStrsmBatched mublasStrsmBatched
|
||||||
#define cublasComputeType_t cudaDataType_t
|
#define cublasComputeType_t cudaDataType_t
|
||||||
#define cublasCreate mublasCreate
|
#define cublasCreate mublasCreate
|
||||||
#define cublasDestroy mublasDestroy
|
#define cublasDestroy mublasDestroy
|
||||||
|
|
|
||||||
|
|
@ -695,6 +695,8 @@ llama_ubatch llama_batch_allocr::ubatch_add(const std::vector<int32_t> & idxs, u
|
||||||
udata->seq_idx .resize(LLAMA_MAX_SEQ, -1);
|
udata->seq_idx .resize(LLAMA_MAX_SEQ, -1);
|
||||||
udata->output .resize(n_tokens);
|
udata->output .resize(n_tokens);
|
||||||
|
|
||||||
|
udata->seq_id_data.reserve(n_tokens);
|
||||||
|
|
||||||
seq_set_t seq_set_unq;
|
seq_set_t seq_set_unq;
|
||||||
|
|
||||||
for (size_t i = 0; i < idxs.size(); ++i) {
|
for (size_t i = 0; i < idxs.size(); ++i) {
|
||||||
|
|
@ -716,11 +718,13 @@ llama_ubatch llama_batch_allocr::ubatch_add(const std::vector<int32_t> & idxs, u
|
||||||
}
|
}
|
||||||
|
|
||||||
udata->n_seq_id[i] = batch.n_seq_id[idxs[i]];
|
udata->n_seq_id[i] = batch.n_seq_id[idxs[i]];
|
||||||
udata->seq_id[i] = batch.seq_id[idxs[i]];
|
|
||||||
udata->output[i] = batch.logits[idxs[i]];
|
udata->output[i] = batch.logits[idxs[i]];
|
||||||
|
|
||||||
for (int s = 0; s < udata->n_seq_id[i]; ++s) {
|
for (int s = 0; s < udata->n_seq_id[i]; ++s) {
|
||||||
seq_set_unq.set(udata->seq_id[i][s]);
|
const llama_seq_id seq_id = batch.seq_id[idxs[i]][s];
|
||||||
|
|
||||||
|
udata->seq_id_data.push_back(seq_id);
|
||||||
|
seq_set_unq.set(seq_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (udata->output[i]) {
|
if (udata->output[i]) {
|
||||||
|
|
@ -728,6 +732,12 @@ llama_ubatch llama_batch_allocr::ubatch_add(const std::vector<int32_t> & idxs, u
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
llama_seq_id * seq_id_ptr = udata->seq_id_data.data();
|
||||||
|
for (size_t i = 0; i < idxs.size(); ++i) {
|
||||||
|
udata->seq_id[i] = seq_id_ptr;
|
||||||
|
seq_id_ptr += udata->n_seq_id[i];
|
||||||
|
}
|
||||||
|
|
||||||
for (uint32_t s = 0; s < n_seq_max; ++s) {
|
for (uint32_t s = 0; s < n_seq_max; ++s) {
|
||||||
if (seq_set_unq.test(s)) {
|
if (seq_set_unq.test(s)) {
|
||||||
udata->seq_idx[s] = udata->seq_id_unq.size();
|
udata->seq_idx[s] = udata->seq_id_unq.size();
|
||||||
|
|
|
||||||
|
|
@ -56,13 +56,15 @@ struct llama_ubatch {
|
||||||
std::vector<float> embd;
|
std::vector<float> embd;
|
||||||
std::vector<llama_pos> pos;
|
std::vector<llama_pos> pos;
|
||||||
std::vector<int32_t> n_seq_id;
|
std::vector<int32_t> n_seq_id;
|
||||||
std::vector<llama_seq_id *> seq_id;
|
std::vector<llama_seq_id *> seq_id; // these point into the seq_id_data below
|
||||||
std::vector<llama_seq_id> seq_id_unq;
|
std::vector<llama_seq_id> seq_id_unq;
|
||||||
std::vector<int32_t> seq_idx;
|
std::vector<int32_t> seq_idx;
|
||||||
std::vector<int8_t> output;
|
std::vector<int8_t> output;
|
||||||
|
|
||||||
|
std::vector<llama_seq_id> seq_id_data;
|
||||||
};
|
};
|
||||||
|
|
||||||
// the llama_ubatch pointers above point to this data if set. otherwise - points to non-owning data
|
// the llama_ubatch pointers above point to this data if set. otherwise - point to external non-owning data
|
||||||
std::shared_ptr<data_t> data;
|
std::shared_ptr<data_t> data;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7861,9 +7861,24 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 30, 30, 7, 1 }, { 8, 30, 7, 1 }));
|
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 30, 30, 7, 1 }, { 8, 30, 7, 1 }));
|
||||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 42, 42, 5, 2 }, { 10, 42, 5, 2 }));
|
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 42, 42, 5, 2 }, { 10, 42, 5, 2 }));
|
||||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 2, 2 }, { 10, 64, 2, 2 }));
|
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 2, 2 }, { 10, 64, 2, 2 }));
|
||||||
|
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 2, 2 }, { 64, 64, 2, 2 }));
|
||||||
|
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 79, 79, 5, 3 }, { 417, 79, 5, 3 }));
|
||||||
|
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 2 }, { 32, 128, 4, 2 }));
|
||||||
|
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 80, 80, 2, 8 }, { 80, 80, 2, 8 }));
|
||||||
|
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 80, 80, 2, 8 }, { 79, 80, 2, 8 }));
|
||||||
|
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 80, 80, 2, 8 }, { 81, 80, 2, 8 }));
|
||||||
|
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 80, 80, 8, 8 }, { 80, 80, 8, 8 }));
|
||||||
|
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 80, 80, 8, 8 }, { 79, 80, 8, 8 }));
|
||||||
|
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 80, 80, 8, 8 }, { 81, 80, 8, 8 }));
|
||||||
|
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 84, 84, 4, 4 }, { 32, 84, 4, 4 }));
|
||||||
|
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 95, 95, 8, 8 }, { 40, 95, 8, 8 }));
|
||||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 100, 100, 4, 4 }, { 41, 100, 4, 4 }));
|
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 100, 100, 4, 4 }, { 41, 100, 4, 4 }));
|
||||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 4 }, { 31, 128, 4, 4 }));
|
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 4 }, { 31, 128, 4, 4 }));
|
||||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 4, 4 }, { 300, 64, 4, 4 }));
|
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 4 }, { 32, 128, 4, 4 }));
|
||||||
|
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 3, 4 }, { 32, 128, 3, 4 }));
|
||||||
|
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 1 }, { 32, 128, 4, 1 }));
|
||||||
|
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 4, 4 }, { 200, 64, 4, 4 }));
|
||||||
|
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 4, 4 }, { 384, 64, 4, 4 }));
|
||||||
|
|
||||||
for (bool v : {false, true}) {
|
for (bool v : {false, true}) {
|
||||||
for (bool circular : {false, true}) {
|
for (bool circular : {false, true}) {
|
||||||
|
|
@ -8064,12 +8079,13 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
|
||||||
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 16416, 1, 128, {8, 1}, {4, 1}, {0, 2, 1, 3}));
|
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 16416, 1, 128, {8, 1}, {4, 1}, {0, 2, 1, 3}));
|
||||||
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 128, 1, 16416, {8, 1}, {4, 1}, {0, 1, 2, 3}, 2*16416));
|
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 128, 1, 16416, {8, 1}, {4, 1}, {0, 1, 2, 3}, 2*16416));
|
||||||
|
|
||||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 4, 2 }, { 6, 64, 4, 2 }));
|
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 4, 4 }, { 32, 64, 4, 4 }));
|
||||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 1 }, { 8, 128, 4, 1 }));
|
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 2 }, { 32, 128, 4, 2 }));
|
||||||
// qwen3next with CHUNK_SIZE 64
|
// qwen3next with CHUNK_SIZE 64
|
||||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 8, 32 }, { 64, 64, 8, 32 }));
|
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 8, 32 }, { 64, 64, 8, 32 }));
|
||||||
// qwen3next with CHUNK_SIZE 128
|
// qwen3next with CHUNK_SIZE 128
|
||||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 32 }, { 128, 128, 4, 32 }));
|
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 32 }, { 128, 128, 4, 32 }));
|
||||||
|
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 256, 256, 4, 2 }, { 128, 256, 4, 2 }));
|
||||||
|
|
||||||
test_cases.emplace_back(new test_tri(GGML_TRI_TYPE_LOWER, GGML_TYPE_F32, { 256, 256, 4, 4 }));
|
test_cases.emplace_back(new test_tri(GGML_TRI_TYPE_LOWER, GGML_TYPE_F32, { 256, 256, 4, 4 }));
|
||||||
test_cases.emplace_back(new test_tri(GGML_TRI_TYPE_UPPER_DIAG, GGML_TYPE_F32, { 1024, 1024, 8, 4 }));
|
test_cases.emplace_back(new test_tri(GGML_TRI_TYPE_UPPER_DIAG, GGML_TYPE_F32, { 1024, 1024, 8, 4 }));
|
||||||
|
|
|
||||||
Binary file not shown.
|
|
@ -41,7 +41,7 @@
|
||||||
"@tailwindcss/vite": "^4.0.0",
|
"@tailwindcss/vite": "^4.0.0",
|
||||||
"@types/node": "^22",
|
"@types/node": "^22",
|
||||||
"@vitest/browser": "^3.2.3",
|
"@vitest/browser": "^3.2.3",
|
||||||
"bits-ui": "^2.8.11",
|
"bits-ui": "^2.14.4",
|
||||||
"clsx": "^2.1.1",
|
"clsx": "^2.1.1",
|
||||||
"dexie": "^4.0.11",
|
"dexie": "^4.0.11",
|
||||||
"eslint": "^9.18.0",
|
"eslint": "^9.18.0",
|
||||||
|
|
@ -3343,17 +3343,17 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/bits-ui": {
|
"node_modules/bits-ui": {
|
||||||
"version": "2.8.11",
|
"version": "2.14.4",
|
||||||
"resolved": "https://registry.npmjs.org/bits-ui/-/bits-ui-2.8.11.tgz",
|
"resolved": "https://registry.npmjs.org/bits-ui/-/bits-ui-2.14.4.tgz",
|
||||||
"integrity": "sha512-lKN9rAk69my6j7H1D4B87r8LrHuEtfEsf1xCixBj9yViql2BdI3f04HyyyT7T1GOCpgb9+8b0B+nm3LN81Konw==",
|
"integrity": "sha512-W6kenhnbd/YVvur+DKkaVJ6GldE53eLewur5AhUCqslYQ0vjZr8eWlOfwZnMiPB+PF5HMVqf61vXBvmyrAmPWg==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@floating-ui/core": "^1.7.1",
|
"@floating-ui/core": "^1.7.1",
|
||||||
"@floating-ui/dom": "^1.7.1",
|
"@floating-ui/dom": "^1.7.1",
|
||||||
"esm-env": "^1.1.2",
|
"esm-env": "^1.1.2",
|
||||||
"runed": "^0.29.1",
|
"runed": "^0.35.1",
|
||||||
"svelte-toolbelt": "^0.9.3",
|
"svelte-toolbelt": "^0.10.6",
|
||||||
"tabbable": "^6.2.0"
|
"tabbable": "^6.2.0"
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
|
|
@ -3368,9 +3368,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/bits-ui/node_modules/runed": {
|
"node_modules/bits-ui/node_modules/runed": {
|
||||||
"version": "0.29.2",
|
"version": "0.35.1",
|
||||||
"resolved": "https://registry.npmjs.org/runed/-/runed-0.29.2.tgz",
|
"resolved": "https://registry.npmjs.org/runed/-/runed-0.35.1.tgz",
|
||||||
"integrity": "sha512-0cq6cA6sYGZwl/FvVqjx9YN+1xEBu9sDDyuWdDW1yWX7JF2wmvmVKfH+hVCZs+csW+P3ARH92MjI3H9QTagOQA==",
|
"integrity": "sha512-2F4Q/FZzbeJTFdIS/PuOoPRSm92sA2LhzTnv6FXhCoENb3huf5+fDuNOg1LNvGOouy3u/225qxmuJvcV3IZK5Q==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"funding": [
|
"funding": [
|
||||||
"https://github.com/sponsors/huntabyte",
|
"https://github.com/sponsors/huntabyte",
|
||||||
|
|
@ -3378,23 +3378,31 @@
|
||||||
],
|
],
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"esm-env": "^1.0.0"
|
"dequal": "^2.0.3",
|
||||||
|
"esm-env": "^1.0.0",
|
||||||
|
"lz-string": "^1.5.0"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
|
"@sveltejs/kit": "^2.21.0",
|
||||||
"svelte": "^5.7.0"
|
"svelte": "^5.7.0"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@sveltejs/kit": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/bits-ui/node_modules/svelte-toolbelt": {
|
"node_modules/bits-ui/node_modules/svelte-toolbelt": {
|
||||||
"version": "0.9.3",
|
"version": "0.10.6",
|
||||||
"resolved": "https://registry.npmjs.org/svelte-toolbelt/-/svelte-toolbelt-0.9.3.tgz",
|
"resolved": "https://registry.npmjs.org/svelte-toolbelt/-/svelte-toolbelt-0.10.6.tgz",
|
||||||
"integrity": "sha512-HCSWxCtVmv+c6g1ACb8LTwHVbDqLKJvHpo6J8TaqwUme2hj9ATJCpjCPNISR1OCq2Q4U1KT41if9ON0isINQZw==",
|
"integrity": "sha512-YWuX+RE+CnWYx09yseAe4ZVMM7e7GRFZM6OYWpBKOb++s+SQ8RBIMMe+Bs/CznBMc0QPLjr+vDBxTAkozXsFXQ==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"funding": [
|
"funding": [
|
||||||
"https://github.com/sponsors/huntabyte"
|
"https://github.com/sponsors/huntabyte"
|
||||||
],
|
],
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"clsx": "^2.1.1",
|
"clsx": "^2.1.1",
|
||||||
"runed": "^0.29.0",
|
"runed": "^0.35.1",
|
||||||
"style-to-object": "^1.0.8"
|
"style-to-object": "^1.0.8"
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,7 @@
|
||||||
"@tailwindcss/vite": "^4.0.0",
|
"@tailwindcss/vite": "^4.0.0",
|
||||||
"@types/node": "^22",
|
"@types/node": "^22",
|
||||||
"@vitest/browser": "^3.2.3",
|
"@vitest/browser": "^3.2.3",
|
||||||
"bits-ui": "^2.8.11",
|
"bits-ui": "^2.14.4",
|
||||||
"clsx": "^2.1.1",
|
"clsx": "^2.1.1",
|
||||||
"dexie": "^4.0.11",
|
"dexie": "^4.0.11",
|
||||||
"eslint": "^9.18.0",
|
"eslint": "^9.18.0",
|
||||||
|
|
|
||||||
|
|
@ -331,6 +331,7 @@
|
||||||
class="{INPUT_CLASSES} border-radius-bottom-none mx-auto max-w-[48rem] overflow-hidden rounded-3xl backdrop-blur-md {disabled
|
class="{INPUT_CLASSES} border-radius-bottom-none mx-auto max-w-[48rem] overflow-hidden rounded-3xl backdrop-blur-md {disabled
|
||||||
? 'cursor-not-allowed opacity-60'
|
? 'cursor-not-allowed opacity-60'
|
||||||
: ''} {className}"
|
: ''} {className}"
|
||||||
|
data-slot="chat-form"
|
||||||
>
|
>
|
||||||
<ChatAttachmentsList
|
<ChatAttachmentsList
|
||||||
bind:uploadedFiles
|
bind:uploadedFiles
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,5 @@
|
||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import { Input } from '$lib/components/ui/input';
|
import { SearchInput } from '$lib/components/app';
|
||||||
import { Search } from '@lucide/svelte';
|
|
||||||
|
|
||||||
interface Props {
|
interface Props {
|
||||||
value?: string;
|
value?: string;
|
||||||
|
|
@ -15,19 +14,6 @@
|
||||||
onInput,
|
onInput,
|
||||||
class: className
|
class: className
|
||||||
}: Props = $props();
|
}: Props = $props();
|
||||||
|
|
||||||
function handleInput(event: Event) {
|
|
||||||
const target = event.target as HTMLInputElement;
|
|
||||||
|
|
||||||
value = target.value;
|
|
||||||
onInput?.(target.value);
|
|
||||||
}
|
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<div class="relative mb-4 {className}">
|
<SearchInput bind:value {placeholder} {onInput} class="mb-4 {className}" />
|
||||||
<Search
|
|
||||||
class="absolute top-1/2 left-3 h-4 w-4 -translate-y-1/2 transform text-muted-foreground"
|
|
||||||
/>
|
|
||||||
|
|
||||||
<Input bind:value class="pl-10" oninput={handleInput} {placeholder} type="search" />
|
|
||||||
</div>
|
|
||||||
|
|
|
||||||
|
|
@ -64,6 +64,7 @@ export { default as CopyToClipboardIcon } from './misc/CopyToClipboardIcon.svelt
|
||||||
export { default as KeyboardShortcutInfo } from './misc/KeyboardShortcutInfo.svelte';
|
export { default as KeyboardShortcutInfo } from './misc/KeyboardShortcutInfo.svelte';
|
||||||
export { default as MarkdownContent } from './misc/MarkdownContent.svelte';
|
export { default as MarkdownContent } from './misc/MarkdownContent.svelte';
|
||||||
export { default as RemoveButton } from './misc/RemoveButton.svelte';
|
export { default as RemoveButton } from './misc/RemoveButton.svelte';
|
||||||
|
export { default as SearchInput } from './misc/SearchInput.svelte';
|
||||||
export { default as SyntaxHighlightedCode } from './misc/SyntaxHighlightedCode.svelte';
|
export { default as SyntaxHighlightedCode } from './misc/SyntaxHighlightedCode.svelte';
|
||||||
export { default as ModelsSelector } from './models/ModelsSelector.svelte';
|
export { default as ModelsSelector } from './models/ModelsSelector.svelte';
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,73 @@
|
||||||
|
<script lang="ts">
|
||||||
|
import { Input } from '$lib/components/ui/input';
|
||||||
|
import { Search, X } from '@lucide/svelte';
|
||||||
|
|
||||||
|
interface Props {
|
||||||
|
value?: string;
|
||||||
|
placeholder?: string;
|
||||||
|
onInput?: (value: string) => void;
|
||||||
|
onClose?: () => void;
|
||||||
|
onKeyDown?: (event: KeyboardEvent) => void;
|
||||||
|
class?: string;
|
||||||
|
id?: string;
|
||||||
|
ref?: HTMLInputElement | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
let {
|
||||||
|
value = $bindable(''),
|
||||||
|
placeholder = 'Search...',
|
||||||
|
onInput,
|
||||||
|
onClose,
|
||||||
|
onKeyDown,
|
||||||
|
class: className,
|
||||||
|
id,
|
||||||
|
ref = $bindable(null)
|
||||||
|
}: Props = $props();
|
||||||
|
|
||||||
|
let showClearButton = $derived(!!value || !!onClose);
|
||||||
|
|
||||||
|
function handleInput(event: Event) {
|
||||||
|
const target = event.target as HTMLInputElement;
|
||||||
|
|
||||||
|
value = target.value;
|
||||||
|
onInput?.(target.value);
|
||||||
|
}
|
||||||
|
|
||||||
|
function handleClear() {
|
||||||
|
if (value) {
|
||||||
|
value = '';
|
||||||
|
onInput?.('');
|
||||||
|
ref?.focus();
|
||||||
|
} else {
|
||||||
|
onClose?.();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<div class="relative {className}">
|
||||||
|
<Search
|
||||||
|
class="absolute top-1/2 left-3 h-4 w-4 -translate-y-1/2 transform text-muted-foreground"
|
||||||
|
/>
|
||||||
|
|
||||||
|
<Input
|
||||||
|
{id}
|
||||||
|
bind:value
|
||||||
|
bind:ref
|
||||||
|
class="pl-9 {showClearButton ? 'pr-9' : ''}"
|
||||||
|
oninput={handleInput}
|
||||||
|
onkeydown={onKeyDown}
|
||||||
|
{placeholder}
|
||||||
|
type="search"
|
||||||
|
/>
|
||||||
|
|
||||||
|
{#if showClearButton}
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
class="absolute top-1/2 right-3 -translate-y-1/2 transform text-muted-foreground transition-colors hover:text-foreground"
|
||||||
|
onclick={handleClear}
|
||||||
|
aria-label={value ? 'Clear search' : 'Close'}
|
||||||
|
>
|
||||||
|
<X class="h-4 w-4" />
|
||||||
|
</button>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
|
@ -2,8 +2,8 @@
|
||||||
import { onMount, tick } from 'svelte';
|
import { onMount, tick } from 'svelte';
|
||||||
import { ChevronDown, EyeOff, Loader2, MicOff, Package, Power } from '@lucide/svelte';
|
import { ChevronDown, EyeOff, Loader2, MicOff, Package, Power } from '@lucide/svelte';
|
||||||
import * as Tooltip from '$lib/components/ui/tooltip';
|
import * as Tooltip from '$lib/components/ui/tooltip';
|
||||||
|
import * as Popover from '$lib/components/ui/popover';
|
||||||
import { cn } from '$lib/components/ui/utils';
|
import { cn } from '$lib/components/ui/utils';
|
||||||
import { portalToBody } from '$lib/utils';
|
|
||||||
import {
|
import {
|
||||||
modelsStore,
|
modelsStore,
|
||||||
modelOptions,
|
modelOptions,
|
||||||
|
|
@ -17,12 +17,8 @@
|
||||||
import { usedModalities, conversationsStore } from '$lib/stores/conversations.svelte';
|
import { usedModalities, conversationsStore } from '$lib/stores/conversations.svelte';
|
||||||
import { ServerModelStatus } from '$lib/enums';
|
import { ServerModelStatus } from '$lib/enums';
|
||||||
import { isRouterMode } from '$lib/stores/server.svelte';
|
import { isRouterMode } from '$lib/stores/server.svelte';
|
||||||
import { DialogModelInformation } from '$lib/components/app';
|
import { DialogModelInformation, SearchInput } from '$lib/components/app';
|
||||||
import {
|
import type { ModelOption } from '$lib/types/models';
|
||||||
MENU_MAX_WIDTH,
|
|
||||||
MENU_OFFSET,
|
|
||||||
VIEWPORT_GUTTER
|
|
||||||
} from '$lib/constants/floating-ui-constraints';
|
|
||||||
|
|
||||||
interface Props {
|
interface Props {
|
||||||
class?: string;
|
class?: string;
|
||||||
|
|
@ -145,185 +141,126 @@
|
||||||
return options.some((option) => option.model === currentModel);
|
return options.some((option) => option.model === currentModel);
|
||||||
});
|
});
|
||||||
|
|
||||||
let isOpen = $state(false);
|
let searchTerm = $state('');
|
||||||
let showModelDialog = $state(false);
|
let searchInputRef = $state<HTMLInputElement | null>(null);
|
||||||
let container: HTMLDivElement | null = null;
|
let highlightedIndex = $state<number>(-1);
|
||||||
let menuRef = $state<HTMLDivElement | null>(null);
|
|
||||||
let triggerButton = $state<HTMLButtonElement | null>(null);
|
|
||||||
let menuPosition = $state<{
|
|
||||||
top: number;
|
|
||||||
left: number;
|
|
||||||
width: number;
|
|
||||||
placement: 'top' | 'bottom';
|
|
||||||
maxHeight: number;
|
|
||||||
} | null>(null);
|
|
||||||
|
|
||||||
onMount(async () => {
|
let filteredOptions: ModelOption[] = $derived(
|
||||||
try {
|
(() => {
|
||||||
await modelsStore.fetch();
|
const term = searchTerm.trim().toLowerCase();
|
||||||
} catch (error) {
|
if (!term) return options;
|
||||||
console.error('Unable to load models:', error);
|
|
||||||
}
|
return options.filter(
|
||||||
|
(option) =>
|
||||||
|
option.model.toLowerCase().includes(term) || option.name?.toLowerCase().includes(term)
|
||||||
|
);
|
||||||
|
})()
|
||||||
|
);
|
||||||
|
|
||||||
|
// Get indices of compatible options for keyboard navigation
|
||||||
|
let compatibleIndices = $derived(
|
||||||
|
filteredOptions
|
||||||
|
.map((option, index) => (isModelCompatible(option) ? index : -1))
|
||||||
|
.filter((i) => i !== -1)
|
||||||
|
);
|
||||||
|
|
||||||
|
// Reset highlighted index when search term changes
|
||||||
|
$effect(() => {
|
||||||
|
void searchTerm;
|
||||||
|
highlightedIndex = -1;
|
||||||
});
|
});
|
||||||
|
|
||||||
function toggleOpen() {
|
let isOpen = $state(false);
|
||||||
if (loading || updating) return;
|
let showModelDialog = $state(false);
|
||||||
|
|
||||||
if (isRouter) {
|
onMount(() => {
|
||||||
// Router mode: show dropdown
|
modelsStore.fetch().catch((error) => {
|
||||||
if (isOpen) {
|
console.error('Unable to load models:', error);
|
||||||
closeMenu();
|
});
|
||||||
} else {
|
});
|
||||||
openMenu();
|
|
||||||
}
|
function handleOpenChange(open: boolean) {
|
||||||
} else {
|
|
||||||
// Single model mode: show dialog
|
|
||||||
showModelDialog = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async function openMenu() {
|
|
||||||
if (loading || updating) return;
|
if (loading || updating) return;
|
||||||
|
|
||||||
|
if (open) {
|
||||||
isOpen = true;
|
isOpen = true;
|
||||||
await tick();
|
searchTerm = '';
|
||||||
updateMenuPosition();
|
highlightedIndex = -1;
|
||||||
requestAnimationFrame(() => updateMenuPosition());
|
|
||||||
|
// Focus search input after popover opens
|
||||||
|
tick().then(() => {
|
||||||
|
requestAnimationFrame(() => searchInputRef?.focus());
|
||||||
|
});
|
||||||
|
|
||||||
if (isRouter) {
|
if (isRouter) {
|
||||||
modelsStore.fetchRouterModels().then(() => {
|
modelsStore.fetchRouterModels().then(() => {
|
||||||
modelsStore.fetchModalitiesForLoadedModels();
|
modelsStore.fetchModalitiesForLoadedModels();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
isOpen = false;
|
||||||
|
searchTerm = '';
|
||||||
|
highlightedIndex = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function handleTriggerClick() {
|
||||||
|
if (loading || updating) return;
|
||||||
|
|
||||||
|
if (!isRouter) {
|
||||||
|
// Single model mode: show dialog instead of popover
|
||||||
|
showModelDialog = true;
|
||||||
|
}
|
||||||
|
// For router mode, the Popover handles open/close
|
||||||
}
|
}
|
||||||
|
|
||||||
export function open() {
|
export function open() {
|
||||||
if (isRouter) {
|
if (isRouter) {
|
||||||
openMenu();
|
handleOpenChange(true);
|
||||||
} else {
|
} else {
|
||||||
showModelDialog = true;
|
showModelDialog = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function closeMenu() {
|
function closeMenu() {
|
||||||
if (!isOpen) return;
|
handleOpenChange(false);
|
||||||
|
|
||||||
isOpen = false;
|
|
||||||
menuPosition = null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function handlePointerDown(event: PointerEvent) {
|
function handleSearchKeyDown(event: KeyboardEvent) {
|
||||||
if (!container) return;
|
if (event.isComposing) return;
|
||||||
|
|
||||||
const target = event.target as Node | null;
|
if (event.key === 'ArrowDown') {
|
||||||
|
event.preventDefault();
|
||||||
|
if (compatibleIndices.length === 0) return;
|
||||||
|
|
||||||
if (target && !container.contains(target) && !(menuRef && menuRef.contains(target))) {
|
const currentPos = compatibleIndices.indexOf(highlightedIndex);
|
||||||
closeMenu();
|
if (currentPos === -1 || currentPos === compatibleIndices.length - 1) {
|
||||||
}
|
highlightedIndex = compatibleIndices[0];
|
||||||
}
|
|
||||||
|
|
||||||
function handleKeydown(event: KeyboardEvent) {
|
|
||||||
if (event.key === 'Escape') {
|
|
||||||
closeMenu();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function handleResize() {
|
|
||||||
if (isOpen) {
|
|
||||||
updateMenuPosition();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function updateMenuPosition() {
|
|
||||||
if (!isOpen || !triggerButton || !menuRef) return;
|
|
||||||
|
|
||||||
const triggerRect = triggerButton.getBoundingClientRect();
|
|
||||||
const viewportWidth = window.innerWidth;
|
|
||||||
const viewportHeight = window.innerHeight;
|
|
||||||
|
|
||||||
if (viewportWidth === 0 || viewportHeight === 0) return;
|
|
||||||
|
|
||||||
const scrollWidth = menuRef.scrollWidth;
|
|
||||||
const scrollHeight = menuRef.scrollHeight;
|
|
||||||
|
|
||||||
const availableWidth = Math.max(0, viewportWidth - VIEWPORT_GUTTER * 2);
|
|
||||||
const constrainedMaxWidth = Math.min(MENU_MAX_WIDTH, availableWidth || MENU_MAX_WIDTH);
|
|
||||||
const safeMaxWidth =
|
|
||||||
constrainedMaxWidth > 0 ? constrainedMaxWidth : Math.min(MENU_MAX_WIDTH, viewportWidth);
|
|
||||||
const desiredMinWidth = Math.min(160, safeMaxWidth || 160);
|
|
||||||
|
|
||||||
let width = Math.min(
|
|
||||||
Math.max(triggerRect.width, scrollWidth, desiredMinWidth),
|
|
||||||
safeMaxWidth || 320
|
|
||||||
);
|
|
||||||
|
|
||||||
const availableBelow = Math.max(
|
|
||||||
0,
|
|
||||||
viewportHeight - VIEWPORT_GUTTER - triggerRect.bottom - MENU_OFFSET
|
|
||||||
);
|
|
||||||
const availableAbove = Math.max(0, triggerRect.top - VIEWPORT_GUTTER - MENU_OFFSET);
|
|
||||||
const viewportAllowance = Math.max(0, viewportHeight - VIEWPORT_GUTTER * 2);
|
|
||||||
const fallbackAllowance = Math.max(1, viewportAllowance > 0 ? viewportAllowance : scrollHeight);
|
|
||||||
|
|
||||||
function computePlacement(placement: 'top' | 'bottom') {
|
|
||||||
const available = placement === 'bottom' ? availableBelow : availableAbove;
|
|
||||||
const allowedHeight =
|
|
||||||
available > 0 ? Math.min(available, fallbackAllowance) : fallbackAllowance;
|
|
||||||
const maxHeight = Math.min(scrollHeight, allowedHeight);
|
|
||||||
const height = Math.max(0, maxHeight);
|
|
||||||
|
|
||||||
let top: number;
|
|
||||||
if (placement === 'bottom') {
|
|
||||||
const rawTop = triggerRect.bottom + MENU_OFFSET;
|
|
||||||
const minTop = VIEWPORT_GUTTER;
|
|
||||||
const maxTop = viewportHeight - VIEWPORT_GUTTER - height;
|
|
||||||
if (maxTop < minTop) {
|
|
||||||
top = minTop;
|
|
||||||
} else {
|
} else {
|
||||||
top = Math.min(Math.max(rawTop, minTop), maxTop);
|
highlightedIndex = compatibleIndices[currentPos + 1];
|
||||||
}
|
}
|
||||||
|
} else if (event.key === 'ArrowUp') {
|
||||||
|
event.preventDefault();
|
||||||
|
if (compatibleIndices.length === 0) return;
|
||||||
|
|
||||||
|
const currentPos = compatibleIndices.indexOf(highlightedIndex);
|
||||||
|
if (currentPos === -1 || currentPos === 0) {
|
||||||
|
highlightedIndex = compatibleIndices[compatibleIndices.length - 1];
|
||||||
} else {
|
} else {
|
||||||
const rawTop = triggerRect.top - MENU_OFFSET - height;
|
highlightedIndex = compatibleIndices[currentPos - 1];
|
||||||
const minTop = VIEWPORT_GUTTER;
|
}
|
||||||
const maxTop = viewportHeight - VIEWPORT_GUTTER - height;
|
} else if (event.key === 'Enter') {
|
||||||
if (maxTop < minTop) {
|
event.preventDefault();
|
||||||
top = minTop;
|
if (highlightedIndex >= 0 && highlightedIndex < filteredOptions.length) {
|
||||||
} else {
|
const option = filteredOptions[highlightedIndex];
|
||||||
top = Math.max(Math.min(rawTop, maxTop), minTop);
|
if (isModelCompatible(option)) {
|
||||||
|
handleSelect(option.id);
|
||||||
|
}
|
||||||
|
} else if (compatibleIndices.length > 0) {
|
||||||
|
// No selection - highlight first compatible option
|
||||||
|
highlightedIndex = compatibleIndices[0];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return { placement, top, height, maxHeight };
|
|
||||||
}
|
|
||||||
|
|
||||||
const belowMetrics = computePlacement('bottom');
|
|
||||||
const aboveMetrics = computePlacement('top');
|
|
||||||
|
|
||||||
let metrics = belowMetrics;
|
|
||||||
if (scrollHeight > belowMetrics.maxHeight && aboveMetrics.maxHeight > belowMetrics.maxHeight) {
|
|
||||||
metrics = aboveMetrics;
|
|
||||||
}
|
|
||||||
|
|
||||||
let left = triggerRect.right - width;
|
|
||||||
const maxLeft = viewportWidth - VIEWPORT_GUTTER - width;
|
|
||||||
if (maxLeft < VIEWPORT_GUTTER) {
|
|
||||||
left = VIEWPORT_GUTTER;
|
|
||||||
} else {
|
|
||||||
if (left > maxLeft) {
|
|
||||||
left = maxLeft;
|
|
||||||
}
|
|
||||||
if (left < VIEWPORT_GUTTER) {
|
|
||||||
left = VIEWPORT_GUTTER;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
menuPosition = {
|
|
||||||
top: Math.round(metrics.top),
|
|
||||||
left: Math.round(left),
|
|
||||||
width: Math.round(width),
|
|
||||||
placement: metrics.placement,
|
|
||||||
maxHeight: Math.round(metrics.maxHeight)
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async function handleSelect(modelId: string) {
|
async function handleSelect(modelId: string) {
|
||||||
|
|
@ -356,6 +293,14 @@
|
||||||
|
|
||||||
if (shouldCloseMenu) {
|
if (shouldCloseMenu) {
|
||||||
closeMenu();
|
closeMenu();
|
||||||
|
|
||||||
|
// Focus the chat textarea after model selection
|
||||||
|
requestAnimationFrame(() => {
|
||||||
|
const textarea = document.querySelector<HTMLTextAreaElement>(
|
||||||
|
'[data-slot="chat-form"] textarea'
|
||||||
|
);
|
||||||
|
textarea?.focus();
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -404,10 +349,7 @@
|
||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<svelte:window onresize={handleResize} />
|
<div class={cn('relative inline-flex flex-col items-end gap-1', className)}>
|
||||||
<svelte:document onpointerdown={handlePointerDown} onkeydown={handleKeydown} />
|
|
||||||
|
|
||||||
<div class={cn('relative inline-flex flex-col items-end gap-1', className)} bind:this={container}>
|
|
||||||
{#if loading && options.length === 0 && isRouter}
|
{#if loading && options.length === 0 && isRouter}
|
||||||
<div class="flex items-center gap-2 text-xs text-muted-foreground">
|
<div class="flex items-center gap-2 text-xs text-muted-foreground">
|
||||||
<Loader2 class="h-3.5 w-3.5 animate-spin" />
|
<Loader2 class="h-3.5 w-3.5 animate-spin" />
|
||||||
|
|
@ -418,9 +360,8 @@
|
||||||
{:else}
|
{:else}
|
||||||
{@const selectedOption = getDisplayOption()}
|
{@const selectedOption = getDisplayOption()}
|
||||||
|
|
||||||
<div class="relative">
|
<Popover.Root bind:open={isOpen} onOpenChange={handleOpenChange}>
|
||||||
<button
|
<Popover.Trigger
|
||||||
type="button"
|
|
||||||
class={cn(
|
class={cn(
|
||||||
`inline-flex cursor-pointer items-center gap-1.5 rounded-sm bg-muted-foreground/10 px-1.5 py-1 text-xs transition hover:text-foreground focus:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-60`,
|
`inline-flex cursor-pointer items-center gap-1.5 rounded-sm bg-muted-foreground/10 px-1.5 py-1 text-xs transition hover:text-foreground focus:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-60`,
|
||||||
!isCurrentModelInCache()
|
!isCurrentModelInCache()
|
||||||
|
|
@ -430,15 +371,11 @@
|
||||||
: isHighlightedCurrentModelActive
|
: isHighlightedCurrentModelActive
|
||||||
? 'text-foreground'
|
? 'text-foreground'
|
||||||
: 'text-muted-foreground',
|
: 'text-muted-foreground',
|
||||||
isOpen ? 'text-foreground' : '',
|
isOpen ? 'text-foreground' : ''
|
||||||
className
|
|
||||||
)}
|
)}
|
||||||
style="max-width: min(calc(100cqw - 6.5rem), 32rem)"
|
style="max-width: min(calc(100cqw - 6.5rem), 32rem)"
|
||||||
aria-haspopup={isRouter ? 'listbox' : undefined}
|
onclick={handleTriggerClick}
|
||||||
aria-expanded={isRouter ? isOpen : undefined}
|
disabled={disabled || updating || !isRouter}
|
||||||
onclick={toggleOpen}
|
|
||||||
bind:this={triggerButton}
|
|
||||||
disabled={disabled || updating}
|
|
||||||
>
|
>
|
||||||
<Package class="h-3.5 w-3.5" />
|
<Package class="h-3.5 w-3.5" />
|
||||||
|
|
||||||
|
|
@ -451,33 +388,35 @@
|
||||||
{:else if isRouter}
|
{:else if isRouter}
|
||||||
<ChevronDown class="h-3 w-3.5" />
|
<ChevronDown class="h-3 w-3.5" />
|
||||||
{/if}
|
{/if}
|
||||||
</button>
|
</Popover.Trigger>
|
||||||
|
|
||||||
{#if isOpen && isRouter}
|
<Popover.Content
|
||||||
<div
|
class="group/popover-content w-96 max-w-[calc(100vw-2rem)] p-0"
|
||||||
bind:this={menuRef}
|
align="end"
|
||||||
use:portalToBody
|
sideOffset={8}
|
||||||
class={cn(
|
collisionPadding={16}
|
||||||
'fixed z-[1000] overflow-hidden rounded-md border bg-popover shadow-lg transition-opacity',
|
|
||||||
menuPosition ? 'opacity-100' : 'pointer-events-none opacity-0'
|
|
||||||
)}
|
|
||||||
role="listbox"
|
|
||||||
style:top={menuPosition ? `${menuPosition.top}px` : undefined}
|
|
||||||
style:left={menuPosition ? `${menuPosition.left}px` : undefined}
|
|
||||||
style:width={menuPosition ? `${menuPosition.width}px` : undefined}
|
|
||||||
data-placement={menuPosition?.placement ?? 'bottom'}
|
|
||||||
>
|
>
|
||||||
|
<div class="flex max-h-[50dvh] flex-col overflow-hidden">
|
||||||
<div
|
<div
|
||||||
class="overflow-y-auto py-1"
|
class="order-1 shrink-0 border-b p-4 group-data-[side=top]/popover-content:order-2 group-data-[side=top]/popover-content:border-t group-data-[side=top]/popover-content:border-b-0"
|
||||||
style:max-height={menuPosition && menuPosition.maxHeight > 0
|
>
|
||||||
? `${menuPosition.maxHeight}px`
|
<SearchInput
|
||||||
: undefined}
|
id="model-search"
|
||||||
|
placeholder="Search models..."
|
||||||
|
bind:value={searchTerm}
|
||||||
|
bind:ref={searchInputRef}
|
||||||
|
onClose={closeMenu}
|
||||||
|
onKeyDown={handleSearchKeyDown}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div
|
||||||
|
class="models-list order-2 min-h-0 flex-1 overflow-y-auto group-data-[side=top]/popover-content:order-1"
|
||||||
>
|
>
|
||||||
{#if !isCurrentModelInCache() && currentModel}
|
{#if !isCurrentModelInCache() && currentModel}
|
||||||
<!-- Show unavailable model as first option (disabled) -->
|
<!-- Show unavailable model as first option (disabled) -->
|
||||||
<button
|
<button
|
||||||
type="button"
|
type="button"
|
||||||
class="flex w-full cursor-not-allowed items-center bg-red-400/10 px-3 py-2 text-left text-sm text-red-400"
|
class="flex w-full cursor-not-allowed items-center bg-red-400/10 px-4 py-2 text-left text-sm text-red-400"
|
||||||
role="option"
|
role="option"
|
||||||
aria-selected="true"
|
aria-selected="true"
|
||||||
aria-disabled="true"
|
aria-disabled="true"
|
||||||
|
|
@ -488,20 +427,25 @@
|
||||||
</button>
|
</button>
|
||||||
<div class="my-1 h-px bg-border"></div>
|
<div class="my-1 h-px bg-border"></div>
|
||||||
{/if}
|
{/if}
|
||||||
{#each options as option (option.id)}
|
{#if filteredOptions.length === 0}
|
||||||
|
<p class="px-4 py-3 text-sm text-muted-foreground">No models found.</p>
|
||||||
|
{/if}
|
||||||
|
{#each filteredOptions as option, index (option.id)}
|
||||||
{@const status = getModelStatus(option.model)}
|
{@const status = getModelStatus(option.model)}
|
||||||
{@const isLoaded = status === ServerModelStatus.LOADED}
|
{@const isLoaded = status === ServerModelStatus.LOADED}
|
||||||
{@const isLoading = status === ServerModelStatus.LOADING}
|
{@const isLoading = status === ServerModelStatus.LOADING}
|
||||||
{@const isSelected = currentModel === option.model || activeId === option.id}
|
{@const isSelected = currentModel === option.model || activeId === option.id}
|
||||||
{@const isCompatible = isModelCompatible(option)}
|
{@const isCompatible = isModelCompatible(option)}
|
||||||
|
{@const isHighlighted = index === highlightedIndex}
|
||||||
{@const missingModalities = getMissingModalities(option)}
|
{@const missingModalities = getMissingModalities(option)}
|
||||||
|
|
||||||
<div
|
<div
|
||||||
class={cn(
|
class={cn(
|
||||||
'group flex w-full items-center gap-2 px-3 py-2 text-left text-sm transition focus:outline-none',
|
'group flex w-full items-center gap-2 px-4 py-2 text-left text-sm transition focus:outline-none',
|
||||||
isCompatible
|
isCompatible
|
||||||
? 'cursor-pointer hover:bg-muted focus:bg-muted'
|
? 'cursor-pointer hover:bg-muted focus:bg-muted'
|
||||||
: 'cursor-not-allowed opacity-50',
|
: 'cursor-not-allowed opacity-50',
|
||||||
isSelected
|
isSelected || isHighlighted
|
||||||
? 'bg-accent text-accent-foreground'
|
? 'bg-accent text-accent-foreground'
|
||||||
: isCompatible
|
: isCompatible
|
||||||
? 'hover:bg-accent hover:text-accent-foreground'
|
? 'hover:bg-accent hover:text-accent-foreground'
|
||||||
|
|
@ -509,10 +453,11 @@
|
||||||
isLoaded ? 'text-popover-foreground' : 'text-muted-foreground'
|
isLoaded ? 'text-popover-foreground' : 'text-muted-foreground'
|
||||||
)}
|
)}
|
||||||
role="option"
|
role="option"
|
||||||
aria-selected={isSelected}
|
aria-selected={isSelected || isHighlighted}
|
||||||
aria-disabled={!isCompatible}
|
aria-disabled={!isCompatible}
|
||||||
tabindex={isCompatible ? 0 : -1}
|
tabindex={isCompatible ? 0 : -1}
|
||||||
onclick={() => isCompatible && handleSelect(option.id)}
|
onclick={() => isCompatible && handleSelect(option.id)}
|
||||||
|
onmouseenter={() => (highlightedIndex = index)}
|
||||||
onkeydown={(e) => {
|
onkeydown={(e) => {
|
||||||
if (isCompatible && (e.key === 'Enter' || e.key === ' ')) {
|
if (isCompatible && (e.key === 'Enter' || e.key === ' ')) {
|
||||||
e.preventDefault();
|
e.preventDefault();
|
||||||
|
|
@ -586,8 +531,8 @@
|
||||||
{/each}
|
{/each}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
{/if}
|
</Popover.Content>
|
||||||
</div>
|
</Popover.Root>
|
||||||
{/if}
|
{/if}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,19 @@
|
||||||
|
import Root from './popover.svelte';
|
||||||
|
import Close from './popover-close.svelte';
|
||||||
|
import Content from './popover-content.svelte';
|
||||||
|
import Trigger from './popover-trigger.svelte';
|
||||||
|
import Portal from './popover-portal.svelte';
|
||||||
|
|
||||||
|
export {
|
||||||
|
Root,
|
||||||
|
Content,
|
||||||
|
Trigger,
|
||||||
|
Close,
|
||||||
|
Portal,
|
||||||
|
//
|
||||||
|
Root as Popover,
|
||||||
|
Content as PopoverContent,
|
||||||
|
Trigger as PopoverTrigger,
|
||||||
|
Close as PopoverClose,
|
||||||
|
Portal as PopoverPortal
|
||||||
|
};
|
||||||
|
|
@ -0,0 +1,7 @@
|
||||||
|
<script lang="ts">
|
||||||
|
import { Popover as PopoverPrimitive } from 'bits-ui';
|
||||||
|
|
||||||
|
let { ref = $bindable(null), ...restProps }: PopoverPrimitive.CloseProps = $props();
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<PopoverPrimitive.Close bind:ref data-slot="popover-close" {...restProps} />
|
||||||
|
|
@ -0,0 +1,37 @@
|
||||||
|
<script lang="ts">
|
||||||
|
import { Popover as PopoverPrimitive } from 'bits-ui';
|
||||||
|
import PopoverPortal from './popover-portal.svelte';
|
||||||
|
import { cn, type WithoutChildrenOrChild } from '$lib/components/ui/utils.js';
|
||||||
|
import type { ComponentProps } from 'svelte';
|
||||||
|
|
||||||
|
let {
|
||||||
|
ref = $bindable(null),
|
||||||
|
class: className,
|
||||||
|
sideOffset = 4,
|
||||||
|
side,
|
||||||
|
align = 'center',
|
||||||
|
collisionPadding = 8,
|
||||||
|
avoidCollisions = true,
|
||||||
|
portalProps,
|
||||||
|
...restProps
|
||||||
|
}: PopoverPrimitive.ContentProps & {
|
||||||
|
portalProps?: WithoutChildrenOrChild<ComponentProps<typeof PopoverPortal>>;
|
||||||
|
} = $props();
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<PopoverPortal {...portalProps}>
|
||||||
|
<PopoverPrimitive.Content
|
||||||
|
bind:ref
|
||||||
|
data-slot="popover-content"
|
||||||
|
{sideOffset}
|
||||||
|
{side}
|
||||||
|
{align}
|
||||||
|
{collisionPadding}
|
||||||
|
{avoidCollisions}
|
||||||
|
class={cn(
|
||||||
|
'z-50 w-72 origin-(--bits-popover-content-transform-origin) rounded-md border bg-popover p-4 text-popover-foreground shadow-md outline-hidden data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-end-2 data-[side=right]:slide-in-from-start-2 data-[side=top]:slide-in-from-bottom-2 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:zoom-in-95',
|
||||||
|
className
|
||||||
|
)}
|
||||||
|
{...restProps}
|
||||||
|
/>
|
||||||
|
</PopoverPortal>
|
||||||
|
|
@ -0,0 +1,7 @@
|
||||||
|
<script lang="ts">
|
||||||
|
import { Popover as PopoverPrimitive } from 'bits-ui';
|
||||||
|
|
||||||
|
let { ...restProps }: PopoverPrimitive.PortalProps = $props();
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<PopoverPrimitive.Portal {...restProps} />
|
||||||
|
|
@ -0,0 +1,17 @@
|
||||||
|
<script lang="ts">
|
||||||
|
import { cn } from '$lib/components/ui/utils.js';
|
||||||
|
import { Popover as PopoverPrimitive } from 'bits-ui';
|
||||||
|
|
||||||
|
let {
|
||||||
|
ref = $bindable(null),
|
||||||
|
class: className,
|
||||||
|
...restProps
|
||||||
|
}: PopoverPrimitive.TriggerProps = $props();
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<PopoverPrimitive.Trigger
|
||||||
|
bind:ref
|
||||||
|
data-slot="popover-trigger"
|
||||||
|
class={cn('', className)}
|
||||||
|
{...restProps}
|
||||||
|
/>
|
||||||
|
|
@ -0,0 +1,7 @@
|
||||||
|
<script lang="ts">
|
||||||
|
import { Popover as PopoverPrimitive } from 'bits-ui';
|
||||||
|
|
||||||
|
let { open = $bindable(false), ...restProps }: PopoverPrimitive.RootProps = $props();
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<PopoverPrimitive.Root bind:open {...restProps} />
|
||||||
|
|
@ -1,3 +1,2 @@
|
||||||
export const VIEWPORT_GUTTER = 8;
|
export const VIEWPORT_GUTTER = 8;
|
||||||
export const MENU_OFFSET = 6;
|
export const MENU_OFFSET = 6;
|
||||||
export const MENU_MAX_WIDTH = 320;
|
|
||||||
|
|
|
||||||
|
|
@ -295,14 +295,21 @@ class ModelsStore {
|
||||||
* Fetch props for a specific model from /props endpoint
|
* Fetch props for a specific model from /props endpoint
|
||||||
* Uses caching to avoid redundant requests
|
* Uses caching to avoid redundant requests
|
||||||
*
|
*
|
||||||
|
* In ROUTER mode, this will only fetch props if the model is loaded,
|
||||||
|
* since unloaded models return 400 from /props endpoint.
|
||||||
|
*
|
||||||
* @param modelId - Model identifier to fetch props for
|
* @param modelId - Model identifier to fetch props for
|
||||||
* @returns Props data or null if fetch failed
|
* @returns Props data or null if fetch failed or model not loaded
|
||||||
*/
|
*/
|
||||||
async fetchModelProps(modelId: string): Promise<ApiLlamaCppServerProps | null> {
|
async fetchModelProps(modelId: string): Promise<ApiLlamaCppServerProps | null> {
|
||||||
// Return cached props if available
|
// Return cached props if available
|
||||||
const cached = this.modelPropsCache.get(modelId);
|
const cached = this.modelPropsCache.get(modelId);
|
||||||
if (cached) return cached;
|
if (cached) return cached;
|
||||||
|
|
||||||
|
if (serverStore.isRouterMode && !this.isModelLoaded(modelId)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
// Avoid duplicate fetches
|
// Avoid duplicate fetches
|
||||||
if (this.modelPropsFetching.has(modelId)) return null;
|
if (this.modelPropsFetching.has(modelId)) return null;
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue