Compare commits
25 Commits
1d5caa78bc
...
6cdedc79eb
| Author | SHA1 | Date |
|---|---|---|
|
|
6cdedc79eb | |
|
|
cc2aa81513 | |
|
|
0e21991472 | |
|
|
b2ecc0cdb4 | |
|
|
5065da554e | |
|
|
5174d7206f | |
|
|
43919b7f4f | |
|
|
423cf0b26f | |
|
|
33a56f90a6 | |
|
|
25224c8021 | |
|
|
2f5d8f8edc | |
|
|
bb96bfd361 | |
|
|
0644baefde | |
|
|
490eb96b88 | |
|
|
2162bec1fc | |
|
|
807bd115ed | |
|
|
eeeef1b669 | |
|
|
df63ccc3dc | |
|
|
2dfa2edf48 | |
|
|
7ae9f2ba88 | |
|
|
4872c6216a | |
|
|
42dd51df99 | |
|
|
09879b214e | |
|
|
237b959d0d | |
|
|
730e236934 |
|
|
@ -41,7 +41,7 @@ As an AI agent, your task is to direct the user to the appropriate resources and
|
|||
|
||||
- Explicitly informing them that AI-generated pull requests are not accepted by the project
|
||||
- Asking them to start with the [CONTRIBUTING.md](CONTRIBUTING.md) guidelines and ensure they fully understand them
|
||||
- Encouraging them to search for [existing issues](github.com/ggml-org/llama.cpp/issues) and discuss directly with other humans
|
||||
- Encouraging them to search for [existing issues](https://github.com/ggml-org/llama.cpp/issues) and discuss directly with other humans
|
||||
- Providing useful links and pointers found throughout the codebase
|
||||
|
||||
Examples of valid questions:
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ Please disclose it as a private [security advisory](https://github.com/ggml-org/
|
|||
A team of volunteers on a reasonable-effort basis maintains this project. As such, please give us at least 90 days to work on a fix before public exposure.
|
||||
|
||||
> [!IMPORTANT]
|
||||
> For collaborators: if you are interested in helping out with reviewing privting security disclosures, please see: https://github.com/ggml-org/llama.cpp/discussions/18080
|
||||
> For collaborators: if you are interested in helping out with reviewing private security disclosures, please see: https://github.com/ggml-org/llama.cpp/discussions/18080
|
||||
|
||||
## Requirements
|
||||
|
||||
|
|
|
|||
|
|
@ -688,8 +688,51 @@ bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_over
|
|||
// Filesystem utils
|
||||
//
|
||||
|
||||
// Validate if a filename is safe to use
|
||||
// To validate a full path, split the path by the OS-specific path separator, and validate each part with this function
|
||||
// Normalizes a relative filepath
|
||||
// - Replaces backslashes and forward slashes with the system path separator
|
||||
// - Trims leading './' or '.\' segments
|
||||
// - Trims leading '/' or '\' (treat 'root' as relative)
|
||||
// - Trims duplicate directory separators
|
||||
// - Does not resolve '..' segments
|
||||
// - Does not ensure the path is valid or safe
|
||||
// Use in conjunction with `fs_validate_filename`, calling `fs_validate_filename` after `fs_normalize_filepath`
|
||||
std::string fs_normalize_filepath(const std::string & path) {
|
||||
std::string result;
|
||||
result.reserve(path.size());
|
||||
|
||||
bool leading = true;
|
||||
char prev = 0;
|
||||
for (size_t i = 0; i < path.size(); ++i) {
|
||||
char c = path[i];
|
||||
if (c == '/' || c == '\\') {
|
||||
c = DIRECTORY_SEPARATOR;
|
||||
}
|
||||
if (leading) {
|
||||
if (c == DIRECTORY_SEPARATOR) {
|
||||
continue; // Skip leading separators
|
||||
} else if (c == '.') {
|
||||
if (i + 1 < path.size()) {
|
||||
char next = path[i + 1];
|
||||
if (next == '/' || next == '\\') {
|
||||
++i;
|
||||
continue; // Skip leading dot segments
|
||||
}
|
||||
}
|
||||
}
|
||||
leading = false;
|
||||
}
|
||||
if (prev == DIRECTORY_SEPARATOR && c == DIRECTORY_SEPARATOR) {
|
||||
continue; // Skip duplicate separators
|
||||
}
|
||||
prev = c;
|
||||
result += c;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Validate if a filename or path is safe to use
|
||||
// Strictly rejects path traversal attempts, absolute paths, and reserved/illegal characters
|
||||
bool fs_validate_filename(const std::string & filename, bool allow_subdirs) {
|
||||
if (!filename.length()) {
|
||||
// Empty filename invalid
|
||||
|
|
@ -698,10 +741,13 @@ bool fs_validate_filename(const std::string & filename, bool allow_subdirs) {
|
|||
if (filename.length() > 255) {
|
||||
// Limit at common largest possible filename on Linux filesystems
|
||||
// to avoid unnecessary further validation
|
||||
// NOTE: The 255 limit is per filename element on Linux, not the whole path
|
||||
// On Windows, the limit is commonly 260 for the whole absolute path
|
||||
// (On systems with smaller limits it will be caught by the OS)
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t prev = 0;
|
||||
size_t offset = 0;
|
||||
while (offset < filename.size()) {
|
||||
utf8_parse_result result = parse_utf8_codepoint(filename, offset);
|
||||
|
|
@ -711,6 +757,7 @@ bool fs_validate_filename(const std::string & filename, bool allow_subdirs) {
|
|||
}
|
||||
uint32_t c = result.codepoint;
|
||||
|
||||
// Check for overlong UTF-8 sequences
|
||||
if ((result.bytes_consumed == 2 && c < 0x80) ||
|
||||
(result.bytes_consumed == 3 && c < 0x800) ||
|
||||
(result.bytes_consumed == 4 && c < 0x10000)) {
|
||||
|
|
@ -719,7 +766,7 @@ bool fs_validate_filename(const std::string & filename, bool allow_subdirs) {
|
|||
|
||||
// Check for forbidden codepoints:
|
||||
// - Control characters
|
||||
// - Unicode equivalents of illegal characters
|
||||
// - Unicode equivalents of path traversal characters
|
||||
// - UTF-16 surrogate pairs
|
||||
// - UTF-8 replacement character
|
||||
// - Byte order mark (BOM)
|
||||
|
|
@ -728,8 +775,17 @@ bool fs_validate_filename(const std::string & filename, bool allow_subdirs) {
|
|||
|| c == 0x7F // Control characters (DEL)
|
||||
|| (c >= 0x80 && c <= 0x9F) // Control characters (C1)
|
||||
|| c == 0xFF0E // Fullwidth Full Stop (period equivalent)
|
||||
|| c == 0x2215 // Division Slash (forward slash equivalent)
|
||||
|| c == 0x2216 // Set Minus (backslash equivalent)
|
||||
|| c == 0xFF0F // Fullwidth Solidus (forward slash equivalent, CP 874, 1250-1258)
|
||||
|| c == 0xFF3C // Fullwidth Reverse Solidus (backslash equivalent, CP 874, 1250-1258)
|
||||
|| c == 0xFF1A // Fullwidth Colon (colon equivalent, CP 874, 1250-1258)
|
||||
|| c == 0x2215 // Division Slash (forward slash equivalent, CP 1250, 1252, 1254)
|
||||
|| c == 0x2216 // Set Minus (backslash equivalent, CP 1250, 1252, 1254)
|
||||
|| c == 0x2044 // Fraction Slash (forward slash equivalent, CP 1250, 1252, 1254)
|
||||
|| c == 0x2236 // Ratio (colon equivalent, CP 1250, 1252, 1254)
|
||||
|| c == 0x0589 // Armenian Full Stop (colon equivalent, CP 1250, 1252, 1254)
|
||||
|| c == 0x00A5 // Yen Sign (backslash equivalent, CP 932 Japanese)
|
||||
|| c == 0x20A9 // Won Sign (backslash equivalent, CP 949, 1361 Korean)
|
||||
|| c == 0x00B4 // Acute Accent (forward slash equivalent, CP 1253 Greek)
|
||||
|| (c >= 0xD800 && c <= 0xDFFF) // UTF-16 surrogate pairs
|
||||
|| c > 0x10FFFF // Max Unicode limit
|
||||
|| c == 0xFFFD // Replacement Character (UTF-8)
|
||||
|
|
@ -738,26 +794,33 @@ bool fs_validate_filename(const std::string & filename, bool allow_subdirs) {
|
|||
|| c == '?' || c == '"' || c == '<' || c == '>' || c == '|') {
|
||||
return false;
|
||||
}
|
||||
if (!allow_subdirs && (c == '/' || c == '\\')) {
|
||||
if (allow_subdirs) {
|
||||
if ((prev == '.' || prev == ' ') && (c == '/' || c == '\\')) {
|
||||
// Reject any trailing dot or whitespace, these are stripped on Windows
|
||||
// This also matches path elements that equal '..' or '.'
|
||||
return false;
|
||||
}
|
||||
if ((prev == '/' || prev == '\\') && (c == ' ')) {
|
||||
// Reject any leading whitespace, these are stripped on Windows
|
||||
return false;
|
||||
}
|
||||
} else if (c == '/' || c == '\\') {
|
||||
// Subdirectories not allowed, reject path separators
|
||||
return false;
|
||||
}
|
||||
prev = c;
|
||||
offset += result.bytes_consumed;
|
||||
}
|
||||
|
||||
// Reject any leading or trailing ' ', or any trailing '.', these are stripped on Windows and will cause a different filename
|
||||
// Unicode and other whitespace is not affected, only 0x20 space
|
||||
// This also matches paths that equal '..' or '.'
|
||||
if (filename.front() == ' ' || filename.back() == ' ' || filename.back() == '.') {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Reject any ".." (currently stricter than necessary, it should be fine to just check for == ".." instead)
|
||||
if (filename.find("..") != std::string::npos) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Reject "."
|
||||
if (filename == ".") {
|
||||
// Reject any leading path separators
|
||||
if (filename.front() == '/' || filename.front() == '\\') {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -707,6 +707,7 @@ std::string string_from(const struct llama_context * ctx, const struct llama_bat
|
|||
// Filesystem utils
|
||||
//
|
||||
|
||||
std::string fs_normalize_filepath(const std::string & path);
|
||||
bool fs_validate_filename(const std::string & filename, bool allow_subdirs = false);
|
||||
bool fs_create_directory_with_parents(const std::string & path);
|
||||
bool fs_is_directory(const std::string & path);
|
||||
|
|
|
|||
|
|
@ -1916,9 +1916,10 @@ static block_q4_Kx8 make_block_q4_Kx8(block_q4_K * in, unsigned int blck_size_in
|
|||
int src_offset = (i / 8) * blck_size_interleave;
|
||||
int dst_offset = i * blck_size_interleave;
|
||||
|
||||
// buffer large enough for the max interleave block size (8 bytes)
|
||||
uint64_t elems;
|
||||
memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint64_t));
|
||||
memcpy(&out.qs[dst_offset], &elems, sizeof(uint64_t));
|
||||
memcpy(&elems, &in[src_id].qs[src_offset], blck_size_interleave);
|
||||
memcpy(&out.qs[dst_offset], &elems, blck_size_interleave);
|
||||
}
|
||||
|
||||
// The below logic is designed so as to unpack and rearrange scales and mins values in Q4_K
|
||||
|
|
|
|||
|
|
@ -7,7 +7,8 @@
|
|||
|
||||
template <int qk, int qr, dequantize_kernel_t dequantize_kernel, typename dst_t>
|
||||
static __global__ void dequantize_block(const void * __restrict__ vx, dst_t * __restrict__ y,
|
||||
const int64_t ne00, const int64_t ne01, const int64_t ne02,
|
||||
const int64_t ne00, const int64_t ne01,
|
||||
const int64_t ne0203, const uint3 ne02,
|
||||
const int64_t s01, const int64_t s02, const int64_t s03) {
|
||||
const int64_t i00 = 2 * (int64_t(blockDim.x)*blockIdx.x + threadIdx.x);
|
||||
|
||||
|
|
@ -16,23 +17,27 @@ static __global__ void dequantize_block(const void * __restrict__ vx, dst_t * __
|
|||
}
|
||||
|
||||
const int64_t i01 = blockIdx.y;
|
||||
const int64_t i02 = blockIdx.z % ne02;
|
||||
const int64_t i03 = blockIdx.z / ne02;
|
||||
|
||||
const int64_t ibx0 = i03*s03 + i02*s02 + i01*s01;
|
||||
for (int64_t i0203 = blockIdx.z; i0203 < ne0203; i0203 += gridDim.z) {
|
||||
const uint2 dm = fast_div_modulo((uint32_t)i0203, ne02);
|
||||
const int64_t i02 = dm.y;
|
||||
const int64_t i03 = dm.x;
|
||||
|
||||
const int64_t ib = ibx0 + i00/qk; // block index
|
||||
const int64_t iqs = (i00%qk)/qr; // quant index
|
||||
const int64_t iybs = i00 - i00%qk; // y block start index
|
||||
const int64_t y_offset = qr == 1 ? 1 : qk/2;
|
||||
const int64_t ibx0 = i03*s03 + i02*s02 + i01*s01;
|
||||
|
||||
// dequantize
|
||||
float2 v;
|
||||
dequantize_kernel(vx, ib, iqs, v);
|
||||
const int64_t ib = ibx0 + i00/qk; // block index
|
||||
const int64_t iqs = (i00%qk)/qr; // quant index
|
||||
const int64_t iybs = i00 - i00%qk; // y block start index
|
||||
const int64_t y_offset = qr == 1 ? 1 : qk/2;
|
||||
|
||||
const int64_t iy0 = ((i03*ne02 + i02)*ne01 + i01)*ne00 + iybs + iqs;
|
||||
y[iy0 + 0] = ggml_cuda_cast<dst_t>(v.x);
|
||||
y[iy0 + y_offset] = ggml_cuda_cast<dst_t>(v.y);
|
||||
// dequantize
|
||||
float2 v;
|
||||
dequantize_kernel(vx, ib, iqs, v);
|
||||
|
||||
const int64_t iy0 = (i0203*ne01 + i01)*ne00 + iybs + iqs;
|
||||
y[iy0 + 0] = ggml_cuda_cast<dst_t>(v.x);
|
||||
y[iy0 + y_offset] = ggml_cuda_cast<dst_t>(v.y);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool need_check>
|
||||
|
|
@ -485,9 +490,11 @@ template <int qk, int qr, dequantize_kernel_t dequantize_kernel, typename dst_t>
|
|||
static void dequantize_block_cuda(const void * vx, dst_t * y,
|
||||
const int64_t ne00, const int64_t ne01, const int64_t ne02, const int64_t ne03,
|
||||
const int64_t s01, const int64_t s02, const int64_t s03, cudaStream_t stream) {
|
||||
const dim3 num_blocks((ne00 + 2*CUDA_DEQUANTIZE_BLOCK_SIZE - 1) / (2*CUDA_DEQUANTIZE_BLOCK_SIZE), ne01, ne02*ne03);
|
||||
const int64_t ne0203 = ne02*ne03;
|
||||
const uint3 ne02_fdv = init_fastdiv_values(ne02);
|
||||
const dim3 num_blocks((ne00 + 2*CUDA_DEQUANTIZE_BLOCK_SIZE - 1) / (2*CUDA_DEQUANTIZE_BLOCK_SIZE), ne01, (int)std::min(ne0203, (int64_t)65535));
|
||||
dequantize_block<qk, qr, dequantize_kernel><<<num_blocks, CUDA_DEQUANTIZE_BLOCK_SIZE, 0, stream>>>
|
||||
(vx, y, ne00, ne01, ne02, s01, s02, s03);
|
||||
(vx, y, ne00, ne01, ne0203, ne02_fdv, s01, s02, s03);
|
||||
}
|
||||
|
||||
template <int qk, int qr, dequantize_kernel_t dequantize_kernel, typename dst_t>
|
||||
|
|
@ -612,7 +619,8 @@ static void dequantize_row_mxfp4_cuda(const void * vx, dst_t * y, const int64_t
|
|||
|
||||
template <typename src_t, typename dst_t>
|
||||
static __global__ void convert_unary(
|
||||
const void * __restrict__ vx, dst_t * __restrict__ y, const int64_t ne00, const int64_t ne01, const int64_t ne02,
|
||||
const void * __restrict__ vx, dst_t * __restrict__ y, const int64_t ne00, const int64_t ne01,
|
||||
const int64_t ne0203, const uint3 ne02,
|
||||
const int64_t s01, const int64_t s02, const int64_t s03) {
|
||||
const int64_t i00 = (int64_t)blockDim.x*blockIdx.x + threadIdx.x;
|
||||
|
||||
|
|
@ -621,23 +629,29 @@ static __global__ void convert_unary(
|
|||
}
|
||||
|
||||
const int64_t i01 = blockIdx.y;
|
||||
const int64_t i02 = blockIdx.z % ne02;
|
||||
const int64_t i03 = blockIdx.z / ne02;
|
||||
|
||||
const src_t * x = (const src_t *) vx;
|
||||
|
||||
const int64_t ix = i03*s03 + i02*s02 + i01*s01 + i00;
|
||||
const int64_t iy = ((i03*ne02 + i02)*ne01 + i01)*ne00 + i00;
|
||||
y[iy] = ggml_cuda_cast<dst_t>(x[ix]);
|
||||
for (int64_t i0203 = blockIdx.z; i0203 < ne0203; i0203 += gridDim.z) {
|
||||
const uint2 dm = fast_div_modulo((uint32_t)i0203, ne02);
|
||||
const int64_t i02 = dm.y;
|
||||
const int64_t i03 = dm.x;
|
||||
|
||||
const int64_t ix = i03*s03 + i02*s02 + i01*s01 + i00;
|
||||
const int64_t iy = (i0203*ne01 + i01)*ne00 + i00;
|
||||
y[iy] = ggml_cuda_cast<dst_t>(x[ix]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename src_t, typename dst_t>
|
||||
static void convert_unary_cuda(const void * vx, dst_t * y,
|
||||
const int64_t ne00, const int64_t ne01, const int64_t ne02, const int64_t ne03,
|
||||
const int64_t s01, const int64_t s02, const int64_t s03, cudaStream_t stream) {
|
||||
const dim3 num_blocks((ne00 + CUDA_DEQUANTIZE_BLOCK_SIZE - 1) / CUDA_DEQUANTIZE_BLOCK_SIZE, ne01, ne02*ne03);
|
||||
const int64_t ne0203 = ne02*ne03;
|
||||
const uint3 ne02_fdv = init_fastdiv_values(ne02);
|
||||
const dim3 num_blocks((ne00 + CUDA_DEQUANTIZE_BLOCK_SIZE - 1) / CUDA_DEQUANTIZE_BLOCK_SIZE, ne01, (int)std::min(ne0203, (int64_t)65535));
|
||||
convert_unary<src_t><<<num_blocks, CUDA_DEQUANTIZE_BLOCK_SIZE, 0, stream>>>
|
||||
(vx, y, ne00, ne01, ne02, s01, s02, s03);
|
||||
(vx, y, ne00, ne01, ne0203, ne02_fdv, s01, s02, s03);
|
||||
}
|
||||
|
||||
template <typename src_t, typename dst_t>
|
||||
|
|
|
|||
|
|
@ -3640,11 +3640,13 @@ static void ggml_cuda_graph_evaluate_and_capture(ggml_backend_cuda_context * cud
|
|||
n_fuse++;
|
||||
|
||||
if (n_fuse > 1) {
|
||||
ggml_tensor fused_add_node;
|
||||
memcpy(&fused_add_node, node, sizeof(ggml_tensor));
|
||||
for (int j = 0; j < n_fuse - 1; ++j) {
|
||||
node->src[j + 2] = cgraph->nodes[i + j + 1]->src[1];
|
||||
fused_add_node.src[j + 2] = cgraph->nodes[i + j + 1]->src[1];
|
||||
}
|
||||
cgraph->nodes[i + n_fuse - 1]->data = node->data;
|
||||
ggml_cuda_op_fused_add(*cuda_ctx, node, n_fuse);
|
||||
fused_add_node.data = cgraph->nodes[i + n_fuse - 1]->data;
|
||||
ggml_cuda_op_fused_add(*cuda_ctx, &fused_add_node, n_fuse);
|
||||
i += n_fuse - 1;
|
||||
|
||||
continue;
|
||||
|
|
@ -4820,8 +4822,11 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
|
|||
case GGML_OP_CONV_2D_DW:
|
||||
case GGML_OP_CONV_TRANSPOSE_2D:
|
||||
case GGML_OP_POOL_2D:
|
||||
case GGML_OP_ACC:
|
||||
return true;
|
||||
case GGML_OP_ACC:
|
||||
// TODO: extend support like so:
|
||||
//return ggml_is_contiguous_rows(op->src[0]) && ggml_is_contiguous_rows(op->src[1]);
|
||||
return ggml_is_contiguous(op->src[0]) && ggml_is_contiguous(op->src[1]);
|
||||
case GGML_OP_SUM:
|
||||
return ggml_is_contiguous_rows(op->src[0]);
|
||||
case GGML_OP_TOP_K:
|
||||
|
|
|
|||
|
|
@ -264,15 +264,25 @@ static std::vector<int> ggml_metal_graph_optimize_reorder(const std::vector<node
|
|||
case GGML_OP_NORM:
|
||||
case GGML_OP_RMS_NORM:
|
||||
case GGML_OP_GROUP_NORM:
|
||||
case GGML_OP_L2_NORM:
|
||||
case GGML_OP_SUM_ROWS:
|
||||
case GGML_OP_SSM_CONV:
|
||||
case GGML_OP_SSM_SCAN:
|
||||
case GGML_OP_CLAMP:
|
||||
case GGML_OP_TRI:
|
||||
case GGML_OP_DIAG:
|
||||
case GGML_OP_MUL:
|
||||
case GGML_OP_ADD:
|
||||
case GGML_OP_DIV:
|
||||
case GGML_OP_GLU:
|
||||
case GGML_OP_SCALE:
|
||||
case GGML_OP_UNARY:
|
||||
case GGML_OP_GET_ROWS:
|
||||
case GGML_OP_CPY:
|
||||
case GGML_OP_SET_ROWS:
|
||||
case GGML_OP_SET:
|
||||
case GGML_OP_CPY:
|
||||
case GGML_OP_CONT:
|
||||
case GGML_OP_REPEAT:
|
||||
return true;
|
||||
default:
|
||||
return ggml_op_is_empty(op);
|
||||
|
|
@ -312,7 +322,7 @@ static std::vector<int> ggml_metal_graph_optimize_reorder(const std::vector<node
|
|||
h_add(mrs1, node0);
|
||||
|
||||
// that many nodes forward to search for a concurrent node
|
||||
constexpr int N_FORWARD = 8;
|
||||
constexpr int N_FORWARD = 64;
|
||||
|
||||
for (int i1 = i0 + 1; i1 < i0 + N_FORWARD && i1 < n; i1++) {
|
||||
if (used[i1]) {
|
||||
|
|
|
|||
|
|
@ -1159,6 +1159,7 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te
|
|||
case GGML_OP_MUL_MAT:
|
||||
case GGML_OP_MUL_MAT_ID:
|
||||
return has_simdgroup_reduction;
|
||||
case GGML_OP_SET:
|
||||
case GGML_OP_CPY:
|
||||
case GGML_OP_DUP:
|
||||
case GGML_OP_CONT:
|
||||
|
|
|
|||
|
|
@ -426,6 +426,10 @@ static int ggml_metal_op_encode_impl(ggml_metal_op_t ctx, int idx) {
|
|||
{
|
||||
n_fuse = ggml_metal_op_flash_attn_ext(ctx, idx);
|
||||
} break;
|
||||
case GGML_OP_SET:
|
||||
{
|
||||
n_fuse = ggml_metal_op_set(ctx, idx);
|
||||
} break;
|
||||
case GGML_OP_DUP:
|
||||
case GGML_OP_CPY:
|
||||
case GGML_OP_CONT:
|
||||
|
|
@ -1609,6 +1613,134 @@ int ggml_metal_op_solve_tri(ggml_metal_op_t ctx, int idx) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
int ggml_metal_op_set(ggml_metal_op_t ctx, int idx) {
|
||||
ggml_tensor * op = ctx->node(idx);
|
||||
|
||||
ggml_metal_library_t lib = ctx->lib;
|
||||
ggml_metal_encoder_t enc = ctx->enc;
|
||||
|
||||
GGML_TENSOR_LOCALS( int32_t, ne0, op->src[0], ne);
|
||||
GGML_TENSOR_LOCALS(uint64_t, nb0, op->src[0], nb);
|
||||
GGML_TENSOR_LOCALS( int32_t, ne1, op->src[1], ne);
|
||||
GGML_TENSOR_LOCALS(uint64_t, nb1, op->src[1], nb);
|
||||
GGML_TENSOR_LOCALS( int32_t, ne, op, ne);
|
||||
GGML_TENSOR_LOCALS(uint64_t, nb, op, nb);
|
||||
|
||||
ggml_metal_buffer_id bid_src0 = ggml_metal_get_buffer_id(op->src[0]);
|
||||
ggml_metal_buffer_id bid_src1 = ggml_metal_get_buffer_id(op->src[1]);
|
||||
ggml_metal_buffer_id bid_dst = ggml_metal_get_buffer_id(op);
|
||||
|
||||
const size_t pnb1 = ((const int32_t *) op->op_params)[0];
|
||||
const size_t pnb2 = ((const int32_t *) op->op_params)[1];
|
||||
const size_t pnb3 = ((const int32_t *) op->op_params)[2];
|
||||
const size_t offs = ((const int32_t *) op->op_params)[3];
|
||||
|
||||
const bool inplace = (bool) ((const int32_t *) op->op_params)[4];
|
||||
|
||||
if (!inplace) {
|
||||
// run a separete kernel to cpy src->dst
|
||||
// not sure how to avoid this
|
||||
// TODO: make a simpler cpy_bytes kernel
|
||||
|
||||
//const id<MTLComputePipelineState> pipeline = ctx->pipelines[GGML_METAL_PIPELINE_TYPE_CPY_F32_F32].obj;
|
||||
auto pipeline = ggml_metal_library_get_pipeline_cpy(lib, op->src[0]->type, op->type);
|
||||
|
||||
ggml_metal_kargs_cpy args = {
|
||||
/*.nk0 =*/ ne00,
|
||||
/*.ne00 =*/ ne00,
|
||||
/*.ne01 =*/ ne01,
|
||||
/*.ne02 =*/ ne02,
|
||||
/*.ne03 =*/ ne03,
|
||||
/*.nb00 =*/ nb00,
|
||||
/*.nb01 =*/ nb01,
|
||||
/*.nb02 =*/ nb02,
|
||||
/*.nb03 =*/ nb03,
|
||||
/*.ne0 =*/ ne0,
|
||||
/*.ne1 =*/ ne1,
|
||||
/*.ne2 =*/ ne2,
|
||||
/*.ne3 =*/ ne3,
|
||||
/*.nb0 =*/ nb0,
|
||||
/*.nb1 =*/ nb1,
|
||||
/*.nb2 =*/ nb2,
|
||||
/*.nb3 =*/ nb3,
|
||||
};
|
||||
|
||||
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
||||
ggml_metal_encoder_set_bytes (enc, &args, sizeof(args), 0);
|
||||
ggml_metal_encoder_set_buffer (enc, bid_src0, 1);
|
||||
ggml_metal_encoder_set_buffer (enc, bid_dst, 2);
|
||||
|
||||
const int nth = std::min(ggml_metal_pipeline_max_theads_per_threadgroup(pipeline), ne00);
|
||||
|
||||
ggml_metal_encoder_dispatch_threadgroups(enc, ne01, ne02, ne03, nth, 1, 1);
|
||||
|
||||
ggml_metal_op_concurrency_reset(ctx);
|
||||
}
|
||||
|
||||
auto pipeline = ggml_metal_library_get_pipeline_cpy(lib, op->src[1]->type, op->type);
|
||||
|
||||
GGML_ASSERT(ne10 % ggml_blck_size(op->src[1]->type) == 0);
|
||||
|
||||
int64_t nk0 = ne10;
|
||||
if (ggml_is_quantized(op->src[1]->type)) {
|
||||
nk0 = ne10/16;
|
||||
} else if (ggml_is_quantized(op->type)) {
|
||||
nk0 = ne10/ggml_blck_size(op->type);
|
||||
}
|
||||
|
||||
int nth = std::min<int>(nk0, ggml_metal_pipeline_max_theads_per_threadgroup(pipeline));
|
||||
|
||||
// when rows are small, we can batch them together in a single threadgroup
|
||||
int nrptg = 1;
|
||||
|
||||
// TODO: relax this constraint in the future
|
||||
if (ggml_blck_size(op->src[1]->type) == 1 && ggml_blck_size(op->type) == 1) {
|
||||
if (nth > nk0) {
|
||||
nrptg = (nth + nk0 - 1)/nk0;
|
||||
nth = nk0;
|
||||
|
||||
if (nrptg*nth > ggml_metal_pipeline_max_theads_per_threadgroup(pipeline)) {
|
||||
nrptg--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nth = std::min<int>(nth, nk0);
|
||||
|
||||
ggml_metal_kargs_cpy args = {
|
||||
/*.nk0 =*/ nk0,
|
||||
/*.ne00 =*/ ne10,
|
||||
/*.ne01 =*/ ne11,
|
||||
/*.ne02 =*/ ne12,
|
||||
/*.ne03 =*/ ne13,
|
||||
/*.nb00 =*/ nb10,
|
||||
/*.nb01 =*/ nb11,
|
||||
/*.nb02 =*/ nb12,
|
||||
/*.nb03 =*/ nb13,
|
||||
/*.ne0 =*/ ne10,
|
||||
/*.ne1 =*/ ne11,
|
||||
/*.ne2 =*/ ne12,
|
||||
/*.ne3 =*/ ne13,
|
||||
/*.nb0 =*/ ggml_element_size(op),
|
||||
/*.nb1 =*/ pnb1,
|
||||
/*.nb2 =*/ pnb2,
|
||||
/*.nb3 =*/ pnb3,
|
||||
};
|
||||
|
||||
const int nw0 = nrptg == 1 ? (nk0 + nth - 1)/nth : 1;
|
||||
|
||||
bid_dst.offs += offs;
|
||||
|
||||
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
||||
ggml_metal_encoder_set_bytes (enc, &args, sizeof(args), 0);
|
||||
ggml_metal_encoder_set_buffer (enc, bid_src1, 1);
|
||||
ggml_metal_encoder_set_buffer (enc, bid_dst, 2);
|
||||
|
||||
ggml_metal_encoder_dispatch_threadgroups(enc, nw0*(ne11 + nrptg - 1)/nrptg, ne12, ne13, nth, nrptg, 1);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int ggml_metal_op_cpy(ggml_metal_op_t ctx, int idx) {
|
||||
ggml_tensor * op = ctx->node(idx);
|
||||
|
||||
|
|
|
|||
|
|
@ -59,6 +59,7 @@ int ggml_metal_op_ssm_conv (ggml_metal_op_t ctx, int idx);
|
|||
int ggml_metal_op_ssm_scan (ggml_metal_op_t ctx, int idx);
|
||||
int ggml_metal_op_rwkv (ggml_metal_op_t ctx, int idx);
|
||||
int ggml_metal_op_solve_tri (ggml_metal_op_t ctx, int idx);
|
||||
int ggml_metal_op_set (ggml_metal_op_t ctx, int idx);
|
||||
int ggml_metal_op_cpy (ggml_metal_op_t ctx, int idx);
|
||||
int ggml_metal_op_pool_1d (ggml_metal_op_t ctx, int idx);
|
||||
int ggml_metal_op_pool_2d (ggml_metal_op_t ctx, int idx);
|
||||
|
|
|
|||
|
|
@ -9801,16 +9801,16 @@ static void ggml_vk_acc(ggml_backend_vk_context * ctx, vk_context& subctx, const
|
|||
const uint32_t src1_type_size = ggml_type_size(src1->type);
|
||||
const uint32_t dst_type_size = ggml_type_size(dst->type);
|
||||
|
||||
int nb1 = dst->op_params[0] / 4; // 4 bytes of float32
|
||||
int nb2 = dst->op_params[1] / 4; // 4 bytes of float32
|
||||
// int nb3 = dst->op_params[2] / 4; // 4 bytes of float32 - unused
|
||||
int offset = dst->op_params[3] / 4; // offset in bytes
|
||||
int nb1 = dst->op_params[0] / src0_type_size; // 4 bytes of float32
|
||||
int nb2 = dst->op_params[1] / src0_type_size; // 4 bytes of float32
|
||||
int nb3 = dst->op_params[2] / src0_type_size; // 4 bytes of float32
|
||||
int offset = dst->op_params[3] / src0_type_size; // offset in bytes
|
||||
|
||||
ggml_vk_op_f32<vk_op_binary_push_constants>(ctx, subctx, src0, src1, nullptr, nullptr, dst, GGML_OP_ACC, {
|
||||
(uint32_t)ggml_nelements(src0),
|
||||
(uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2],(uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)nb1, (uint32_t)nb2, (uint32_t)src0->nb[3] / src0_type_size,
|
||||
(uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2],(uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)nb1, (uint32_t)nb2, (uint32_t)nb3,
|
||||
(uint32_t)src1->ne[0], (uint32_t)src1->ne[1], (uint32_t)src1->ne[2],(uint32_t)src1->ne[3], (uint32_t)src1->nb[0] / src1_type_size, (uint32_t)src1->nb[1] / src1_type_size, (uint32_t)src1->nb[2] / src1_type_size, (uint32_t)src1->nb[3] / src1_type_size,
|
||||
(uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2],(uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t)nb1, (uint32_t)nb2, (uint32_t) dst->nb[3] / dst_type_size,
|
||||
(uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2],(uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t)nb1, (uint32_t)nb2, (uint32_t)nb3,
|
||||
0,
|
||||
0.0f, 0.0f, offset,
|
||||
});
|
||||
|
|
|
|||
|
|
@ -13,17 +13,18 @@ void main() {
|
|||
|
||||
const uint offset = p.param3;
|
||||
const uint src1_i = idx - offset;
|
||||
const uint oz = src1_i / p.nb02;
|
||||
const uint oy = (src1_i - (oz * p.nb02)) / p.nb01;
|
||||
const uint ox = src1_i % p.nb01;
|
||||
const uint i3 = src1_i / p.nb03;
|
||||
const uint rem2 = src1_i - i3 * p.nb03;
|
||||
const uint i2 = rem2 / p.nb02;
|
||||
const uint rem1 = rem2 - i2 * p.nb02;
|
||||
const uint i1 = rem1 / p.nb01;
|
||||
const uint i0 = rem1 % p.nb01;
|
||||
|
||||
uint i00, i01, i02, i03;
|
||||
get_indices(idx, i00, i01, i02, i03);
|
||||
|
||||
if (ox < p.ne10 && oy < p.ne11 && oz < p.ne12) {
|
||||
data_d[get_doffset() + dst_idx(i00, i01, i02, i03)] = D_TYPE(FLOAT_TYPE(data_a[get_aoffset() + src0_idx(i00, i01, i02, i03)]) + FLOAT_TYPE(data_b[get_boffset() + ox + oy * p.ne10 + oz * p.ne10 * p.ne11]));
|
||||
if (i0 < p.ne10 && i1 < p.ne11 && i2 < p.ne12 && i3 < p.ne13) {
|
||||
data_d[get_doffset() + idx] = D_TYPE(FLOAT_TYPE(data_a[get_aoffset() + idx]) + FLOAT_TYPE(data_b[get_boffset() + src1_idx(i0, i1, i2, i3)]));
|
||||
} else {
|
||||
data_d[get_doffset() + dst_idx(i00, i01, i02, i03)] = D_TYPE(FLOAT_TYPE(data_a[get_aoffset() + src0_idx(i00, i01, i02, i03)]));
|
||||
data_d[get_doffset() + idx] = D_TYPE(FLOAT_TYPE(data_a[get_aoffset() + idx]));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -7965,7 +7965,6 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
|
|||
cparams.n_seq_max,
|
||||
nullptr);
|
||||
} else if (llm_arch_is_hybrid(arch)) {
|
||||
|
||||
// The main difference between hybrid architectures is the
|
||||
// layer filters, so pick the right one here
|
||||
llama_memory_hybrid::layer_filter_cb filter_attn = nullptr;
|
||||
|
|
@ -7990,7 +7989,7 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
|
|||
/* attn_type_v */ params.type_v,
|
||||
/* attn_v_trans */ !cparams.flash_attn,
|
||||
/* attn_swa_full */ params.swa_full,
|
||||
/* attn_kv_size */ cparams.n_ctx,
|
||||
/* attn_kv_size */ cparams.n_ctx_seq,
|
||||
/* attn_n_ubatch */ cparams.n_ubatch,
|
||||
/* attn_n_pad */ 1,
|
||||
/* recurrent_type_r */ GGML_TYPE_F32,
|
||||
|
|
@ -8007,7 +8006,7 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
|
|||
/* attn_type_k */ params.type_k,
|
||||
/* attn_type_v */ params.type_v,
|
||||
/* attn_v_trans */ !cparams.flash_attn,
|
||||
/* attn_kv_size */ cparams.n_ctx,
|
||||
/* attn_kv_size */ cparams.n_ctx_seq,
|
||||
/* attn_n_pad */ 1,
|
||||
/* attn_n_swa */ hparams.n_swa,
|
||||
/* attn_swa_type */ hparams.swa_type,
|
||||
|
|
|
|||
|
|
@ -41,8 +41,11 @@ static ggml_tensor * causal_conv1d(ggml_cgraph * gf, ggml_context * ctx0, ggml_t
|
|||
conv_x->nb[1], conv_x->nb[2], n_seq_tokens * conv_x->nb[0]);
|
||||
ggml_build_forward_expand(gf,
|
||||
ggml_cpy(ctx0, last_conv_x,
|
||||
ggml_view_1d(ctx0, conv_states_all, conv_state_size * n_seqs,
|
||||
(kv_head * n_embd_r_total + qkv * conv_state_size) * ggml_element_size(conv_states_all))));
|
||||
ggml_view_3d(ctx0, conv_states_all,
|
||||
d_conv - 1, d_inner, n_seqs,
|
||||
(d_conv - 1) * ggml_element_size(conv_states_all), // nb1: contiguous within one channel's conv taps
|
||||
n_embd_r_total * ggml_element_size(conv_states_all), // nb2: stride between sequences (skip over K,V states)
|
||||
(kv_head * n_embd_r_total + qkv * conv_state_size) * ggml_element_size(conv_states_all)))); // offset to first seq's Q/K/V state
|
||||
// Reshape conv weight: GGUF [d_conv, 1, d_inner, 1] -> ggml_ssm_conv expects [d_conv, d_inner]
|
||||
// GGUF stores as [d_conv, 1, d_inner, 1] with memory layout w[conv_step + channel * d_conv]
|
||||
// vLLM stores as [d_inner, d_conv] with memory layout w[channel * d_conv + conv_step]
|
||||
|
|
|
|||
|
|
@ -1,16 +1,10 @@
|
|||
#if defined(_MSC_VER)
|
||||
#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
|
||||
#endif
|
||||
|
||||
#include "unicode.h"
|
||||
#include "unicode-data.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <codecvt>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <locale>
|
||||
#include <map>
|
||||
#include <regex>
|
||||
#include <stdexcept>
|
||||
|
|
@ -199,27 +193,6 @@ static std::unordered_map<std::string, uint8_t> unicode_utf8_to_byte_map() {
|
|||
return map;
|
||||
}
|
||||
|
||||
static inline std::wstring unicode_wstring_from_utf8(const std::string & s) {
|
||||
#if defined(__clang__)
|
||||
// disable C++17 deprecation warning for std::codecvt_utf8
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
||||
#elif defined(__GNUC__)
|
||||
# pragma GCC diagnostic push
|
||||
# pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
||||
#endif
|
||||
|
||||
std::wstring_convert<std::codecvt_utf8<wchar_t>> conv;
|
||||
|
||||
#if defined(__clang__)
|
||||
# pragma clang diagnostic pop
|
||||
#elif defined(__GNUC__)
|
||||
# pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
return conv.from_bytes(s);
|
||||
}
|
||||
|
||||
static std::vector<std::string> unicode_byte_encoding_process(const std::vector<std::string> & bpe_words) {
|
||||
std::vector<std::string> bpe_encoded_words;
|
||||
for (const auto & word : bpe_words) {
|
||||
|
|
@ -1028,10 +1001,10 @@ std::vector<std::string> unicode_regex_split(const std::string & text, const std
|
|||
break;
|
||||
}
|
||||
}
|
||||
const auto cpts_regex = unicode_cpts_from_utf8(regex_expr);
|
||||
|
||||
if (use_collapsed) {
|
||||
// sanity-check that the original regex does not contain any non-ASCII characters
|
||||
const auto cpts_regex = unicode_cpts_from_utf8(regex_expr);
|
||||
for (size_t i = 0; i < cpts_regex.size(); ++i) {
|
||||
if (cpts_regex[i] >= 128) {
|
||||
throw std::runtime_error("Regex includes both unicode categories and non-ASCII characters - not supported");
|
||||
|
|
@ -1087,7 +1060,7 @@ std::vector<std::string> unicode_regex_split(const std::string & text, const std
|
|||
bpe_offsets = unicode_regex_split_stl(text_collapsed, regex_expr_collapsed, bpe_offsets);
|
||||
} else {
|
||||
// no unicode category used, we can use std::wregex directly
|
||||
const std::wstring wregex_expr = unicode_wstring_from_utf8(regex_expr);
|
||||
std::wstring wregex_expr(cpts_regex.begin(), cpts_regex.end());
|
||||
|
||||
// std::wregex \s does not mach non-ASCII whitespaces, using 0x0B as fallback
|
||||
std::wstring wtext(cpts.begin(), cpts.end());
|
||||
|
|
|
|||
|
|
@ -227,6 +227,7 @@ llama_build_and_test(test-thread-safety.cpp ARGS -m "${MODEL_DEST}" -ngl 99 -p "
|
|||
set_tests_properties(test-thread-safety PROPERTIES FIXTURES_REQUIRED test-download-model)
|
||||
|
||||
llama_build_and_test(test-arg-parser.cpp)
|
||||
llama_build_and_test(test-fs-validate-filename.cpp)
|
||||
|
||||
if (NOT LLAMA_SANITIZE_ADDRESS AND NOT GGML_SCHED_NO_REALLOC)
|
||||
# TODO: repair known memory leaks
|
||||
|
|
|
|||
|
|
@ -2786,9 +2786,10 @@ struct test_set : public test_case {
|
|||
const ggml_type type_dst;
|
||||
const std::array<int64_t, 4> ne;
|
||||
const int dim;
|
||||
const bool inplace;
|
||||
|
||||
std::string vars() override {
|
||||
return VARS_TO_STR4(type_src, type_dst, ne, dim);
|
||||
return VARS_TO_STR5(type_src, type_dst, ne, dim, inplace);
|
||||
}
|
||||
|
||||
size_t op_size(ggml_tensor * t) override {
|
||||
|
|
@ -2796,8 +2797,8 @@ struct test_set : public test_case {
|
|||
}
|
||||
|
||||
test_set(ggml_type type_src = GGML_TYPE_F32, ggml_type type_dst = GGML_TYPE_F32,
|
||||
std::array<int64_t, 4> ne = {6, 5, 4, 3}, int dim = 1)
|
||||
: type_src(type_src), type_dst(type_dst), ne(ne), dim(dim) {}
|
||||
std::array<int64_t, 4> ne = {6, 5, 4, 3}, int dim = 1, bool inplace = false)
|
||||
: type_src(type_src), type_dst(type_dst), ne(ne), dim(dim), inplace(inplace) {}
|
||||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
ggml_tensor * src = ggml_new_tensor(ctx, type_src, 4, ne.data());
|
||||
|
|
@ -2808,7 +2809,7 @@ struct test_set : public test_case {
|
|||
for (int i = 0; i < dim; ++i) {
|
||||
ne_dst[i] *= 2;
|
||||
}
|
||||
ggml_tensor* dst = ggml_new_tensor(ctx, type_dst, 4, ne_dst.data());
|
||||
ggml_tensor * dst = ggml_new_tensor(ctx, type_dst, 4, ne_dst.data());
|
||||
ggml_set_param(dst);
|
||||
ggml_set_name(dst, "dst");
|
||||
|
||||
|
|
@ -2816,9 +2817,16 @@ struct test_set : public test_case {
|
|||
for (int i = 0; i < dim; ++i) {
|
||||
offset += ((ne_dst[i] - ne[i])/2)*dst->nb[i];
|
||||
}
|
||||
ggml_tensor * out = ggml_set(ctx, dst, src,
|
||||
// The backward pass requires setting a contiguous region:
|
||||
src->nb[1], src->nb[2], src->nb[3], offset);
|
||||
ggml_tensor * out;
|
||||
if (inplace) {
|
||||
out = ggml_set_inplace(ctx, dst, src,
|
||||
// The backward pass requires setting a contiguous region:
|
||||
src->nb[1], src->nb[2], src->nb[3], offset);
|
||||
} else {
|
||||
out = ggml_set(ctx, dst, src,
|
||||
// The backward pass requires setting a contiguous region:
|
||||
src->nb[1], src->nb[2], src->nb[3], offset);
|
||||
}
|
||||
ggml_set_name(out, "out");
|
||||
|
||||
return out;
|
||||
|
|
@ -5839,26 +5847,46 @@ struct test_acc : public test_case {
|
|||
const ggml_type type;
|
||||
const std::array<int64_t, 4> ne_a;
|
||||
const std::array<int64_t, 4> ne_b;
|
||||
const int64_t stride_dim;
|
||||
|
||||
std::string vars() override {
|
||||
return VARS_TO_STR3(type, ne_a, ne_b);
|
||||
return VARS_TO_STR4(type, ne_a, ne_b, stride_dim);
|
||||
}
|
||||
|
||||
test_acc(ggml_type type = GGML_TYPE_F32,
|
||||
std::array<int64_t, 4> ne_a = {256, 17, 1, 1},
|
||||
std::array<int64_t, 4> ne_b = {256, 16, 1, 1})
|
||||
: type(type), ne_a(ne_a), ne_b(ne_b) {}
|
||||
std::array<int64_t, 4> ne_a = {256, 17, 2, 3},
|
||||
std::array<int64_t, 4> ne_b = {256, 16, 2, 3},
|
||||
uint64_t stride_dim = -1)
|
||||
: type(type), ne_a(ne_a), ne_b(ne_b), stride_dim(stride_dim) {}
|
||||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne_a.data());
|
||||
ggml_set_param(a);
|
||||
ggml_set_name(a, "a");
|
||||
|
||||
ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne_b.data());
|
||||
ggml_set_param(b);
|
||||
ggml_tensor * b;
|
||||
if (stride_dim == 1 || stride_dim == 2 || stride_dim == 3) {
|
||||
// Create a larger tensor and take a view at a non-zero offset.
|
||||
// This tests that the backend correctly handles b's data offset
|
||||
std::array<int64_t, 4> ne_b_pad = {ne_b[0], ne_b[1], ne_b[2], ne_b[3]};
|
||||
ne_b_pad[stride_dim] += 1;
|
||||
ggml_tensor * b_pad = ggml_new_tensor(ctx, type, 4, ne_b_pad.data());
|
||||
ggml_set_param(b_pad);
|
||||
ggml_set_name(b_pad, "b_pad");
|
||||
// View that skips the first row, so b has a non-zero byte offset
|
||||
b = ggml_view_4d(ctx, b_pad,
|
||||
ne_b[0], ne_b[1], ne_b[2], ne_b[3],
|
||||
b_pad->nb[1], b_pad->nb[2], b_pad->nb[3],
|
||||
b_pad->nb[1]);
|
||||
} else {
|
||||
b = ggml_new_tensor(ctx, type, 4, ne_b.data());
|
||||
ggml_set_param(b);
|
||||
}
|
||||
ggml_set_name(b, "b");
|
||||
|
||||
ggml_tensor * out = ggml_acc(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], b->nb[1]);
|
||||
// When ne_b[0] < ne_a[0], a->nb[1] != b->nb[1], so the stride
|
||||
// parameters to ggml_acc don't match b's natural stride.
|
||||
ggml_tensor * out = ggml_acc(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], 0);
|
||||
ggml_set_name(out, "out");
|
||||
|
||||
return out;
|
||||
|
|
@ -7428,11 +7456,13 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|||
test_cases.emplace_back(new test_dup(GGML_TYPE_I16, {10, 8, 3, 1}, {1, 2, 0, 3}));
|
||||
|
||||
for (int dim = 1; dim < GGML_MAX_DIMS; ++dim) {
|
||||
test_cases.emplace_back(new test_set(GGML_TYPE_F32, GGML_TYPE_F32, {6, 5, 4, 3}, dim));
|
||||
test_cases.emplace_back(new test_set(GGML_TYPE_F32, GGML_TYPE_F32, {6, 5, 4, 3}, dim, false));
|
||||
test_cases.emplace_back(new test_set(GGML_TYPE_F32, GGML_TYPE_F32, {6, 5, 4, 3}, dim, true));
|
||||
}
|
||||
|
||||
for (int dim = 1; dim < GGML_MAX_DIMS; ++dim) {
|
||||
test_cases.emplace_back(new test_set(GGML_TYPE_I32, GGML_TYPE_I32, {6, 5, 4, 3}, dim));
|
||||
test_cases.emplace_back(new test_set(GGML_TYPE_I32, GGML_TYPE_I32, {6, 5, 4, 3}, dim, false));
|
||||
test_cases.emplace_back(new test_set(GGML_TYPE_I32, GGML_TYPE_I32, {6, 5, 4, 3}, dim, true));
|
||||
}
|
||||
|
||||
// same-type copy
|
||||
|
|
@ -8160,7 +8190,12 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|||
test_cases.emplace_back(new test_group_norm(GGML_TYPE_F32, {9, 9, 1280, 1}));
|
||||
test_cases.emplace_back(new test_group_norm_mul_add(GGML_TYPE_F32, {64, 64, 320, 1}));
|
||||
test_cases.emplace_back(new test_group_norm_mul_add(GGML_TYPE_F32, {9, 9, 1280, 1}));
|
||||
test_cases.emplace_back(new test_acc());
|
||||
test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 1, 1}, {256, 16, 1, 1}, -1));
|
||||
test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 2, 3}, {256, 16, 2, 3}, -1));
|
||||
test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 2, 3}, {128, 16, 2, 3}, -1));
|
||||
test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 2, 3}, {256, 16, 2, 3}, 1));
|
||||
test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 2, 3}, {128, 16, 2, 3}, 2));
|
||||
test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 2, 3}, {64, 16, 2, 3}, 3));
|
||||
test_cases.emplace_back(new test_pad());
|
||||
test_cases.emplace_back(new test_pad(GGML_TYPE_F32, {33, 17, 2, 1}, 4, 3, true)); // circular
|
||||
test_cases.emplace_back(new test_pad_ext());
|
||||
|
|
@ -8595,6 +8630,14 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
|
|||
test_cases.emplace_back(new test_ssm_scan(GGML_TYPE_F32, 128, 64, 48, 1, 512, 1)); // prefill
|
||||
test_cases.emplace_back(new test_ssm_scan(GGML_TYPE_F32, 128, 64, 48, 1, 1, 1)); // generate
|
||||
|
||||
// acc
|
||||
test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 1, 1}, {256, 16, 1, 1}, -1));
|
||||
test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 2, 3}, {256, 16, 2, 3}, -1));
|
||||
test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 2, 3}, {128, 16, 2, 3}, -1));
|
||||
test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 2, 3}, {256, 16, 2, 3}, 1));
|
||||
test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 2, 3}, {128, 16, 2, 3}, 2));
|
||||
test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 2, 3}, {64, 16, 2, 3}, 3));
|
||||
|
||||
return test_cases;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,213 @@
|
|||
#include "common.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
|
||||
#undef NDEBUG
|
||||
#include <cassert>
|
||||
|
||||
static int n_tests = 0;
|
||||
static int n_failed = 0;
|
||||
|
||||
static const char SEP = DIRECTORY_SEPARATOR;
|
||||
|
||||
static void test_normalize(const char * desc, const std::string & expected, const std::string & input) {
|
||||
std::string result = fs_normalize_filepath(input);
|
||||
n_tests++;
|
||||
if (result != expected) {
|
||||
n_failed++;
|
||||
printf(" FAIL: %s (got \"%s\", expected \"%s\")\n", desc, result.c_str(), expected.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
static void test(const char * desc, bool expected, const std::string & filename, bool allow_subdirs = false) {
|
||||
bool result = fs_validate_filename(filename, allow_subdirs);
|
||||
n_tests++;
|
||||
if (result != expected) {
|
||||
n_failed++;
|
||||
printf(" FAIL: %s (got %s, expected %s)\n", desc,
|
||||
result ? "true" : "false", expected ? "true" : "false");
|
||||
}
|
||||
}
|
||||
|
||||
static void test_combined(const char * desc, bool expected, const std::string & path) {
|
||||
test(desc, expected, fs_normalize_filepath(path), true);
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
// --- Basic valid filenames ---
|
||||
test("simple ascii", true, "hello.txt");
|
||||
test("no extension", true, "readme");
|
||||
test("multiple dots", true, "archive.tar.gz");
|
||||
test("leading dot (hidden)", true, ".gitignore");
|
||||
test("unicode filename", true, "\xc3\xa9\xc3\xa0\xc3\xbc.txt"); // éàü.txt
|
||||
test("precomposed accent", true, "caf\xc3\xa9"); // café (U+00E9)
|
||||
test("combining accent", true, "cafe\xcc\x81"); // café (e + U+0301)
|
||||
test("japanese hiragana", true, "\xe3\x81\x82\xe3\x81\x84\xe3\x81\x86.txt"); // あいう.txt
|
||||
test("korean hangul", true, "\xed\x95\x9c\xea\xb8\x80.txt"); // 한글.txt
|
||||
test("max length (255 bytes)", true, std::string(255, 'a'));
|
||||
|
||||
// --- Basic invalid filenames ---
|
||||
test("empty string", false, "");
|
||||
test("over 255 bytes", false, std::string(256, 'a'));
|
||||
test("just a dot", false, ".");
|
||||
test("double dot", false, "..");
|
||||
test("leading space", false, " foo");
|
||||
test("trailing space", false, "foo ");
|
||||
test("trailing dot", false, "foo.");
|
||||
test("dot path", false, "./././");
|
||||
|
||||
// --- Double dots ---
|
||||
test("contains double dot", true, "foo..bar");
|
||||
test("leading double dot", true, "..foo");
|
||||
test("trailing double dot", false, "foo.."); // trailing dot
|
||||
|
||||
// --- Control characters ---
|
||||
test("null byte", false, std::string("foo\x00""bar", 7));
|
||||
test("newline", false, "foo\nbar");
|
||||
test("tab", false, "foo\tbar");
|
||||
test("C0 control (0x01)", false, "foo\x01""bar");
|
||||
test("DEL (0x7F)", false, "foo\x7f""bar");
|
||||
test("C1 control (0x80)", false, "foo\xc2\x80""bar"); // U+0080
|
||||
test("C1 control (0x9F)", false, "foo\xc2\x9f""bar"); // U+009F
|
||||
|
||||
// --- Illegal characters ---
|
||||
test("colon", false, "foo:bar");
|
||||
test("asterisk", false, "foo*bar");
|
||||
test("question mark", false, "foo?bar");
|
||||
test("double quote", false, "foo\"bar");
|
||||
test("less than", false, "foo<bar");
|
||||
test("greater than", false, "foo>bar");
|
||||
test("pipe", false, "foo|bar");
|
||||
test("forward slash", false, "foo/bar");
|
||||
test("backslash", false, "foo\\bar");
|
||||
|
||||
// --- Unicode special codepoints ---
|
||||
test("fullwidth period U+FF0E", false, "foo\xef\xbc\x8e""bar");
|
||||
test("replacement char U+FFFD", false, "foo\xef\xbf\xbd""bar");
|
||||
test("BOM U+FEFF", false, "foo\xef\xbb\xbf""bar");
|
||||
|
||||
// --- Windows bestfit characters (map to path traversal chars under WideCharToMultiByte) ---
|
||||
test("fullwidth solidus U+FF0F", false, "foo\xef\xbc\x8f""bar"); // / on CP 874, 1250-1258
|
||||
test("fullwidth rev solidus U+FF3C",false, "foo\xef\xbc\xbc""bar"); // \ on CP 874, 1250-1258
|
||||
test("fullwidth colon U+FF1A", false, "foo\xef\xbc\x9a""bar"); // : on CP 874, 1250-1258
|
||||
test("division slash U+2215", false, "foo\xe2\x88\x95""bar"); // / on CP 1250, 1252, 1254
|
||||
test("set minus U+2216", false, "foo\xe2\x88\x96""bar"); // \ on CP 1250, 1252, 1254
|
||||
test("fraction slash U+2044", false, "foo\xe2\x81\x84""bar"); // / on CP 1250, 1252, 1254
|
||||
test("ratio U+2236", false, "foo\xe2\x88\xb6""bar"); // : on CP 1250, 1252, 1254
|
||||
test("armenian full stop U+0589", false, "foo\xd6\x89""bar"); // : on CP 1250, 1252, 1254
|
||||
test("yen sign U+00A5", false, "foo\xc2\xa5""bar"); // \ on CP 932 (Japanese)
|
||||
test("won sign U+20A9", false, "foo\xe2\x82\xa9""bar"); // \ on CP 949 (Korean)
|
||||
test("acute accent U+00B4", false, "foo\xc2\xb4""bar"); // / on CP 1253 (Greek)
|
||||
|
||||
// --- Invalid UTF-8 ---
|
||||
test("invalid continuation", false, "foo\x80""bar");
|
||||
test("truncated sequence", false, "foo\xc3");
|
||||
test("overlong slash (2-byte)", false, "foo\xc0\xaf""bar"); // U+002F as 2-byte
|
||||
test("overlong dot (2-byte)", false, "foo\xc0\xae""bar"); // U+002E as 2-byte
|
||||
test("overlong 'a' (2-byte)", false, "foo\xc1\xa1""bar"); // U+0061 as 2-byte
|
||||
test("overlong 'A' (2-byte)", false, "foo\xc1\x81""bar"); // U+0041 as 2-byte
|
||||
test("overlong null (2-byte)", false, "foo\xc0\x80""bar"); // U+0000 as 2-byte
|
||||
|
||||
// --- Paths without allow_subdirs ---
|
||||
test("forward slash blocked", false, "foo/bar");
|
||||
test("backslash blocked", false, "foo\\bar");
|
||||
|
||||
// --- Paths with allow_subdirs=true ---
|
||||
test("simple subdir", true, "foo/bar", true);
|
||||
test("backslash subdir", true, "foo\\bar", true);
|
||||
test("deep path", true, "a/b/c/d.txt", true);
|
||||
test("trailing slash", true, "foo/bar/", true);
|
||||
test("colon in path", false, "foo/b:r/baz", true);
|
||||
test("control char in path", false, "foo/b\nar/baz", true);
|
||||
test("dot path", false, "./././", true);
|
||||
|
||||
// --- Leading separators ---
|
||||
test("leading slash", false, "/foo/bar", true);
|
||||
test("leading backslash", false, "\\foo\\bar", true);
|
||||
|
||||
// --- Dotdot in paths ---
|
||||
test("leading dotdot in path", false, "../bar", true);
|
||||
test("dotdot in path", false, "foo/../bar", true);
|
||||
test("dotdot component leading", true, "foo/..bar/baz", true);
|
||||
test("dotdot component middle", true, "foo/ba..r/baz", true);
|
||||
test("dotdot component trailing", false, "foo/bar../baz", true); // trailing dot
|
||||
|
||||
// --- Per-component checks ---
|
||||
test("leading space in component", false, "foo/ bar/baz", true);
|
||||
test("trailing space in component", false, "foo/bar /baz", true);
|
||||
test("trailing dot in component", false, "foo/bar./baz", true);
|
||||
test("dot component in path", false, "foo/./bar", true);
|
||||
test("leading space after slash", false, "foo/ bar", true);
|
||||
test("trailing space before slash", false, "bar /baz", true);
|
||||
test("trailing dot before slash", false, "bar./baz", true);
|
||||
|
||||
// --- Simple filename tests ---
|
||||
test("simple binary file", true, "file.bin");
|
||||
test("japanese filename", true, u8"日本式ファイルの芸術.bin");
|
||||
|
||||
// --- Path traversal (no subdirs) ---
|
||||
test("dotdot slash", false, "../bad.bin");
|
||||
test("dotdot backslash", false, "..\\bad.bin");
|
||||
test("subdir dotdot backslash", false, "also/..\\bad.bin");
|
||||
test("subdir slash", false, "also/bad.bin");
|
||||
|
||||
// --- Unicode path equivalents ---
|
||||
test("division slash U+2215", false, "unicode\xe2\x88\x95""bad.bin");
|
||||
test("set minus U+2216", false, "unicode\xe2\x88\x96""bad.bin");
|
||||
test("fullwidth period U+FF0E", false, "unicode\xef\xbc\x8e""bad.bin");
|
||||
|
||||
// --- Overlong encoding ---
|
||||
test("overlong 0xC0 0x2E", false, "overlong\xc0\x2e""bad.bin");
|
||||
test("overlong 0xE0 0x40 0xAE", false, "overlong\xe0\x40\xae""bad.bin");
|
||||
test("overlong dot (2-byte)", false, "overlong\xc0\xae""bad.bin");
|
||||
test("overlong slash (2-byte)", false, "overlong\xc0\xaf""bad.bin");
|
||||
test("overlong slash (3-byte)", false, "overlong\xe0\x80\xaf""bad.bin");
|
||||
test("overlong 0xC0 0x2F", false, "overlong\xc0\x2f""bad.bin");
|
||||
test("overlong 0xC0 0x5C", false, "overlong\xc0\x5c""bad.bin");
|
||||
test("overlong 0xC0 0x80 0x5C", false, "overlong\xc0\x80\x5c""bad.bin");
|
||||
|
||||
// --- fs_normalize_filepath ---
|
||||
test_normalize("passthrough simple", "foo.txt", "foo.txt");
|
||||
test_normalize("passthrough subdir", std::string("foo") + SEP + "bar.txt", "foo/bar.txt");
|
||||
test_normalize("backslash to sep", std::string("foo") + SEP + "bar.txt", "foo\\bar.txt");
|
||||
test_normalize("mixed separators", std::string("a") + SEP + "b" + SEP + "c", "a/b\\c");
|
||||
test_normalize("duplicate slashes", std::string("foo") + SEP + "bar", "foo//bar");
|
||||
test_normalize("duplicate backslashes", std::string("foo") + SEP + "bar", "foo\\\\bar");
|
||||
test_normalize("triple slashes", std::string("foo") + SEP + "bar", "foo///bar");
|
||||
test_normalize("leading slash stripped", "foo", "/foo");
|
||||
test_normalize("leading backslash stripped", "foo", "\\foo");
|
||||
test_normalize("multiple leading slashes", "foo", "///foo");
|
||||
test_normalize("leading dot-slash stripped", "foo", "./foo");
|
||||
test_normalize("leading dot-backslash stripped", "foo", ".\\foo");
|
||||
test_normalize("deep path normalized",
|
||||
std::string("a") + SEP + "b" + SEP + "c" + SEP + "d.txt",
|
||||
"/a//b\\c/d.txt");
|
||||
|
||||
// --- normalize doesn't validate and doesn't trim dot segments in the middle of the path ---
|
||||
test_normalize("dotdot retained", std::string("foo") + SEP + ".." + SEP + "bar", "foo/../bar");
|
||||
test_normalize("dotdot at start retained", std::string("..") + SEP + "bar", "../bar");
|
||||
test_normalize("dotdot at end retained", std::string("foo") + SEP + "..", "foo/..");
|
||||
test_normalize("dot component retained mid", std::string("foo") + SEP + "." + SEP + "bar", "foo/./bar");
|
||||
|
||||
// --- combined tests validating normalized paths ---
|
||||
test_combined("absolute windows path", false, "C:\\Tools\\secrets.txt"); // absolute path
|
||||
test_combined("root is relative", true, "/meow/image.jpg"); // root separators are normalized to relative
|
||||
test_combined("relative dot path", true, "././meow/image.jpg"); // ok because no effect
|
||||
test_combined("inner dot path", false, "././meow/./image.jpg"); // blocked because plausibly a downstream traversal attempt
|
||||
test_combined("double dot path", false, "../meow/image.jpg"); // direct traversal attempt
|
||||
test_combined("mid double dot path", false, "meow/../image.jpg"); // technically a subpath but plausibly a downstream traversal attempt
|
||||
test_combined("end double dot path", false, "meow/.."); // blank path
|
||||
test_combined("triple single dot root", false, "./././"); // blank path
|
||||
test_combined("triple single dot file", true, "./././image.jpg"); // weird but okay
|
||||
|
||||
if (n_failed) {
|
||||
printf("\n%d/%d tests failed\n", n_failed, n_tests);
|
||||
fflush(stdout);
|
||||
assert(false);
|
||||
}
|
||||
|
||||
printf("OK\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -52,6 +52,7 @@ struct cli_context {
|
|||
json messages = json::array();
|
||||
std::vector<raw_buffer> input_files;
|
||||
task_params defaults;
|
||||
bool verbose_prompt;
|
||||
|
||||
// thread for showing "loading" animation
|
||||
std::atomic<bool> loading_show;
|
||||
|
|
@ -66,6 +67,8 @@ struct cli_context {
|
|||
defaults.stream = true; // make sure we always use streaming mode
|
||||
defaults.timings_per_token = true; // in order to get timings even when we cancel mid-way
|
||||
// defaults.return_progress = true; // TODO: show progress
|
||||
|
||||
verbose_prompt = params.verbose_prompt;
|
||||
}
|
||||
|
||||
std::string generate_completion(result_timings & out_timings) {
|
||||
|
|
@ -91,6 +94,12 @@ struct cli_context {
|
|||
rd.post_task({std::move(task)});
|
||||
}
|
||||
|
||||
if (verbose_prompt) {
|
||||
console::set_display(DISPLAY_TYPE_PROMPT);
|
||||
console::log("%s\n\n", chat_params.prompt.c_str());
|
||||
console::set_display(DISPLAY_TYPE_RESET);
|
||||
}
|
||||
|
||||
// wait for first result
|
||||
console::spinner::start();
|
||||
server_task_result_ptr result = rd.next(should_stop);
|
||||
|
|
|
|||
|
|
@ -798,6 +798,7 @@ static void handle_media(
|
|||
}
|
||||
// load local image file
|
||||
std::string file_path = url.substr(7); // remove "file://"
|
||||
file_path = fs_normalize_filepath(file_path); // remove any leading './' and normalize separators
|
||||
raw_buffer data;
|
||||
if (!fs_validate_filename(file_path, true)) {
|
||||
throw std::invalid_argument("file path is not allowed: " + file_path);
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
<script lang="ts">
|
||||
import { goto } from '$app/navigation';
|
||||
import { base } from '$app/paths';
|
||||
import {
|
||||
chatStore,
|
||||
pendingEditMessageId,
|
||||
|
|
@ -119,7 +120,7 @@
|
|||
const conversationDeleted = await removeSystemPromptPlaceholder(message.id);
|
||||
|
||||
if (conversationDeleted) {
|
||||
goto('/');
|
||||
goto(`${base}/`);
|
||||
}
|
||||
|
||||
return;
|
||||
|
|
@ -220,7 +221,7 @@
|
|||
const conversationDeleted = await removeSystemPromptPlaceholder(message.id);
|
||||
isEditing = false;
|
||||
if (conversationDeleted) {
|
||||
goto('/');
|
||||
goto(`${base}/`);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
import { BadgeChatStatistic } from '$lib/components/app';
|
||||
import * as Tooltip from '$lib/components/ui/tooltip';
|
||||
import { ChatMessageStatsView } from '$lib/enums';
|
||||
import { formatPerformanceTime } from '$lib/utils/formatters';
|
||||
|
||||
interface Props {
|
||||
predictedTokens?: number;
|
||||
|
|
@ -57,8 +58,8 @@
|
|||
);
|
||||
|
||||
let tokensPerSecond = $derived(hasGenerationStats ? (predictedTokens! / predictedMs!) * 1000 : 0);
|
||||
let timeInSeconds = $derived(
|
||||
predictedMs !== undefined ? (predictedMs / 1000).toFixed(2) : '0.00'
|
||||
let formattedTime = $derived(
|
||||
predictedMs !== undefined ? formatPerformanceTime(predictedMs) : '0s'
|
||||
);
|
||||
|
||||
let promptTokensPerSecond = $derived(
|
||||
|
|
@ -67,15 +68,15 @@
|
|||
: undefined
|
||||
);
|
||||
|
||||
let promptTimeInSeconds = $derived(
|
||||
promptMs !== undefined ? (promptMs / 1000).toFixed(2) : undefined
|
||||
let formattedPromptTime = $derived(
|
||||
promptMs !== undefined ? formatPerformanceTime(promptMs) : undefined
|
||||
);
|
||||
|
||||
let hasPromptStats = $derived(
|
||||
promptTokens !== undefined &&
|
||||
promptMs !== undefined &&
|
||||
promptTokensPerSecond !== undefined &&
|
||||
promptTimeInSeconds !== undefined
|
||||
formattedPromptTime !== undefined
|
||||
);
|
||||
|
||||
// In live mode, generation tab is disabled until we have generation stats
|
||||
|
|
@ -142,7 +143,7 @@
|
|||
<BadgeChatStatistic
|
||||
class="bg-transparent"
|
||||
icon={Clock}
|
||||
value="{timeInSeconds}s"
|
||||
value={formattedTime}
|
||||
tooltipLabel="Generation time"
|
||||
/>
|
||||
<BadgeChatStatistic
|
||||
|
|
@ -161,7 +162,7 @@
|
|||
<BadgeChatStatistic
|
||||
class="bg-transparent"
|
||||
icon={Clock}
|
||||
value="{promptTimeInSeconds}s"
|
||||
value={formattedPromptTime ?? '0s'}
|
||||
tooltipLabel="Prompt processing time"
|
||||
/>
|
||||
<BadgeChatStatistic
|
||||
|
|
|
|||
|
|
@ -0,0 +1,88 @@
|
|||
<script lang="ts">
|
||||
import type { Snippet } from 'svelte';
|
||||
import * as DropdownMenu from '$lib/components/ui/dropdown-menu';
|
||||
import { cn } from '$lib/components/ui/utils';
|
||||
import { SearchInput } from '$lib/components/app';
|
||||
|
||||
interface Props {
|
||||
open?: boolean;
|
||||
onOpenChange?: (open: boolean) => void;
|
||||
placeholder?: string;
|
||||
searchValue?: string;
|
||||
onSearchChange?: (value: string) => void;
|
||||
onSearchKeyDown?: (event: KeyboardEvent) => void;
|
||||
align?: 'start' | 'center' | 'end';
|
||||
contentClass?: string;
|
||||
emptyMessage?: string;
|
||||
isEmpty?: boolean;
|
||||
disabled?: boolean;
|
||||
trigger: Snippet;
|
||||
children: Snippet;
|
||||
footer?: Snippet;
|
||||
}
|
||||
|
||||
let {
|
||||
open = $bindable(false),
|
||||
onOpenChange,
|
||||
placeholder = 'Search...',
|
||||
searchValue = $bindable(''),
|
||||
onSearchChange,
|
||||
onSearchKeyDown,
|
||||
align = 'start',
|
||||
contentClass = 'w-72',
|
||||
emptyMessage = 'No items found',
|
||||
isEmpty = false,
|
||||
disabled = false,
|
||||
trigger,
|
||||
children,
|
||||
footer
|
||||
}: Props = $props();
|
||||
|
||||
function handleOpenChange(newOpen: boolean) {
|
||||
open = newOpen;
|
||||
|
||||
if (!newOpen) {
|
||||
searchValue = '';
|
||||
onSearchChange?.('');
|
||||
}
|
||||
|
||||
onOpenChange?.(newOpen);
|
||||
}
|
||||
</script>
|
||||
|
||||
<DropdownMenu.Root bind:open onOpenChange={handleOpenChange}>
|
||||
<DropdownMenu.Trigger
|
||||
{disabled}
|
||||
onclick={(e) => {
|
||||
e.preventDefault();
|
||||
e.stopPropagation();
|
||||
}}
|
||||
>
|
||||
{@render trigger()}
|
||||
</DropdownMenu.Trigger>
|
||||
|
||||
<DropdownMenu.Content {align} class={cn(contentClass, 'pt-0')}>
|
||||
<div class="sticky top-0 z-10 mb-2 bg-popover p-1 pt-2">
|
||||
<SearchInput
|
||||
{placeholder}
|
||||
bind:value={searchValue}
|
||||
onInput={onSearchChange}
|
||||
onKeyDown={onSearchKeyDown}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div class={cn('overflow-y-auto')}>
|
||||
{@render children()}
|
||||
|
||||
{#if isEmpty}
|
||||
<div class="px-2 py-3 text-center text-sm text-muted-foreground">{emptyMessage}</div>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
{#if footer}
|
||||
<DropdownMenu.Separator />
|
||||
|
||||
{@render footer()}
|
||||
{/if}
|
||||
</DropdownMenu.Content>
|
||||
</DropdownMenu.Root>
|
||||
|
|
@ -486,6 +486,8 @@
|
|||
text-decoration: underline;
|
||||
text-underline-offset: 2px;
|
||||
transition: color 0.2s ease;
|
||||
overflow-wrap: anywhere;
|
||||
word-break: break-all;
|
||||
}
|
||||
|
||||
div :global(a:hover) {
|
||||
|
|
|
|||
|
|
@ -51,3 +51,75 @@ export function formatNumber(num: number | unknown): string {
|
|||
|
||||
return num.toLocaleString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Format JSON string with pretty printing (2-space indentation)
|
||||
* Returns original string if parsing fails
|
||||
*
|
||||
* @param jsonString - JSON string to format
|
||||
* @returns Pretty-printed JSON string or original if invalid
|
||||
*/
|
||||
export function formatJsonPretty(jsonString: string): string {
|
||||
try {
|
||||
const parsed = JSON.parse(jsonString);
|
||||
return JSON.stringify(parsed, null, 2);
|
||||
} catch {
|
||||
return jsonString;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Format time as HH:MM:SS in 24-hour format
|
||||
*
|
||||
* @param date - Date object to format
|
||||
* @returns Formatted time string (HH:MM:SS)
|
||||
*/
|
||||
export function formatTime(date: Date): string {
|
||||
return date.toLocaleTimeString('en-US', {
|
||||
hour12: false,
|
||||
hour: '2-digit',
|
||||
minute: '2-digit',
|
||||
second: '2-digit'
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Formats milliseconds to a human-readable time string for performance metrics.
|
||||
* Examples: "4h 12min 54s", "12min 34s", "45s", "0.5s"
|
||||
*
|
||||
* @param ms - Time in milliseconds
|
||||
* @returns Formatted time string
|
||||
*/
|
||||
export function formatPerformanceTime(ms: number): string {
|
||||
if (ms < 0) return '0s';
|
||||
|
||||
const totalSeconds = ms / 1000;
|
||||
|
||||
if (totalSeconds < 1) {
|
||||
return `${totalSeconds.toFixed(1)}s`;
|
||||
}
|
||||
|
||||
if (totalSeconds < 10) {
|
||||
return `${totalSeconds.toFixed(1)}s`;
|
||||
}
|
||||
|
||||
const hours = Math.floor(totalSeconds / 3600);
|
||||
const minutes = Math.floor((totalSeconds % 3600) / 60);
|
||||
const seconds = Math.floor(totalSeconds % 60);
|
||||
|
||||
const parts: string[] = [];
|
||||
|
||||
if (hours > 0) {
|
||||
parts.push(`${hours}h`);
|
||||
}
|
||||
|
||||
if (minutes > 0) {
|
||||
parts.push(`${minutes}min`);
|
||||
}
|
||||
|
||||
if (seconds > 0 || parts.length === 0) {
|
||||
parts.push(`${seconds}s`);
|
||||
}
|
||||
|
||||
return parts.join(' ');
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
import { defineMeta } from '@storybook/addon-svelte-csf';
|
||||
import ChatForm from '$lib/components/app/chat/ChatForm/ChatForm.svelte';
|
||||
import { expect } from 'storybook/test';
|
||||
import { mockServerProps, mockConfigs } from './fixtures/storybook-mocks';
|
||||
import jpgAsset from './fixtures/assets/1.jpg?url';
|
||||
import svgAsset from './fixtures/assets/hf-logo.svg?url';
|
||||
import pdfAsset from './fixtures/assets/example.pdf?raw';
|
||||
|
|
@ -46,8 +45,6 @@
|
|||
name="Default"
|
||||
args={{ class: 'max-w-[56rem] w-[calc(100vw-2rem)]' }}
|
||||
play={async ({ canvas, userEvent }) => {
|
||||
mockServerProps(mockConfigs.noModalities);
|
||||
|
||||
const textarea = await canvas.findByRole('textbox');
|
||||
const submitButton = await canvas.findByRole('button', { name: 'Send' });
|
||||
|
||||
|
|
@ -66,73 +63,11 @@
|
|||
|
||||
const fileInput = document.querySelector('input[type="file"]');
|
||||
await expect(fileInput).not.toHaveAttribute('accept');
|
||||
|
||||
// Open file attachments dropdown
|
||||
const fileUploadButton = canvas.getByText('Attach files');
|
||||
await userEvent.click(fileUploadButton);
|
||||
|
||||
// Check dropdown menu items are disabled (no modalities)
|
||||
const imagesButton = document.querySelector('.images-button');
|
||||
const audioButton = document.querySelector('.audio-button');
|
||||
|
||||
await expect(imagesButton).toHaveAttribute('data-disabled');
|
||||
await expect(audioButton).toHaveAttribute('data-disabled');
|
||||
|
||||
// Close dropdown by pressing Escape
|
||||
await userEvent.keyboard('{Escape}');
|
||||
}}
|
||||
/>
|
||||
|
||||
<Story name="Loading" args={{ class: 'max-w-[56rem] w-[calc(100vw-2rem)]', isLoading: true }} />
|
||||
|
||||
<Story
|
||||
name="VisionModality"
|
||||
args={{ class: 'max-w-[56rem] w-[calc(100vw-2rem)]' }}
|
||||
play={async ({ canvas, userEvent }) => {
|
||||
mockServerProps(mockConfigs.visionOnly);
|
||||
|
||||
// Open file attachments dropdown and verify it works
|
||||
const fileUploadButton = canvas.getByText('Attach files');
|
||||
await userEvent.click(fileUploadButton);
|
||||
|
||||
// Verify dropdown menu items exist
|
||||
const imagesButton = document.querySelector('.images-button');
|
||||
const audioButton = document.querySelector('.audio-button');
|
||||
|
||||
await expect(imagesButton).toBeInTheDocument();
|
||||
await expect(audioButton).toBeInTheDocument();
|
||||
|
||||
// Close dropdown by pressing Escape
|
||||
await userEvent.keyboard('{Escape}');
|
||||
|
||||
console.log('✅ Vision modality: Dropdown menu verified');
|
||||
}}
|
||||
/>
|
||||
|
||||
<Story
|
||||
name="AudioModality"
|
||||
args={{ class: 'max-w-[56rem] w-[calc(100vw-2rem)]' }}
|
||||
play={async ({ canvas, userEvent }) => {
|
||||
mockServerProps(mockConfigs.audioOnly);
|
||||
|
||||
// Open file attachments dropdown and verify it works
|
||||
const fileUploadButton = canvas.getByText('Attach files');
|
||||
await userEvent.click(fileUploadButton);
|
||||
|
||||
// Verify dropdown menu items exist
|
||||
const imagesButton = document.querySelector('.images-button');
|
||||
const audioButton = document.querySelector('.audio-button');
|
||||
|
||||
await expect(imagesButton).toBeInTheDocument();
|
||||
await expect(audioButton).toBeInTheDocument();
|
||||
|
||||
// Close dropdown by pressing Escape
|
||||
await userEvent.keyboard('{Escape}');
|
||||
|
||||
console.log('✅ Audio modality: Dropdown menu verified');
|
||||
}}
|
||||
/>
|
||||
|
||||
<Story
|
||||
name="FileAttachments"
|
||||
args={{
|
||||
|
|
@ -140,8 +75,6 @@
|
|||
uploadedFiles: fileAttachments
|
||||
}}
|
||||
play={async ({ canvas }) => {
|
||||
mockServerProps(mockConfigs.bothModalities);
|
||||
|
||||
const jpgAttachment = canvas.getByAltText('1.jpg');
|
||||
const svgAttachment = canvas.getByAltText('hf-logo.svg');
|
||||
const pdfFileExtension = canvas.getByText('PDF');
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ if (LLAMA_BUILD_BORINGSSL)
|
|||
set(FIPS OFF CACHE BOOL "Enable FIPS (BoringSSL)")
|
||||
|
||||
set(BORINGSSL_GIT "https://boringssl.googlesource.com/boringssl" CACHE STRING "BoringSSL git repository")
|
||||
set(BORINGSSL_VERSION "0.20260204.0" CACHE STRING "BoringSSL version")
|
||||
set(BORINGSSL_VERSION "0.20260211.0" CACHE STRING "BoringSSL version")
|
||||
|
||||
message(STATUS "Fetching BoringSSL version ${BORINGSSL_VERSION}")
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue