diff --git a/common/arg.cpp b/common/arg.cpp index 9c85696ebd..18f953a38e 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -1301,7 +1301,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex [](common_params & params, bool value) { params.kv_unified = value; } - ).set_env("LLAMA_ARG_KV_UNIFIED").set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_PERPLEXITY, LLAMA_EXAMPLE_BATCHED, LLAMA_EXAMPLE_BENCH})); + ).set_env("LLAMA_ARG_KV_UNIFIED").set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_PERPLEXITY, LLAMA_EXAMPLE_BATCHED, LLAMA_EXAMPLE_BENCH, LLAMA_EXAMPLE_PARALLEL})); add_opt(common_arg( {"--context-shift"}, {"--no-context-shift"}, diff --git a/common/common.cpp b/common/common.cpp index 93474f88c7..ec15804c91 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1,7 +1,3 @@ -#if defined(_MSC_VER) -#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING -#endif - #include "ggml.h" #include "gguf.h" @@ -9,12 +5,12 @@ #include "log.h" #include "llama.h" #include "sampling.h" +#include "unicode.h" #include #include #include #include -#include #include #include #include @@ -706,45 +702,28 @@ bool fs_validate_filename(const std::string & filename, bool allow_subdirs) { return false; } - std::u32string filename_utf32; - try { -#if defined(__clang__) - // disable C++17 deprecation warning for std::codecvt_utf8 -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wdeprecated-declarations" -#elif defined(__GNUC__) -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#endif + size_t offset = 0; + while (offset < filename.size()) { + utf8_parse_result result = parse_utf8_codepoint(filename, offset); - std::wstring_convert, char32_t> converter; - -#if defined(__clang__) -# pragma clang diagnostic pop -#elif defined(__GNUC__) -# pragma GCC diagnostic pop -#endif - - filename_utf32 = converter.from_bytes(filename); - - // If the reverse conversion mismatches, it means overlong UTF-8 sequences were used, - // or invalid encodings were encountered. Reject such attempts - std::string filename_reencoded = converter.to_bytes(filename_utf32); - if (filename_reencoded != filename) { + if (result.status != utf8_parse_result::SUCCESS) { return false; } - } catch (const std::exception &) { - return false; - } + uint32_t c = result.codepoint; - // Check for forbidden codepoints: - // - Control characters - // - Unicode equivalents of illegal characters - // - UTF-16 surrogate pairs - // - UTF-8 replacement character - // - Byte order mark (BOM) - // - Illegal characters: / \ : * ? " < > | - for (char32_t c : filename_utf32) { + if ((result.bytes_consumed == 2 && c < 0x80) || + (result.bytes_consumed == 3 && c < 0x800) || + (result.bytes_consumed == 4 && c < 0x10000)) { + return false; + } + + // Check for forbidden codepoints: + // - Control characters + // - Unicode equivalents of illegal characters + // - UTF-16 surrogate pairs + // - UTF-8 replacement character + // - Byte order mark (BOM) + // - Illegal characters: / \ : * ? " < > | if (c <= 0x1F // Control characters (C0) || c == 0x7F // Control characters (DEL) || (c >= 0x80 && c <= 0x9F) // Control characters (C1) @@ -752,6 +731,7 @@ bool fs_validate_filename(const std::string & filename, bool allow_subdirs) { || c == 0x2215 // Division Slash (forward slash equivalent) || c == 0x2216 // Set Minus (backslash equivalent) || (c >= 0xD800 && c <= 0xDFFF) // UTF-16 surrogate pairs + || c > 0x10FFFF // Max Unicode limit || c == 0xFFFD // Replacement Character (UTF-8) || c == 0xFEFF // Byte Order Mark (BOM) || c == ':' || c == '*' // Illegal characters @@ -762,6 +742,7 @@ bool fs_validate_filename(const std::string & filename, bool allow_subdirs) { // Subdirectories not allowed, reject path separators return false; } + offset += result.bytes_consumed; } // Reject any leading or trailing ' ', or any trailing '.', these are stripped on Windows and will cause a different filename diff --git a/docs/backend/snapdragon/README.md b/docs/backend/snapdragon/README.md index 8e1f37b206..2c3f88e91a 100644 --- a/docs/backend/snapdragon/README.md +++ b/docs/backend/snapdragon/README.md @@ -35,7 +35,7 @@ Adapt below build commands accordingly. Let's build llama.cpp with CPU, OpenCL, and Hexagon backends via CMake presets: ``` -[d]/workspace> cp docs/backend/hexagon/CMakeUserPresets.json . +[d]/workspace> cp docs/backend/snapdragon/CMakeUserPresets.json . [d]/workspace> cmake --preset arm64-android-snapdragon-release -B build-snapdragon Preset CMake variables: diff --git a/ggml/src/ggml-cuda/fattn-wmma-f16.cu b/ggml/src/ggml-cuda/fattn-wmma-f16.cu index 8694fd06c7..35735d48b2 100644 --- a/ggml/src/ggml-cuda/fattn-wmma-f16.cu +++ b/ggml/src/ggml-cuda/fattn-wmma-f16.cu @@ -63,11 +63,19 @@ static __global__ void flash_attn_ext_f16( constexpr int frag_m = ncols == 8 ? 32 : 16; constexpr int frag_n = ncols == 8 ? 8 : 16; static_assert(D % frag_m == 0, "If ncols == 8 then D % frag_m must be 0."); +#if defined(GGML_USE_HIP) + typedef wmma::fragment frag_a_K; + typedef wmma::fragment frag_a_V; + typedef wmma::fragment frag_b; + typedef wmma::fragment frag_c_KQ; + typedef wmma::fragment frag_c_VKQ; +#else typedef wmma::fragment frag_a_K; typedef wmma::fragment frag_a_V; typedef wmma::fragment frag_b; typedef wmma::fragment frag_c_KQ; typedef wmma::fragment frag_c_VKQ; +#endif constexpr int KQ_stride_tc = nwarps*frag_m; // Number of KQ rows calculated in parallel. constexpr int VKQ_ratio = KQ_stride_tc/VKQ_stride; // Number of parallel VKQ accumulators needed to keep all warps busy. @@ -126,6 +134,19 @@ static __global__ void flash_attn_ext_f16( __shared__ half VKQ[ncols*D_padded]; // Accumulator for final VKQ slice. half2 * VKQ2 = (half2 *) VKQ; + +#if defined(GGML_USE_HIP) + const _Float16 * K_h_f16 = reinterpret_cast(K_h); + const _Float16 * V_h_f16 = reinterpret_cast(V_h); + _Float16 * KQ_f16 = reinterpret_cast<_Float16 *>(KQ); + _Float16 * VKQ_f16 = reinterpret_cast<_Float16 *>(VKQ); +#else + const half * K_h_f16 = K_h; + const half * V_h_f16 = V_h; + half * KQ_f16 = KQ; + half * VKQ_f16 = VKQ; +#endif + #pragma unroll for (int j0 = 0; j0 < ncols; j0 += nwarps) { const int j = j0 + threadIdx.y; @@ -160,7 +181,7 @@ static __global__ void flash_attn_ext_f16( for (int i0 = 0; i0 < D; i0 += 16) { #pragma unroll for (int j0 = 0; j0 < ncols; j0 += frag_n) { - wmma::load_matrix_sync(Q_b[i0/16][j0/frag_n], KQ + j0*D_padded + i0, D_padded); + wmma::load_matrix_sync(Q_b[i0/16][j0/frag_n], KQ_f16 + j0*D_padded + i0, D_padded); } } @@ -180,7 +201,7 @@ static __global__ void flash_attn_ext_f16( #pragma unroll for (int k_KQ_0 = 0; k_KQ_0 < D; k_KQ_0 += 16) { frag_a_K K_a; - wmma::load_matrix_sync(K_a, K_h + int64_t(k_VKQ_0 + i_KQ_0 + frag_m*threadIdx.y)*stride_KV + k_KQ_0, stride_KV); + wmma::load_matrix_sync(K_a, K_h_f16 + int64_t(k_VKQ_0 + i_KQ_0 + frag_m*threadIdx.y)*stride_KV + k_KQ_0, stride_KV); #pragma unroll for (int j = 0; j < ncols/frag_n; ++j) { wmma::mma_sync(KQ_c[j], K_a, Q_b[k_KQ_0/16][j], KQ_c[j]); @@ -310,7 +331,7 @@ static __global__ void flash_attn_ext_f16( const int k = k0 + (threadIdx.y % VKQ_ratio)*16; wmma::load_matrix_sync( KQ_b[k0/(VKQ_ratio*16)][j0/frag_n], - KQ + j0*(kqar*kqs_padded) + k, + KQ_f16 + j0*(kqar*kqs_padded) + k, kqar*kqs_padded); } } @@ -328,7 +349,7 @@ static __global__ void flash_attn_ext_f16( const int k = k0 + (threadIdx.y % VKQ_ratio)*16; frag_a_V v_a; - wmma::load_matrix_sync(v_a, V_h + int64_t(k_VKQ_0 + k)*stride_KV + i_VKQ_0 + frag_m*(threadIdx.y/VKQ_ratio), stride_KV); + wmma::load_matrix_sync(v_a, V_h_f16 + int64_t(k_VKQ_0 + k)*stride_KV + i_VKQ_0 + frag_m*(threadIdx.y/VKQ_ratio), stride_KV); #pragma unroll for (int j = 0; j < ncols/frag_n; ++j) { wmma::mma_sync(VKQ_c[i_VKQ_0/VKQ_stride][j], v_a, KQ_b[k0/(VKQ_ratio*16)][j], VKQ_c[i_VKQ_0/VKQ_stride][j]); @@ -344,7 +365,7 @@ static __global__ void flash_attn_ext_f16( #pragma unroll for (int j0 = 0; j0 < ncols; j0 += frag_n) { wmma::store_matrix_sync( - KQ + offset_k + j0*D_padded + i_KQ_0 + frag_m*(threadIdx.y/VKQ_ratio), + KQ_f16 + offset_k + j0*D_padded + i_KQ_0 + frag_m*(threadIdx.y/VKQ_ratio), VKQ_c[i_KQ_0/VKQ_stride][j0/frag_n], D_padded, wmma::mem_col_major); } diff --git a/ggml/src/ggml-hexagon/htp/htp-ops.h b/ggml/src/ggml-hexagon/htp/htp-ops.h index c0d72587ce..f1ad24dbfa 100644 --- a/ggml/src/ggml-hexagon/htp/htp-ops.h +++ b/ggml/src/ggml-hexagon/htp/htp-ops.h @@ -64,25 +64,12 @@ struct htp_ops_context { struct fastdiv_values broadcast_rv2; struct fastdiv_values broadcast_rv3; - struct fastdiv_values mm_div_ne12_ne1; // fastdiv values for ne12 * ne1 - struct fastdiv_values mm_div_ne1; // fastdiv values for ne1 - struct fastdiv_values mm_div_r2; // fastdiv values for ne12 / ne02 - struct fastdiv_values mm_div_r3; // fastdiv values for ne13 / ne03 - struct fastdiv_values set_rows_div_ne12; // fastdiv values for ne12 struct fastdiv_values set_rows_div_ne11; // fastdiv values for ne11 struct fastdiv_values get_rows_div_ne10; // fastdiv values for ne10 struct fastdiv_values get_rows_div_ne10_ne11; // fastdiv values for ne10 * ne11 - struct fastdiv_values cpy_div_ne01; // fastdiv values for ne01 - struct fastdiv_values cpy_div_ne02; // fastdiv values for ne02 - struct fastdiv_values cpy_div_ne03; // fastdiv values for ne03 - - struct fastdiv_values cpy_rshp_div_n0; // fastdiv values for ne00 - struct fastdiv_values cpy_rshp_div_n1n0; // fastdiv values for ne00*ne01 - struct fastdiv_values cpy_rshp_div_n2n1n0; // fastdiv values for ne00*ne01*ne02 - uint32_t flags; }; diff --git a/ggml/src/ggml-hexagon/htp/main.c b/ggml/src/ggml-hexagon/htp/main.c index 62708eee5c..92a1422896 100644 --- a/ggml/src/ggml-hexagon/htp/main.c +++ b/ggml/src/ggml-hexagon/htp/main.c @@ -189,7 +189,7 @@ static int vtcm_release_callback(unsigned int rctx, void * state) { // otherwise we'll release it once we're done with the current Op. if (ctx->vtcm_inuse) { - ctx->vtcm_needs_release = false; + ctx->vtcm_needs_release = true; return 0; } diff --git a/ggml/src/ggml-hexagon/htp/matmul-ops.c b/ggml/src/ggml-hexagon/htp/matmul-ops.c index d251eeed33..c360abe8da 100644 --- a/ggml/src/ggml-hexagon/htp/matmul-ops.c +++ b/ggml/src/ggml-hexagon/htp/matmul-ops.c @@ -23,10 +23,30 @@ #define MM_SPAD_SRC1_NROWS 16 #define MM_SPAD_DST_NROWS 2 -struct htp_matmul_type { +struct htp_matmul_context { const char * type; - void (*vec_dot)(const int n, float * restrict s, const void * restrict vx, const void * restrict vy); - void (*vec_dot_rx2)(const int n, float * restrict s, const void * restrict vx, uint32_t vx_row_size, const void * restrict vy); + struct htp_ops_context * octx; + + void (*vec_dot_1x1)(const int n, float * restrict s0, + const void * restrict vx0, + const void * restrict vy0); + + void (*vec_dot_2x1)(const int n, float * restrict s0, + const void * restrict vx0, const void * restrict vx1, + const void * restrict vy0); + + void (*vec_dot_2x2)(const int n, float * restrict s0, float * restrict s1, + const void * restrict vx0, const void * restrict vx1, + const void * restrict vy0, const void * restrict vy1); + + // Precomputed values + uint32_t src0_nrows_per_thread; + uint32_t src1_nrows_per_thread; + + struct fastdiv_values mm_div_ne12_ne1; + struct fastdiv_values mm_div_ne1; + struct fastdiv_values mm_div_r2; + struct fastdiv_values mm_div_r3; }; // vdelta control to replicate first 4x fp32 values across lanes @@ -122,6 +142,7 @@ static inline HVX_Vector_x8 hvx_vec_load_q4x4x8(const uint8_t * restrict ptr) { HVX_Vector v6_7 = vptr[3]; // ... const HVX_Vector mask_h4 = Q6_Vb_vsplat_R(0x0F); + const HVX_Vector i8 = Q6_Vb_vsplat_R(8); HVX_Vector v0 = Q6_V_vand_VV(v0_1, mask_h4); // & 0x0F HVX_Vector v1 = Q6_Vub_vlsr_VubR(v0_1, 4); // >> 4 @@ -133,15 +154,14 @@ static inline HVX_Vector_x8 hvx_vec_load_q4x4x8(const uint8_t * restrict ptr) { HVX_Vector v7 = Q6_Vub_vlsr_VubR(v6_7, 4); // >> 4 // Convert uint4 to int4 (i.e. x - 8) - const HVX_Vector i8 = Q6_Vb_vsplat_R(8); - v0 = Q6_Vb_vsub_VbVb(v0, i8); - v1 = Q6_Vb_vsub_VbVb(v1, i8); - v2 = Q6_Vb_vsub_VbVb(v2, i8); - v3 = Q6_Vb_vsub_VbVb(v3, i8); - v4 = Q6_Vb_vsub_VbVb(v4, i8); - v5 = Q6_Vb_vsub_VbVb(v5, i8); - v6 = Q6_Vb_vsub_VbVb(v6, i8); - v7 = Q6_Vb_vsub_VbVb(v7, i8); + v0 = Q6_Vb_vsub_VbVb(v0, i8); + v1 = Q6_Vb_vsub_VbVb(v1, i8); + v2 = Q6_Vb_vsub_VbVb(v2, i8); + v3 = Q6_Vb_vsub_VbVb(v3, i8); + v4 = Q6_Vb_vsub_VbVb(v4, i8); + v5 = Q6_Vb_vsub_VbVb(v5, i8); + v6 = Q6_Vb_vsub_VbVb(v6, i8); + v7 = Q6_Vb_vsub_VbVb(v7, i8); HVX_Vector_x8 r = { v0, v1, v2, v3, v4, v5, v6, v7 }; return r; @@ -156,6 +176,7 @@ static inline HVX_Vector_x8 hvx_vec_load_mxfp4x4x8(const uint8_t * restrict ptr) HVX_Vector v6_7 = vptr[3]; // ... const HVX_Vector mask_h4 = Q6_Vb_vsplat_R(0x0F); + const HVX_Vector lut = *(const HVX_Vector *) kvalues_mxfp4_lut; HVX_Vector v0 = Q6_V_vand_VV(v0_1, mask_h4); // & 0x0F HVX_Vector v1 = Q6_Vub_vlsr_VubR(v0_1, 4); // >> 4 @@ -166,15 +187,14 @@ static inline HVX_Vector_x8 hvx_vec_load_mxfp4x4x8(const uint8_t * restrict ptr) HVX_Vector v6 = Q6_V_vand_VV(v6_7, mask_h4); // & 0x0F HVX_Vector v7 = Q6_Vub_vlsr_VubR(v6_7, 4); // >> 4 - HVX_Vector lut = *(const HVX_Vector *) kvalues_mxfp4_lut; - v0 = Q6_Vb_vlut32_VbVbI(v0, lut, 0); - v1 = Q6_Vb_vlut32_VbVbI(v1, lut, 0); - v2 = Q6_Vb_vlut32_VbVbI(v2, lut, 0); - v3 = Q6_Vb_vlut32_VbVbI(v3, lut, 0); - v4 = Q6_Vb_vlut32_VbVbI(v4, lut, 0); - v5 = Q6_Vb_vlut32_VbVbI(v5, lut, 0); - v6 = Q6_Vb_vlut32_VbVbI(v6, lut, 0); - v7 = Q6_Vb_vlut32_VbVbI(v7, lut, 0); + v0 = Q6_Vb_vlut32_VbVbI(v0, lut, 0); + v1 = Q6_Vb_vlut32_VbVbI(v1, lut, 0); + v2 = Q6_Vb_vlut32_VbVbI(v2, lut, 0); + v3 = Q6_Vb_vlut32_VbVbI(v3, lut, 0); + v4 = Q6_Vb_vlut32_VbVbI(v4, lut, 0); + v5 = Q6_Vb_vlut32_VbVbI(v5, lut, 0); + v6 = Q6_Vb_vlut32_VbVbI(v6, lut, 0); + v7 = Q6_Vb_vlut32_VbVbI(v7, lut, 0); HVX_Vector_x8 r = { v0, v1, v2, v3, v4, v5, v6, v7 }; return r; @@ -196,46 +216,6 @@ static inline HVX_Vector_x8 hvx_vec_load_q8x4x8(const uint8_t * restrict ptr) { return r; } -static inline HVX_Vector_x4 hvx_vec_load_x4_f16(const uint8_t * restrict ptr) { - const HVX_Vector * restrict vptr = (const HVX_Vector *) ptr; - - HVX_Vector v0 = vptr[0]; // first 64 vals - HVX_Vector v1 = vptr[1]; // second 64 vals - HVX_Vector v2 = vptr[2]; // third 64 vals - HVX_Vector v3 = vptr[3]; // forth 64 vals - - HVX_Vector_x4 r = { v0, v1, v2, v3 }; - return r; -} - -static inline HVX_Vector_x4 hvx_vec_load_x4_f32_as_f16(const uint8_t * restrict ptr) { - const HVX_VectorPair * restrict vptr = (const HVX_VectorPair *) ptr; - - HVX_VectorPair v0 = vptr[0]; // first 64 vals - HVX_VectorPair v1 = vptr[1]; // second 64 vals - HVX_VectorPair v2 = vptr[2]; // third 64 vals - HVX_VectorPair v3 = vptr[3]; // forth 64 vals - - HVX_Vector vq0_lo = Q6_Vqf32_vsub_VsfVsf(Q6_V_lo_W(v0), Q6_V_vzero()); - HVX_Vector vq0_hi = Q6_Vqf32_vsub_VsfVsf(Q6_V_hi_W(v0), Q6_V_vzero()); - HVX_Vector vq1_lo = Q6_Vqf32_vsub_VsfVsf(Q6_V_lo_W(v1), Q6_V_vzero()); - HVX_Vector vq1_hi = Q6_Vqf32_vsub_VsfVsf(Q6_V_hi_W(v1), Q6_V_vzero()); - HVX_Vector vq2_lo = Q6_Vqf32_vsub_VsfVsf(Q6_V_lo_W(v2), Q6_V_vzero()); - HVX_Vector vq2_hi = Q6_Vqf32_vsub_VsfVsf(Q6_V_hi_W(v2), Q6_V_vzero()); - HVX_Vector vq3_lo = Q6_Vqf32_vsub_VsfVsf(Q6_V_lo_W(v3), Q6_V_vzero()); - HVX_Vector vq3_hi = Q6_Vqf32_vsub_VsfVsf(Q6_V_hi_W(v3), Q6_V_vzero()); - - HVX_Vector vh0 = Q6_Vhf_equals_Wqf32(Q6_W_vcombine_VV(vq0_hi, vq0_lo)); - HVX_Vector vh1 = Q6_Vhf_equals_Wqf32(Q6_W_vcombine_VV(vq1_hi, vq1_lo)); - HVX_Vector vh2 = Q6_Vhf_equals_Wqf32(Q6_W_vcombine_VV(vq2_hi, vq2_lo)); - HVX_Vector vh3 = Q6_Vhf_equals_Wqf32(Q6_W_vcombine_VV(vq3_hi, vq3_lo)); - - // vcombine does a shuffle, use vdeal to undo - - HVX_Vector_x4 r = { Q6_Vh_vdeal_Vh(vh0), Q6_Vh_vdeal_Vh(vh1), Q6_Vh_vdeal_Vh(vh2), Q6_Vh_vdeal_Vh(vh3) }; - return r; -} - // Reduce multiply 1024 x 1024 int8 elements (32x q4/8 blocks in 8x HVX vectors). // Accumulate each block into a single int32 value. // Return a single HVX vector with 32x int32 accumulators. @@ -300,26 +280,26 @@ static inline HVX_Vector hvx_vec_rmpy_x8_nloe(HVX_Vector_x8 x, HVX_Vector_x8 y, return hvx_vec_rmpy_x8_n(x, y, 1024); } -static void vec_dot_q4x4x2_q8x4x2(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { +static void vec_dot_q4x4x2_q8x4x2_1x1(const int n, float * restrict s0, const void * restrict vx0, const void * restrict vy0) { assert(n % 32 == 0); // min sub-block size - assert((unsigned long) vx % 128 == 0); - assert((unsigned long) vy % 128 == 0); + assert((unsigned long) vx0 % 128 == 0); + assert((unsigned long) vy0 % 128 == 0); const uint32_t qk = QK_Q4_0x4x2 * 4; - const uint32_t x_dblk_size = 8 * 4 * 2; // 32x __fp16 - const uint32_t x_qblk_size = qk / 2; // int4 - const uint32_t x_qrow_size = n / 2; // int4 (not padded) + const uint32_t x_dblk_size = 8 * 4 * 2; // 32x __fp16 + const uint32_t x_qblk_size = qk / 2; // int4 + const uint32_t x_qrow_size = n / 2; // int4 (not padded) - const uint32_t y_dblk_size = 8 * 4 * 2; // 32x __fp16 - const uint32_t y_qblk_size = qk; // int8 - const uint32_t y_qrow_size = n; // int8 (not padded) + const uint32_t y_dblk_size = 8 * 4 * 2; // 32x __fp16 + const uint32_t y_qblk_size = qk; // int8 + const uint32_t y_qrow_size = n; // int8 (not padded) - const uint8_t * restrict r0_x_q = ((const uint8_t *) vx + 0); // quants first - const uint8_t * restrict r0_x_d = ((const uint8_t *) vx + x_qrow_size); // then scales + const uint8_t * restrict r0_x_q = ((const uint8_t *) vx0 + 0); // quants first + const uint8_t * restrict r0_x_d = ((const uint8_t *) vx0 + x_qrow_size); // then scales - const uint8_t * restrict y_q = ((const uint8_t *) vy + 0); // quants first - const uint8_t * restrict y_d = ((const uint8_t *) vy + y_qrow_size); // then scales + const uint8_t * restrict y_q = ((const uint8_t *) vy0 + 0); // quants first + const uint8_t * restrict y_d = ((const uint8_t *) vy0 + y_qrow_size); // then scales // Row sum (sf) HVX_Vector r0_sum = Q6_V_vsplat_R(0); @@ -372,36 +352,34 @@ static void vec_dot_q4x4x2_q8x4x2(const int n, float * restrict s, const void * r0_sum = hvx_vec_reduce_sum_f32(r0_sum); - hvx_vec_store_u(&s[0], 4, r0_sum); + hvx_vec_store_u(s0, 4, r0_sum); } -static void vec_dot_q4x4x2_q8x4x2_rx2(const int n, - float * restrict s, - const void * restrict vx, - uint32_t vx_row_size, - const void * restrict vy) { +static void vec_dot_q4x4x2_q8x4x2_2x1(const int n, float * restrict s0, + const void * restrict vx0, const void * restrict vx1, + const void * restrict vy0) { assert(n % 32 == 0); // min sub-block size - assert((unsigned long) vx % 128 == 0); - assert((unsigned long) vy % 128 == 0); + assert((unsigned long) vx0 % 128 == 0); + assert((unsigned long) vx1 % 128 == 0); + assert((unsigned long) vy0 % 128 == 0); const uint32_t qk = QK_Q4_0x4x2 * 4; - const uint32_t x_dblk_size = 8 * 4 * 2; // 32x __fp16 - const uint32_t x_qblk_size = qk / 2; // int4 - const uint32_t x_qrow_size = n / 2; // int4 (not padded) + const uint32_t x_dblk_size = 8 * 4 * 2; // 32x __fp16 + const uint32_t x_qblk_size = qk / 2; // int4 + const uint32_t x_qrow_size = n / 2; // int4 (not padded) - const uint32_t y_dblk_size = 8 * 4 * 2; // 32x __fp16 - const uint32_t y_qblk_size = qk; // int8 - const uint32_t y_qrow_size = n; // int8 (not padded) + const uint32_t y_dblk_size = 8 * 4 * 2; // 32x __fp16 + const uint32_t y_qblk_size = qk; // int8 + const uint32_t y_qrow_size = n; // int8 (not padded) - const uint8_t * restrict r0_x_q = ((const uint8_t *) (vx + (0 * vx_row_size)) + 0); // quants first - const uint8_t * restrict r0_x_d = ((const uint8_t *) (vx + (0 * vx_row_size)) + x_qrow_size); // then scales + const uint8_t * restrict r0_x_q = ((const uint8_t *) vx0) + 0; // quants first + const uint8_t * restrict r0_x_d = ((const uint8_t *) vx0) + x_qrow_size; // then scales + const uint8_t * restrict r1_x_q = ((const uint8_t *) vx1) + 0; // quants first + const uint8_t * restrict r1_x_d = ((const uint8_t *) vx1) + x_qrow_size; // then scales - const uint8_t * restrict r1_x_q = ((const uint8_t *) (vx + (1 * vx_row_size)) + 0); // quants first - const uint8_t * restrict r1_x_d = ((const uint8_t *) (vx + (1 * vx_row_size)) + x_qrow_size); // then scales - - const uint8_t * restrict y_q = ((const uint8_t *) vy + 0); // quants first - const uint8_t * restrict y_d = ((const uint8_t *) vy + y_qrow_size); // then scales + const uint8_t * restrict y_q = ((const uint8_t *) vy0 + 0); // quants first + const uint8_t * restrict y_d = ((const uint8_t *) vy0 + y_qrow_size); // then scales // Row sum (sf) HVX_Vector r0_sum = Q6_V_vsplat_R(0); @@ -468,13 +446,143 @@ static void vec_dot_q4x4x2_q8x4x2_rx2(const int n, } HVX_Vector rsum = hvx_vec_reduce_sum_f32x2(r0_sum, r1_sum); - hvx_vec_store_u(&s[0], 8, rsum); + hvx_vec_store_u(s0, 8, rsum); } -static void vec_dot_q8x4x2_q8x4x2(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { +static void vec_dot_q4x4x2_q8x4x2_2x2(const int n, float * restrict s0, float * restrict s1, + const void * restrict vx0, const void * restrict vx1, + const void * restrict vy0, const void * restrict vy1) { + assert(n % 32 == 0); + assert((unsigned long) vx0 % 128 == 0); + assert((unsigned long) vx1 % 128 == 0); + assert((unsigned long) vy0 % 128 == 0); + assert((unsigned long) vy1 % 128 == 0); + + const uint32_t qk = QK_Q4_0x4x2 * 4; + + const uint32_t x_dblk_size = 8 * 4 * 2; // 32x __fp16 + const uint32_t x_qblk_size = qk / 2; // int4 + const uint32_t x_qrow_size = n / 2; // int4 (not padded) + + const uint32_t y_dblk_size = 8 * 4 * 2; // 32x __fp16 + const uint32_t y_qblk_size = qk; // int8 + const uint32_t y_qrow_size = n; // int8 (not padded) + + const uint8_t * restrict r0_x_q = ((const uint8_t *) vx0) + 0; // quants first + const uint8_t * restrict r0_x_d = ((const uint8_t *) vx0) + x_qrow_size; // then scales + const uint8_t * restrict r1_x_q = ((const uint8_t *) vx1) + 0; // quants first + const uint8_t * restrict r1_x_d = ((const uint8_t *) vx1) + x_qrow_size; // then scales + + const uint8_t * restrict y0_q = ((const uint8_t *) vy0) + 0; // quants first + const uint8_t * restrict y0_d = ((const uint8_t *) vy0) + y_qrow_size; // then scales + const uint8_t * restrict y1_q = ((const uint8_t *) vy1) + 0; // quants first + const uint8_t * restrict y1_d = ((const uint8_t *) vy1) + y_qrow_size; // then scales + + // Row sums (sf) - 4 accumulators for 2×2 tile + HVX_Vector r0_c0_sum = Q6_V_vsplat_R(0); + HVX_Vector r0_c1_sum = Q6_V_vsplat_R(0); + HVX_Vector r1_c0_sum = Q6_V_vsplat_R(0); + HVX_Vector r1_c1_sum = Q6_V_vsplat_R(0); + + const uint32_t nb = n / qk; // num full blocks + const uint32_t nloe = n % qk; // num leftover elements + + uint32_t i = 0; + for (; i < nb; i++) { + // Load src1 columns (reused across both src0 rows) + HVX_Vector_x8 vy0_q = hvx_vec_load_q8x4x8(y0_q + i * y_qblk_size); + HVX_Vector_x8 vy1_q = hvx_vec_load_q8x4x8(y1_q + i * y_qblk_size); + + // Load src0 rows (reused across both src1 columns) + HVX_Vector_x8 r0_q = hvx_vec_load_q4x4x8(r0_x_q + i * x_qblk_size); + HVX_Vector_x8 r1_q = hvx_vec_load_q4x4x8(r1_x_q + i * x_qblk_size); + + // Compute 4 dot products: r0×c0, r0×c1, r1×c0, r1×c1 + HVX_Vector r0_c0_ia = Q6_Vsf_equals_Vw(hvx_vec_rmpy_x8_full(r0_q, vy0_q)); + HVX_Vector r0_c1_ia = Q6_Vsf_equals_Vw(hvx_vec_rmpy_x8_full(r0_q, vy1_q)); + HVX_Vector r1_c0_ia = Q6_Vsf_equals_Vw(hvx_vec_rmpy_x8_full(r1_q, vy0_q)); + HVX_Vector r1_c1_ia = Q6_Vsf_equals_Vw(hvx_vec_rmpy_x8_full(r1_q, vy1_q)); + + // Load scales + HVX_Vector vy0_d = Q6_Vh_vshuff_Vh(*(const HVX_UVector *) (y0_d + i * y_dblk_size)); + HVX_Vector vy1_d = Q6_Vh_vshuff_Vh(*(const HVX_UVector *) (y1_d + i * y_dblk_size)); + HVX_Vector r0_d = Q6_Vh_vshuff_Vh(*(const HVX_UVector *) (r0_x_d + i * x_dblk_size)); + HVX_Vector r1_d = Q6_Vh_vshuff_Vh(*(const HVX_UVector *) (r1_x_d + i * x_dblk_size)); + + // Compute combined scales + HVX_Vector r0_c0_dd = Q6_Vsf_equals_Vqf32(Q6_V_lo_W(Q6_Wqf32_vmpy_VhfVhf(r0_d, vy0_d))); + HVX_Vector r0_c1_dd = Q6_Vsf_equals_Vqf32(Q6_V_lo_W(Q6_Wqf32_vmpy_VhfVhf(r0_d, vy1_d))); + HVX_Vector r1_c0_dd = Q6_Vsf_equals_Vqf32(Q6_V_lo_W(Q6_Wqf32_vmpy_VhfVhf(r1_d, vy0_d))); + HVX_Vector r1_c1_dd = Q6_Vsf_equals_Vqf32(Q6_V_lo_W(Q6_Wqf32_vmpy_VhfVhf(r1_d, vy1_d))); + + // Apply scales and accumulate + HVX_Vector r0_c0_fa = Q6_Vqf32_vmpy_VsfVsf(r0_c0_ia, r0_c0_dd); + HVX_Vector r0_c1_fa = Q6_Vqf32_vmpy_VsfVsf(r0_c1_ia, r0_c1_dd); + HVX_Vector r1_c0_fa = Q6_Vqf32_vmpy_VsfVsf(r1_c0_ia, r1_c0_dd); + HVX_Vector r1_c1_fa = Q6_Vqf32_vmpy_VsfVsf(r1_c1_ia, r1_c1_dd); + + r0_c0_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r0_c0_fa, r0_c0_sum)); + r0_c1_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r0_c1_fa, r0_c1_sum)); + r1_c0_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r1_c0_fa, r1_c0_sum)); + r1_c1_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r1_c1_fa, r1_c1_sum)); + } + + // Process leftovers + if (nloe) { + HVX_Vector_x8 vy0_q = hvx_vec_load_q8x4x8(y0_q + i * y_qblk_size); + HVX_Vector_x8 vy1_q = hvx_vec_load_q8x4x8(y1_q + i * y_qblk_size); + HVX_Vector_x8 r0_q = hvx_vec_load_q4x4x8(r0_x_q + i * x_qblk_size); + HVX_Vector_x8 r1_q = hvx_vec_load_q4x4x8(r1_x_q + i * x_qblk_size); + + HVX_Vector r0_c0_ia = Q6_Vsf_equals_Vw(hvx_vec_rmpy_x8_nloe(r0_q, vy0_q, nloe)); + HVX_Vector r0_c1_ia = Q6_Vsf_equals_Vw(hvx_vec_rmpy_x8_nloe(r0_q, vy1_q, nloe)); + HVX_Vector r1_c0_ia = Q6_Vsf_equals_Vw(hvx_vec_rmpy_x8_nloe(r1_q, vy0_q, nloe)); + HVX_Vector r1_c1_ia = Q6_Vsf_equals_Vw(hvx_vec_rmpy_x8_nloe(r1_q, vy1_q, nloe)); + + HVX_Vector vy0_d = Q6_Vh_vshuff_Vh(*(const HVX_UVector *) (y0_d + i * y_dblk_size)); + HVX_Vector vy1_d = Q6_Vh_vshuff_Vh(*(const HVX_UVector *) (y1_d + i * y_dblk_size)); + HVX_Vector r0_d = Q6_Vh_vshuff_Vh(*(const HVX_UVector *) (r0_x_d + i * x_dblk_size)); + HVX_Vector r1_d = Q6_Vh_vshuff_Vh(*(const HVX_UVector *) (r1_x_d + i * x_dblk_size)); + + HVX_Vector r0_c0_dd = Q6_Vsf_equals_Vqf32(Q6_V_lo_W(Q6_Wqf32_vmpy_VhfVhf(r0_d, vy0_d))); + HVX_Vector r0_c1_dd = Q6_Vsf_equals_Vqf32(Q6_V_lo_W(Q6_Wqf32_vmpy_VhfVhf(r0_d, vy1_d))); + HVX_Vector r1_c0_dd = Q6_Vsf_equals_Vqf32(Q6_V_lo_W(Q6_Wqf32_vmpy_VhfVhf(r1_d, vy0_d))); + HVX_Vector r1_c1_dd = Q6_Vsf_equals_Vqf32(Q6_V_lo_W(Q6_Wqf32_vmpy_VhfVhf(r1_d, vy1_d))); + + // Zero out unused scales + HVX_VectorPred bmask = Q6_Q_vsetq_R(nloe / 8); + r0_c0_dd = Q6_V_vand_QV(bmask, r0_c0_dd); + r0_c1_dd = Q6_V_vand_QV(bmask, r0_c1_dd); + r1_c0_dd = Q6_V_vand_QV(bmask, r1_c0_dd); + r1_c1_dd = Q6_V_vand_QV(bmask, r1_c1_dd); + r0_c0_ia = Q6_V_vand_QV(bmask, r0_c0_ia); + r0_c1_ia = Q6_V_vand_QV(bmask, r0_c1_ia); + r1_c0_ia = Q6_V_vand_QV(bmask, r1_c0_ia); + r1_c1_ia = Q6_V_vand_QV(bmask, r1_c1_ia); + + HVX_Vector r0_c0_fa = Q6_Vqf32_vmpy_VsfVsf(r0_c0_ia, r0_c0_dd); + HVX_Vector r0_c1_fa = Q6_Vqf32_vmpy_VsfVsf(r0_c1_ia, r0_c1_dd); + HVX_Vector r1_c0_fa = Q6_Vqf32_vmpy_VsfVsf(r1_c0_ia, r1_c0_dd); + HVX_Vector r1_c1_fa = Q6_Vqf32_vmpy_VsfVsf(r1_c1_ia, r1_c1_dd); + + r0_c0_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r0_c0_fa, r0_c0_sum)); + r0_c1_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r0_c1_fa, r0_c1_sum)); + r1_c0_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r1_c0_fa, r1_c0_sum)); + r1_c1_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r1_c1_fa, r1_c1_sum)); + } + + // Reduce and store results + HVX_Vector r0_r1_c0_sum = hvx_vec_reduce_sum_f32x2(r0_c0_sum, r1_c0_sum); + HVX_Vector r0_r1_c1_sum = hvx_vec_reduce_sum_f32x2(r0_c1_sum, r1_c1_sum); + + hvx_vec_store_u(s0, 8, r0_r1_c0_sum); // row0,col0 row1,col0 + hvx_vec_store_u(s1, 8, r0_r1_c1_sum); // row0,col1 row1,col1 +} + +static void vec_dot_q8x4x2_q8x4x2_1x1(const int n, float * restrict s0, const void * restrict vx0, const void * restrict vy0) { assert(n % 32 == 0); // min sub-block size - assert((unsigned long) vx % 128 == 0); - assert((unsigned long) vy % 128 == 0); + assert((unsigned long) vx0 % 128 == 0); + assert((unsigned long) vy0 % 128 == 0); const uint32_t qk = QK_Q4_0x4x2 * 4; @@ -486,11 +594,11 @@ static void vec_dot_q8x4x2_q8x4x2(const int n, float * restrict s, const void * const uint32_t y_qblk_size = qk; // int8 const uint32_t y_qrow_size = n; // int8 (not padded) - const uint8_t * restrict r0_x_q = ((const uint8_t *) vx + 0); // quants first - const uint8_t * restrict r0_x_d = ((const uint8_t *) vx + x_qrow_size); // then scales + const uint8_t * restrict r0_x_q = ((const uint8_t *) vx0 + 0); // quants first + const uint8_t * restrict r0_x_d = ((const uint8_t *) vx0 + x_qrow_size); // then scales - const uint8_t * restrict y_q = ((const uint8_t *) vy + 0); // quants first - const uint8_t * restrict y_d = ((const uint8_t *) vy + y_qrow_size); // then scales + const uint8_t * restrict y_q = ((const uint8_t *) vy0 + 0); // quants first + const uint8_t * restrict y_d = ((const uint8_t *) vy0 + y_qrow_size); // then scales // Row sum (sf) HVX_Vector r0_sum = Q6_V_vsplat_R(0); @@ -543,36 +651,34 @@ static void vec_dot_q8x4x2_q8x4x2(const int n, float * restrict s, const void * r0_sum = hvx_vec_reduce_sum_f32(r0_sum); - hvx_vec_store_u(&s[0], 4, r0_sum); + hvx_vec_store_u(s0, 4, r0_sum); } -static void vec_dot_q8x4x2_q8x4x2_rx2(const int n, - float * restrict s, - const void * restrict vx, - uint32_t vx_row_size, - const void * restrict vy) { +static void vec_dot_q8x4x2_q8x4x2_2x1(const int n, float * restrict s0, + const void * restrict vx0, const void * restrict vx1, + const void * restrict vy0) { assert(n % 32 == 0); // min sub-block size - assert((unsigned long) vx % 128 == 0); - assert((unsigned long) vy % 128 == 0); + assert((unsigned long) vx0 % 128 == 0); + assert((unsigned long) vx1 % 128 == 0); + assert((unsigned long) vy0 % 128 == 0); const uint32_t qk = QK_Q4_0x4x2 * 4; - const uint32_t x_dblk_size = 8 * 4 * 2; // 32x __fp16 - const uint32_t x_qblk_size = qk; // int8 - const uint32_t x_qrow_size = n; // int8 (not padded) + const uint32_t x_dblk_size = 8 * 4 * 2; // 32x __fp16 + const uint32_t x_qblk_size = qk; // int8 + const uint32_t x_qrow_size = n; // int8 (not padded) - const uint32_t y_dblk_size = 8 * 4 * 2; // 32x __fp16 - const uint32_t y_qblk_size = qk; // int8 - const uint32_t y_qrow_size = n; // int8 (not padded) + const uint32_t y_dblk_size = 8 * 4 * 2; // 32x __fp16 + const uint32_t y_qblk_size = qk; // int8 + const uint32_t y_qrow_size = n; // int8 (not padded) - const uint8_t * restrict r0_x_q = ((const uint8_t *) (vx + (0 * vx_row_size)) + 0); // quants first - const uint8_t * restrict r0_x_d = ((const uint8_t *) (vx + (0 * vx_row_size)) + x_qrow_size); // then scales + const uint8_t * restrict r0_x_q = ((const uint8_t *) vx0) + 0; // quants first + const uint8_t * restrict r0_x_d = ((const uint8_t *) vx0) + x_qrow_size; // then scales + const uint8_t * restrict r1_x_q = ((const uint8_t *) vx1) + 0; // quants first + const uint8_t * restrict r1_x_d = ((const uint8_t *) vx1) + x_qrow_size; // then scales - const uint8_t * restrict r1_x_q = ((const uint8_t *) (vx + (1 * vx_row_size)) + 0); // quants first - const uint8_t * restrict r1_x_d = ((const uint8_t *) (vx + (1 * vx_row_size)) + x_qrow_size); // then scales - - const uint8_t * restrict y_q = ((const uint8_t *) vy + 0); // quants first - const uint8_t * restrict y_d = ((const uint8_t *) vy + y_qrow_size); // then scales + const uint8_t * restrict y_q = ((const uint8_t *) vy0 + 0); // quants first + const uint8_t * restrict y_d = ((const uint8_t *) vy0 + y_qrow_size); // then scales // Row sum (qf32) HVX_Vector r0_sum = Q6_V_vsplat_R(0); @@ -639,16 +745,143 @@ static void vec_dot_q8x4x2_q8x4x2_rx2(const int n, } HVX_Vector rsum = hvx_vec_reduce_sum_f32x2(r0_sum, r1_sum); - hvx_vec_store_u(&s[0], 8, rsum); + hvx_vec_store_u(s0, 8, rsum); } -static void vec_dot_mxfp4x4x2_q8x4x2(const int n, - float * restrict s, - const void * restrict vx, - const void * restrict vy) { +static void vec_dot_q8x4x2_q8x4x2_2x2(const int n, float * restrict s0, float * restrict s1, + const void * restrict vx0, const void * restrict vx1, + const void * restrict vy0, const void * restrict vy1) { + assert(n % 32 == 0); + assert((unsigned long) vx0 % 128 == 0); + assert((unsigned long) vx1 % 128 == 0); + assert((unsigned long) vy0 % 128 == 0); + assert((unsigned long) vy1 % 128 == 0); + + const uint32_t qk = QK_Q8_0x4x2 * 4; + + const uint32_t x_dblk_size = 8 * 4 * 2; // 32x __fp16 + const uint32_t x_qblk_size = qk; // int8 + const uint32_t x_qrow_size = n; // int8 (not padded) + + const uint32_t y_dblk_size = 8 * 4 * 2; // 32x __fp16 + const uint32_t y_qblk_size = qk; // int8 + const uint32_t y_qrow_size = n; // int8 (not padded) + + const uint8_t * restrict r0_x_q = ((const uint8_t *) vx0) + 0; // quants first + const uint8_t * restrict r0_x_d = ((const uint8_t *) vx0) + x_qrow_size; // then scales + const uint8_t * restrict r1_x_q = ((const uint8_t *) vx1) + 0; // quants first + const uint8_t * restrict r1_x_d = ((const uint8_t *) vx1) + x_qrow_size; // then scales + + const uint8_t * restrict y0_q = ((const uint8_t *) vy0) + 0; // quants first + const uint8_t * restrict y0_d = ((const uint8_t *) vy0) + y_qrow_size; // then scales + const uint8_t * restrict y1_q = ((const uint8_t *) vy1) + 0; // quants first + const uint8_t * restrict y1_d = ((const uint8_t *) vy1) + y_qrow_size; // then scales + + // Row sums (sf) - 4 accumulators for 2×2 tile + HVX_Vector r0_c0_sum = Q6_V_vsplat_R(0); + HVX_Vector r0_c1_sum = Q6_V_vsplat_R(0); + HVX_Vector r1_c0_sum = Q6_V_vsplat_R(0); + HVX_Vector r1_c1_sum = Q6_V_vsplat_R(0); + + const uint32_t nb = n / qk; // num full blocks + const uint32_t nloe = n % qk; // num leftover elements + + uint32_t i = 0; + for (; i < nb; i++) { + // Load src1 columns (reused across both src0 rows) + HVX_Vector_x8 vy0_q = hvx_vec_load_q8x4x8(y0_q + i * y_qblk_size); + HVX_Vector_x8 vy1_q = hvx_vec_load_q8x4x8(y1_q + i * y_qblk_size); + + // Load src0 rows (reused across both src1 columns) + HVX_Vector_x8 r0_q = hvx_vec_load_q8x4x8(r0_x_q + i * x_qblk_size); + HVX_Vector_x8 r1_q = hvx_vec_load_q8x4x8(r1_x_q + i * x_qblk_size); + + // Compute 4 dot products: r0×c0, r0×c1, r1×c0, r1×c1 + HVX_Vector r0_c0_ia = Q6_Vsf_equals_Vw(hvx_vec_rmpy_x8_full(r0_q, vy0_q)); + HVX_Vector r0_c1_ia = Q6_Vsf_equals_Vw(hvx_vec_rmpy_x8_full(r0_q, vy1_q)); + HVX_Vector r1_c0_ia = Q6_Vsf_equals_Vw(hvx_vec_rmpy_x8_full(r1_q, vy0_q)); + HVX_Vector r1_c1_ia = Q6_Vsf_equals_Vw(hvx_vec_rmpy_x8_full(r1_q, vy1_q)); + + // Load scales + HVX_Vector vy0_d = Q6_Vh_vshuff_Vh(*(const HVX_UVector *) (y0_d + i * y_dblk_size)); + HVX_Vector vy1_d = Q6_Vh_vshuff_Vh(*(const HVX_UVector *) (y1_d + i * y_dblk_size)); + HVX_Vector r0_d = Q6_Vh_vshuff_Vh(*(const HVX_UVector *) (r0_x_d + i * x_dblk_size)); + HVX_Vector r1_d = Q6_Vh_vshuff_Vh(*(const HVX_UVector *) (r1_x_d + i * x_dblk_size)); + + // Compute combined scales + HVX_Vector r0_c0_dd = Q6_Vsf_equals_Vqf32(Q6_V_lo_W(Q6_Wqf32_vmpy_VhfVhf(r0_d, vy0_d))); + HVX_Vector r0_c1_dd = Q6_Vsf_equals_Vqf32(Q6_V_lo_W(Q6_Wqf32_vmpy_VhfVhf(r0_d, vy1_d))); + HVX_Vector r1_c0_dd = Q6_Vsf_equals_Vqf32(Q6_V_lo_W(Q6_Wqf32_vmpy_VhfVhf(r1_d, vy0_d))); + HVX_Vector r1_c1_dd = Q6_Vsf_equals_Vqf32(Q6_V_lo_W(Q6_Wqf32_vmpy_VhfVhf(r1_d, vy1_d))); + + // Apply scales and accumulate + HVX_Vector r0_c0_fa = Q6_Vqf32_vmpy_VsfVsf(r0_c0_ia, r0_c0_dd); + HVX_Vector r0_c1_fa = Q6_Vqf32_vmpy_VsfVsf(r0_c1_ia, r0_c1_dd); + HVX_Vector r1_c0_fa = Q6_Vqf32_vmpy_VsfVsf(r1_c0_ia, r1_c0_dd); + HVX_Vector r1_c1_fa = Q6_Vqf32_vmpy_VsfVsf(r1_c1_ia, r1_c1_dd); + + r0_c0_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r0_c0_fa, r0_c0_sum)); + r0_c1_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r0_c1_fa, r0_c1_sum)); + r1_c0_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r1_c0_fa, r1_c0_sum)); + r1_c1_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r1_c1_fa, r1_c1_sum)); + } + + // Process leftovers + if (nloe) { + HVX_Vector_x8 vy0_q = hvx_vec_load_q8x4x8(y0_q + i * y_qblk_size); + HVX_Vector_x8 vy1_q = hvx_vec_load_q8x4x8(y1_q + i * y_qblk_size); + HVX_Vector_x8 r0_q = hvx_vec_load_q8x4x8(r0_x_q + i * x_qblk_size); + HVX_Vector_x8 r1_q = hvx_vec_load_q8x4x8(r1_x_q + i * x_qblk_size); + + HVX_Vector r0_c0_ia = Q6_Vsf_equals_Vw(hvx_vec_rmpy_x8_nloe(r0_q, vy0_q, nloe)); + HVX_Vector r0_c1_ia = Q6_Vsf_equals_Vw(hvx_vec_rmpy_x8_nloe(r0_q, vy1_q, nloe)); + HVX_Vector r1_c0_ia = Q6_Vsf_equals_Vw(hvx_vec_rmpy_x8_nloe(r1_q, vy0_q, nloe)); + HVX_Vector r1_c1_ia = Q6_Vsf_equals_Vw(hvx_vec_rmpy_x8_nloe(r1_q, vy1_q, nloe)); + + HVX_Vector vy0_d = Q6_Vh_vshuff_Vh(*(const HVX_UVector *) (y0_d + i * y_dblk_size)); + HVX_Vector vy1_d = Q6_Vh_vshuff_Vh(*(const HVX_UVector *) (y1_d + i * y_dblk_size)); + HVX_Vector r0_d = Q6_Vh_vshuff_Vh(*(const HVX_UVector *) (r0_x_d + i * x_dblk_size)); + HVX_Vector r1_d = Q6_Vh_vshuff_Vh(*(const HVX_UVector *) (r1_x_d + i * x_dblk_size)); + + HVX_Vector r0_c0_dd = Q6_Vsf_equals_Vqf32(Q6_V_lo_W(Q6_Wqf32_vmpy_VhfVhf(r0_d, vy0_d))); + HVX_Vector r0_c1_dd = Q6_Vsf_equals_Vqf32(Q6_V_lo_W(Q6_Wqf32_vmpy_VhfVhf(r0_d, vy1_d))); + HVX_Vector r1_c0_dd = Q6_Vsf_equals_Vqf32(Q6_V_lo_W(Q6_Wqf32_vmpy_VhfVhf(r1_d, vy0_d))); + HVX_Vector r1_c1_dd = Q6_Vsf_equals_Vqf32(Q6_V_lo_W(Q6_Wqf32_vmpy_VhfVhf(r1_d, vy1_d))); + + // Zero out unused scales + HVX_VectorPred bmask = Q6_Q_vsetq_R(nloe / 8); + r0_c0_dd = Q6_V_vand_QV(bmask, r0_c0_dd); + r0_c1_dd = Q6_V_vand_QV(bmask, r0_c1_dd); + r1_c0_dd = Q6_V_vand_QV(bmask, r1_c0_dd); + r1_c1_dd = Q6_V_vand_QV(bmask, r1_c1_dd); + r0_c0_ia = Q6_V_vand_QV(bmask, r0_c0_ia); + r0_c1_ia = Q6_V_vand_QV(bmask, r0_c1_ia); + r1_c0_ia = Q6_V_vand_QV(bmask, r1_c0_ia); + r1_c1_ia = Q6_V_vand_QV(bmask, r1_c1_ia); + + HVX_Vector r0_c0_fa = Q6_Vqf32_vmpy_VsfVsf(r0_c0_ia, r0_c0_dd); + HVX_Vector r0_c1_fa = Q6_Vqf32_vmpy_VsfVsf(r0_c1_ia, r0_c1_dd); + HVX_Vector r1_c0_fa = Q6_Vqf32_vmpy_VsfVsf(r1_c0_ia, r1_c0_dd); + HVX_Vector r1_c1_fa = Q6_Vqf32_vmpy_VsfVsf(r1_c1_ia, r1_c1_dd); + + r0_c0_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r0_c0_fa, r0_c0_sum)); + r0_c1_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r0_c1_fa, r0_c1_sum)); + r1_c0_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r1_c0_fa, r1_c0_sum)); + r1_c1_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r1_c1_fa, r1_c1_sum)); + } + + // Reduce and store results + HVX_Vector r0_r1_c0_sum = hvx_vec_reduce_sum_f32x2(r0_c0_sum, r1_c0_sum); + HVX_Vector r0_r1_c1_sum = hvx_vec_reduce_sum_f32x2(r0_c1_sum, r1_c1_sum); + + hvx_vec_store_u(&s0[0], 8, r0_r1_c0_sum); // row0,col0 row1,col0 + hvx_vec_store_u(&s1[0], 8, r0_r1_c1_sum); // row0,col1 row1,col1 +} + +static void vec_dot_mxfp4x4x2_q8x4x2_1x1(const int n, float * restrict s0, const void * restrict vx0, const void * restrict vy0) { assert(n % 32 == 0); // min sub-block size - assert((unsigned long) vx % 128 == 0); - assert((unsigned long) vy % 128 == 0); + assert((unsigned long) vx0 % 128 == 0); + assert((unsigned long) vy0 % 128 == 0); const uint32_t qk = QK_MXFP4x4x2 * 4; @@ -660,11 +893,11 @@ static void vec_dot_mxfp4x4x2_q8x4x2(const int n, const uint32_t y_qblk_size = qk; // int8 const uint32_t y_qrow_size = n; // int8 (not padded) - const uint8_t * restrict r0_x_q = ((const uint8_t *) vx + 0); // quants first - const uint8_t * restrict r0_x_d = ((const uint8_t *) vx + x_qrow_size); // then scales + const uint8_t * restrict r0_x_q = ((const uint8_t *) vx0 + 0); // quants first + const uint8_t * restrict r0_x_d = ((const uint8_t *) vx0 + x_qrow_size); // then scales - const uint8_t * restrict y_q = ((const uint8_t *) vy + 0); // quants first - const uint8_t * restrict y_d = ((const uint8_t *) vy + y_qrow_size); // then scales + const uint8_t * restrict y_q = ((const uint8_t *) vy0 + 0); // quants first + const uint8_t * restrict y_d = ((const uint8_t *) vy0 + y_qrow_size); // then scales // Row sum (sf) HVX_Vector r0_sum = Q6_V_vsplat_R(0); @@ -747,36 +980,34 @@ static void vec_dot_mxfp4x4x2_q8x4x2(const int n, r0_sum = hvx_vec_reduce_sum_f32(r0_sum); - hvx_vec_store_u(&s[0], 4, r0_sum); + hvx_vec_store_u(s0, 4, r0_sum); } -static void vec_dot_mxfp4x4x2_q8x4x2_rx2(const int n, - float * restrict s, - const void * restrict vx, - uint32_t vx_row_size, - const void * restrict vy) { +static void vec_dot_mxfp4x4x2_q8x4x2_2x1(const int n, float * restrict s0, + const void * restrict vx0, const void * restrict vx1, + const void * restrict vy0) { assert(n % 32 == 0); // min sub-block size - assert((unsigned long) vx % 128 == 0); - assert((unsigned long) vy % 128 == 0); + assert((unsigned long) vx0 % 128 == 0); + assert((unsigned long) vx1 % 128 == 0); + assert((unsigned long) vy0 % 128 == 0); const uint32_t qk = QK_MXFP4x4x2 * 4; - const uint32_t x_dblk_size = 8 * 4 * 1; // 32x e8m0 - const uint32_t x_qblk_size = qk / 2; // fp4 - const uint32_t x_qrow_size = n / 2; // fp4 (not padded) + const uint32_t x_dblk_size = 8 * 4 * 1; // 32x e8m0 + const uint32_t x_qblk_size = qk / 2; // fp4 + const uint32_t x_qrow_size = n / 2; // fp4 (not padded) - const uint32_t y_dblk_size = 8 * 4 * 2; // 32x __fp16 - const uint32_t y_qblk_size = qk; // int8 - const uint32_t y_qrow_size = n; // int8 (not padded) + const uint32_t y_dblk_size = 8 * 4 * 2; // 32x __fp16 + const uint32_t y_qblk_size = qk; // int8 + const uint32_t y_qrow_size = n; // int8 (not padded) - const uint8_t * restrict r0_x_q = ((const uint8_t *) (vx + (0 * vx_row_size)) + 0); // quants first - const uint8_t * restrict r0_x_d = ((const uint8_t *) (vx + (0 * vx_row_size)) + x_qrow_size); // then scales + const uint8_t * restrict r0_x_q = ((const uint8_t *) vx0) + 0; // quants first + const uint8_t * restrict r0_x_d = ((const uint8_t *) vx0) + x_qrow_size; // then scales + const uint8_t * restrict r1_x_q = ((const uint8_t *) vx1) + 0; // quants first + const uint8_t * restrict r1_x_d = ((const uint8_t *) vx1) + x_qrow_size; // then scales - const uint8_t * restrict r1_x_q = ((const uint8_t *) (vx + (1 * vx_row_size)) + 0); // quants first - const uint8_t * restrict r1_x_d = ((const uint8_t *) (vx + (1 * vx_row_size)) + x_qrow_size); // then scales - - const uint8_t * restrict y_q = ((const uint8_t *) vy + 0); // quants first - const uint8_t * restrict y_d = ((const uint8_t *) vy + y_qrow_size); // then scales + const uint8_t * restrict y_q = ((const uint8_t *) vy0) + 0; // quants first + const uint8_t * restrict y_d = ((const uint8_t *) vy0) + y_qrow_size; // then scales // Row sum (sf) HVX_Vector r0_sum = Q6_V_vsplat_R(0); @@ -879,10 +1110,180 @@ static void vec_dot_mxfp4x4x2_q8x4x2_rx2(const int n, } HVX_Vector rsum = hvx_vec_reduce_sum_f32x2(r0_sum, r1_sum); - hvx_vec_store_u(&s[0], 8, rsum); + hvx_vec_store_u(s0, 8, rsum); } -static void vec_dot_f16_f16_aa(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { +static void vec_dot_mxfp4x4x2_q8x4x2_2x2(const int n, float * restrict s0, float * restrict s1, + const void * restrict vx0, const void * restrict vx1, + const void * restrict vy0, const void * restrict vy1) { + assert(n % 32 == 0); + assert((unsigned long) vx0 % 128 == 0); + assert((unsigned long) vx1 % 128 == 0); + assert((unsigned long) vy0 % 128 == 0); + assert((unsigned long) vy1 % 128 == 0); + + const uint32_t qk = QK_MXFP4x4x2 * 4; + + const uint32_t x_dblk_size = 8 * 4 * 1; // 32x e8m0 + const uint32_t x_qblk_size = qk / 2; // fp4 + const uint32_t x_qrow_size = n / 2; // fp4 (not padded) + + const uint32_t y_dblk_size = 8 * 4 * 2; // 32x __fp16 + const uint32_t y_qblk_size = qk; // int8 + const uint32_t y_qrow_size = n; // int8 (not padded) + + const uint8_t * restrict r0_x_q = ((const uint8_t *) vx0) + 0; // quants first + const uint8_t * restrict r0_x_d = ((const uint8_t *) vx0) + x_qrow_size; // then scales + const uint8_t * restrict r1_x_q = ((const uint8_t *) vx1) + 0; // quants first + const uint8_t * restrict r1_x_d = ((const uint8_t *) vx1) + x_qrow_size; // then scales + + const uint8_t * restrict y0_q = ((const uint8_t *) vy0) + 0; // quants first + const uint8_t * restrict y0_d = ((const uint8_t *) vy0) + y_qrow_size; // then scales + const uint8_t * restrict y1_q = ((const uint8_t *) vy1) + 0; // quants first + const uint8_t * restrict y1_d = ((const uint8_t *) vy1) + y_qrow_size; // then scales + + // Row sums (sf) - 4 accumulators for 2×2 tile + HVX_Vector r0_c0_sum = Q6_V_vsplat_R(0); + HVX_Vector r0_c1_sum = Q6_V_vsplat_R(0); + HVX_Vector r1_c0_sum = Q6_V_vsplat_R(0); + HVX_Vector r1_c1_sum = Q6_V_vsplat_R(0); + + const uint32_t nb = n / qk; // num full blocks + const uint32_t nloe = n % qk; // num leftover elements + + uint32_t i = 0; + for (; i < nb; i++) { + // Load src1 columns (reused across both src0 rows) + HVX_Vector_x8 vy0_q = hvx_vec_load_q8x4x8(y0_q + i * y_qblk_size); + HVX_Vector_x8 vy1_q = hvx_vec_load_q8x4x8(y1_q + i * y_qblk_size); + + // Load src0 rows (reused across both src1 columns) + HVX_Vector_x8 r0_q = hvx_vec_load_mxfp4x4x8(r0_x_q + i * x_qblk_size); + HVX_Vector_x8 r1_q = hvx_vec_load_mxfp4x4x8(r1_x_q + i * x_qblk_size); + + // Compute 4 dot products: r0×c0, r0×c1, r1×c0, r1×c1 + HVX_Vector r0_c0_ia = Q6_Vsf_equals_Vw(hvx_vec_rmpy_x8_full(r0_q, vy0_q)); + HVX_Vector r0_c1_ia = Q6_Vsf_equals_Vw(hvx_vec_rmpy_x8_full(r0_q, vy1_q)); + HVX_Vector r1_c0_ia = Q6_Vsf_equals_Vw(hvx_vec_rmpy_x8_full(r1_q, vy0_q)); + HVX_Vector r1_c1_ia = Q6_Vsf_equals_Vw(hvx_vec_rmpy_x8_full(r1_q, vy1_q)); + + // Load scales + HVX_Vector vy0_d = *(const HVX_UVector *) (y0_d + i * y_dblk_size); + HVX_Vector vy1_d = *(const HVX_UVector *) (y1_d + i * y_dblk_size); + HVX_Vector r0_d = *(const HVX_UVector *) (r0_x_d + i * x_dblk_size); + HVX_Vector r1_d = *(const HVX_UVector *) (r1_x_d + i * x_dblk_size); + + // Convert vy_d from fp16 to fp32 while applying 0.5 scaling which is used for e8m0 halving + HVX_Vector half = Q6_Vh_vsplat_R(0x3800); // 0.5 in fp16 + vy0_d = Q6_V_lo_W(Q6_Wqf32_vmpy_VhfVhf(Q6_Vh_vshuff_Vh(vy0_d), half)); + vy0_d = Q6_Vsf_equals_Vqf32(vy0_d); + vy1_d = Q6_V_lo_W(Q6_Wqf32_vmpy_VhfVhf(Q6_Vh_vshuff_Vh(vy1_d), half)); + vy1_d = Q6_Vsf_equals_Vqf32(vy1_d); + + // Convert rX_d scales from e8m0 to fp32 + // Expand and zero-pad 32x uint8 e8m0 values to uint32s : 0 0 0 0, 0 0 0 1, 0 0 0 2, ... + // Left shift with zero fill to create FP32 + // FIXME: might need to handle zero as a special case (see ggml-cpu code) + HVX_Vector expand = *(const HVX_Vector *) expand_x32_e8m0; + HVX_Vector e8m0_mask = Q6_V_vsplat_R(0x000000ff); + r0_d = Q6_V_vdelta_VV(r0_d, expand); + r0_d = Q6_V_vand_VV(r0_d, e8m0_mask); + r0_d = Q6_Vw_vasl_VwR(r0_d, 23); + r1_d = Q6_V_vdelta_VV(r1_d, expand); + r1_d = Q6_V_vand_VV(r1_d, e8m0_mask); + r1_d = Q6_Vw_vasl_VwR(r1_d, 23); + + // Compute combined scales + HVX_Vector r0_c0_dd = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vmpy_VsfVsf(r0_d, vy0_d)); + HVX_Vector r0_c1_dd = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vmpy_VsfVsf(r0_d, vy1_d)); + HVX_Vector r1_c0_dd = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vmpy_VsfVsf(r1_d, vy0_d)); + HVX_Vector r1_c1_dd = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vmpy_VsfVsf(r1_d, vy1_d)); + + // Apply scales and accumulate + HVX_Vector r0_c0_fa = Q6_Vqf32_vmpy_VsfVsf(r0_c0_ia, r0_c0_dd); + HVX_Vector r0_c1_fa = Q6_Vqf32_vmpy_VsfVsf(r0_c1_ia, r0_c1_dd); + HVX_Vector r1_c0_fa = Q6_Vqf32_vmpy_VsfVsf(r1_c0_ia, r1_c0_dd); + HVX_Vector r1_c1_fa = Q6_Vqf32_vmpy_VsfVsf(r1_c1_ia, r1_c1_dd); + + r0_c0_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r0_c0_fa, r0_c0_sum)); + r0_c1_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r0_c1_fa, r0_c1_sum)); + r1_c0_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r1_c0_fa, r1_c0_sum)); + r1_c1_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r1_c1_fa, r1_c1_sum)); + } + + // Process leftovers + if (nloe) { + HVX_Vector_x8 vy0_q = hvx_vec_load_q8x4x8(y0_q + i * y_qblk_size); + HVX_Vector_x8 vy1_q = hvx_vec_load_q8x4x8(y1_q + i * y_qblk_size); + HVX_Vector_x8 r0_q = hvx_vec_load_mxfp4x4x8(r0_x_q + i * x_qblk_size); + HVX_Vector_x8 r1_q = hvx_vec_load_mxfp4x4x8(r1_x_q + i * x_qblk_size); + + HVX_Vector r0_c0_ia = Q6_Vsf_equals_Vw(hvx_vec_rmpy_x8_nloe(r0_q, vy0_q, nloe)); + HVX_Vector r0_c1_ia = Q6_Vsf_equals_Vw(hvx_vec_rmpy_x8_nloe(r0_q, vy1_q, nloe)); + HVX_Vector r1_c0_ia = Q6_Vsf_equals_Vw(hvx_vec_rmpy_x8_nloe(r1_q, vy0_q, nloe)); + HVX_Vector r1_c1_ia = Q6_Vsf_equals_Vw(hvx_vec_rmpy_x8_nloe(r1_q, vy1_q, nloe)); + + HVX_Vector vy0_d = *(const HVX_UVector *) (y0_d + i * y_dblk_size); + HVX_Vector vy1_d = *(const HVX_UVector *) (y1_d + i * y_dblk_size); + HVX_Vector r0_d = *(const HVX_UVector *) (r0_x_d + i * x_dblk_size); + HVX_Vector r1_d = *(const HVX_UVector *) (r1_x_d + i * x_dblk_size); + + // Convert vy_d from fp16 to fp32 while applying 0.5 scaling which is used for e8m0 halving + HVX_Vector half = Q6_Vh_vsplat_R(0x3800); // 0.5 in fp16 + vy0_d = Q6_V_lo_W(Q6_Wqf32_vmpy_VhfVhf(Q6_Vh_vshuff_Vh(vy0_d), half)); + vy0_d = Q6_Vsf_equals_Vqf32(vy0_d); + vy1_d = Q6_V_lo_W(Q6_Wqf32_vmpy_VhfVhf(Q6_Vh_vshuff_Vh(vy1_d), half)); + vy1_d = Q6_Vsf_equals_Vqf32(vy1_d); + + // Convert rX_d scales from e8m0 to fp32 + // Expand and zero-pad 32x uint8 e8m0 values to uint32s : 0 0 0 0, 0 0 0 1, 0 0 0 2, ... + // Left shift with zero fill to create FP32 + // FIXME: might need to handle zero as a special case (see ggml-cpu code) + HVX_Vector expand = *(const HVX_Vector *) expand_x32_e8m0; + HVX_Vector e8m0_mask = Q6_V_vsplat_R(0x000000ff); + r0_d = Q6_V_vdelta_VV(r0_d, expand); + r0_d = Q6_V_vand_VV(r0_d, e8m0_mask); + r0_d = Q6_Vw_vasl_VwR(r0_d, 23); + r1_d = Q6_V_vdelta_VV(r1_d, expand); + r1_d = Q6_V_vand_VV(r1_d, e8m0_mask); + r1_d = Q6_Vw_vasl_VwR(r1_d, 23); + + HVX_Vector r0_c0_dd = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vmpy_VsfVsf(r0_d, vy0_d)); + HVX_Vector r0_c1_dd = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vmpy_VsfVsf(r0_d, vy1_d)); + HVX_Vector r1_c0_dd = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vmpy_VsfVsf(r1_d, vy0_d)); + HVX_Vector r1_c1_dd = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vmpy_VsfVsf(r1_d, vy1_d)); + + // Zero out unused scales + HVX_VectorPred bmask = Q6_Q_vsetq_R(nloe / 8); + r0_c0_dd = Q6_V_vand_QV(bmask, r0_c0_dd); + r0_c1_dd = Q6_V_vand_QV(bmask, r0_c1_dd); + r1_c0_dd = Q6_V_vand_QV(bmask, r1_c0_dd); + r1_c1_dd = Q6_V_vand_QV(bmask, r1_c1_dd); + r0_c0_ia = Q6_V_vand_QV(bmask, r0_c0_ia); + r0_c1_ia = Q6_V_vand_QV(bmask, r0_c1_ia); + r1_c0_ia = Q6_V_vand_QV(bmask, r1_c0_ia); + r1_c1_ia = Q6_V_vand_QV(bmask, r1_c1_ia); + + HVX_Vector r0_c0_fa = Q6_Vqf32_vmpy_VsfVsf(r0_c0_ia, r0_c0_dd); + HVX_Vector r0_c1_fa = Q6_Vqf32_vmpy_VsfVsf(r0_c1_ia, r0_c1_dd); + HVX_Vector r1_c0_fa = Q6_Vqf32_vmpy_VsfVsf(r1_c0_ia, r1_c0_dd); + HVX_Vector r1_c1_fa = Q6_Vqf32_vmpy_VsfVsf(r1_c1_ia, r1_c1_dd); + + r0_c0_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r0_c0_fa, r0_c0_sum)); + r0_c1_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r0_c1_fa, r0_c1_sum)); + r1_c0_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r1_c0_fa, r1_c0_sum)); + r1_c1_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r1_c1_fa, r1_c1_sum)); + } + + // Reduce and store results + HVX_Vector r0_r1_c0_sum = hvx_vec_reduce_sum_f32x2(r0_c0_sum, r1_c0_sum); + HVX_Vector r0_r1_c1_sum = hvx_vec_reduce_sum_f32x2(r0_c1_sum, r1_c1_sum); + + hvx_vec_store_u(&s0[0], 8, r0_r1_c0_sum); // row0,col0 row1,col0 + hvx_vec_store_u(&s1[0], 8, r0_r1_c1_sum); // row0,col1 row1,col1 +} + +static void vec_dot_f16_f16_aa_1x1(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { const HVX_Vector * restrict x = (const HVX_Vector *) vx; const HVX_Vector * restrict y = (const HVX_Vector *) vy; @@ -912,14 +1313,12 @@ static void vec_dot_f16_f16_aa(const int n, float * restrict s, const void * res hvx_vec_store_u(&s[0], 4, rsum); } -static void vec_dot_f16_f16_aa_rx2(const int n, - float * restrict s, - const void * restrict vx, - uint32_t vx_row_size, - const void * restrict vy) { - const HVX_Vector * restrict x0 = (const HVX_Vector *) vx; - const HVX_Vector * restrict x1 = (const HVX_Vector *) ((const uint8_t *) vx + vx_row_size); - const HVX_Vector * restrict y = (const HVX_Vector *) vy; +static void vec_dot_f16_f16_aa_2x1(const int n, float * restrict s0, + const void * restrict vx0, const void * restrict vx1, + const void * restrict vy0) { + const HVX_Vector * restrict x0 = (const HVX_Vector *) vx0; + const HVX_Vector * restrict x1 = (const HVX_Vector *) vx1; + const HVX_Vector * restrict y = (const HVX_Vector *) vy0; uint32_t nvec = n / VLEN_FP16; uint32_t nloe = n % VLEN_FP16; @@ -953,10 +1352,86 @@ static void vec_dot_f16_f16_aa_rx2(const int n, } HVX_Vector rsum = hvx_vec_reduce_sum_f32x2(Q6_Vsf_equals_Vqf32(rsum0), Q6_Vsf_equals_Vqf32(rsum1)); - hvx_vec_store_u(&s[0], 8, rsum); + hvx_vec_store_u(s0, 8, rsum); } -static void vec_dot_f16_f16_uu(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { +static void vec_dot_f16_f16_aa_2x2(const int n, float * restrict s0, float * restrict s1, + const void * restrict vx0, const void * restrict vx1, + const void * restrict vy0, const void * restrict vy1) { + const HVX_Vector * restrict x0 = (const HVX_Vector *) vx0; + const HVX_Vector * restrict x1 = (const HVX_Vector *) vx1; + const HVX_Vector * restrict y0 = (const HVX_Vector *) vy0; + const HVX_Vector * restrict y1 = (const HVX_Vector *) vy1; + + uint32_t nvec = n / VLEN_FP16; + uint32_t nloe = n % VLEN_FP16; + + // Row sums (sf) - 4 accumulators for 2×2 tile + HVX_Vector r0_c0_sum = Q6_V_vsplat_R(0); + HVX_Vector r0_c1_sum = Q6_V_vsplat_R(0); + HVX_Vector r1_c0_sum = Q6_V_vsplat_R(0); + HVX_Vector r1_c1_sum = Q6_V_vsplat_R(0); + + uint32_t i = 0; + + #pragma unroll(2) + for (i = 0; i < nvec; i++) { + HVX_Vector r0_hf = x0[i]; + HVX_Vector r1_hf = x1[i]; + HVX_Vector c0_hf = y0[i]; + HVX_Vector c1_hf = y1[i]; + + // Compute 4 dot products: r0×c0, r0×c1, r1×c0, r1×c1 + HVX_VectorPair r0_c0_qf_p = Q6_Wqf32_vmpy_VhfVhf(r0_hf, c0_hf); + HVX_VectorPair r0_c1_qf_p = Q6_Wqf32_vmpy_VhfVhf(r0_hf, c1_hf); + HVX_VectorPair r1_c0_qf_p = Q6_Wqf32_vmpy_VhfVhf(r1_hf, c0_hf); + HVX_VectorPair r1_c1_qf_p = Q6_Wqf32_vmpy_VhfVhf(r1_hf, c1_hf); + + HVX_Vector r0_c0_qf = Q6_Vqf32_vadd_Vqf32Vqf32(Q6_V_lo_W(r0_c0_qf_p), Q6_V_hi_W(r0_c0_qf_p)); + HVX_Vector r0_c1_qf = Q6_Vqf32_vadd_Vqf32Vqf32(Q6_V_lo_W(r0_c1_qf_p), Q6_V_hi_W(r0_c1_qf_p)); + HVX_Vector r1_c0_qf = Q6_Vqf32_vadd_Vqf32Vqf32(Q6_V_lo_W(r1_c0_qf_p), Q6_V_hi_W(r1_c0_qf_p)); + HVX_Vector r1_c1_qf = Q6_Vqf32_vadd_Vqf32Vqf32(Q6_V_lo_W(r1_c1_qf_p), Q6_V_hi_W(r1_c1_qf_p)); + + r0_c0_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r0_c0_qf, r0_c0_sum)); + r0_c1_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r0_c1_qf, r0_c1_sum)); + r1_c0_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r1_c0_qf, r1_c0_sum)); + r1_c1_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r1_c1_qf, r1_c1_sum)); + } + + if (nloe) { + HVX_VectorPred bmask = Q6_Q_vsetq_R(nloe * 2); + + HVX_Vector r0_hf = Q6_V_vand_QV(bmask, x0[i]); + HVX_Vector r1_hf = Q6_V_vand_QV(bmask, x1[i]); + HVX_Vector c0_hf = Q6_V_vand_QV(bmask, y0[i]); + HVX_Vector c1_hf = Q6_V_vand_QV(bmask, y1[i]); + + HVX_VectorPair r0_c0_qf_p = Q6_Wqf32_vmpy_VhfVhf(r0_hf, c0_hf); + HVX_VectorPair r0_c1_qf_p = Q6_Wqf32_vmpy_VhfVhf(r0_hf, c1_hf); + HVX_VectorPair r1_c0_qf_p = Q6_Wqf32_vmpy_VhfVhf(r1_hf, c0_hf); + HVX_VectorPair r1_c1_qf_p = Q6_Wqf32_vmpy_VhfVhf(r1_hf, c1_hf); + + HVX_Vector r0_c0_qf = Q6_Vqf32_vadd_Vqf32Vqf32(Q6_V_lo_W(r0_c0_qf_p), Q6_V_hi_W(r0_c0_qf_p)); + HVX_Vector r0_c1_qf = Q6_Vqf32_vadd_Vqf32Vqf32(Q6_V_lo_W(r0_c1_qf_p), Q6_V_hi_W(r0_c1_qf_p)); + HVX_Vector r1_c0_qf = Q6_Vqf32_vadd_Vqf32Vqf32(Q6_V_lo_W(r1_c0_qf_p), Q6_V_hi_W(r1_c0_qf_p)); + HVX_Vector r1_c1_qf = Q6_Vqf32_vadd_Vqf32Vqf32(Q6_V_lo_W(r1_c1_qf_p), Q6_V_hi_W(r1_c1_qf_p)); + + r0_c0_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r0_c0_qf, r0_c0_sum)); + r0_c1_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r0_c1_qf, r0_c1_sum)); + r1_c0_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r1_c0_qf, r1_c0_sum)); + r1_c1_sum = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vadd_Vqf32Vsf(r1_c1_qf, r1_c1_sum)); + + } + + // Reduce and store results + HVX_Vector r0_r1_c0_sum = hvx_vec_reduce_sum_f32x2(r0_c0_sum, r1_c0_sum); + HVX_Vector r0_r1_c1_sum = hvx_vec_reduce_sum_f32x2(r0_c1_sum, r1_c1_sum); + + hvx_vec_store_u(&s0[0], 8, r0_r1_c0_sum); // row0,col0 row1,col0 + hvx_vec_store_u(&s1[0], 8, r0_r1_c1_sum); // row0,col1 row1,col1 +} + +static void vec_dot_f16_f16_uu_1x1(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { const HVX_UVector * restrict x = (const HVX_UVector *) vx; const HVX_UVector * restrict y = (const HVX_UVector *) vy; @@ -986,7 +1461,7 @@ static void vec_dot_f16_f16_uu(const int n, float * restrict s, const void * res hvx_vec_store_u(&s[0], 4, rsum); } -static void vec_dot_f16_f32_uu(const int n, float * restrict s, const void * restrict x, const void * restrict y) { +static void vec_dot_f16_f32_uu_1x1(const int n, float * restrict s, const void * restrict x, const void * restrict y) { const HVX_UVector * restrict vx = (const HVX_UVector * restrict) x; const HVX_UVector * restrict vy = (const HVX_UVector * restrict) y; @@ -1083,14 +1558,16 @@ static void vec_dot_f16_f32_uu(const int n, float * restrict s, const void * res const uint32_t nb2 = dst->nb[2]; \ const uint32_t nb3 = dst->nb[3]; -#define htp_matmul_preamble \ - htp_matmul_tensors_preamble; \ - dma_queue *dma_queue = octx->ctx->dma[ith]; \ - uint32_t src0_nrows_per_thread = octx->src0_nrows_per_thread; +#define htp_matmul_preamble \ + struct htp_matmul_context * mmctx = data; \ + struct htp_ops_context * octx = mmctx->octx; \ + htp_matmul_tensors_preamble; \ + dma_queue *dma_queue = octx->ctx->dma[ith]; \ + uint32_t src0_nrows_per_thread = mmctx->src0_nrows_per_thread; // *** matmul with support for 4d tensors and full broadcasting -static void matmul_4d(struct htp_matmul_type * mt, struct htp_ops_context * octx, uint32_t nth, uint32_t ith) { +static void matmul_4d(unsigned int nth, unsigned int ith, void * data) { htp_matmul_preamble; uint64_t t1, t2; @@ -1136,13 +1613,13 @@ static void matmul_4d(struct htp_matmul_type * mt, struct htp_ops_context * octx for (uint32_t iir1 = ir1_start; iir1 < ir1_end; iir1 += blck_1) { for (uint32_t iir0 = ir0_start; iir0 < ir0_end; iir0 += blck_0) { for (uint32_t ir1 = iir1; ir1 < MIN(iir1 + blck_1, ir1_end); ir1++) { - const uint32_t i13 = fastdiv(ir1, &octx->mm_div_ne12_ne1); - const uint32_t i12 = fastdiv(ir1 - i13 * ne12 * ne1, &octx->mm_div_ne1); + const uint32_t i13 = fastdiv(ir1, &mmctx->mm_div_ne12_ne1); + const uint32_t i12 = fastdiv(ir1 - i13 * ne12 * ne1, &mmctx->mm_div_ne1); const uint32_t i11 = (ir1 - i13 * ne12 * ne1 - i12 * ne1); // broadcast src0 into src1 - const uint32_t i03 = fastdiv(i13, &octx->mm_div_r3); - const uint32_t i02 = fastdiv(i12, &octx->mm_div_r2); + const uint32_t i03 = fastdiv(i13, &mmctx->mm_div_r3); + const uint32_t i02 = fastdiv(i12, &mmctx->mm_div_r2); const uint32_t i1 = i11; const uint32_t i2 = i12; @@ -1155,7 +1632,7 @@ static void matmul_4d(struct htp_matmul_type * mt, struct htp_ops_context * octx const uint32_t ir0_block_end = MIN(iir0 + blck_0, ir0_end); for (uint32_t ir0 = iir0; ir0 < ir0_block_end; ir0++) { const uint8_t * restrict src0_row = src0_base + ir0 * nb01; - mt->vec_dot(ne00, &dst_col[ir0], src0_row, src1_col); + mmctx->vec_dot_1x1(ne00, &dst_col[ir0], src0_row, src1_col); } } } @@ -1170,7 +1647,7 @@ static void matmul_4d(struct htp_matmul_type * mt, struct htp_ops_context * octx } // src1 tensor is already in VTCM spad -static void matmul_2d(struct htp_matmul_type * mt, struct htp_ops_context * octx, uint32_t nth, uint32_t ith) { +static void matmul_2d(unsigned int nth, unsigned int ith, void * data) { htp_matmul_preamble; const uint32_t src0_nrows = ne01 * ne02 * ne03; // src0 rows @@ -1195,7 +1672,7 @@ static void matmul_2d(struct htp_matmul_type * mt, struct htp_ops_context * octx // Per-thread VTCM scratchpads for all tensors // Note that the entire src1 tensor is already in VTCM // For other tensors we allocate N rows per thread, padded to HVX vector size - uint8_t * restrict spad_dst = dst_spad->data + dst_spad->size_per_thread * ith; + uint8_t * restrict spad_dst = dst_spad->data + dst_spad->size_per_thread * ith; uint8_t * restrict spad_src0 = src0_spad->data + src0_spad->size_per_thread * ith; uint8_t * restrict src1_data = src1_spad->data; @@ -1219,11 +1696,21 @@ static void matmul_2d(struct htp_matmul_type * mt, struct htp_ops_context * octx for (uint32_t ir0 = src0_start_row; ir0 < src0_end_row_x2; ir0 += 2) { const uint8_t * ss0 = dma_queue_pop(dma_queue).dst; - #pragma unroll(2) - for (uint32_t ir1 = 0; ir1 < src1_nrows; ++ir1) { + // Process src1 columns in pairs (2×2 tiling) + uint32_t ir1 = 0; + for (; ir1 + 1 < src1_nrows; ir1 += 2) { + const uint8_t * restrict src1_col0 = (const uint8_t *) (src1_data + (ir1+0) * src1_stride); + const uint8_t * restrict src1_col1 = (const uint8_t *) (src1_data + (ir1+1) * src1_stride); + float * restrict dst_row0 = (float *) (dst->data + ((ir1+0) * dst_row_size)); + float * restrict dst_row1 = (float *) (dst->data + ((ir1+1) * dst_row_size)); + mmctx->vec_dot_2x2(ne00, &dst_row0[ir0], &dst_row1[ir0], ss0, ss0 + src0_stride, src1_col0, src1_col1); + } + + // Handle remaining src1 rows (fallback to 2×1) + for (; ir1 < src1_nrows; ++ir1) { const uint8_t * restrict src1_col = (const uint8_t *) (src1_data + ir1 * src1_stride); float * restrict dst_row = (float *) (dst->data + (ir1 * dst_row_size)); - mt->vec_dot_rx2(ne00, &dst_row[ir0], ss0, src0_stride, src1_col); + mmctx->vec_dot_2x1(ne00, &dst_row[ir0], ss0, ss0 + src0_stride, src1_col); } // Prefetch next (n + spad_nrows) row @@ -1247,20 +1734,20 @@ static void matmul_2d(struct htp_matmul_type * mt, struct htp_ops_context * octx for (uint32_t ir1 = 0; ir1 < src1_nrows; ++ir1) { const uint8_t * restrict src1_col = (const uint8_t *) (src1_data + ir1 * src1_stride); float * restrict dst_row = (float *) (dst->data + (ir1 * dst_row_size)); - mt->vec_dot(ne00, &dst_row[ir0], ss0, src1_col); + mmctx->vec_dot_1x1(ne00, &dst_row[ir0], ss0, src1_col); } } t2 = HAP_perf_get_qtimer_count(); - FARF(HIGH, "matmul-%s %d/%d: %ux%ux%ux%u (%u:%u) * %ux%ux%ux%u -> %ux%ux%ux%u usec %u\n", mt->type, ith, nth, + FARF(HIGH, "matmul-%s %d/%d: %ux%ux%ux%u (%u:%u) * %ux%ux%ux%u -> %ux%ux%ux%u usec %u\n", mmctx->type, ith, nth, src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3], src0_start_row, src0_end_row, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], (unsigned) HAP_perf_qtimer_count_to_us(t2 - t1)); } // q8x4x2 src1 tensor is already in VTCM spad -static void matvec_2d(struct htp_matmul_type * mt, struct htp_ops_context * octx, uint32_t nth, uint32_t ith) { +static void matvec_2d(unsigned int nth, unsigned int ith, void * data) { htp_matmul_preamble; const uint32_t src0_nrows = ne01; @@ -1311,7 +1798,7 @@ static void matvec_2d(struct htp_matmul_type * mt, struct htp_ops_context * octx // Process src0 rows for (uint32_t ir0 = src0_start_row; ir0 < src0_end_row_x2; ir0 += 2) { const uint8_t * ss0 = dma_queue_pop(dma_queue).dst; - mt->vec_dot_rx2(ne00, &tmp[ir0 - src0_start_row], ss0, src0_stride, src1_col); + mmctx->vec_dot_2x1(ne00, &tmp[ir0 - src0_start_row], ss0, ss0 + src0_stride, src1_col); // Prefetch next (n + spad_nrows) row const uint32_t pr0 = (ir0 + MM_SPAD_SRC0_NROWS); @@ -1329,14 +1816,14 @@ static void matvec_2d(struct htp_matmul_type * mt, struct htp_ops_context * octx dma_queue_push_ddr_to_vtcm(dma_queue, dma_make_ptr(spad_src0 + is0 * src0_stride, src0_row + ir0 * src0_row_size), src0_stride, src0_row_size, 1); const uint8_t * ss0 = dma_queue_pop(dma_queue).dst; - mt->vec_dot(ne00, &tmp[ir0 - src0_start_row], ss0, src1_col); + mmctx->vec_dot_1x1(ne00, &tmp[ir0 - src0_start_row], ss0, src1_col); } hvx_copy_f32_ua((uint8_t *) &dst_col[src0_start_row], (uint8_t *) tmp, src0_end_row - src0_start_row); t2 = HAP_perf_get_qtimer_count(); - FARF(HIGH, "matvec-%s %u/%u: %ux%ux%ux%u (%u:%u) * %ux%ux%ux%u -> %ux%ux%ux%u usec %u\n", mt->type, ith, nth, + FARF(HIGH, "matvec-%s %u/%u: %ux%ux%ux%u (%u:%u) * %ux%ux%ux%u -> %ux%ux%ux%u usec %u\n", mmctx->type, ith, nth, src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3], src0_start_row, src0_end_row, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], (unsigned) HAP_perf_qtimer_count_to_us(t2 - t1)); @@ -1350,7 +1837,7 @@ struct mmid_row_mapping { }; // src1 tensor is already in VTCM spad -static void matmul_id(struct htp_matmul_type * mt, struct htp_ops_context * octx, uint32_t nth, uint32_t ith) { +static void matmul_id(unsigned int nth, unsigned int ith, void * data) { htp_matmul_preamble; struct htp_tensor * restrict ids = &octx->src2; @@ -1423,11 +1910,10 @@ static void matmul_id(struct htp_matmul_type * mt, struct htp_ops_context * octx const int rm2 = row_mapping.i2; // token idx const uint32_t ir1 = src1_nrows == 1 ? 0 : rm1; // src1 row idx - const uint8_t * restrict src1_col = - (const uint8_t *) (src1_data + (ir1 + rm2 * ne11 + 0) * src1_row_size); + const uint8_t * restrict src1_col = (const uint8_t *) (src1_data + (ir1 + rm2 * ne11 + 0) * src1_row_size); float * dst_row = (float *) (dst->data + (rm1 * nb1 + rm2 * nb2 + 0)); - mt->vec_dot_rx2(ne00, &dst_row[ir0], ss0, src0_row_size_padded, src1_col); + mmctx->vec_dot_2x1(ne00, &dst_row[ir0], ss0, ss0 + src0_row_size_padded, src1_col); } // Prefetch next (n + spad_nrows) row @@ -1453,25 +1939,24 @@ static void matmul_id(struct htp_matmul_type * mt, struct htp_ops_context * octx const int rm2 = row_mapping.i2; // token idx const uint32_t ir1 = src1_nrows == 1 ? 0 : rm1; // src1 row idx - const uint8_t * restrict src1_col = - (const uint8_t *) (src1_data + (ir1 + rm2 * ne11 + 0) * src1_row_size); + const uint8_t * restrict src1_col = (const uint8_t *) (src1_data + (ir1 + rm2 * ne11 + 0) * src1_row_size); float * dst_row = (float *) (dst->data + (rm1 * nb1 + rm2 * nb2 + 0)); - mt->vec_dot(ne00, &dst_row[ir0], ss0, src1_col); + mmctx->vec_dot_1x1(ne00, &dst_row[ir0], ss0, src1_col); } } } t2 = HAP_perf_get_qtimer_count(); - FARF(HIGH, "matmul-id-%s %d/%d: %ux%ux%ux%u (%u:%u) * %ux%ux%ux%u (%ux%ux%ux%u) -> %ux%ux%ux%u usec %u\n", mt->type, + FARF(HIGH, "matmul-id-%s %d/%d: %ux%ux%ux%u (%u:%u) * %ux%ux%ux%u (%ux%ux%ux%u) -> %ux%ux%ux%u usec %u\n", mmctx->type, ith, nth, src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3], src0_start_row, src0_end_row, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3], ids->ne[0], ids->ne[1], ids->ne[2], ids->ne[3], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], (unsigned) HAP_perf_qtimer_count_to_us(t2 - t1)); } // src1 tensor is already in VTCM spad -static void matvec_id(struct htp_matmul_type * mt, struct htp_ops_context * octx, uint32_t nth, uint32_t ith) { +static void matvec_id(unsigned int nth, unsigned int ith, void * data) { htp_matmul_preamble; struct htp_tensor * restrict ids = &octx->src2; @@ -1531,7 +2016,7 @@ static void matvec_id(struct htp_matmul_type * mt, struct htp_ops_context * octx // Process src0 rows for (uint32_t ir0 = src0_start_row; ir0 < src0_end_row_x2; ir0 += 2) { const uint8_t * ss0 = dma_queue_pop(dma_queue).dst; - mt->vec_dot_rx2(ne00, &dst_row[ir0], ss0, src0_row_size_padded, src1_col); + mmctx->vec_dot_2x1(ne00, &dst_row[ir0], ss0, ss0 + src0_row_size_padded, src1_col); // Prefetch next (n + spad_nrows) row const int pr0 = (ir0 + MM_SPAD_SRC0_NROWS); @@ -1549,13 +2034,13 @@ static void matvec_id(struct htp_matmul_type * mt, struct htp_ops_context * octx dma_queue_push_ddr_to_vtcm(dma_queue, dma_make_ptr(spad_src0 + is0 * src0_row_size_padded, src0_row + ir0 * src0_row_size), src0_row_size_padded, src0_row_size, 1); const uint8_t * ss0 = dma_queue_pop(dma_queue).dst; - mt->vec_dot(ne00, &dst_row[ir0], ss0, src1_col); + mmctx->vec_dot_1x1(ne00, &dst_row[ir0], ss0, src1_col); } } t2 = HAP_perf_get_qtimer_count(); - FARF(HIGH, "matvec-id-%s %d/%d: %ux%ux%ux%u (%u:%u) * %ux%ux%ux%u (%ux%ux%ux%u) -> %ux%ux%ux%u usec %u\n", mt->type, + FARF(HIGH, "matvec-id-%s %d/%d: %ux%ux%ux%u (%u:%u) * %ux%ux%ux%u (%ux%ux%ux%u) -> %ux%ux%ux%u usec %u\n", mmctx->type, ith, nth, src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3], src0_start_row, src0_end_row, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3], src2->ne[0], src2->ne[1], src2->ne[2], src2->ne[3], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], (unsigned) HAP_perf_qtimer_count_to_us(t2 - t1)); @@ -1754,12 +2239,14 @@ static void quantize_row_f32_q8x4x2(float * restrict x, uint8_t * restrict y, ui hvx_copy_f16_ua(y_d, t_d, nb * 8); } -static void quantize_f32_q8x4x2(const struct htp_tensor * src, - uint8_t * restrict dst, - struct htp_spad * spad, - uint32_t nth, - uint32_t ith, - uint32_t nrows_per_thread) { +static void quantize_f32_q8x4x2(unsigned int nth, unsigned int ith, void * data) { + struct htp_matmul_context * mmctx = data; + struct htp_ops_context * octx = mmctx->octx; + + const struct htp_tensor * src = &octx->src1; + uint8_t * restrict dst = octx->src1_spad.data; + struct htp_spad * spad = &octx->src0_spad; + uint32_t nrows_per_thread = mmctx->src1_nrows_per_thread; uint64_t t1 = HAP_perf_get_qtimer_count(); @@ -1799,8 +2286,14 @@ static void quantize_f32_q8x4x2(const struct htp_tensor * src, ir_last, src_row_size, dst_row_size, (unsigned) HAP_perf_qtimer_count_to_us(t2 - t1)); } -static void quantize_f32_f16(const struct htp_tensor * src, uint8_t * restrict dst, uint32_t nth, uint32_t ith, - uint32_t nrows_per_thread, uint32_t dst_stride) { +static void quantize_f32_f16(unsigned int nth, unsigned int ith, void * data) { + struct htp_matmul_context * mmctx = data; + struct htp_ops_context * octx = mmctx->octx; + + const struct htp_tensor * src = &octx->src1; + uint8_t * restrict dst = octx->src1_spad.data; + uint32_t nrows_per_thread = mmctx->src1_nrows_per_thread; + uint32_t dst_stride = octx->src1_spad.stride; uint64_t t1 = HAP_perf_get_qtimer_count(); @@ -1835,8 +2328,14 @@ static void quantize_f32_f16(const struct htp_tensor * src, uint8_t * restrict d } // TODO just a plain copy that should be done via the DMA during the Op setup -static void quantize_f16_f16(const struct htp_tensor * src, uint8_t * restrict dst, uint32_t nth, uint32_t ith, - uint32_t nrows_per_thread, uint32_t dst_stride) { +static void quantize_f16_f16(unsigned int nth, unsigned int ith, void * data) { + struct htp_matmul_context * mmctx = data; + struct htp_ops_context * octx = mmctx->octx; + + const struct htp_tensor * src = &octx->src1; + uint8_t * restrict dst = octx->src1_spad.data; + uint32_t nrows_per_thread = mmctx->src1_nrows_per_thread; + uint32_t dst_stride = octx->src1_spad.stride; uint64_t t1 = HAP_perf_get_qtimer_count(); @@ -1870,213 +2369,76 @@ static void quantize_f16_f16(const struct htp_tensor * src, uint8_t * restrict d ir_last, src_row_size, src_stride, dst_stride, (unsigned) HAP_perf_qtimer_count_to_us(t2 - t1)); } -static void htp_quantize_f32_q8x4x2(unsigned int n, unsigned int i, void * data) { - struct htp_ops_context * octx = data; - quantize_f32_q8x4x2(&octx->src1, octx->src1_spad.data, &octx->src0_spad, n, i, octx->src1_nrows_per_thread); -} - -static void htp_quantize_f32_f16(unsigned int n, unsigned int i, void * data) { - struct htp_ops_context * octx = data; - quantize_f32_f16(&octx->src1, octx->src1_spad.data, n, i, octx->src1_nrows_per_thread, octx->src1_spad.stride); -} - -static void htp_quantize_f16_f16(unsigned int n, unsigned int i, void * data) { - struct htp_ops_context * octx = data; - quantize_f16_f16(&octx->src1, octx->src1_spad.data, n, i, octx->src1_nrows_per_thread, octx->src1_spad.stride); -} - -// ** matmul/matvec callbacks for worker_pool - -static void htp_matvec_2d_q4x4x2_q8x4x2(unsigned int n, unsigned int i, void * data) { - struct htp_ops_context * octx = data; - - struct htp_matmul_type mt; - mt.type = "q4x4x2-q8x4x2"; - mt.vec_dot = vec_dot_q4x4x2_q8x4x2; - mt.vec_dot_rx2 = vec_dot_q4x4x2_q8x4x2_rx2; - - matvec_2d(&mt, octx, n, i); -} - -static void htp_matmul_2d_q4x4x2_q8x4x2(unsigned int n, unsigned int i, void * data) { - struct htp_ops_context * octx = data; - - struct htp_matmul_type mt; - mt.type = "q4x4x2-q8x4x2"; - mt.vec_dot = vec_dot_q4x4x2_q8x4x2; - mt.vec_dot_rx2 = vec_dot_q4x4x2_q8x4x2_rx2; - - matmul_2d(&mt, octx, n, i); -} - -static void htp_matvec_2d_q8x4x2_q8x4x2(unsigned int n, unsigned int i, void * data) { - struct htp_ops_context * octx = data; - - struct htp_matmul_type mt; - mt.type = "q8x4x2-q8x4x2"; - mt.vec_dot = vec_dot_q8x4x2_q8x4x2; - mt.vec_dot_rx2 = vec_dot_q8x4x2_q8x4x2_rx2; - - matvec_2d(&mt, octx, n, i); -} - -static void htp_matmul_2d_q8x4x2_q8x4x2(unsigned int n, unsigned int i, void * data) { - struct htp_ops_context * octx = data; - - struct htp_matmul_type mt; - mt.type = "q8x4x2-q8x4x2"; - mt.vec_dot = vec_dot_q8x4x2_q8x4x2; - mt.vec_dot_rx2 = vec_dot_q8x4x2_q8x4x2_rx2; - - matmul_2d(&mt, octx, n, i); -} - -static void htp_matvec_2d_mxfp4x4x2_q8x4x2(unsigned int n, unsigned int i, void * data) { - struct htp_ops_context * octx = data; - - struct htp_matmul_type mt; - mt.type = "mxfp4x4x2-q8x4x2"; - mt.vec_dot = vec_dot_mxfp4x4x2_q8x4x2; - mt.vec_dot_rx2 = vec_dot_mxfp4x4x2_q8x4x2_rx2; - - matvec_2d(&mt, octx, n, i); -} - -static void htp_matmul_2d_mxfp4x4x2_q8x4x2(unsigned int n, unsigned int i, void * data) { - struct htp_ops_context * octx = data; - - struct htp_matmul_type mt; - mt.type = "mxfp4x4x2-q8x4x2"; - mt.vec_dot = vec_dot_mxfp4x4x2_q8x4x2; - mt.vec_dot_rx2 = vec_dot_mxfp4x4x2_q8x4x2_rx2; - - matmul_2d(&mt, octx, n, i); -} - -static void htp_matvec_2d_f16_f16(unsigned int n, unsigned int i, void * data) { - struct htp_ops_context * octx = data; - - struct htp_matmul_type mt; - mt.type = "f16-f16"; - mt.vec_dot = vec_dot_f16_f16_aa; - mt.vec_dot_rx2 = vec_dot_f16_f16_aa_rx2; - - matvec_2d(&mt, octx, n, i); -} - -static void htp_matmul_2d_f16_f16(unsigned int n, unsigned int i, void * data) { - struct htp_ops_context * octx = data; - - struct htp_matmul_type mt; - mt.type = "f16-f16"; - mt.vec_dot = vec_dot_f16_f16_aa; - mt.vec_dot_rx2 = vec_dot_f16_f16_aa_rx2; - - matmul_2d(&mt, octx, n, i); -} - -static void htp_matmul_4d_f16_f32(unsigned int n, unsigned int i, void * data) { - struct htp_ops_context * octx = data; - - struct htp_matmul_type mt; - mt.type = "f16-f32"; - mt.vec_dot = vec_dot_f16_f32_uu; - - matmul_4d(&mt, octx, n, i); -} - -static void htp_matmul_4d_f16_f16(unsigned int n, unsigned int i, void * data) { - struct htp_ops_context * octx = data; - - struct htp_matmul_type mt; - mt.type = "f16-f16"; - mt.vec_dot = vec_dot_f16_f16_uu; - - matmul_4d(&mt, octx, n, i); -} - -// ** matmul-id callbacks for worker_pool - -static void htp_matvec_id_q4x4x2_q8x4x2(unsigned int n, unsigned int i, void * data) { - struct htp_ops_context * octx = data; - - struct htp_matmul_type mt; - mt.type = "q4x4x2-q8x4x2"; - mt.vec_dot = vec_dot_q4x4x2_q8x4x2; - mt.vec_dot_rx2 = vec_dot_q4x4x2_q8x4x2_rx2; - - matvec_id(&mt, octx, n, i); -} - -static void htp_matmul_id_q4x4x2_q8x4x2(unsigned int n, unsigned int i, void * data) { - struct htp_ops_context * octx = data; - - struct htp_matmul_type mt; - mt.type = "q4x4x2-q8x4x2"; - mt.vec_dot = vec_dot_q4x4x2_q8x4x2; - mt.vec_dot_rx2 = vec_dot_q4x4x2_q8x4x2_rx2; - - matmul_id(&mt, octx, n, i); -} - -static void htp_matvec_id_q8x4x2_q8x4x2(unsigned int n, unsigned int i, void * data) { - struct htp_ops_context * octx = data; - - struct htp_matmul_type mt; - mt.type = "q8x4x2-q8x4x2"; - mt.vec_dot = vec_dot_q8x4x2_q8x4x2; - mt.vec_dot_rx2 = vec_dot_q8x4x2_q8x4x2_rx2; - - matvec_id(&mt, octx, n, i); -} - -static void htp_matmul_id_q8x4x2_q8x4x2(unsigned int n, unsigned int i, void * data) { - struct htp_ops_context * octx = data; - - struct htp_matmul_type mt; - mt.type = "q8x4x2-q8x4x2"; - mt.vec_dot = vec_dot_q8x4x2_q8x4x2; - mt.vec_dot_rx2 = vec_dot_q8x4x2_q8x4x2_rx2; - - matmul_id(&mt, octx, n, i); -} - -static void htp_matvec_id_mxfp4x4x2_q8x4x2(unsigned int n, unsigned int i, void * data) { - struct htp_ops_context * octx = data; - - struct htp_matmul_type mt; - mt.type = "mxfp4x4x2-q8x4x2"; - mt.vec_dot = vec_dot_mxfp4x4x2_q8x4x2; - mt.vec_dot_rx2 = vec_dot_mxfp4x4x2_q8x4x2_rx2; - - matvec_id(&mt, octx, n, i); -} - -static void htp_matmul_id_mxfp4x4x2_q8x4x2(unsigned int n, unsigned int i, void * data) { - struct htp_ops_context * octx = data; - - struct htp_matmul_type mt; - mt.type = "mxfp4x4x2-q8x4x2"; - mt.vec_dot = vec_dot_mxfp4x4x2_q8x4x2; - mt.vec_dot_rx2 = vec_dot_mxfp4x4x2_q8x4x2_rx2; - - matmul_id(&mt, octx, n, i); -} - -// ** main matmul entry point static inline bool htp_is_permuted(const struct htp_tensor * t) { return t->nb[0] > t->nb[1] || t->nb[1] > t->nb[2] || t->nb[2] > t->nb[3]; } +static int htp_mminit_vec_dot(struct htp_matmul_context * mmctx, enum htp_data_type type) { + switch (type) { + case HTP_TYPE_Q4_0: + mmctx->type = "q4x4x2-f32"; + mmctx->vec_dot_1x1 = vec_dot_q4x4x2_q8x4x2_1x1; + mmctx->vec_dot_2x1 = vec_dot_q4x4x2_q8x4x2_2x1; + mmctx->vec_dot_2x2 = vec_dot_q4x4x2_q8x4x2_2x2; + return 0; + case HTP_TYPE_Q8_0: + mmctx->type = "q8x4x2-f32"; + mmctx->vec_dot_1x1 = vec_dot_q8x4x2_q8x4x2_1x1; + mmctx->vec_dot_2x1 = vec_dot_q8x4x2_q8x4x2_2x1; + mmctx->vec_dot_2x2 = vec_dot_q8x4x2_q8x4x2_2x2; + return 0; + case HTP_TYPE_MXFP4: + mmctx->type = "mxfp4x4x2-f32"; + mmctx->vec_dot_1x1 = vec_dot_mxfp4x4x2_q8x4x2_1x1; + mmctx->vec_dot_2x1 = vec_dot_mxfp4x4x2_q8x4x2_2x1; + mmctx->vec_dot_2x2 = vec_dot_mxfp4x4x2_q8x4x2_2x2; + return 0; + default: + return -1; + } +} + +static void htp_mminit_spad(struct htp_ops_context * octx, + size_t dst_row_size, + size_t src0_row_size_padded, + size_t src1_row_size, + uint32_t src1_nrows, + size_t src2_spad_size_per_thread) { + octx->dst_spad.size_per_thread = hex_round_up(MM_SPAD_DST_NROWS * dst_row_size, 256); + octx->src0_spad.size_per_thread = hex_round_up(MM_SPAD_SRC0_NROWS * src0_row_size_padded, 256); + octx->src1_spad.size_per_thread = hex_round_up(src1_row_size * src1_nrows, 256); + + if (src2_spad_size_per_thread > 0) { + octx->src2_spad.size_per_thread = src2_spad_size_per_thread; + octx->src2_spad.size = octx->src2_spad.size_per_thread; + } + + // src0 spad is also used in dynamic quantizer to store padded src1 rows + size_t src1_row_size_padded = hex_round_up(src1_row_size, QK_Q8_0x4x2 * sizeof(float)); + if (octx->src0_spad.size_per_thread < src1_row_size_padded) { + octx->src0_spad.size_per_thread = src1_row_size_padded; + } + + octx->src1_spad.size = octx->src1_spad.size_per_thread; + octx->src0_spad.size = octx->src0_spad.size_per_thread * octx->n_threads; + octx->dst_spad.size = octx->dst_spad.size_per_thread * octx->n_threads; +} + int op_matmul(struct htp_ops_context * octx) { htp_matmul_tensors_preamble; - const char * op_type; + struct htp_matmul_context mmctx_struct = {0}; + struct htp_matmul_context * mmctx = &mmctx_struct; + mmctx->octx = octx; const uint32_t src0_nrows = ne01 * ne02 * ne03; const uint32_t src1_nrows = ne11 * ne12 * ne13; + // Compute src0_nrows_per_thread + mmctx->src0_nrows_per_thread = (src0_nrows + octx->n_threads - 1) / octx->n_threads; + mmctx->src0_nrows_per_thread += (mmctx->src0_nrows_per_thread & 1); // round up to even + const size_t src0_row_size = nb01; const size_t dst_row_size = nb1; size_t src1_row_size = nb11; @@ -2085,181 +2447,95 @@ int op_matmul(struct htp_ops_context * octx) { size_t src1_row_size_padded; worker_callback_t quant_job_func; - worker_callback_t matmul_job_func; + worker_callback_t matmul_job_func = src1_nrows > 1 ? matmul_2d : matvec_2d; bool need_quant = !(octx->flags & HTP_OPFLAGS_SKIP_QUANTIZE); - switch (src0->type) { - case HTP_TYPE_Q4_0: - op_type = "q4x4x2-f32"; - quant_job_func = htp_quantize_f32_q8x4x2; - if (src1_nrows > 1) { - matmul_job_func = htp_matmul_2d_q4x4x2_q8x4x2; - } else { - matmul_job_func = htp_matvec_2d_q4x4x2_q8x4x2; - } + if (src0->type == HTP_TYPE_F16) { + // Try optimized f16-f16 path first (src1 in VTCM) + const size_t f16_src1_row_size = hex_round_up(ne10 * 2, 128); + const size_t f16_src1_spad_size = hex_round_up(f16_src1_row_size * src1_nrows, 256); + const size_t f16_src0_spad_size = hex_round_up(MM_SPAD_SRC0_NROWS * src0_row_size_padded, 256) * octx->n_threads; + const size_t f16_dst_spad_size = hex_round_up(MM_SPAD_DST_NROWS * dst_row_size, 256) * octx->n_threads; - src1_row_size = q8x4x2_row_size(ne10); // row size post quantization + const size_t f16_total_size = f16_src1_spad_size + f16_src0_spad_size + f16_dst_spad_size; - // Entire src1 tensor is placed into the VTCM - // For other tensors we allocate N rows per thread, padded to HVX vector size + // Default matmul implementation does not support multi-batch src0 (N-vs-N broadcasting). + // It only supports 1-vs-N broadcasting (src0 is 2D) or standard 2D matmul. + const bool is_batched = (ne02 > 1) || (ne03 > 1); + const bool is_permuted = htp_is_permuted(&octx->src0) || htp_is_permuted(&octx->src1); + + if (!is_batched && !is_permuted && f16_total_size <= octx->ctx->vtcm_size) { + // Optimized path + quant_job_func = (src1->type == HTP_TYPE_F32) ? quantize_f32_f16 : quantize_f16_f16; + mmctx->type = "f16-f16"; + mmctx->vec_dot_1x1 = vec_dot_f16_f16_aa_1x1; + mmctx->vec_dot_2x1 = vec_dot_f16_f16_aa_2x1; + mmctx->vec_dot_2x2 = vec_dot_f16_f16_aa_2x2; + + src1_row_size = f16_src1_row_size; // row size post quantization octx->dst_spad.size_per_thread = hex_round_up(MM_SPAD_DST_NROWS * dst_row_size, 256); octx->src0_spad.size_per_thread = hex_round_up(MM_SPAD_SRC0_NROWS * src0_row_size_padded, 256); octx->src1_spad.size_per_thread = hex_round_up(src1_row_size * src1_nrows, 256); - // src0 spad is also used in dynamic quantizer to store padded src1 rows - src1_row_size_padded = hex_round_up(src1_row_size, QK_Q8_0x4x2 * sizeof(float)); - if (octx->src0_spad.size_per_thread < src1_row_size_padded) { - octx->src0_spad.size_per_thread = src1_row_size_padded; - } - octx->src1_spad.size = octx->src1_spad.size_per_thread; octx->src0_spad.size = octx->src0_spad.size_per_thread * octx->n_threads; octx->dst_spad.size = octx->dst_spad.size_per_thread * octx->n_threads; - break; - - case HTP_TYPE_Q8_0: - op_type = "q8x4x2-f32"; - quant_job_func = htp_quantize_f32_q8x4x2; - if (src1_nrows > 1) { - matmul_job_func = htp_matmul_2d_q8x4x2_q8x4x2; + } else { + // Fallback to f16/f32 (DDR) if src1 doesn't fit in VTCM or broadcasting is required + quant_job_func = NULL; + if (src1->type == HTP_TYPE_F32) { + mmctx->type = "f16-f32"; + mmctx->vec_dot_1x1 = vec_dot_f16_f32_uu_1x1; + matmul_job_func = matmul_4d; } else { - matmul_job_func = htp_matvec_2d_q8x4x2_q8x4x2; + mmctx->type = "f16-f16"; + mmctx->vec_dot_1x1 = vec_dot_f16_f16_uu_1x1; + matmul_job_func = matmul_4d; } - src1_row_size = q8x4x2_row_size(ne10); // row size post quantization - - // Entire src1 tensor is placed into the VTCM - // For other tensors we allocate N rows per thread, padded to HVX vector size + src1_row_size = nb11; // original row size in DDR octx->dst_spad.size_per_thread = hex_round_up(MM_SPAD_DST_NROWS * dst_row_size, 256); - octx->src0_spad.size_per_thread = hex_round_up(MM_SPAD_SRC0_NROWS * src0_row_size_padded, 256); - octx->src1_spad.size_per_thread = hex_round_up(src1_row_size * src1_nrows, 256); + octx->src0_spad.size_per_thread = hex_round_up(MM_SPAD_SRC0_NROWS * src0_row_size, 256); + octx->src1_spad.size_per_thread = hex_round_up(MM_SPAD_SRC1_NROWS * src1_row_size, 256); - // src0 spad is also used in dynamic quantizer to store padded src1 rows - src1_row_size_padded = hex_round_up(src1_row_size, QK_Q8_0x4x2 * sizeof(float)); - if (octx->src0_spad.size_per_thread < src1_row_size_padded) { - octx->src0_spad.size_per_thread = src1_row_size_padded; - } - - octx->src1_spad.size = octx->src1_spad.size_per_thread; octx->src0_spad.size = octx->src0_spad.size_per_thread * octx->n_threads; + octx->src1_spad.size = octx->src1_spad.size_per_thread * octx->n_threads; octx->dst_spad.size = octx->dst_spad.size_per_thread * octx->n_threads; - break; - case HTP_TYPE_MXFP4: - op_type = "mxfp4x4x2-f32"; - quant_job_func = htp_quantize_f32_q8x4x2; - if (src1_nrows > 1) { - matmul_job_func = htp_matmul_2d_mxfp4x4x2_q8x4x2; - } else { - matmul_job_func = htp_matvec_2d_mxfp4x4x2_q8x4x2; - } + // Init fastdiv for matmul_4d (supports broadcasting) + mmctx->mm_div_ne12_ne1 = init_fastdiv_values(src1->ne[2] * dst->ne[1]); + mmctx->mm_div_ne1 = init_fastdiv_values(dst->ne[1]); + mmctx->mm_div_r2 = init_fastdiv_values(src1->ne[2] / src0->ne[2]); + mmctx->mm_div_r3 = init_fastdiv_values(src1->ne[3] / src0->ne[3]); - src1_row_size = q8x4x2_row_size(ne10); // row size post quantization - - // Entire src1 tensor is placed into the VTCM - // For other tensors we allocate N rows per thread, padded to HVX vector size - - octx->dst_spad.size_per_thread = hex_round_up(MM_SPAD_DST_NROWS * dst_row_size, 256); - octx->src0_spad.size_per_thread = hex_round_up(MM_SPAD_SRC0_NROWS * src0_row_size_padded, 256); - octx->src1_spad.size_per_thread = hex_round_up(src1_row_size * src1_nrows, 256); - - // src0 spad is also used in dynamic quantizer to store padded src1 rows - src1_row_size_padded = hex_round_up(src1_row_size, QK_Q8_0x4x2 * sizeof(float)); - if (octx->src0_spad.size_per_thread < src1_row_size_padded) { - octx->src0_spad.size_per_thread = src1_row_size_padded; - } - - octx->src1_spad.size = octx->src1_spad.size_per_thread; - octx->src0_spad.size = octx->src0_spad.size_per_thread * octx->n_threads; - octx->dst_spad.size = octx->dst_spad.size_per_thread * octx->n_threads; - break; - - case HTP_TYPE_F16: - { - // Try optimized f16-f16 path first (src1 in VTCM) - const size_t f16_src1_row_size = hex_round_up(ne10 * 2, 128); - const size_t f16_src1_spad_size = hex_round_up(f16_src1_row_size * src1_nrows, 256); - const size_t f16_src0_spad_size = hex_round_up(MM_SPAD_SRC0_NROWS * src0_row_size_padded, 256) * octx->n_threads; - const size_t f16_dst_spad_size = hex_round_up(MM_SPAD_DST_NROWS * dst_row_size, 256) * octx->n_threads; - - const size_t f16_total_size = f16_src1_spad_size + f16_src0_spad_size + f16_dst_spad_size; - - // Default matmul implementation does not support multi-batch src0 (N-vs-N broadcasting). - // It only supports 1-vs-N broadcasting (src0 is 2D) or standard 2D matmul. - const bool is_batched = (ne02 > 1) || (ne03 > 1); - const bool is_permuted = htp_is_permuted(&octx->src0) || htp_is_permuted(&octx->src1); - - if (!is_batched && !is_permuted && f16_total_size <= octx->ctx->vtcm_size) { - // Optimized path - op_type = "f16-f16"; - quant_job_func = (src1->type == HTP_TYPE_F32) ? htp_quantize_f32_f16 : htp_quantize_f16_f16; - if (src1_nrows > 1) { - matmul_job_func = htp_matmul_2d_f16_f16; - } else { - matmul_job_func = htp_matvec_2d_f16_f16; - } - - src1_row_size = f16_src1_row_size; // row size post quantization - - octx->dst_spad.size_per_thread = hex_round_up(MM_SPAD_DST_NROWS * dst_row_size, 256); - octx->src0_spad.size_per_thread = hex_round_up(MM_SPAD_SRC0_NROWS * src0_row_size_padded, 256); - octx->src1_spad.size_per_thread = hex_round_up(src1_row_size * src1_nrows, 256); - - octx->src1_spad.size = octx->src1_spad.size_per_thread; - octx->src0_spad.size = octx->src0_spad.size_per_thread * octx->n_threads; - octx->dst_spad.size = octx->dst_spad.size_per_thread * octx->n_threads; - } else { - // Fallback to f16/f32 (DDR) if src1 doesn't fit in VTCM or broadcasting is required - quant_job_func = NULL; - if (src1->type == HTP_TYPE_F32) { - op_type = "f16-f32"; - matmul_job_func = htp_matmul_4d_f16_f32; - } else { - op_type = "f16-f16"; - matmul_job_func = htp_matmul_4d_f16_f16; - } - - src1_row_size = nb11; // original row size in DDR - - octx->dst_spad.size_per_thread = hex_round_up(MM_SPAD_DST_NROWS * dst_row_size, 256); - octx->src0_spad.size_per_thread = hex_round_up(MM_SPAD_SRC0_NROWS * src0_row_size, 256); - octx->src1_spad.size_per_thread = hex_round_up(MM_SPAD_SRC1_NROWS * src1_row_size, 256); - - octx->src0_spad.size = octx->src0_spad.size_per_thread * octx->n_threads; - octx->src1_spad.size = octx->src1_spad.size_per_thread * octx->n_threads; - octx->dst_spad.size = octx->dst_spad.size_per_thread * octx->n_threads; - - // Init fastdiv for matmul_4d (supports broadcasting) - octx->mm_div_ne12_ne1 = init_fastdiv_values(src1->ne[2] * dst->ne[1]); - octx->mm_div_ne1 = init_fastdiv_values(dst->ne[1]); - octx->mm_div_r2 = init_fastdiv_values(src1->ne[2] / src0->ne[2]); - octx->mm_div_r3 = init_fastdiv_values(src1->ne[3] / src0->ne[3]); - - need_quant = false; - } - } - break; - - default: + need_quant = false; + } + } else { + if (htp_mminit_vec_dot(mmctx, src0->type) != 0) { return HTP_STATUS_NO_SUPPORT; + } + + quant_job_func = quantize_f32_q8x4x2; + src1_row_size = q8x4x2_row_size(ne10); + htp_mminit_spad(octx, dst_row_size, src0_row_size_padded, src1_row_size, src1_nrows, 0); } // VTCM scratchpads for all tensors size_t spad_size = octx->src1_spad.size + octx->src0_spad.size + octx->dst_spad.size; - FARF(HIGH, "matmul-%s : src0-spad-size %u src1-spad-size %u dst-spad-size %u (%zu)\n", op_type, + FARF(HIGH, "matmul-%s : src0-spad-size %u src1-spad-size %u dst-spad-size %u (%zu)\n", mmctx->type, octx->src0_spad.size, octx->src1_spad.size, octx->dst_spad.size, spad_size); - FARF(HIGH, "matmul-%s : %ux%ux%ux%u * %ux%ux%ux%u-> %ux%ux%ux%u (0x%p, 0x%p, 0x%p)\n", op_type, src0->ne[0], + FARF(HIGH, "matmul-%s : %ux%ux%ux%u * %ux%ux%ux%u-> %ux%ux%ux%u (0x%p, 0x%p, 0x%p)\n", mmctx->type, src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3], src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], src0->data, src1->data, dst->data); // Make sure the reserved vtcm size is sufficient if (octx->ctx->vtcm_size < spad_size) { - FARF(ERROR, "matmul-%s : current VTCM reservation %zu is too small, needed %zu\n", op_type, + FARF(ERROR, "matmul-%s : current VTCM reservation %zu is too small, needed %zu\n", mmctx->type, octx->ctx->vtcm_size, spad_size); return HTP_STATUS_VTCM_TOO_SMALL; } @@ -2268,40 +2544,32 @@ int op_matmul(struct htp_ops_context * octx) { octx->src1_spad.data = octx->src0_spad.data + octx->src0_spad.size; octx->dst_spad.data = octx->src1_spad.data + octx->src1_spad.size; - octx->src0_nrows_per_thread = (src0_nrows + octx->n_threads - 1) / octx->n_threads; - octx->src0_nrows_per_thread += (octx->src0_nrows_per_thread & 1); // round up to even - octx->src0_spad.stride = src0_row_size_padded; octx->src1_spad.stride = src1_row_size; if (need_quant) { - // Run quant jobs - const uint32_t n_quant_jobs = MIN(src1_nrows, octx->n_threads); - octx->src1_nrows_per_thread = (src1_nrows + n_quant_jobs - 1) / n_quant_jobs; - worker_pool_run_func(octx->ctx->worker_pool, quant_job_func, octx, n_quant_jobs); + const uint32_t n_quant_jobs = MIN(src1_nrows, octx->n_threads); + mmctx->src1_nrows_per_thread = (src1_nrows + n_quant_jobs - 1) / n_quant_jobs; + worker_pool_run_func(octx->ctx->worker_pool, quant_job_func, mmctx, n_quant_jobs); } if (!(octx->flags & HTP_OPFLAGS_SKIP_COMPUTE)) { - // Run matmul jobs const uint32_t n_matmul_jobs = octx->n_threads; - worker_pool_run_func(octx->ctx->worker_pool, matmul_job_func, octx, n_matmul_jobs); + worker_pool_run_func(octx->ctx->worker_pool, matmul_job_func, mmctx, n_matmul_jobs); } return HTP_STATUS_OK; } -// ** main matmul-id entry point - int op_matmul_id(struct htp_ops_context * octx) { htp_matmul_tensors_preamble; + struct htp_matmul_context mmctx_struct = {0}; + struct htp_matmul_context * mmctx = &mmctx_struct; + mmctx->octx = octx; + struct htp_tensor * restrict ids = &octx->src2; - const char * op_type; - - worker_callback_t quant_job_func; - worker_callback_t matmul_id_job_func; - const size_t src0_row_size = nb01; const size_t dst_row_size = nb1; @@ -2310,6 +2578,13 @@ int op_matmul_id(struct htp_ops_context * octx) { const uint32_t src0_nrows = ne01; // per expert const uint32_t src1_nrows = ne11 * ne12 * ne13; + worker_callback_t quant_job_func; + worker_callback_t matmul_id_job_func = src1_nrows > 1 ? matmul_id : matvec_id; + + // Compute src0_nrows_per_thread + mmctx->src0_nrows_per_thread = (src0_nrows + octx->n_threads - 1) / octx->n_threads; + mmctx->src0_nrows_per_thread += (mmctx->src0_nrows_per_thread & 1); // round up to even + size_t src1_row_size; size_t src1_row_size_padded; @@ -2320,112 +2595,29 @@ int op_matmul_id(struct htp_ops_context * octx) { size_t matrix_row_counts_size = n_as * sizeof(uint32_t); size_t matrix_row_map_size = n_as * ids->ne[0] * ids->ne[1] * sizeof(struct mmid_row_mapping); - switch (src0->type) { - case HTP_TYPE_Q4_0: - op_type = "q4x2x2-f32"; - quant_job_func = htp_quantize_f32_q8x4x2; - src1_row_size = q8x4x2_row_size(ne10); // row size post quantization - if (src1_nrows > 1) { - matmul_id_job_func = htp_matmul_id_q4x4x2_q8x4x2; - } else { - matmul_id_job_func = htp_matvec_id_q4x4x2_q8x4x2; - } - - // Entire src1 tensor is placed into the VTCM - // For other tensors we allocate N rows per thread, padded to HVX vector size - octx->dst_spad.size_per_thread = hex_round_up(MM_SPAD_DST_NROWS * dst_row_size, 256); - octx->src0_spad.size_per_thread = hex_round_up(MM_SPAD_SRC0_NROWS * src0_row_size_padded, 256); - octx->src1_spad.size_per_thread = hex_round_up(src1_row_size * src1_nrows, 256); - octx->src2_spad.size_per_thread = hex_round_up(matrix_row_counts_size + matrix_row_map_size, 256); - - // src0 spad is also used in dynamic quantizer to store padded src1 rows - src1_row_size_padded = hex_round_up(src1_row_size, QK_Q8_0x4x2 * sizeof(float)); - if (octx->src0_spad.size_per_thread < src1_row_size_padded) { - octx->src0_spad.size_per_thread = src1_row_size_padded; - } - - octx->src2_spad.size = octx->src2_spad.size_per_thread; - octx->src1_spad.size = octx->src1_spad.size_per_thread; - octx->src0_spad.size = octx->src0_spad.size_per_thread * octx->n_threads; - octx->dst_spad.size = octx->dst_spad.size_per_thread * octx->n_threads; - break; - - case HTP_TYPE_Q8_0: - op_type = "q8x2x2-f32"; - quant_job_func = htp_quantize_f32_q8x4x2; - src1_row_size = q8x4x2_row_size(ne10); // row size post quantization - if (src1_nrows > 1) { - matmul_id_job_func = htp_matmul_id_q8x4x2_q8x4x2; - } else { - matmul_id_job_func = htp_matvec_id_q8x4x2_q8x4x2; - } - - // Entire src1 tensor is placed into the VTCM - // For other tensors we allocate N rows per thread, padded to HVX vector size - octx->dst_spad.size_per_thread = hex_round_up(MM_SPAD_DST_NROWS * dst_row_size, 256); - octx->src0_spad.size_per_thread = hex_round_up(MM_SPAD_SRC0_NROWS * src0_row_size_padded, 256); - octx->src1_spad.size_per_thread = hex_round_up(src1_row_size * src1_nrows, 256); - octx->src2_spad.size_per_thread = hex_round_up(matrix_row_counts_size + matrix_row_map_size, 256); - - // src0 spad is also used in dynamic quantizer to store padded src1 rows - src1_row_size_padded = hex_round_up(src1_row_size, QK_Q8_0x4x2 * sizeof(float)); - if (octx->src0_spad.size_per_thread < src1_row_size_padded) { - octx->src0_spad.size_per_thread = src1_row_size_padded; - } - - octx->src2_spad.size = octx->src2_spad.size_per_thread; - octx->src1_spad.size = octx->src1_spad.size_per_thread; - octx->src0_spad.size = octx->src0_spad.size_per_thread * octx->n_threads; - octx->dst_spad.size = octx->dst_spad.size_per_thread * octx->n_threads; - break; - - case HTP_TYPE_MXFP4: - op_type = "mxfp4x2x2-f32"; - quant_job_func = htp_quantize_f32_q8x4x2; - src1_row_size = q8x4x2_row_size(ne10); // row size post quantization - if (src1_nrows > 1) { - matmul_id_job_func = htp_matmul_id_mxfp4x4x2_q8x4x2; - } else { - matmul_id_job_func = htp_matvec_id_mxfp4x4x2_q8x4x2; - } - - // Entire src1 tensor is placed into the VTCM - // For other tensors we allocate N rows per thread, padded to HVX vector size - octx->dst_spad.size_per_thread = hex_round_up(MM_SPAD_DST_NROWS * dst_row_size, 256); - octx->src0_spad.size_per_thread = hex_round_up(MM_SPAD_SRC0_NROWS * src0_row_size_padded, 256); - octx->src1_spad.size_per_thread = hex_round_up(src1_row_size * src1_nrows, 256); - octx->src2_spad.size_per_thread = hex_round_up(matrix_row_counts_size + matrix_row_map_size, 256); - - // src0 spad is also used in dynamic quantizer to store padded src1 rows - src1_row_size_padded = hex_round_up(src1_row_size, QK_Q8_0x4x2 * sizeof(float)); - if (octx->src0_spad.size_per_thread < src1_row_size_padded) { - octx->src0_spad.size_per_thread = src1_row_size_padded; - } - - octx->src2_spad.size = octx->src2_spad.size_per_thread; - octx->src1_spad.size = octx->src1_spad.size_per_thread; - octx->src0_spad.size = octx->src0_spad.size_per_thread * octx->n_threads; - octx->dst_spad.size = octx->dst_spad.size_per_thread * octx->n_threads; - break; - - default: - return HTP_STATUS_NO_SUPPORT; + if (htp_mminit_vec_dot(mmctx, src0->type) != 0) { + return HTP_STATUS_NO_SUPPORT; } + quant_job_func = quantize_f32_q8x4x2; + src1_row_size = q8x4x2_row_size(ne10); + + const size_t src2_spad_size_per_thread = hex_round_up(matrix_row_counts_size + matrix_row_map_size, 256); + htp_mminit_spad(octx, dst_row_size, src0_row_size_padded, src1_row_size, src1_nrows, src2_spad_size_per_thread); + size_t spad_size = octx->src2_spad.size + octx->src1_spad.size + octx->src0_spad.size + octx->dst_spad.size; - FARF(HIGH, "matmul-id-%s : src0-spad-size %u src1-spad-size %u src2-spad-size %u dst-spad-size %u (%zu)\n", op_type, + FARF(HIGH, "matmul-id-%s : src0-spad-size %u src1-spad-size %u src2-spad-size %u dst-spad-size %u (%zu)\n", mmctx->type, octx->src0_spad.size, octx->src1_spad.size, octx->src2_spad.size, octx->dst_spad.size, spad_size); - FARF(HIGH, "matmul-id-%s : %ux%ux%ux%u * %ux%ux%ux%u (%ux%ux%ux%u) -> %ux%ux%ux%u (0x%p, 0x%p, 0x%p)\n", op_type, + FARF(HIGH, "matmul-id-%s : %ux%ux%ux%u * %ux%ux%ux%u (%ux%ux%ux%u) -> %ux%ux%ux%u (0x%p, 0x%p, 0x%p)\n", mmctx->type, src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3], src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3], ids->ne[0], ids->ne[1], ids->ne[2], ids->ne[3], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], src0->data, src1->data, dst->data); // Make sure the reserved vtcm size is sufficient if (octx->ctx->vtcm_size < spad_size) { - FARF(ERROR, "matmul-id-%s : current VTCM reservation %zu is too small, needed %zu\n", op_type, - octx->ctx->vtcm_size, spad_size); + FARF(ERROR, "matmul-id-%s : current VTCM reservation %zu is too small, needed %zu\n", mmctx->type, octx->ctx->vtcm_size, spad_size); return HTP_STATUS_VTCM_TOO_SMALL; } @@ -2434,8 +2626,8 @@ int op_matmul_id(struct htp_ops_context * octx) { octx->src2_spad.data = octx->src1_spad.data + octx->src1_spad.size; octx->dst_spad.data = octx->src2_spad.data + octx->src2_spad.size; - octx->src0_nrows_per_thread = (src0_nrows + octx->n_threads - 1) / octx->n_threads; - octx->src0_nrows_per_thread += (octx->src0_nrows_per_thread & 1); // round up to even + octx->src0_spad.stride = src0_row_size_padded; + octx->src1_spad.stride = src1_row_size; if (src1_nrows > 1) { // initialize matrix_row_counts and map @@ -2447,8 +2639,7 @@ int op_matmul_id(struct htp_ops_context * octx) { // group rows by src0 matrix for (uint32_t iid1 = 0; iid1 < ids->ne[1]; ++iid1) { // token idx for (uint32_t id = 0; id < n_ids; ++id) { // expert idx - const uint32_t i02 = - *(const uint32_t *) ((const uint8_t *) ids->data + iid1 * ids->nb[1] + id * ids->nb[0]); + const uint32_t i02 = *(const uint32_t *) ((const uint8_t *) ids->data + iid1 * ids->nb[1] + id * ids->nb[0]); assert(i02 >= 0 && i02 < n_as); @@ -2460,16 +2651,14 @@ int op_matmul_id(struct htp_ops_context * octx) { // Setup worker pool callbacks if (!(octx->flags & HTP_OPFLAGS_SKIP_QUANTIZE)) { - // Run quant jobs const uint32_t n_quant_jobs = MIN(src1_nrows, octx->n_threads); - octx->src1_nrows_per_thread = (src1_nrows + n_quant_jobs - 1) / n_quant_jobs; - worker_pool_run_func(octx->ctx->worker_pool, quant_job_func, octx, n_quant_jobs); + mmctx->src1_nrows_per_thread = (src1_nrows + n_quant_jobs - 1) / n_quant_jobs; + worker_pool_run_func(octx->ctx->worker_pool, quant_job_func, mmctx, n_quant_jobs); } if (!(octx->flags & HTP_OPFLAGS_SKIP_COMPUTE)) { - // Run matmul-id jobs const uint32_t n_matmul_jobs = octx->n_threads; - worker_pool_run_func(octx->ctx->worker_pool, matmul_id_job_func, octx, n_matmul_jobs); + worker_pool_run_func(octx->ctx->worker_pool, matmul_id_job_func, mmctx, n_matmul_jobs); } return HTP_STATUS_OK; diff --git a/ggml/src/ggml-metal/ggml-metal-common.cpp b/ggml/src/ggml-metal/ggml-metal-common.cpp index 95627d3866..87e1378684 100644 --- a/ggml/src/ggml-metal/ggml-metal-common.cpp +++ b/ggml/src/ggml-metal/ggml-metal-common.cpp @@ -264,15 +264,25 @@ static std::vector ggml_metal_graph_optimize_reorder(const std::vector ggml_metal_graph_optimize_reorder(const std::vectorsrc[0]->nb[0] == ggml_type_size(op->src[0]->type)); + GGML_ASSERT(ggml_is_contiguous_rows(op->src[0])); char base[256]; char name[256]; - const char * op_str = "undefined"; + int op_num = -1; + switch (op->op) { - case GGML_OP_SUM_ROWS: - op_str = "sum_rows"; break; - case GGML_OP_MEAN: - op_str = "mean"; break; + case GGML_OP_SUM_ROWS: op_num = OP_SUM_ROWS_NUM_SUM_ROWS; break; + case GGML_OP_MEAN: op_num = OP_SUM_ROWS_NUM_MEAN; break; default: GGML_ABORT("fatal error"); }; - snprintf(base, 256, "kernel_%s_%s", op_str, ggml_type_name(op->src[0]->type)); + const char * t0_str = ggml_type_name(op->src[0]->type); + const char * t_str = ggml_type_name(op->type); - snprintf(name, 256, "%s", base); + const bool is_c4 = op->src[0]->ne[0] % 4 == 0; + + snprintf(base, 256, "kernel_sum_rows_%s_%s%s", t0_str, t_str, is_c4 ? "_4" : ""); + snprintf(name, 256, "%s_op=%d", base, op_num); ggml_metal_pipeline_with_params res = ggml_metal_library_get_pipeline(lib, name); if (!res.pipeline) { - res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr); + ggml_metal_cv_t cv = ggml_metal_cv_init(); + + ggml_metal_cv_set_int16(cv, op_num, FC_SUM_ROWS + 0); + + res = ggml_metal_library_compile_pipeline(lib, base, name, cv); + + ggml_metal_cv_free(cv); } res.smem = 32*sizeof(float); + if (is_c4) { + res.smem *= 4; + } + + res.c4 = is_c4; + return res; } diff --git a/ggml/src/ggml-metal/ggml-metal-device.m b/ggml/src/ggml-metal/ggml-metal-device.m index 4ea0bfb94d..b4ca9c5dd6 100644 --- a/ggml/src/ggml-metal/ggml-metal-device.m +++ b/ggml/src/ggml-metal/ggml-metal-device.m @@ -1159,6 +1159,7 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te case GGML_OP_MUL_MAT: case GGML_OP_MUL_MAT_ID: return has_simdgroup_reduction; + case GGML_OP_SET: case GGML_OP_CPY: case GGML_OP_DUP: case GGML_OP_CONT: diff --git a/ggml/src/ggml-metal/ggml-metal-impl.h b/ggml/src/ggml-metal/ggml-metal-impl.h index 952e1be076..383e0d6e93 100644 --- a/ggml/src/ggml-metal/ggml-metal-impl.h +++ b/ggml/src/ggml-metal/ggml-metal-impl.h @@ -82,6 +82,7 @@ #define FC_COUNT_EQUAL 1100 #define FC_UNARY 1200 #define FC_BIN 1300 +#define FC_SUM_ROWS 1400 // op-specific constants #define OP_FLASH_ATTN_EXT_NQPSG 8 @@ -118,6 +119,8 @@ #define OP_UNARY_NUM_SOFTPLUS 115 #define OP_UNARY_NUM_EXPM1 116 +#define OP_SUM_ROWS_NUM_SUM_ROWS 10 +#define OP_SUM_ROWS_NUM_MEAN 11 // kernel argument structs // diff --git a/ggml/src/ggml-metal/ggml-metal-ops.cpp b/ggml/src/ggml-metal/ggml-metal-ops.cpp index 7db95d1c84..c04e9fc7ff 100644 --- a/ggml/src/ggml-metal/ggml-metal-ops.cpp +++ b/ggml/src/ggml-metal/ggml-metal-ops.cpp @@ -426,6 +426,10 @@ static int ggml_metal_op_encode_impl(ggml_metal_op_t ctx, int idx) { { n_fuse = ggml_metal_op_flash_attn_ext(ctx, idx); } break; + case GGML_OP_SET: + { + n_fuse = ggml_metal_op_set(ctx, idx); + } break; case GGML_OP_DUP: case GGML_OP_CPY: case GGML_OP_CONT: @@ -904,6 +908,11 @@ int ggml_metal_op_sum_rows(ggml_metal_op_t ctx, int idx) { GGML_TENSOR_LOCALS( int32_t, ne, op, ne); GGML_TENSOR_LOCALS(uint64_t, nb, op, nb); + GGML_ASSERT(ggml_is_contiguous_rows(op->src[0])); + + ggml_metal_buffer_id bid_src0 = ggml_metal_get_buffer_id(op->src[0]); + ggml_metal_buffer_id bid_dst = ggml_metal_get_buffer_id(op); + ggml_metal_kargs_sum_rows args = { /*.ne00 =*/ ne00, /*.ne01 =*/ ne01, @@ -925,21 +934,26 @@ int ggml_metal_op_sum_rows(ggml_metal_op_t ctx, int idx) { auto pipeline = ggml_metal_library_get_pipeline_sum_rows(lib, op); + if (pipeline.c4) { + args.ne00 = ne00/4; + args.ne0 = ne0/4; + } + int nth = 32; // SIMD width - while (nth < ne00 && nth < ggml_metal_pipeline_max_theads_per_threadgroup(pipeline)) { + while (nth < args.ne00 && nth < ggml_metal_pipeline_max_theads_per_threadgroup(pipeline)) { nth *= 2; } nth = std::min(nth, ggml_metal_pipeline_max_theads_per_threadgroup(pipeline)); - nth = std::min(nth, ne00); + nth = std::min(nth, (int) args.ne00); const size_t smem = pipeline.smem; ggml_metal_encoder_set_pipeline(enc, pipeline); ggml_metal_encoder_set_bytes (enc, &args, sizeof(args), 0); - ggml_metal_encoder_set_buffer (enc, ggml_metal_get_buffer_id(op->src[0]), 1); - ggml_metal_encoder_set_buffer (enc, ggml_metal_get_buffer_id(op), 2); + ggml_metal_encoder_set_buffer (enc, bid_src0, 1); + ggml_metal_encoder_set_buffer (enc, bid_dst, 2); ggml_metal_encoder_set_threadgroup_memory_size(enc, smem, 0); @@ -1599,6 +1613,134 @@ int ggml_metal_op_solve_tri(ggml_metal_op_t ctx, int idx) { return 1; } +int ggml_metal_op_set(ggml_metal_op_t ctx, int idx) { + ggml_tensor * op = ctx->node(idx); + + ggml_metal_library_t lib = ctx->lib; + ggml_metal_encoder_t enc = ctx->enc; + + GGML_TENSOR_LOCALS( int32_t, ne0, op->src[0], ne); + GGML_TENSOR_LOCALS(uint64_t, nb0, op->src[0], nb); + GGML_TENSOR_LOCALS( int32_t, ne1, op->src[1], ne); + GGML_TENSOR_LOCALS(uint64_t, nb1, op->src[1], nb); + GGML_TENSOR_LOCALS( int32_t, ne, op, ne); + GGML_TENSOR_LOCALS(uint64_t, nb, op, nb); + + ggml_metal_buffer_id bid_src0 = ggml_metal_get_buffer_id(op->src[0]); + ggml_metal_buffer_id bid_src1 = ggml_metal_get_buffer_id(op->src[1]); + ggml_metal_buffer_id bid_dst = ggml_metal_get_buffer_id(op); + + const size_t pnb1 = ((const int32_t *) op->op_params)[0]; + const size_t pnb2 = ((const int32_t *) op->op_params)[1]; + const size_t pnb3 = ((const int32_t *) op->op_params)[2]; + const size_t offs = ((const int32_t *) op->op_params)[3]; + + const bool inplace = (bool) ((const int32_t *) op->op_params)[4]; + + if (!inplace) { + // run a separete kernel to cpy src->dst + // not sure how to avoid this + // TODO: make a simpler cpy_bytes kernel + + //const id pipeline = ctx->pipelines[GGML_METAL_PIPELINE_TYPE_CPY_F32_F32].obj; + auto pipeline = ggml_metal_library_get_pipeline_cpy(lib, op->src[0]->type, op->type); + + ggml_metal_kargs_cpy args = { + /*.nk0 =*/ ne00, + /*.ne00 =*/ ne00, + /*.ne01 =*/ ne01, + /*.ne02 =*/ ne02, + /*.ne03 =*/ ne03, + /*.nb00 =*/ nb00, + /*.nb01 =*/ nb01, + /*.nb02 =*/ nb02, + /*.nb03 =*/ nb03, + /*.ne0 =*/ ne0, + /*.ne1 =*/ ne1, + /*.ne2 =*/ ne2, + /*.ne3 =*/ ne3, + /*.nb0 =*/ nb0, + /*.nb1 =*/ nb1, + /*.nb2 =*/ nb2, + /*.nb3 =*/ nb3, + }; + + ggml_metal_encoder_set_pipeline(enc, pipeline); + ggml_metal_encoder_set_bytes (enc, &args, sizeof(args), 0); + ggml_metal_encoder_set_buffer (enc, bid_src0, 1); + ggml_metal_encoder_set_buffer (enc, bid_dst, 2); + + const int nth = std::min(ggml_metal_pipeline_max_theads_per_threadgroup(pipeline), ne00); + + ggml_metal_encoder_dispatch_threadgroups(enc, ne01, ne02, ne03, nth, 1, 1); + + ggml_metal_op_concurrency_reset(ctx); + } + + auto pipeline = ggml_metal_library_get_pipeline_cpy(lib, op->src[1]->type, op->type); + + GGML_ASSERT(ne10 % ggml_blck_size(op->src[1]->type) == 0); + + int64_t nk0 = ne10; + if (ggml_is_quantized(op->src[1]->type)) { + nk0 = ne10/16; + } else if (ggml_is_quantized(op->type)) { + nk0 = ne10/ggml_blck_size(op->type); + } + + int nth = std::min(nk0, ggml_metal_pipeline_max_theads_per_threadgroup(pipeline)); + + // when rows are small, we can batch them together in a single threadgroup + int nrptg = 1; + + // TODO: relax this constraint in the future + if (ggml_blck_size(op->src[1]->type) == 1 && ggml_blck_size(op->type) == 1) { + if (nth > nk0) { + nrptg = (nth + nk0 - 1)/nk0; + nth = nk0; + + if (nrptg*nth > ggml_metal_pipeline_max_theads_per_threadgroup(pipeline)) { + nrptg--; + } + } + } + + nth = std::min(nth, nk0); + + ggml_metal_kargs_cpy args = { + /*.nk0 =*/ nk0, + /*.ne00 =*/ ne10, + /*.ne01 =*/ ne11, + /*.ne02 =*/ ne12, + /*.ne03 =*/ ne13, + /*.nb00 =*/ nb10, + /*.nb01 =*/ nb11, + /*.nb02 =*/ nb12, + /*.nb03 =*/ nb13, + /*.ne0 =*/ ne10, + /*.ne1 =*/ ne11, + /*.ne2 =*/ ne12, + /*.ne3 =*/ ne13, + /*.nb0 =*/ ggml_element_size(op), + /*.nb1 =*/ pnb1, + /*.nb2 =*/ pnb2, + /*.nb3 =*/ pnb3, + }; + + const int nw0 = nrptg == 1 ? (nk0 + nth - 1)/nth : 1; + + bid_dst.offs += offs; + + ggml_metal_encoder_set_pipeline(enc, pipeline); + ggml_metal_encoder_set_bytes (enc, &args, sizeof(args), 0); + ggml_metal_encoder_set_buffer (enc, bid_src1, 1); + ggml_metal_encoder_set_buffer (enc, bid_dst, 2); + + ggml_metal_encoder_dispatch_threadgroups(enc, nw0*(ne11 + nrptg - 1)/nrptg, ne12, ne13, nth, nrptg, 1); + + return 1; +} + int ggml_metal_op_cpy(ggml_metal_op_t ctx, int idx) { ggml_tensor * op = ctx->node(idx); diff --git a/ggml/src/ggml-metal/ggml-metal-ops.h b/ggml/src/ggml-metal/ggml-metal-ops.h index 29456d70d5..f3e38c7aa9 100644 --- a/ggml/src/ggml-metal/ggml-metal-ops.h +++ b/ggml/src/ggml-metal/ggml-metal-ops.h @@ -59,6 +59,7 @@ int ggml_metal_op_ssm_conv (ggml_metal_op_t ctx, int idx); int ggml_metal_op_ssm_scan (ggml_metal_op_t ctx, int idx); int ggml_metal_op_rwkv (ggml_metal_op_t ctx, int idx); int ggml_metal_op_solve_tri (ggml_metal_op_t ctx, int idx); +int ggml_metal_op_set (ggml_metal_op_t ctx, int idx); int ggml_metal_op_cpy (ggml_metal_op_t ctx, int idx); int ggml_metal_op_pool_1d (ggml_metal_op_t ctx, int idx); int ggml_metal_op_pool_2d (ggml_metal_op_t ctx, int idx); diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal index 0036ba90ec..6c349aa0c9 100644 --- a/ggml/src/ggml-metal/ggml-metal.metal +++ b/ggml/src/ggml-metal/ggml-metal.metal @@ -77,6 +77,14 @@ static inline float dot(float x, float y) { return x*y; } +static inline float sum(float x) { + return x; +} + +static inline float sum(float4 x) { + return x[0] + x[1] + x[2] + x[3]; +} + // NOTE: this is not dequantizing - we are simply fitting the template template void dequantize_f32(device const float4x4 * src, short il, thread type4x4 & reg) { @@ -1501,33 +1509,35 @@ kernel void kernel_op_sum_f32( } } -template -kernel void kernel_sum_rows( +constant short FC_sum_rows_op [[function_constant(FC_SUM_ROWS + 0)]]; + +template +kernel void kernel_sum_rows_impl( constant ggml_metal_kargs_sum_rows & args, - device const float * src0, - device float * dst, - threadgroup float * shmem_f32 [[threadgroup(0)]], + device const char * src0, + device char * dst, + threadgroup char * shmem [[threadgroup(0)]], uint3 tgpig[[threadgroup_position_in_grid]], ushort3 tpitg[[thread_position_in_threadgroup]], ushort sgitg[[simdgroup_index_in_threadgroup]], ushort tiisg[[thread_index_in_simdgroup]], ushort3 ntg[[threads_per_threadgroup]]) { - int64_t i3 = tgpig.z; - int64_t i2 = tgpig.y; - int64_t i1 = tgpig.x; +#define FC_OP FC_sum_rows_op - if (i3 >= args.ne03 || i2 >= args.ne02 || i1 >= args.ne01) { - return; - } + const int i3 = tgpig.z; + const int i2 = tgpig.y; + const int i1 = tgpig.x; + + threadgroup T0 * shmem_t = (threadgroup T0 *) shmem; if (sgitg == 0) { - shmem_f32[tiisg] = 0.0f; + shmem_t[tiisg] = 0.0f; } - device const float * src_row = (device const float *) ((device const char *) src0 + i1*args.nb01 + i2*args.nb02 + i3*args.nb03); - device float * dst_row = (device float *) ((device char *) dst + i1*args.nb1 + i2*args.nb2 + i3*args.nb3); + device const T0 * src_row = (device const T0 *) (src0 + i1*args.nb01 + i2*args.nb02 + i3*args.nb03); + device T * dst_row = (device T *) (dst + i1*args.nb1 + i2*args.nb2 + i3*args.nb3); - float sumf = 0; + T0 sumf = T0(0.0f); for (int64_t i0 = tpitg.x; i0 < args.ne00; i0 += ntg.x) { sumf += src_row[i0]; @@ -1538,23 +1548,33 @@ kernel void kernel_sum_rows( threadgroup_barrier(mem_flags::mem_threadgroup); if (tiisg == 0) { - shmem_f32[sgitg] = sumf; + shmem_t[sgitg] = sumf; } threadgroup_barrier(mem_flags::mem_threadgroup); - sumf = shmem_f32[tiisg]; + sumf = shmem_t[tiisg]; sumf = simd_sum(sumf); if (tpitg.x == 0) { - dst_row[0] = norm ? sumf / args.ne00 : sumf; + if (FC_OP == OP_SUM_ROWS_NUM_MEAN) { + if (is_same::value) { + dst_row[0] = sum(sumf) / (4*args.ne00); + } else { + dst_row[0] = sum(sumf) / args.ne00; + } + } else { + dst_row[0] = sum(sumf); + } } + +#undef FC_OP } -typedef decltype(kernel_sum_rows) kernel_sum_rows_t; +typedef decltype(kernel_sum_rows_impl) kernel_sum_rows_t; -template [[host_name("kernel_sum_rows_f32")]] kernel kernel_sum_rows_t kernel_sum_rows; -template [[host_name("kernel_mean_f32")]] kernel kernel_sum_rows_t kernel_sum_rows; +template [[host_name("kernel_sum_rows_f32_f32")]] kernel kernel_sum_rows_t kernel_sum_rows_impl; +template [[host_name("kernel_sum_rows_f32_f32_4")]] kernel kernel_sum_rows_t kernel_sum_rows_impl; template kernel void kernel_cumsum_blk( @@ -2435,9 +2455,6 @@ kernel void kernel_solve_tri_f32( const short K = FC_solve_tri_k; const short NP = PAD2(N, NW); - const int32_t ne02 = args.ne02; - const int32_t ne03 = args.ne03; - const int32_t i03 = tgpig.z; const int32_t i02 = tgpig.y; const int32_t i01 = tgpig.x*NSG + sgitg; @@ -5949,7 +5966,7 @@ kernel void kernel_flash_attn_ext_vec( static_assert(DK4 % NL == 0, "DK4 must be divisible by NL"); static_assert(DV4 % NL == 0, "DV4 must be divisible by NL"); - const short T = PK + NSG*SH; // shared memory size per query in (half) + //const short T = PK + NSG*SH; // shared memory size per query in (half) //threadgroup q_t * sq = (threadgroup q_t *) (shmem_f16 + 0*PK); // holds the query data threadgroup q4_t * sq4 = (threadgroup q4_t *) (shmem_f16 + 0*PK); // same as above but in q4_t @@ -8537,7 +8554,9 @@ kernel void kernel_mul_mm( threadgroup S0 * sa = (threadgroup S0 *)(shmem); threadgroup S1 * sb = (threadgroup S1 *)(shmem + 4096); +#ifdef GGML_METAL_HAS_TENSOR threadgroup float * sc = (threadgroup float *)(shmem); +#endif constexpr int NR0 = 64; constexpr int NR1 = 32; @@ -8660,8 +8679,8 @@ kernel void kernel_mul_mm( const short sx = (tiitg%NL1); const short sy = (tiitg/NL1)/8; - const short dx = sx; - const short dy = sy; + //const short dx = sx; + //const short dy = sy; const short ly = (tiitg/NL1)%8; @@ -8910,7 +8929,9 @@ kernel void kernel_mul_mm_id( threadgroup S0 * sa = (threadgroup S0 *)(shmem); threadgroup S1 * sb = (threadgroup S1 *)(shmem + 4096); +#ifdef GGML_METAL_HAS_TENSOR threadgroup float * sc = (threadgroup float *)(shmem); +#endif constexpr int NR0 = 64; constexpr int NR1 = 32; @@ -9045,8 +9066,8 @@ kernel void kernel_mul_mm_id( const short sx = (tiitg%NL1); const short sy = (tiitg/NL1)/8; - const short dx = sx; - const short dy = sy; + //const short dx = sx; + //const short dy = sy; const short ly = (tiitg/NL1)%8; diff --git a/ggml/src/ggml-opencl/CMakeLists.txt b/ggml/src/ggml-opencl/CMakeLists.txt index b6094fb68b..f389193691 100644 --- a/ggml/src/ggml-opencl/CMakeLists.txt +++ b/ggml/src/ggml-opencl/CMakeLists.txt @@ -85,6 +85,8 @@ set(GGML_OPENCL_KERNELS mul_mv_q4_0_f32_8x_flat mul_mv_q4_0_f32_1d_8x_flat mul_mv_q4_0_f32_1d_16x_flat + mul_mv_q4_1_f32 + mul_mv_q4_1_f32_flat mul_mv_q4_k_f32 mul_mv_q6_k_f32 mul_mv_q6_k_f32_flat @@ -101,6 +103,8 @@ set(GGML_OPENCL_KERNELS gemv_moe_mxfp4_f32 mul_mm_f32_f32_l4_lm mul_mm_f16_f32_l4_lm + mul_mm_q4_0_f32_l4_lm + mul_mm_q4_1_f32_l4_lm mul_mm_q8_0_f32_l4_lm mul_mm_q6_k_f32_l4_lm mul_mm_q8_0_f32_8x4 diff --git a/ggml/src/ggml-opencl/ggml-opencl.cpp b/ggml/src/ggml-opencl/ggml-opencl.cpp index 40474c193b..ae3f79fd0d 100644 --- a/ggml/src/ggml-opencl/ggml-opencl.cpp +++ b/ggml/src/ggml-opencl/ggml-opencl.cpp @@ -525,6 +525,7 @@ struct ggml_backend_opencl_context { cl_kernel kernel_mul_mm_f16_f32_kq; cl_kernel kernel_mul_mat_q4_0_f32, kernel_mul_mat_q4_0_f32_v; cl_kernel kernel_convert_block_q4_0, kernel_restore_block_q4_0; + cl_kernel kernel_convert_block_q4_1, kernel_restore_block_q4_1; cl_kernel kernel_convert_block_mxfp4, kernel_convert_block_mxfp4_trans, kernel_restore_block_mxfp4, kernel_restore_block_mxfp4_trans; cl_kernel kernel_convert_block_q8_0, kernel_restore_block_q8_0, kernel_restore_block_q8_0_trans; cl_kernel kernel_mul_mat_q4_0_f32_8x_flat; @@ -532,6 +533,8 @@ struct ggml_backend_opencl_context { cl_kernel kernel_restore_block_q4_0_noshuffle; cl_kernel kernel_convert_block_q6_K, kernel_restore_block_q6_K; cl_kernel kernel_mul_mat_q4_0_f32_1d_8x_flat, kernel_mul_mat_q4_0_f32_1d_16x_flat; + cl_kernel kernel_mul_mv_q4_1_f32; + cl_kernel kernel_mul_mv_q4_1_f32_flat; cl_kernel kernel_mul_mv_q4_K_f32; cl_kernel kernel_mul_mv_q6_K_f32; cl_kernel kernel_mul_mv_q6_K_f32_flat; @@ -564,6 +567,8 @@ struct ggml_backend_opencl_context { cl_kernel kernel_mul_mv_id_mxfp4_f32_flat; cl_kernel kernel_mul_mm_f32_f32_l4_lm; cl_kernel kernel_mul_mm_f16_f32_l4_lm; + cl_kernel kernel_mul_mm_q4_0_f32_l4_lm; + cl_kernel kernel_mul_mm_q4_1_f32_l4_lm; cl_kernel kernel_mul_mm_q8_0_f32_l4_lm; cl_kernel kernel_mul_mm_q6_k_f32_l4_lm; @@ -888,6 +893,8 @@ static void load_cl_kernels(ggml_backend_opencl_context *backend_ctx, ggml_cl_ve CL_CHECK((backend_ctx->kernel_restore_block_q4_0_noshuffle = clCreateKernel(backend_ctx->program_cvt, "kernel_restore_block_q4_0_noshuffle", &err), err)); CL_CHECK((backend_ctx->kernel_convert_block_q4_0 = clCreateKernel(backend_ctx->program_cvt, "kernel_convert_block_q4_0", &err), err)); CL_CHECK((backend_ctx->kernel_restore_block_q4_0 = clCreateKernel(backend_ctx->program_cvt, "kernel_restore_block_q4_0", &err), err)); + CL_CHECK((backend_ctx->kernel_convert_block_q4_1 = clCreateKernel(backend_ctx->program_cvt, "kernel_convert_block_q4_1", &err), err)); + CL_CHECK((backend_ctx->kernel_restore_block_q4_1 = clCreateKernel(backend_ctx->program_cvt, "kernel_restore_block_q4_1", &err), err)); CL_CHECK((backend_ctx->kernel_convert_block_mxfp4 = clCreateKernel(backend_ctx->program_cvt, "kernel_convert_block_mxfp4", &err), err)); CL_CHECK((backend_ctx->kernel_convert_block_mxfp4_trans = clCreateKernel(backend_ctx->program_cvt, "kernel_convert_block_mxfp4_trans", &err), err)); CL_CHECK((backend_ctx->kernel_restore_block_mxfp4_trans = clCreateKernel(backend_ctx->program_cvt, "kernel_restore_block_mxfp4_trans", &err), err)); @@ -1119,6 +1126,40 @@ static void load_cl_kernels(ggml_backend_opencl_context *backend_ctx, ggml_cl_ve GGML_LOG_CONT("."); } + // mul_mv_q4_1_f32 + { +#ifdef GGML_OPENCL_EMBED_KERNELS + const std::string kernel_src { + #include "mul_mv_q4_1_f32.cl.h" + }; +#else + const std::string kernel_src = read_file("mul_mv_q4_1_f32.cl"); +#endif + cl_program prog = + build_program_from_source(backend_ctx->context, backend_ctx->device, kernel_src.c_str(), compile_opts); + + CL_CHECK((backend_ctx->kernel_mul_mv_q4_1_f32 = clCreateKernel(prog, "kernel_mul_mv_q4_1_f32", &err), err)); + CL_CHECK(clReleaseProgram(prog)); + GGML_LOG_CONT("."); + } + + // mul_mv_q4_1_f32_flat + { +#ifdef GGML_OPENCL_EMBED_KERNELS + const std::string kernel_src { + #include "mul_mv_q4_1_f32_flat.cl.h" + }; +#else + const std::string kernel_src = read_file("mul_mv_q4_1_f32_flat.cl"); +#endif + cl_program prog = + build_program_from_source(backend_ctx->context, backend_ctx->device, kernel_src.c_str(), compile_opts); + + CL_CHECK((backend_ctx->kernel_mul_mv_q4_1_f32_flat = clCreateKernel(prog, "kernel_mul_mv_q4_1_f32_flat", &err), err)); + CL_CHECK(clReleaseProgram(prog)); + GGML_LOG_CONT("."); + } + // mul_mv_q4_k_f32 { #ifdef GGML_OPENCL_EMBED_KERNELS @@ -1361,6 +1402,38 @@ static void load_cl_kernels(ggml_backend_opencl_context *backend_ctx, ggml_cl_ve GGML_LOG_CONT("."); } + // mul_mm_q4_0_f32_l4_lm + { +#ifdef GGML_OPENCL_EMBED_KERNELS + const std::string kernel_src { + #include "mul_mm_q4_0_f32_l4_lm.cl.h" + }; +#else + const std::string kernel_src = read_file("mul_mm_q4_0_f32_l4_lm.cl"); +#endif + cl_program prog = + build_program_from_source(backend_ctx->context, backend_ctx->device, kernel_src.c_str(), compile_opts); + + CL_CHECK((backend_ctx->kernel_mul_mm_q4_0_f32_l4_lm = clCreateKernel(prog, "kernel_mul_mm_q4_0_f32_l4_lm", &err), err)); + GGML_LOG_CONT("."); + } + + // mul_mm_q4_1_f32_l4_lm + { +#ifdef GGML_OPENCL_EMBED_KERNELS + const std::string kernel_src { + #include "mul_mm_q4_1_f32_l4_lm.cl.h" + }; +#else + const std::string kernel_src = read_file("mul_mm_q4_1_f32_l4_lm.cl"); +#endif + cl_program prog = + build_program_from_source(backend_ctx->context, backend_ctx->device, kernel_src.c_str(), compile_opts); + + CL_CHECK((backend_ctx->kernel_mul_mm_q4_1_f32_l4_lm = clCreateKernel(prog, "kernel_mul_mm_q4_1_f32_l4_lm", &err), err)); + GGML_LOG_CONT("."); + } + // mul_mm_q8_0_f32_l4_lm { #ifdef GGML_OPENCL_EMBED_KERNELS @@ -2923,6 +2996,59 @@ struct ggml_tensor_extra_cl_q4_0 { } }; +struct ggml_tensor_extra_cl_q4_1 { + // Quantized values. + cl_mem q = nullptr; + // Quantized values in image1d_buffer_t. + cl_mem q_img = nullptr; + // Scales. + cl_mem d = nullptr; + // Scales in image1d_buffer_t. + cl_mem d_img = nullptr; + // Min + cl_mem m = nullptr; + // Min in image1d_buffer_t. + cl_mem m_img = nullptr; + // Size of quantized values. + size_t size_q = 0; + // Size of scales. + size_t size_d = 0; + // Size of min values. + size_t size_m = 0; + + ~ggml_tensor_extra_cl_q4_1() { + reset(); + } + + void reset() { + // q and d are subbuffers into the bigger buffer allocated in ggml_backend_buffer. + // They must be properly released so that the original buffer can be + // properly released to avoid memory leak. + if (q != nullptr) { + CL_CHECK(clReleaseMemObject(q)); + q = nullptr; + } + if (d != nullptr) { + CL_CHECK(clReleaseMemObject(d)); + d = nullptr; + } + if (m != nullptr) { + CL_CHECK(clReleaseMemObject(m)); + m = nullptr; + } + // Currently, q_img and d_img are only initialized when SMALL_ALLOC is + // enabled. They point to the images in ggml_backend_opencl_buffer_context. + // So, there is no need to release them here. + // TODO: initialize them for non SMALL_PATH path, or remove them. + q_img = nullptr; + d_img = nullptr; + m_img = nullptr; + size_q = 0; + size_d = 0; + size_m = 0; + } +}; + struct ggml_tensor_extra_cl_mxfp4 { // Quantized values. cl_mem q = nullptr; @@ -3399,8 +3525,9 @@ static bool ggml_opencl_supports_op(ggml_backend_dev_t dev, const struct ggml_te return true; } else if (op->src[0]->type == GGML_TYPE_F32) { return op->src[1]->type == GGML_TYPE_F32; - } else if (op->src[0]->type == GGML_TYPE_Q4_0 || op->src[0]->type == GGML_TYPE_MXFP4 || - op->src[0]->type == GGML_TYPE_Q4_K || + } else if (op->src[0]->type == GGML_TYPE_Q4_0 || op->src[0]->type == GGML_TYPE_Q4_1 || + op->src[0]->type == GGML_TYPE_MXFP4 || + op->src[0]->type == GGML_TYPE_Q4_K || op->src[0]->type == GGML_TYPE_Q6_K) { return op->src[1]->type == GGML_TYPE_F32 && ggml_is_contiguous(op->src[0]) && ggml_is_contiguous(op->src[1]); } else if (op->src[0]->type == GGML_TYPE_Q8_0) { @@ -3629,6 +3756,21 @@ struct ggml_backend_opencl_buffer_context { return extra; } + ggml_tensor_extra_cl_q4_1 * ggml_opencl_alloc_temp_tensor_extra_q4_1() { + ggml_tensor_extra_cl_q4_1 * extra; + if (temp_tensor_extras_q4_1.empty()) { + extra = new ggml_tensor_extra_cl_q4_1(); + } else { + extra = temp_tensor_extras_q4_1.back(); + temp_tensor_extras_q4_1.pop_back(); + } + + temp_tensor_extras_q4_1_in_use.push_back(extra); + + extra->reset(); + return extra; + } + ggml_tensor_extra_cl_mxfp4 * ggml_opencl_alloc_temp_tensor_extra_mxfp4() { ggml_tensor_extra_cl_mxfp4 * extra; if (temp_tensor_extras_mxfp4.empty()) { @@ -3685,6 +3827,11 @@ struct ggml_backend_opencl_buffer_context { } temp_tensor_extras_q4_0_in_use.clear(); + for (ggml_tensor_extra_cl_q4_1 * e : temp_tensor_extras_q4_1_in_use) { + temp_tensor_extras_q4_1.push_back(e); + } + temp_tensor_extras_q4_1_in_use.clear(); + for (ggml_tensor_extra_cl_mxfp4 * e : temp_tensor_extras_mxfp4_in_use) { temp_tensor_extras_mxfp4.push_back(e); } @@ -3710,6 +3857,8 @@ struct ggml_backend_opencl_buffer_context { std::vector temp_tensor_extras_in_use; std::vector temp_tensor_extras_q4_0; std::vector temp_tensor_extras_q4_0_in_use; + std::vector temp_tensor_extras_q4_1; + std::vector temp_tensor_extras_q4_1_in_use; std::vector temp_tensor_extras_mxfp4; std::vector temp_tensor_extras_mxfp4_in_use; std::vector temp_tensor_extras_q8_0; @@ -4079,6 +4228,75 @@ static void ggml_backend_opencl_buffer_set_tensor(ggml_backend_buffer_t buffer, return; } + if (tensor->type == GGML_TYPE_Q4_1) { + ggml_tensor_extra_cl * extra_orig = (ggml_tensor_extra_cl *)tensor->extra; + GGML_ASSERT(extra_orig && "Tesnors in OpenCL backend should have been allocated and initialized"); + + // Allocate the new extra and create aliases from the original. + ggml_backend_opencl_buffer_context * ctx = (ggml_backend_opencl_buffer_context *) buffer->context; + ggml_tensor_extra_cl_q4_1 * extra = ctx->ggml_opencl_alloc_temp_tensor_extra_q4_1(); + + size_t size_d = ggml_nelements(tensor)/ggml_blck_size(tensor->type)*sizeof(ggml_fp16_t); + size_t size_m = ggml_nelements(tensor)/ggml_blck_size(tensor->type)*sizeof(ggml_fp16_t); + size_t size_q = ggml_nelements(tensor)/ggml_blck_size(tensor->type)*ggml_blck_size(tensor->type)/2; + GGML_ASSERT(size_d + size_m + size_q == ggml_nbytes(tensor) && "Incorrect tensor size"); + + cl_int err; + cl_mem data_device = clCreateBuffer(context, CL_MEM_READ_WRITE, + ggml_nbytes(tensor), NULL, &err); + CL_CHECK(err); + CL_CHECK(clEnqueueWriteBuffer( + queue, data_device, CL_TRUE, 0, + ggml_nbytes(tensor), data, 0, NULL, NULL)); + + cl_buffer_region region; + + // The original tensor memory is divided into scales and quants, i.e., + // we first store scales, mins, then quants. + // Create subbuffer for scales. + region.origin = align_to(extra_orig->offset + tensor->view_offs + offset, backend_ctx->alignment); + region.size = size_d; + extra->d = clCreateSubBuffer( + extra_orig->data_device, CL_MEM_READ_WRITE, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err); + CL_CHECK(err); + auto previous_origin = region.origin; + + // Create subbuffer for mins. + region.origin = align_to(previous_origin + size_d, backend_ctx->alignment); + region.size = size_m; + extra->m = clCreateSubBuffer( + extra_orig->data_device, CL_MEM_READ_WRITE, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err); + CL_CHECK(err); + previous_origin = region.origin; + + // Create subbuffer for quants. + region.origin = align_to(previous_origin + size_m, backend_ctx->alignment); + region.size = size_q; + extra->q = clCreateSubBuffer( + extra_orig->data_device, CL_MEM_READ_WRITE, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err); + CL_CHECK(err); + + cl_kernel kernel = backend_ctx->kernel_convert_block_q4_1; + CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &data_device)); + CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &extra->q)); + CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &extra->d)); + CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), &extra->m)); + + size_t global_work_size[] = {(size_t)ggml_nelements(tensor)/ggml_blck_size(tensor->type), 1, 1}; + size_t local_work_size[] = {64, 1, 1}; + + cl_event evt; + CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt)); + CL_CHECK(clWaitForEvents(1, &evt)); + CL_CHECK(clReleaseMemObject(data_device)); + + tensor->extra = extra; + + return; + } if (tensor->type == GGML_TYPE_MXFP4) { ggml_tensor_extra_cl * extra_orig = (ggml_tensor_extra_cl *)tensor->extra; GGML_ASSERT(extra_orig && "Tesnors in OpenCL backend should have been allocated and initialized"); @@ -4581,7 +4799,35 @@ static void ggml_backend_opencl_buffer_get_tensor(ggml_backend_buffer_t buffer, size, data, 0, NULL, NULL)); CL_CHECK(clReleaseMemObject(data_device)); return; - } else if (tensor->type == GGML_TYPE_MXFP4) { + } + if (tensor->type == GGML_TYPE_Q4_1) { + ggml_tensor_extra_cl_q4_1 * extra = (ggml_tensor_extra_cl_q4_1 *)tensor->extra; + + cl_int err; + cl_mem data_device = clCreateBuffer(context, CL_MEM_READ_WRITE, + ggml_nbytes(tensor), NULL, &err); + CL_CHECK(err); + + cl_kernel kernel = backend_ctx->kernel_restore_block_q4_1; + CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &extra->q)); + CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &extra->d)); + CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &extra->m)); + CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), &data_device)); + + size_t global_work_size[] = {(size_t)ggml_nelements(tensor)/ggml_blck_size(tensor->type), 1, 1}; + size_t local_work_size[] = {1, 1, 1}; + + cl_event evt; + CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, + global_work_size, local_work_size, 0, NULL, &evt)); + CL_CHECK(clWaitForEvents(1, &evt)); + CL_CHECK(clEnqueueReadBuffer( + queue, data_device, CL_TRUE, offset, + size, data, 0, NULL, NULL)); + CL_CHECK(clReleaseMemObject(data_device)); + return; + } + if (tensor->type == GGML_TYPE_MXFP4) { ggml_tensor_extra_cl_mxfp4 * extra = (ggml_tensor_extra_cl_mxfp4 *)tensor->extra; cl_int err; @@ -8409,6 +8655,7 @@ static void ggml_cl_mul_mat(ggml_backend_t backend, const ggml_tensor * src0, co #ifdef GGML_OPENCL_SOA_Q ggml_tensor_extra_cl_q4_0 * extra0_q4_0 = (ggml_tensor_extra_cl_q4_0 *)src0->extra; + ggml_tensor_extra_cl_q4_1 * extra0_q4_1 = (ggml_tensor_extra_cl_q4_1 *)src0->extra; ggml_tensor_extra_cl_mxfp4 * extra0_mxfp4 = (ggml_tensor_extra_cl_mxfp4 *)src0->extra; ggml_tensor_extra_cl_q8_0 * extra0_q8_0 = (ggml_tensor_extra_cl_q8_0 *)src0->extra; ggml_tensor_extra_cl_q6_K * extra0_q6_K = (ggml_tensor_extra_cl_q6_K *)src0->extra; @@ -8922,6 +9169,91 @@ static void ggml_cl_mul_mat(ggml_backend_t backend, const ggml_tensor * src0, co backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); return; } + case GGML_TYPE_Q4_0: { + if (ne11 < 32) { + break; + } + if (!ggml_is_contiguous(src0) || !ggml_is_contiguous(src1)) { + break; + } + + kernel = backend_ctx->kernel_mul_mm_q4_0_f32_l4_lm; + nth0 = 128; // calculated as (BM*BN)/(TM*TN) + + int batch_stride_a = ne00*ne01; + int batch_stride_b = ne10*ne11; + int batch_stride_d = ne0*ne1; + + CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &extra0_q4_0->q)); + CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &extra0_q4_0->d)); + CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &extra1->data_device)); + CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_ulong), &offset1)); + CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), &extrad->data_device)); + CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_ulong), &offsetd)); + CL_CHECK(clSetKernelArg(kernel, 6, sizeof(int), &ne00)); + CL_CHECK(clSetKernelArg(kernel, 7, sizeof(int), &ne01)); + CL_CHECK(clSetKernelArg(kernel, 8, sizeof(int), &ne02)); + CL_CHECK(clSetKernelArg(kernel, 9, sizeof(int), &ne11)); + CL_CHECK(clSetKernelArg(kernel, 10, sizeof(int), &ne12)); + CL_CHECK(clSetKernelArg(kernel, 11, sizeof(int), &ne10)); // stride_a + CL_CHECK(clSetKernelArg(kernel, 12, sizeof(int), &ne10)); // stride_b + CL_CHECK(clSetKernelArg(kernel, 13, sizeof(int), &ne01)); // stride_d + CL_CHECK(clSetKernelArg(kernel, 14, sizeof(int), &batch_stride_a)); + CL_CHECK(clSetKernelArg(kernel, 15, sizeof(int), &batch_stride_b)); + CL_CHECK(clSetKernelArg(kernel, 16, sizeof(int), &batch_stride_d)); + CL_CHECK(clSetKernelArg(kernel, 17, sizeof(int), &r2)); + CL_CHECK(clSetKernelArg(kernel, 18, sizeof(int), &r3)); + + // 64 is block tile size BM and BN - change here when BM and BN in the kernel are changed. + size_t global_work_size[] = {(size_t)(CEIL_DIV(ne01, 64)*nth0), (size_t)(CEIL_DIV(ne11, 64)), (size_t)ne12*ne13}; + size_t local_work_size[] = {(size_t)nth0, 1, 1}; + + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); + return; + } + case GGML_TYPE_Q4_1: { + if (ne11 < 32) { + break; + } + if (!ggml_is_contiguous(src0) || !ggml_is_contiguous(src1)) { + break; + } + + kernel = backend_ctx->kernel_mul_mm_q4_1_f32_l4_lm; + nth0 = 128; // calculated as (BM*BN)/(TM*TN) + + int batch_stride_a = ne00*ne01; + int batch_stride_b = ne10*ne11; + int batch_stride_d = ne0*ne1; + + CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &extra0_q4_1->q)); + CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &extra0_q4_1->d)); + CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &extra0_q4_1->m)); + CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), &extra1->data_device)); + CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_ulong), &offset1)); + CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_mem), &extrad->data_device)); + CL_CHECK(clSetKernelArg(kernel, 6, sizeof(cl_ulong), &offsetd)); + CL_CHECK(clSetKernelArg(kernel, 7, sizeof(int), &ne00)); + CL_CHECK(clSetKernelArg(kernel, 8, sizeof(int), &ne01)); + CL_CHECK(clSetKernelArg(kernel, 9, sizeof(int), &ne02)); + CL_CHECK(clSetKernelArg(kernel, 10, sizeof(int), &ne11)); + CL_CHECK(clSetKernelArg(kernel, 11, sizeof(int), &ne12)); + CL_CHECK(clSetKernelArg(kernel, 12, sizeof(int), &ne10)); // stride_a + CL_CHECK(clSetKernelArg(kernel, 13, sizeof(int), &ne10)); // stride_b + CL_CHECK(clSetKernelArg(kernel, 14, sizeof(int), &ne01)); // stride_d + CL_CHECK(clSetKernelArg(kernel, 15, sizeof(int), &batch_stride_a)); + CL_CHECK(clSetKernelArg(kernel, 16, sizeof(int), &batch_stride_b)); + CL_CHECK(clSetKernelArg(kernel, 17, sizeof(int), &batch_stride_d)); + CL_CHECK(clSetKernelArg(kernel, 18, sizeof(int), &r2)); + CL_CHECK(clSetKernelArg(kernel, 19, sizeof(int), &r3)); + + // 64 is block tile size BM and BN - change here when BM and BN in the kernel are changed. + size_t global_work_size[] = {(size_t)(CEIL_DIV(ne01, 64)*nth0), (size_t)(CEIL_DIV(ne11, 64)), (size_t)ne12*ne13}; + size_t local_work_size[] = {(size_t)nth0, 1, 1}; + + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); + return; + } case GGML_TYPE_Q8_0: { if (ne11 < 32) { break; @@ -9262,7 +9594,71 @@ static void ggml_cl_mul_mat(ggml_backend_t backend, const ggml_tensor * src0, co CL_CHECK(clSetKernelArg(kernel, 14, sizeof(int), &r3)); #endif // GGML_OPENCL_SOA_Q break; - case GGML_TYPE_Q4_1: + case GGML_TYPE_Q4_1: { +#ifdef GGML_OPENCL_SOA_Q + if (backend_ctx->gpu_family == INTEL) { + nth0 = 16; + nth1 = 1; + ndst = 4; + } else if (backend_ctx->gpu_family == ADRENO) { + nth0 = 64; + nth1 = 1; + ndst = 4; + } else { + GGML_ASSERT(false && "TODO: Unknown GPU"); + } + + kernel = backend_ctx->kernel_mul_mv_q4_1_f32_flat; + + CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &extra0_q4_1->q)); + CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &extra0_q4_1->d)); + CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &extra0_q4_1->m)); + CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), &extra1->data_device)); + CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_ulong), &offset1)); + CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_mem), &extrad->data_device)); + CL_CHECK(clSetKernelArg(kernel, 6, sizeof(cl_ulong), &offsetd)); + CL_CHECK(clSetKernelArg(kernel, 7, sizeof(int), &ne00)); + CL_CHECK(clSetKernelArg(kernel, 8, sizeof(int), &ne01)); + CL_CHECK(clSetKernelArg(kernel, 9, sizeof(int), &ne02)); + CL_CHECK(clSetKernelArg(kernel, 10, sizeof(int), &ne10)); + CL_CHECK(clSetKernelArg(kernel, 11, sizeof(int), &ne12)); + CL_CHECK(clSetKernelArg(kernel, 12, sizeof(int), &ne0)); + CL_CHECK(clSetKernelArg(kernel, 13, sizeof(int), &ne1)); + CL_CHECK(clSetKernelArg(kernel, 14, sizeof(int), &r2)); + CL_CHECK(clSetKernelArg(kernel, 15, sizeof(int), &r3)); +#else + if (backend_ctx->gpu_family == INTEL) { + nth0 = 16; + nth1 = 1; + ndst = 4; + } else if (backend_ctx->gpu_family == ADRENO) { + nth0 = 64; + nth1 = 1; + ndst = 4; + } else { + GGML_ASSERT(false && "TODO: Unknown GPU"); + } + + kernel = backend_ctx->kernel_mul_mv_q4_1_f32; + + CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &extra0->data_device)); + CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_ulong), &offset0)); + CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &extra1->data_device)); + CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_ulong), &offset1)); + CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), &extrad->data_device)); + CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_ulong), &offsetd)); + CL_CHECK(clSetKernelArg(kernel, 6, sizeof(int), &ne00)); + CL_CHECK(clSetKernelArg(kernel, 7, sizeof(int), &ne01)); + CL_CHECK(clSetKernelArg(kernel, 8, sizeof(int), &ne02)); + CL_CHECK(clSetKernelArg(kernel, 9, sizeof(int), &ne10)); + CL_CHECK(clSetKernelArg(kernel, 10, sizeof(int), &ne12)); + CL_CHECK(clSetKernelArg(kernel, 11, sizeof(int), &ne0)); + CL_CHECK(clSetKernelArg(kernel, 12, sizeof(int), &ne1)); + CL_CHECK(clSetKernelArg(kernel, 13, sizeof(int), &r2)); + CL_CHECK(clSetKernelArg(kernel, 14, sizeof(int), &r3)); +#endif // GGML_OPENCL_SOA_Q + break; + } case GGML_TYPE_Q8_0: { #ifdef GGML_OPENCL_SOA_Q kernel = backend_ctx->kernel_mul_mv_q8_0_f32_flat; diff --git a/ggml/src/ggml-opencl/kernels/cvt.cl b/ggml/src/ggml-opencl/kernels/cvt.cl index 9fb434713d..2c244ce321 100644 --- a/ggml/src/ggml-opencl/kernels/cvt.cl +++ b/ggml/src/ggml-opencl/kernels/cvt.cl @@ -46,6 +46,15 @@ struct block_q4_0 uint8_t qs[QK4_0 / 2]; }; +//------------------------------------------------------------------------------ +// block_q4_1 +//------------------------------------------------------------------------------ +struct block_q4_1 { + half d; // delta + half m; // min + uchar qs[QK4_1 / 2]; // nibbles / quants +}; + //------------------------------------------------------------------------------ // block_q6_K //------------------------------------------------------------------------------ @@ -148,6 +157,48 @@ kernel void kernel_restore_block_q4_0_noshuffle( } } +//------------------------------------------------------------------------------ +// kernel_convert_block_q4_1 +// Convert the block_q4_1 format to 2 separate arrays (AOS -> SOA). +// This kernel does not deshuffle the bits. +//------------------------------------------------------------------------------ +kernel void kernel_convert_block_q4_1( + global struct block_q4_1 * src0, + global uchar * dst_q, + global half * dst_d, + global half * dst_m +) { + global struct block_q4_1 * b = (global struct block_q4_1 *) src0 + get_global_id(0); + global uchar * q = (global uchar *) dst_q + QK4_1/2*get_global_id(0); + global half * d = (global half *) dst_d + get_global_id(0); + global half * m = (global half *) dst_m + get_global_id(0); + + *d = b->d; + *m = b->m; + + for (int i = 0; i < QK4_1/2; ++i) { + q[i] = b->qs[i]; + } +} + +kernel void kernel_restore_block_q4_1( + global uchar * src_q, + global half * src_d, + global half * src_m, + global struct block_q4_1 * dst +) { + global struct block_q4_1 * b = (global struct block_q4_1 *) dst + get_global_id(0); + global uchar * q = (global uchar *) src_q + QK4_1/2*get_global_id(0); + global half * d = (global half *) src_d + get_global_id(0); + global half * m = (global half *) src_m + get_global_id(0); + + b->d = *d; + b->m = *m; + for (int i = 0; i < QK4_1/2; ++i) { + b->qs[i] = q[i]; + } +} + //------------------------------------------------------------------------------ // block_mxfp4 //------------------------------------------------------------------------------ diff --git a/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl b/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl new file mode 100644 index 0000000000..4100e3080a --- /dev/null +++ b/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl @@ -0,0 +1,163 @@ +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +#define LOAD_VEC_A 8 +#define LOAD_VEC_B 4 + +#define BM 64 +#define BN 64 +#define BK 32 +#define TM 4 +#define TN 8 + +kernel void kernel_mul_mm_q4_0_f32_l4_lm( + global uchar4 * src0_q, + global half * src0_d, + global float4 * src1, + ulong offset1, + global float * dst, + ulong offsetd, + + int ne00, + int ne01, + int ne02, + int ne11, + int ne12, + + int stride_a, + int stride_b, + int stride_d, + + int batch_stride_a, + int batch_stride_b, + int batch_stride_d, + + int r2, + int r3 +) { + src1 = (global float4*)((global char*)src1 + offset1); + dst = (global float *)((global char*)dst + offsetd); + + local float buf_a[BM * BK]; + local float buf_b[BN * BK]; + + const int batch_idx = get_global_id(2); + + const int i13 = batch_idx / ne12; + const int i12 = batch_idx % ne12; + + const int i03 = i13 / r3; + const int i02 = i12 / r2; + + const int batch_idx_a = i03 * ne02 + i02; + + const int ir = get_group_id(0); + const int ic = get_group_id(1); + + const int tid = get_local_id(0); + const int th_r = tid % (BM / TM); + const int th_c = tid / (BM / TM); + + const int loadr_a = get_local_id(0) % (BK / LOAD_VEC_A); + const int loadc_a = get_local_id(0) / (BK / LOAD_VEC_A); + const int loadr_b = get_local_id(0) % (BK / LOAD_VEC_B); + const int loadc_b = get_local_id(0) / (BK / LOAD_VEC_B); + + const int loadstride_a = get_local_size(0) * LOAD_VEC_A / BK; + const int loadstride_b = get_local_size(0) * LOAD_VEC_B / BK; + + int pos_a = (batch_idx_a * batch_stride_a + ir * BM * stride_a) / LOAD_VEC_A; + int pos_b = (batch_idx * batch_stride_b + ic * BN * stride_b) / LOAD_VEC_B; + + float sums[TM * TN]; + float cache_a[TM]; + float cache_b[TN]; + + for (int i = 0; i < TM * TN; i++) { + sums[i] = 0.0f; + } + + for (int block = 0; block < ne00; block += BK) { + for (int l = 0; l < BM; l += loadstride_a) { + if (ir*BM + loadc_a + l < ne01) { + int idx = pos_a + (loadc_a + l) * stride_a / LOAD_VEC_A + loadr_a; + int ib = idx / 4; + int iqs = idx % 4; + + float d = (float)src0_d[ib]; + global uchar4 * qs = src0_q + ib*4 + iqs; + uchar4 q = *qs; + float4 v1 = (convert_float4((uchar4)((q.s0 )&0x0F, (q.s1 )&0x0F, (q.s2 )&0x0F, (q.s3 )&0x0F)) - 8.0f)*d; + float4 v2 = (convert_float4((uchar4)((q.s0>>4)&0x0F, (q.s1>>4)&0x0F, (q.s2>>4)&0x0F, (q.s3>>4)&0x0F)) - 8.0f)*d; + + buf_a[(loadr_a * 4 + 0) * BM + loadc_a + l] = v1.s0; + buf_a[(loadr_a * 4 + 1) * BM + loadc_a + l] = v1.s1; + buf_a[(loadr_a * 4 + 2) * BM + loadc_a + l] = v1.s2; + buf_a[(loadr_a * 4 + 3) * BM + loadc_a + l] = v1.s3; + buf_a[(loadr_a * 4 + 16) * BM + loadc_a + l] = v2.s0; + buf_a[(loadr_a * 4 + 17) * BM + loadc_a + l] = v2.s1; + buf_a[(loadr_a * 4 + 18) * BM + loadc_a + l] = v2.s2; + buf_a[(loadr_a * 4 + 19) * BM + loadc_a + l] = v2.s3; + } else { + buf_a[(loadr_a * 4 + 0) * BM + loadc_a + l] = 0.0f; + buf_a[(loadr_a * 4 + 1) * BM + loadc_a + l] = 0.0f; + buf_a[(loadr_a * 4 + 2) * BM + loadc_a + l] = 0.0f; + buf_a[(loadr_a * 4 + 3) * BM + loadc_a + l] = 0.0f; + buf_a[(loadr_a * 4 + 16) * BM + loadc_a + l] = 0.0f; + buf_a[(loadr_a * 4 + 17) * BM + loadc_a + l] = 0.0f; + buf_a[(loadr_a * 4 + 18) * BM + loadc_a + l] = 0.0f; + buf_a[(loadr_a * 4 + 19) * BM + loadc_a + l] = 0.0f; + } + } + + for (int l = 0; l < BN; l += loadstride_b) { + if (ic*BN + loadc_b + l < ne11) { + int idx = pos_b + (loadc_b + l) * stride_b / LOAD_VEC_B + loadr_b; + buf_b[(loadr_b * LOAD_VEC_B + 0) * BN + loadc_b + l] = src1[idx].s0; + buf_b[(loadr_b * LOAD_VEC_B + 1) * BN + loadc_b + l] = src1[idx].s1; + buf_b[(loadr_b * LOAD_VEC_B + 2) * BN + loadc_b + l] = src1[idx].s2; + buf_b[(loadr_b * LOAD_VEC_B + 3) * BN + loadc_b + l] = src1[idx].s3; + } else { + buf_b[(loadr_b * LOAD_VEC_B + 0) * BN + loadc_b + l] = 0.0f; + buf_b[(loadr_b * LOAD_VEC_B + 1) * BN + loadc_b + l] = 0.0f; + buf_b[(loadr_b * LOAD_VEC_B + 2) * BN + loadc_b + l] = 0.0f; + buf_b[(loadr_b * LOAD_VEC_B + 3) * BN + loadc_b + l] = 0.0f; + } + } + + barrier(CLK_LOCAL_MEM_FENCE); + + pos_a += BK / LOAD_VEC_A; + pos_b += BK / LOAD_VEC_B; + + for (int i = 0; i < BK; i++) { + for (int j = 0; j < TM; j++) { + cache_a[j] = buf_a[(i) * BM + th_r * TM + j]; + } + + for (int j = 0; j < TN; j++) { + cache_b[j] = buf_b[(i) * BN + th_c * TN + j]; + } + + for (int cc = 0; cc < TN; cc++) { + for (int cr = 0; cr < TM; cr++) { + const int sums_idx = cc*TM + cr; + sums[sums_idx] = mad(cache_a[cr], cache_b[cc], sums[sums_idx]); + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + + const int dr = ir * BM + th_r * TM; + const int dc = ic * BN + th_c * TN; + + const int offsets = batch_idx * batch_stride_d; + + for (int cc = 0; cc < TN; cc++) { + for (int cr = 0; cr < TM; cr++) { + if (dr + cr < ne01 && dc + cc < ne11) { + dst[offsets + (dc + cc) * stride_d + dr + cr] = sums[cc * TM + cr]; + } + } + } +} diff --git a/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl b/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl new file mode 100644 index 0000000000..d0d2f08361 --- /dev/null +++ b/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl @@ -0,0 +1,165 @@ +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +#define LOAD_VEC_A 8 +#define LOAD_VEC_B 4 + +#define BM 64 +#define BN 64 +#define BK 32 +#define TM 4 +#define TN 8 + +kernel void kernel_mul_mm_q4_1_f32_l4_lm( + global uchar4 * src0_q, + global half * src0_d, + global half * src0_m, + global float4 * src1, + ulong offset1, + global float * dst, + ulong offsetd, + + int ne00, + int ne01, + int ne02, + int ne11, + int ne12, + + int stride_a, + int stride_b, + int stride_d, + + int batch_stride_a, + int batch_stride_b, + int batch_stride_d, + + int r2, + int r3 +) { + src1 = (global float4*)((global char*)src1 + offset1); + dst = (global float *)((global char*)dst + offsetd); + + local float buf_a[BM * BK]; + local float buf_b[BN * BK]; + + const int batch_idx = get_global_id(2); + + const int i13 = batch_idx / ne12; + const int i12 = batch_idx % ne12; + + const int i03 = i13 / r3; + const int i02 = i12 / r2; + + const int batch_idx_a = i03 * ne02 + i02; + + const int ir = get_group_id(0); + const int ic = get_group_id(1); + + const int tid = get_local_id(0); + const int th_r = tid % (BM / TM); + const int th_c = tid / (BM / TM); + + const int loadr_a = get_local_id(0) % (BK / LOAD_VEC_A); + const int loadc_a = get_local_id(0) / (BK / LOAD_VEC_A); + const int loadr_b = get_local_id(0) % (BK / LOAD_VEC_B); + const int loadc_b = get_local_id(0) / (BK / LOAD_VEC_B); + + const int loadstride_a = get_local_size(0) * LOAD_VEC_A / BK; + const int loadstride_b = get_local_size(0) * LOAD_VEC_B / BK; + + int pos_a = (batch_idx_a * batch_stride_a + ir * BM * stride_a) / LOAD_VEC_A; + int pos_b = (batch_idx * batch_stride_b + ic * BN * stride_b) / LOAD_VEC_B; + + float sums[TM * TN]; + float cache_a[TM]; + float cache_b[TN]; + + for (int i = 0; i < TM * TN; i++) { + sums[i] = 0.0f; + } + + for (int block = 0; block < ne00; block += BK) { + for (int l = 0; l < BM; l += loadstride_a) { + if (ir*BM + loadc_a + l < ne01) { + int idx = pos_a + (loadc_a + l) * stride_a / LOAD_VEC_A + loadr_a; + int ib = idx / 4; + int iqs = idx % 4; + + float d = (float)src0_d[ib]; + float m = (float)src0_m[ib]; + global uchar4 * qs = src0_q + ib*4 + iqs; + uchar4 q = *qs; + float4 v1 = (convert_float4((uchar4)((q.s0 )&0x0F, (q.s1 )&0x0F, (q.s2 )&0x0F, (q.s3 )&0x0F)))*d + m; + float4 v2 = (convert_float4((uchar4)((q.s0>>4)&0x0F, (q.s1>>4)&0x0F, (q.s2>>4)&0x0F, (q.s3>>4)&0x0F)))*d + m; + + buf_a[(loadr_a * 4 + 0) * BM + loadc_a + l] = v1.s0; + buf_a[(loadr_a * 4 + 1) * BM + loadc_a + l] = v1.s1; + buf_a[(loadr_a * 4 + 2) * BM + loadc_a + l] = v1.s2; + buf_a[(loadr_a * 4 + 3) * BM + loadc_a + l] = v1.s3; + buf_a[(loadr_a * 4 + 16) * BM + loadc_a + l] = v2.s0; + buf_a[(loadr_a * 4 + 17) * BM + loadc_a + l] = v2.s1; + buf_a[(loadr_a * 4 + 18) * BM + loadc_a + l] = v2.s2; + buf_a[(loadr_a * 4 + 19) * BM + loadc_a + l] = v2.s3; + } else { + buf_a[(loadr_a * 4 + 0) * BM + loadc_a + l] = 0.0f; + buf_a[(loadr_a * 4 + 1) * BM + loadc_a + l] = 0.0f; + buf_a[(loadr_a * 4 + 2) * BM + loadc_a + l] = 0.0f; + buf_a[(loadr_a * 4 + 3) * BM + loadc_a + l] = 0.0f; + buf_a[(loadr_a * 4 + 16) * BM + loadc_a + l] = 0.0f; + buf_a[(loadr_a * 4 + 17) * BM + loadc_a + l] = 0.0f; + buf_a[(loadr_a * 4 + 18) * BM + loadc_a + l] = 0.0f; + buf_a[(loadr_a * 4 + 19) * BM + loadc_a + l] = 0.0f; + } + } + + for (int l = 0; l < BN; l += loadstride_b) { + if (ic*BN + loadc_b + l < ne11) { + int idx = pos_b + (loadc_b + l) * stride_b / LOAD_VEC_B + loadr_b; + buf_b[(loadr_b * LOAD_VEC_B + 0) * BN + loadc_b + l] = src1[idx].s0; + buf_b[(loadr_b * LOAD_VEC_B + 1) * BN + loadc_b + l] = src1[idx].s1; + buf_b[(loadr_b * LOAD_VEC_B + 2) * BN + loadc_b + l] = src1[idx].s2; + buf_b[(loadr_b * LOAD_VEC_B + 3) * BN + loadc_b + l] = src1[idx].s3; + } else { + buf_b[(loadr_b * LOAD_VEC_B + 0) * BN + loadc_b + l] = 0.0f; + buf_b[(loadr_b * LOAD_VEC_B + 1) * BN + loadc_b + l] = 0.0f; + buf_b[(loadr_b * LOAD_VEC_B + 2) * BN + loadc_b + l] = 0.0f; + buf_b[(loadr_b * LOAD_VEC_B + 3) * BN + loadc_b + l] = 0.0f; + } + } + + barrier(CLK_LOCAL_MEM_FENCE); + + pos_a += BK / LOAD_VEC_A; + pos_b += BK / LOAD_VEC_B; + + for (int i = 0; i < BK; i++) { + for (int j = 0; j < TM; j++) { + cache_a[j] = buf_a[(i) * BM + th_r * TM + j]; + } + + for (int j = 0; j < TN; j++) { + cache_b[j] = buf_b[(i) * BN + th_c * TN + j]; + } + + for (int cc = 0; cc < TN; cc++) { + for (int cr = 0; cr < TM; cr++) { + const int sums_idx = cc*TM + cr; + sums[sums_idx] = mad(cache_a[cr], cache_b[cc], sums[sums_idx]); + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + + const int dr = ir * BM + th_r * TM; + const int dc = ic * BN + th_c * TN; + + const int offsets = batch_idx * batch_stride_d; + + for (int cc = 0; cc < TN; cc++) { + for (int cr = 0; cr < TM; cr++) { + if (dr + cr < ne01 && dc + cc < ne11) { + dst[offsets + (dc + cc) * stride_d + dr + cr] = sums[cc * TM + cr]; + } + } + } +} diff --git a/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl b/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl new file mode 100644 index 0000000000..6fe828f20e --- /dev/null +++ b/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl @@ -0,0 +1,219 @@ +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +#ifdef cl_intel_subgroups +#pragma OPENCL EXTENSION cl_intel_subgroups : enable +#else +#pragma OPENCL EXTENSION cl_khr_subgroups : enable +#endif + +#ifdef cl_intel_required_subgroup_size +#pragma OPENCL EXTENSION cl_intel_required_subgroup_size : enable +#define INTEL_GPU 1 +#define REQD_SUBGROUP_SIZE_16 __attribute__((intel_reqd_sub_group_size(16))) +#define REQD_SUBGROUP_SIZE_32 __attribute__((intel_reqd_sub_group_size(32))) +#elif defined(cl_qcom_reqd_sub_group_size) +#pragma OPENCL EXTENSION cl_qcom_reqd_sub_group_size : enable +#define ADRENO_GPU 1 +#define REQD_SUBGROUP_SIZE_64 __attribute__((qcom_reqd_sub_group_size("half"))) +#define REQD_SUBGROUP_SIZE_128 __attribute__((qcom_reqd_sub_group_size("full"))) +#endif + +#define QK4_1 32 + +struct block_q4_1 { + half d; // delta + half m; // min + uchar qs[QK4_1 / 2]; // nibbles / quants +}; + +inline float block_q4_1_dot_y( + global const struct block_q4_1 * qb_curr, + float sumy, + float16 yl, + int il +) { + float d = qb_curr->d; + float m = qb_curr->m; + + float4 acc = (float4)(0.0f, 0.0f, 0.0f, 0.0f); + + global const ushort * qs = ((global const ushort *) qb_curr + 2 + il/2); + + acc.s0 += yl.s0 * (qs[0] & 0x000F); + acc.s0 += yl.s1 * (qs[0] & 0x0F00); + acc.s0 += yl.s8 * (qs[0] & 0x00F0); + acc.s3 += yl.s9 * (qs[0] & 0xF000); + + acc.s0 += yl.s2 * (qs[1] & 0x000F); + acc.s1 += yl.s3 * (qs[1] & 0x0F00); + acc.s2 += yl.sa * (qs[1] & 0x00F0); + acc.s3 += yl.sb * (qs[1] & 0xF000); + + acc.s0 += yl.s4 * (qs[2] & 0x000F); + acc.s1 += yl.s5 * (qs[2] & 0x0F00); + acc.s2 += yl.sc * (qs[2] & 0x00F0); + acc.s3 += yl.sd * (qs[2] & 0xF000); + + acc.s0 += yl.s6 * (qs[3] & 0x000F); + acc.s1 += yl.s7 * (qs[3] & 0x0F00); + acc.s2 += yl.se * (qs[3] & 0x00F0); + acc.s3 += yl.sf * (qs[3] & 0xF000); + + return d * (acc.s0 + acc.s1 + acc.s2 + acc.s3) + sumy * m; +} + +#undef N_DST +#undef N_SIMDGROUP +#undef N_SIMDWIDTH + +#ifdef INTEL_GPU +#define N_DST 4 // each subgroup works on 4 rows +#define N_SIMDGROUP 1 // number of subgroups in a thread group +#define N_SIMDWIDTH 16 // assuming subgroup size is 16 +#elif defined (ADRENO_GPU) +#define N_DST 4 +#define N_SIMDGROUP 1 +#define N_SIMDWIDTH 64 +#endif + +inline void mul_vec_q_n_f32( + global void * src0, + global float * src1, + global float * dst, + int ne00, + int ne01, + int ne02, + int ne10, + int ne12, + int ne0, + int ne1, + int r2, + int r3 +) { + const ulong nb = ne00/QK4_1; + + int r0 = get_group_id(0); + int r1 = get_group_id(1); + int im = get_group_id(2); + + int first_row = (r0 * N_SIMDGROUP + get_sub_group_id()) * N_DST; + + int i12 = im%ne12; + int i13 = im/ne12; + + ulong offset0 = first_row * nb + (i12/r2)*(nb*ne01) + (i13/r3)*(nb*ne01*ne02); + + global struct block_q4_1 * x = (global struct block_q4_1 *) src0 + offset0; + global float * y = (global float *) src1 + r1*ne10 + im*ne00*ne1; + + float16 yl; + float4 sumf = (float4)(0.f, 0.f, 0.f, 0.f); + + int ix = get_sub_group_local_id()/2; + int il = 8*(get_sub_group_local_id()%2); + + global float * yb = y + ix * QK4_1 + il; + + for (int ib = ix; ib < nb; ib += N_SIMDWIDTH/2) { + float sumy = 0; + + sumy += yb[0]; + sumy += yb[1]; + sumy += yb[2]; + sumy += yb[3]; + sumy += yb[4]; + sumy += yb[5]; + sumy += yb[6]; + sumy += yb[7]; + + sumy += yb[16]; + sumy += yb[17]; + sumy += yb[18]; + sumy += yb[19]; + sumy += yb[20]; + sumy += yb[21]; + sumy += yb[22]; + sumy += yb[23]; + + + yl.s0 = yb[0]; + yl.s1 = yb[1]/256.f; + + yl.s2 = yb[2]; + yl.s3 = yb[3]/256.f; + + yl.s4 = yb[4]; + yl.s5 = yb[5]/256.f; + + yl.s6 = yb[6]; + yl.s7 = yb[7]/256.f; + + yl.s8 = yb[16]/16.f; + yl.s9 = yb[17]/4096.f; + + yl.sa = yb[18]/16.f; + yl.sb = yb[19]/4096.f; + + yl.sc = yb[20]/16.f; + yl.sd = yb[21]/4096.f; + + yl.se = yb[22]/16.f; + yl.sf = yb[23]/4096.f; + + sumf.s0 += block_q4_1_dot_y(x+ib+0*nb, sumy, yl, il); + sumf.s1 += block_q4_1_dot_y(x+ib+1*nb, sumy, yl, il); + sumf.s2 += block_q4_1_dot_y(x+ib+2*nb, sumy, yl, il); + sumf.s3 += block_q4_1_dot_y(x+ib+3*nb, sumy, yl, il); + + yb += QK4_1 * (N_SIMDWIDTH/2); + } + + float4 tot = (float4)( + sub_group_reduce_add(sumf.s0), sub_group_reduce_add(sumf.s1), + sub_group_reduce_add(sumf.s2), sub_group_reduce_add(sumf.s3) + ); + + if (get_sub_group_local_id() == 0) { + if (first_row + 0 < ne01) { + dst[r1*ne0 + im*ne0*ne1 + first_row + 0] = tot.s0; + } + if (first_row + 1 < ne01) { + dst[r1*ne0 + im*ne0*ne1 + first_row + 1] = tot.s1; + } + if (first_row + 2 < ne01) { + dst[r1*ne0 + im*ne0*ne1 + first_row + 2] = tot.s2; + } + if (first_row + 3 < ne01) { + dst[r1*ne0 + im*ne0*ne1 + first_row + 3] = tot.s3; + } + } +} + +#ifdef INTEL_GPU +REQD_SUBGROUP_SIZE_16 +#elif defined (ADRENO_GPU) +REQD_SUBGROUP_SIZE_64 +#endif +kernel void kernel_mul_mv_q4_1_f32( + global void * src0, + ulong offset0, + global float * src1, + ulong offset1, + global float * dst, + ulong offsetd, + int ne00, + int ne01, + int ne02, + int ne10, + int ne12, + int ne0, + int ne1, + int r2, + int r3 +) { + src0 = (global void*)((global char*)src0 + offset0); + src1 = (global float*)((global char*)src1 + offset1); + dst = (global float*)((global char*)dst + offsetd); + + mul_vec_q_n_f32(src0, src1, dst, ne00, ne01, ne02, ne10, ne12, ne0, ne1, r2, r3); +} diff --git a/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl b/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl new file mode 100644 index 0000000000..d7c4645d67 --- /dev/null +++ b/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl @@ -0,0 +1,229 @@ +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +#ifdef cl_intel_subgroups +#pragma OPENCL EXTENSION cl_intel_subgroups : enable +#else +#pragma OPENCL EXTENSION cl_khr_subgroups : enable +#endif + +#ifdef cl_intel_required_subgroup_size +#pragma OPENCL EXTENSION cl_intel_required_subgroup_size : enable +#define INTEL_GPU 1 +#define REQD_SUBGROUP_SIZE_16 __attribute__((intel_reqd_sub_group_size(16))) +#define REQD_SUBGROUP_SIZE_32 __attribute__((intel_reqd_sub_group_size(32))) +#elif defined(cl_qcom_reqd_sub_group_size) +#pragma OPENCL EXTENSION cl_qcom_reqd_sub_group_size : enable +#define ADRENO_GPU 1 +#define REQD_SUBGROUP_SIZE_64 __attribute__((qcom_reqd_sub_group_size("half"))) +#define REQD_SUBGROUP_SIZE_128 __attribute__((qcom_reqd_sub_group_size("full"))) +#endif + +#define QK4_1 32 + +struct block_q4_1 { + half d; // delta + half m; // min + uchar qs[QK4_1 / 2]; // nibbles / quants +}; + +inline float block_q4_1_dot_y_flat( + global const uchar * x, + global const half * dh, + global const half * mh, + float sumy, + float16 yl, + int il +) { + float d = *dh; + float m = *mh; + global const ushort * qs = ((global const ushort *) x + il/2); + + float4 acc = (float4)(0.0f, 0.0f, 0.0f, 0.0f); + + acc.s0 += yl.s0 * (qs[0] & 0x000F); + acc.s0 += yl.s1 * (qs[0] & 0x0F00); + acc.s0 += yl.s8 * (qs[0] & 0x00F0); + acc.s3 += yl.s9 * (qs[0] & 0xF000); + + acc.s0 += yl.s2 * (qs[1] & 0x000F); + acc.s1 += yl.s3 * (qs[1] & 0x0F00); + acc.s2 += yl.sa * (qs[1] & 0x00F0); + acc.s3 += yl.sb * (qs[1] & 0xF000); + + acc.s0 += yl.s4 * (qs[2] & 0x000F); + acc.s1 += yl.s5 * (qs[2] & 0x0F00); + acc.s2 += yl.sc * (qs[2] & 0x00F0); + acc.s3 += yl.sd * (qs[2] & 0xF000); + + acc.s0 += yl.s6 * (qs[3] & 0x000F); + acc.s1 += yl.s7 * (qs[3] & 0x0F00); + acc.s2 += yl.se * (qs[3] & 0x00F0); + acc.s3 += yl.sf * (qs[3] & 0xF000); + + return d * (acc.s0 + acc.s1 + acc.s2 + acc.s3) + sumy * m; +} + +#undef N_DST +#undef N_SIMDGROUP +#undef N_SIMDWIDTH + +#ifdef INTEL_GPU +#define N_DST 4 // each subgroup works on 4 rows +#define N_SIMDGROUP 1 // number of subgroups in a thread group +#define N_SIMDWIDTH 16 // assuming subgroup size is 16 +#elif defined (ADRENO_GPU) +#define N_DST 4 +#define N_SIMDGROUP 1 +#define N_SIMDWIDTH 64 +#endif + +inline void mul_vec_q_n_f32_flat( + global void * src0_q, + global void * src0_d, + global void * src0_m, + global float * src1, + global float * dst, + int ne00, + int ne01, + int ne02, + int ne10, + int ne12, + int ne0, + int ne1, + int r2, + int r3 +) { + const ulong nb = ne00/QK4_1; + + int r0 = get_group_id(0); + int r1 = get_group_id(1); + int im = get_group_id(2); + + int first_row = (r0 * N_SIMDGROUP + get_sub_group_id()) * N_DST; + + int i12 = im%ne12; + int i13 = im/ne12; + + ulong offset0 = first_row * nb + (i12/r2)*(nb*ne01) + (i13/r3)*(nb*ne01*ne02); + + // The number of scales/mins is the same as the number of blocks. + ulong offset0_dm = (first_row * nb + (i12/r2)*(nb*ne01) + (i13/r3)*(nb*ne01*ne02)); + // Each block contains QK4_1/2 uchars, hence offset for qs is as follows. + ulong offset0_q = (first_row * nb + (i12/r2)*(nb*ne01) + (i13/r3)*(nb*ne01*ne02)) * QK4_1/2; + + global uchar * x = (global uchar *) src0_q + offset0_q; + global half * d = (global half *) src0_d + offset0_dm; + global half * m = (global half *) src0_m + offset0_dm; + global float * y = (global float *) src1 + r1*ne10 + im*ne00*ne1; + + float16 yl; + float4 sumf = (float4)(0.f, 0.f, 0.f, 0.f); + + int ix = get_sub_group_local_id()/2; + int il = 8*(get_sub_group_local_id()%2); + + global float * yb = y + ix * QK4_1 + il; + + for (int ib = ix; ib < nb; ib += N_SIMDWIDTH/2) { + float sumy = 0; + + sumy += yb[0]; + sumy += yb[1]; + sumy += yb[2]; + sumy += yb[3]; + sumy += yb[4]; + sumy += yb[5]; + sumy += yb[6]; + sumy += yb[7]; + + sumy += yb[16]; + sumy += yb[17]; + sumy += yb[18]; + sumy += yb[19]; + sumy += yb[20]; + sumy += yb[21]; + sumy += yb[22]; + sumy += yb[23]; + + + yl.s0 = yb[0]; + yl.s1 = yb[1]/256.f; + + yl.s2 = yb[2]; + yl.s3 = yb[3]/256.f; + + yl.s4 = yb[4]; + yl.s5 = yb[5]/256.f; + + yl.s6 = yb[6]; + yl.s7 = yb[7]/256.f; + + yl.s8 = yb[16]/16.f; + yl.s9 = yb[17]/4096.f; + + yl.sa = yb[18]/16.f; + yl.sb = yb[19]/4096.f; + + yl.sc = yb[20]/16.f; + yl.sd = yb[21]/4096.f; + + yl.se = yb[22]/16.f; + yl.sf = yb[23]/4096.f; + + sumf.s0 += block_q4_1_dot_y_flat(x + ib*QK4_1/2 + 0*nb*QK4_1/2, d + ib + 0*nb, m + ib + 0*nb, sumy, yl, il); + sumf.s1 += block_q4_1_dot_y_flat(x + ib*QK4_1/2 + 1*nb*QK4_1/2, d + ib + 1*nb, m + ib + 1*nb, sumy, yl, il); + sumf.s2 += block_q4_1_dot_y_flat(x + ib*QK4_1/2 + 2*nb*QK4_1/2, d + ib + 2*nb, m + ib + 2*nb, sumy, yl, il); + sumf.s3 += block_q4_1_dot_y_flat(x + ib*QK4_1/2 + 3*nb*QK4_1/2, d + ib + 3*nb, m + ib + 3*nb, sumy, yl, il); + + yb += QK4_1 * (N_SIMDWIDTH/2); + } + + float4 tot = (float4)( + sub_group_reduce_add(sumf.s0), sub_group_reduce_add(sumf.s1), + sub_group_reduce_add(sumf.s2), sub_group_reduce_add(sumf.s3) + ); + + if (get_sub_group_local_id() == 0) { + if (first_row + 0 < ne01) { + dst[r1*ne0 + im*ne0*ne1 + first_row + 0] = tot.s0; + } + if (first_row + 1 < ne01) { + dst[r1*ne0 + im*ne0*ne1 + first_row + 1] = tot.s1; + } + if (first_row + 2 < ne01) { + dst[r1*ne0 + im*ne0*ne1 + first_row + 2] = tot.s2; + } + if (first_row + 3 < ne01) { + dst[r1*ne0 + im*ne0*ne1 + first_row + 3] = tot.s3; + } + } +} + +#ifdef INTEL_GPU +REQD_SUBGROUP_SIZE_16 +#elif defined (ADRENO_GPU) +REQD_SUBGROUP_SIZE_64 +#endif +kernel void kernel_mul_mv_q4_1_f32_flat( + global void * src0_q, + global void * src0_d, + global void * src0_m, + global float * src1, + ulong offset1, + global float * dst, + ulong offsetd, + int ne00, + int ne01, + int ne02, + int ne10, + int ne12, + int ne0, + int ne1, + int r2, + int r3 +) { + src1 = (global float*)((global char*)src1 + offset1); + dst = (global float*)((global char*)dst + offsetd); + + mul_vec_q_n_f32_flat(src0_q, src0_d, src0_m, src1, dst, ne00, ne01, ne02, ne10, ne12, ne0, ne1, r2, r3); +} diff --git a/include/llama.h b/include/llama.h index 46c3672e98..305623127c 100644 --- a/include/llama.h +++ b/include/llama.h @@ -1150,9 +1150,9 @@ extern "C" { // /// Apply chat template. Inspired by hf apply_chat_template() on python. - /// Both "model" and "custom_template" are optional, but at least one is required. "custom_template" has higher precedence than "model" + /// /// NOTE: This function does not use a jinja parser. It only support a pre-defined list of template. See more: https://github.com/ggml-org/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template - /// @param tmpl A Jinja template to use for this chat. If this is nullptr, the model’s default chat template will be used instead. + /// @param tmpl A Jinja template to use for this chat. /// @param chat Pointer to a list of multiple llama_chat_message /// @param n_msg Number of llama_chat_message in this chat /// @param add_ass Whether to end the prompt with the token(s) that indicate the start of an assistant message. diff --git a/scripts/pr2wt.sh b/scripts/pr2wt.sh index bd635f3b9d..067f5d466b 100755 --- a/scripts/pr2wt.sh +++ b/scripts/pr2wt.sh @@ -30,12 +30,18 @@ fi PR=$1 [[ "$PR" =~ ^[0-9]+$ ]] || { echo "error: PR number must be numeric"; exit 1; } +url_origin=$(git config --get remote.upstream.url 2>/dev/null) || \ url_origin=$(git config --get remote.origin.url) || { - echo "error: no remote named 'origin' in this repository" + echo "error: no remote named 'upstream' or 'origin' in this repository" exit 1 } -org_repo=$(echo $url_origin | cut -d/ -f4-) +# Extract org/repo from either https or ssh format. +if [[ $url_origin =~ ^git@ ]]; then + org_repo=$(echo $url_origin | cut -d: -f2) +else + org_repo=$(echo $url_origin | cut -d/ -f4-) +fi org_repo=${org_repo%.git} echo "org/repo: $org_repo" diff --git a/scripts/sync_vendor.py b/scripts/sync_vendor.py index 1ff6a9a40f..68db04dea9 100755 --- a/scripts/sync_vendor.py +++ b/scripts/sync_vendor.py @@ -2,6 +2,8 @@ import urllib.request +HTTPLIB_VERSION = "f80864ca031932351abef49b74097c67f14719c6" + vendor = { "https://github.com/nlohmann/json/releases/latest/download/json.hpp": "vendor/nlohmann/json.hpp", "https://github.com/nlohmann/json/releases/latest/download/json_fwd.hpp": "vendor/nlohmann/json_fwd.hpp", @@ -12,8 +14,8 @@ vendor = { # "https://github.com/mackron/miniaudio/raw/refs/tags/0.11.23/miniaudio.h": "vendor/miniaudio/miniaudio.h", "https://github.com/mackron/miniaudio/raw/669ed3e844524fcd883231b13095baee9f6de304/miniaudio.h": "vendor/miniaudio/miniaudio.h", - "https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.30.2/httplib.h": "vendor/cpp-httplib/httplib.h", - "https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.30.2/LICENSE": "vendor/cpp-httplib/LICENSE", + f"https://raw.githubusercontent.com/yhirose/cpp-httplib/{HTTPLIB_VERSION}/httplib.h": "vendor/cpp-httplib/httplib.h", + f"https://raw.githubusercontent.com/yhirose/cpp-httplib/{HTTPLIB_VERSION}/LICENSE": "vendor/cpp-httplib/LICENSE", "https://raw.githubusercontent.com/sheredom/subprocess.h/b49c56e9fe214488493021017bf3954b91c7c1f5/subprocess.h": "vendor/sheredom/subprocess.h", } diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 5816e9a954..6b7da69e9d 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -7965,7 +7965,6 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params, cparams.n_seq_max, nullptr); } else if (llm_arch_is_hybrid(arch)) { - // The main difference between hybrid architectures is the // layer filters, so pick the right one here llama_memory_hybrid::layer_filter_cb filter_attn = nullptr; @@ -7990,7 +7989,7 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params, /* attn_type_v */ params.type_v, /* attn_v_trans */ !cparams.flash_attn, /* attn_swa_full */ params.swa_full, - /* attn_kv_size */ cparams.n_ctx, + /* attn_kv_size */ cparams.n_ctx_seq, /* attn_n_ubatch */ cparams.n_ubatch, /* attn_n_pad */ 1, /* recurrent_type_r */ GGML_TYPE_F32, @@ -8007,7 +8006,7 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params, /* attn_type_k */ params.type_k, /* attn_type_v */ params.type_v, /* attn_v_trans */ !cparams.flash_attn, - /* attn_kv_size */ cparams.n_ctx, + /* attn_kv_size */ cparams.n_ctx_seq, /* attn_n_pad */ 1, /* attn_n_swa */ hparams.n_swa, /* attn_swa_type */ hparams.swa_type, diff --git a/src/unicode.cpp b/src/unicode.cpp index adfc489d1f..b88d953bd2 100644 --- a/src/unicode.cpp +++ b/src/unicode.cpp @@ -1,16 +1,10 @@ -#if defined(_MSC_VER) -#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING -#endif - #include "unicode.h" #include "unicode-data.h" #include #include -#include #include #include -#include #include #include #include @@ -199,27 +193,6 @@ static std::unordered_map unicode_utf8_to_byte_map() { return map; } -static inline std::wstring unicode_wstring_from_utf8(const std::string & s) { -#if defined(__clang__) - // disable C++17 deprecation warning for std::codecvt_utf8 -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wdeprecated-declarations" -#elif defined(__GNUC__) -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#endif - - std::wstring_convert> conv; - -#if defined(__clang__) -# pragma clang diagnostic pop -#elif defined(__GNUC__) -# pragma GCC diagnostic pop -#endif - - return conv.from_bytes(s); -} - static std::vector unicode_byte_encoding_process(const std::vector & bpe_words) { std::vector bpe_encoded_words; for (const auto & word : bpe_words) { @@ -1028,10 +1001,10 @@ std::vector unicode_regex_split(const std::string & text, const std break; } } + const auto cpts_regex = unicode_cpts_from_utf8(regex_expr); if (use_collapsed) { // sanity-check that the original regex does not contain any non-ASCII characters - const auto cpts_regex = unicode_cpts_from_utf8(regex_expr); for (size_t i = 0; i < cpts_regex.size(); ++i) { if (cpts_regex[i] >= 128) { throw std::runtime_error("Regex includes both unicode categories and non-ASCII characters - not supported"); @@ -1087,7 +1060,7 @@ std::vector unicode_regex_split(const std::string & text, const std bpe_offsets = unicode_regex_split_stl(text_collapsed, regex_expr_collapsed, bpe_offsets); } else { // no unicode category used, we can use std::wregex directly - const std::wstring wregex_expr = unicode_wstring_from_utf8(regex_expr); + std::wstring wregex_expr(cpts_regex.begin(), cpts_regex.end()); // std::wregex \s does not mach non-ASCII whitespaces, using 0x0B as fallback std::wstring wtext(cpts.begin(), cpts.end()); diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index ed99c24516..8816f6963f 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -2786,9 +2786,10 @@ struct test_set : public test_case { const ggml_type type_dst; const std::array ne; const int dim; + const bool inplace; std::string vars() override { - return VARS_TO_STR4(type_src, type_dst, ne, dim); + return VARS_TO_STR5(type_src, type_dst, ne, dim, inplace); } size_t op_size(ggml_tensor * t) override { @@ -2796,8 +2797,8 @@ struct test_set : public test_case { } test_set(ggml_type type_src = GGML_TYPE_F32, ggml_type type_dst = GGML_TYPE_F32, - std::array ne = {6, 5, 4, 3}, int dim = 1) - : type_src(type_src), type_dst(type_dst), ne(ne), dim(dim) {} + std::array ne = {6, 5, 4, 3}, int dim = 1, bool inplace = false) + : type_src(type_src), type_dst(type_dst), ne(ne), dim(dim), inplace(inplace) {} ggml_tensor * build_graph(ggml_context * ctx) override { ggml_tensor * src = ggml_new_tensor(ctx, type_src, 4, ne.data()); @@ -2808,7 +2809,7 @@ struct test_set : public test_case { for (int i = 0; i < dim; ++i) { ne_dst[i] *= 2; } - ggml_tensor* dst = ggml_new_tensor(ctx, type_dst, 4, ne_dst.data()); + ggml_tensor * dst = ggml_new_tensor(ctx, type_dst, 4, ne_dst.data()); ggml_set_param(dst); ggml_set_name(dst, "dst"); @@ -2816,9 +2817,16 @@ struct test_set : public test_case { for (int i = 0; i < dim; ++i) { offset += ((ne_dst[i] - ne[i])/2)*dst->nb[i]; } - ggml_tensor * out = ggml_set(ctx, dst, src, - // The backward pass requires setting a contiguous region: - src->nb[1], src->nb[2], src->nb[3], offset); + ggml_tensor * out; + if (inplace) { + out = ggml_set_inplace(ctx, dst, src, + // The backward pass requires setting a contiguous region: + src->nb[1], src->nb[2], src->nb[3], offset); + } else { + out = ggml_set(ctx, dst, src, + // The backward pass requires setting a contiguous region: + src->nb[1], src->nb[2], src->nb[3], offset); + } ggml_set_name(out, "out"); return out; @@ -7428,11 +7436,13 @@ static std::vector> make_test_cases_eval() { test_cases.emplace_back(new test_dup(GGML_TYPE_I16, {10, 8, 3, 1}, {1, 2, 0, 3})); for (int dim = 1; dim < GGML_MAX_DIMS; ++dim) { - test_cases.emplace_back(new test_set(GGML_TYPE_F32, GGML_TYPE_F32, {6, 5, 4, 3}, dim)); + test_cases.emplace_back(new test_set(GGML_TYPE_F32, GGML_TYPE_F32, {6, 5, 4, 3}, dim, false)); + test_cases.emplace_back(new test_set(GGML_TYPE_F32, GGML_TYPE_F32, {6, 5, 4, 3}, dim, true)); } for (int dim = 1; dim < GGML_MAX_DIMS; ++dim) { - test_cases.emplace_back(new test_set(GGML_TYPE_I32, GGML_TYPE_I32, {6, 5, 4, 3}, dim)); + test_cases.emplace_back(new test_set(GGML_TYPE_I32, GGML_TYPE_I32, {6, 5, 4, 3}, dim, false)); + test_cases.emplace_back(new test_set(GGML_TYPE_I32, GGML_TYPE_I32, {6, 5, 4, 3}, dim, true)); } // same-type copy @@ -8132,24 +8142,30 @@ static std::vector> make_test_cases_eval() { } test_cases.emplace_back(new test_sum()); - test_cases.emplace_back(new test_sum_rows()); test_cases.emplace_back(new test_sum(GGML_TYPE_F32, {11, 5, 6, 3}, {0, 2, 1, 3})); // row-contiguous but non-contiguous test_cases.emplace_back(new test_sum(GGML_TYPE_F32, {11, 5, 6, 3}, {0, 3, 2, 1})); test_cases.emplace_back(new test_sum(GGML_TYPE_F32, {11, 5, 6, 3}, {0, 1, 3, 2})); + test_cases.emplace_back(new test_mean()); + test_cases.emplace_back(new test_mean(GGML_TYPE_F32, { 33, 1, 1, 1 })); + test_cases.emplace_back(new test_mean(GGML_TYPE_F32, { 33, 256, 1, 1 })); + test_cases.emplace_back(new test_mean(GGML_TYPE_F32, { 32769, 1, 1, 1 })); + test_cases.emplace_back(new test_mean(GGML_TYPE_F32, { 32, 1, 1, 1 })); + test_cases.emplace_back(new test_mean(GGML_TYPE_F32, { 32, 256, 1, 1 })); + test_cases.emplace_back(new test_mean(GGML_TYPE_F32, { 32768, 1, 1, 1 })); + test_cases.emplace_back(new test_sum(GGML_TYPE_F32, { 33, 1, 1, 1 })); + test_cases.emplace_back(new test_sum(GGML_TYPE_F32, { 33, 1024, 1, 1 })); + test_cases.emplace_back(new test_sum(GGML_TYPE_F32, { 33, 256, 1, 1 })); + test_cases.emplace_back(new test_sum(GGML_TYPE_F32, { 33, 256, 1, 1 }, { 1, 0, 2, 3 })); // sum dst not-contiguous + test_cases.emplace_back(new test_sum_rows()); test_cases.emplace_back(new test_sum_rows(GGML_TYPE_F32, { 11, 5, 6, 3 }, true, false)); test_cases.emplace_back(new test_sum_rows(GGML_TYPE_F32, { 11, 5, 6, 3 }, false, true)); test_cases.emplace_back(new test_sum_rows(GGML_TYPE_F32, { 11, 5, 6, 3 }, true, true)); - test_cases.emplace_back(new test_mean()); - test_cases.emplace_back(new test_sum(GGML_TYPE_F32, { 33, 1, 1, 1 })); + test_cases.emplace_back(new test_sum_rows(GGML_TYPE_F32, { 16, 5, 6, 3 }, true, false)); + test_cases.emplace_back(new test_sum_rows(GGML_TYPE_F32, { 16, 5, 6, 3 }, false, true)); + test_cases.emplace_back(new test_sum_rows(GGML_TYPE_F32, { 16, 5, 6, 3 }, true, true)); test_cases.emplace_back(new test_sum_rows(GGML_TYPE_F32, { 33, 1, 1, 1 })); - test_cases.emplace_back(new test_mean(GGML_TYPE_F32, { 33, 1, 1, 1 })); - test_cases.emplace_back(new test_sum(GGML_TYPE_F32, { 33, 1024, 1, 1 })); test_cases.emplace_back(new test_sum_rows(GGML_TYPE_F32, { 33, 1024, 1, 1 })); - test_cases.emplace_back(new test_sum(GGML_TYPE_F32, { 33, 256, 1, 1 })); - test_cases.emplace_back(new test_sum(GGML_TYPE_F32, { 33, 256, 1, 1 }, { 1, 0, 2, 3 })); // sum dst not-contiguous test_cases.emplace_back(new test_sum_rows(GGML_TYPE_F32, { 33, 256, 1, 1 })); - test_cases.emplace_back(new test_mean(GGML_TYPE_F32, { 33, 256, 1, 1 })); - test_cases.emplace_back(new test_mean(GGML_TYPE_F32, { 32769, 1, 1, 1 })); test_cases.emplace_back(new test_group_norm(GGML_TYPE_F32, {64, 64, 320, 1})); test_cases.emplace_back(new test_group_norm(GGML_TYPE_F32, {9, 9, 1280, 1})); test_cases.emplace_back(new test_group_norm_mul_add(GGML_TYPE_F32, {64, 64, 320, 1})); diff --git a/tools/server/README.md b/tools/server/README.md index d132830171..0b56ca1e27 100644 --- a/tools/server/README.md +++ b/tools/server/README.md @@ -19,7 +19,7 @@ Set of LLM REST APIs and a web UI to interact with llama.cpp. * Speculative decoding * Easy-to-use web UI -For the ful list of features, please refer to [server's changelog](https://github.com/ggml-org/llama.cpp/issues/9291) +For the full list of features, please refer to [server's changelog](https://github.com/ggml-org/llama.cpp/issues/9291) ## Usage diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz index e3b06f4901..f4ff57b4c9 100644 Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ diff --git a/tools/server/webui/docs/flows/settings-flow.md b/tools/server/webui/docs/flows/settings-flow.md index 578e01e6e1..474aef01b0 100644 --- a/tools/server/webui/docs/flows/settings-flow.md +++ b/tools/server/webui/docs/flows/settings-flow.md @@ -139,6 +139,6 @@ sequenceDiagram Note over settingsStore: UI-only (not synced): rect rgb(255, 240, 240) - Note over settingsStore: systemMessage, custom (JSON)
showStatistics, enableContinueGeneration
autoMicOnEmpty, disableAutoScroll
apiKey, pdfAsImage, disableReasoningFormat + Note over settingsStore: systemMessage, custom (JSON)
showStatistics, enableContinueGeneration
autoMicOnEmpty, disableAutoScroll
apiKey, pdfAsImage, disableReasoningParsing, showRawOutputSwitch end ``` diff --git a/tools/server/webui/src/app.css b/tools/server/webui/src/app.css index 9705040a4d..3ab21f0cc7 100644 --- a/tools/server/webui/src/app.css +++ b/tools/server/webui/src/app.css @@ -14,11 +14,11 @@ --popover-foreground: oklch(0.145 0 0); --primary: oklch(0.205 0 0); --primary-foreground: oklch(0.985 0 0); - --secondary: oklch(0.97 0 0); + --secondary: oklch(0.95 0 0); --secondary-foreground: oklch(0.205 0 0); --muted: oklch(0.97 0 0); --muted-foreground: oklch(0.556 0 0); - --accent: oklch(0.97 0 0); + --accent: oklch(0.95 0 0); --accent-foreground: oklch(0.205 0 0); --destructive: oklch(0.577 0.245 27.325); --border: oklch(0.875 0 0); @@ -37,7 +37,7 @@ --sidebar-accent-foreground: oklch(0.205 0 0); --sidebar-border: oklch(0.922 0 0); --sidebar-ring: oklch(0.708 0 0); - --code-background: oklch(0.975 0 0); + --code-background: oklch(0.985 0 0); --code-foreground: oklch(0.145 0 0); --layer-popover: 1000000; } @@ -51,7 +51,7 @@ --popover-foreground: oklch(0.985 0 0); --primary: oklch(0.922 0 0); --primary-foreground: oklch(0.205 0 0); - --secondary: oklch(0.269 0 0); + --secondary: oklch(0.29 0 0); --secondary-foreground: oklch(0.985 0 0); --muted: oklch(0.269 0 0); --muted-foreground: oklch(0.708 0 0); @@ -116,12 +116,62 @@ --color-sidebar-ring: var(--sidebar-ring); } +:root { + --chat-form-area-height: 8rem; + --chat-form-area-offset: 2rem; + --max-message-height: max(24rem, min(80dvh, calc(100dvh - var(--chat-form-area-height) - 12rem))); +} + +@media (min-width: 640px) { + :root { + --chat-form-area-height: 24rem; + --chat-form-area-offset: 12rem; + } +} + @layer base { * { @apply border-border outline-ring/50; } + body { @apply bg-background text-foreground; + scrollbar-width: thin; + scrollbar-gutter: stable; + } + + /* Global scrollbar styling - visible only on hover */ + * { + scrollbar-width: thin; + scrollbar-color: transparent transparent; + transition: scrollbar-color 0.2s ease; + } + + *:hover { + scrollbar-color: hsl(var(--muted-foreground) / 0.3) transparent; + } + + *::-webkit-scrollbar { + width: 6px; + height: 6px; + } + + *::-webkit-scrollbar-track { + background: transparent; + } + + *::-webkit-scrollbar-thumb { + background: transparent; + border-radius: 3px; + transition: background 0.2s ease; + } + + *:hover::-webkit-scrollbar-thumb { + background: hsl(var(--muted-foreground) / 0.3); + } + + *::-webkit-scrollbar-thumb:hover { + background: hsl(var(--muted-foreground) / 0.5); } } diff --git a/tools/server/webui/src/lib/components/app/actions/ActionIcon.svelte b/tools/server/webui/src/lib/components/app/actions/ActionIcon.svelte new file mode 100644 index 0000000000..4494ea880b --- /dev/null +++ b/tools/server/webui/src/lib/components/app/actions/ActionIcon.svelte @@ -0,0 +1,48 @@ + + + + + + + + +

{tooltip}

+
+
diff --git a/tools/server/webui/src/lib/components/app/actions/ActionIconCopyToClipboard.svelte b/tools/server/webui/src/lib/components/app/actions/ActionIconCopyToClipboard.svelte new file mode 100644 index 0000000000..bf6cd4fb28 --- /dev/null +++ b/tools/server/webui/src/lib/components/app/actions/ActionIconCopyToClipboard.svelte @@ -0,0 +1,18 @@ + + + canCopy && copyToClipboard(text)} +/> diff --git a/tools/server/webui/src/lib/components/app/actions/ActionIconRemove.svelte b/tools/server/webui/src/lib/components/app/actions/ActionIconRemove.svelte new file mode 100644 index 0000000000..1ae3d21774 --- /dev/null +++ b/tools/server/webui/src/lib/components/app/actions/ActionIconRemove.svelte @@ -0,0 +1,26 @@ + + + diff --git a/tools/server/webui/src/lib/components/app/actions/ActionIconsCodeBlock.svelte b/tools/server/webui/src/lib/components/app/actions/ActionIconsCodeBlock.svelte new file mode 100644 index 0000000000..54ff0af1a0 --- /dev/null +++ b/tools/server/webui/src/lib/components/app/actions/ActionIconsCodeBlock.svelte @@ -0,0 +1,46 @@ + + +
+
+ +
+ + {#if showPreview} + + {/if} +
diff --git a/tools/server/webui/src/lib/components/app/actions/index.ts b/tools/server/webui/src/lib/components/app/actions/index.ts new file mode 100644 index 0000000000..43485c7b7e --- /dev/null +++ b/tools/server/webui/src/lib/components/app/actions/index.ts @@ -0,0 +1,19 @@ +/** + * + * ACTIONS + * + * Small interactive components for user actions. + * + */ + +/** Styled icon button for action triggers with tooltip. */ +export { default as ActionIcon } from './ActionIcon.svelte'; + +/** Code block actions component (copy, preview). */ +export { default as ActionIconsCodeBlock } from './ActionIconsCodeBlock.svelte'; + +/** Copy-to-clipboard icon button with click handler. */ +export { default as ActionIconCopyToClipboard } from './ActionIconCopyToClipboard.svelte'; + +/** Remove/delete icon button with X icon. */ +export { default as ActionIconRemove } from './ActionIconRemove.svelte'; diff --git a/tools/server/webui/src/lib/components/app/badges/BadgeChatStatistic.svelte b/tools/server/webui/src/lib/components/app/badges/BadgeChatStatistic.svelte new file mode 100644 index 0000000000..a2b28d2057 --- /dev/null +++ b/tools/server/webui/src/lib/components/app/badges/BadgeChatStatistic.svelte @@ -0,0 +1,44 @@ + + +{#if tooltipLabel} + + + + {#snippet icon()} + + {/snippet} + + {value} + + + +

{tooltipLabel}

+
+
+{:else} + + {#snippet icon()} + + {/snippet} + + {value} + +{/if} diff --git a/tools/server/webui/src/lib/components/app/badges/BadgeInfo.svelte b/tools/server/webui/src/lib/components/app/badges/BadgeInfo.svelte new file mode 100644 index 0000000000..c70af6f423 --- /dev/null +++ b/tools/server/webui/src/lib/components/app/badges/BadgeInfo.svelte @@ -0,0 +1,27 @@ + + + diff --git a/tools/server/webui/src/lib/components/app/badges/BadgeModality.svelte b/tools/server/webui/src/lib/components/app/badges/BadgeModality.svelte new file mode 100644 index 0000000000..a0d5e863c2 --- /dev/null +++ b/tools/server/webui/src/lib/components/app/badges/BadgeModality.svelte @@ -0,0 +1,39 @@ + + +{#each displayableModalities as modality, index (index)} + {@const IconComponent = MODALITY_ICONS[modality]} + {@const label = MODALITY_LABELS[modality]} + + + {#if IconComponent} + + {/if} + + {label} + +{/each} diff --git a/tools/server/webui/src/lib/components/app/badges/index.ts b/tools/server/webui/src/lib/components/app/badges/index.ts new file mode 100644 index 0000000000..860afe3084 --- /dev/null +++ b/tools/server/webui/src/lib/components/app/badges/index.ts @@ -0,0 +1,16 @@ +/** + * + * BADGES & INDICATORS + * + * Small visual indicators for status and metadata. + * + */ + +/** Badge displaying chat statistics (tokens, timing). */ +export { default as BadgeChatStatistic } from './BadgeChatStatistic.svelte'; + +/** Generic info badge with optional tooltip and click handler. */ +export { default as BadgeInfo } from './BadgeInfo.svelte'; + +/** Badge indicating model modality (vision, audio, tools). */ +export { default as BadgeModality } from './BadgeModality.svelte'; diff --git a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatForm.svelte b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatForm.svelte index 27ab975cbd..95645295fb 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatForm.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatForm.svelte @@ -27,11 +27,13 @@ interface Props { class?: string; disabled?: boolean; + initialMessage?: string; isLoading?: boolean; onFileRemove?: (fileId: string) => void; onFileUpload?: (files: File[]) => void; onSend?: (message: string, files?: ChatUploadedFile[]) => Promise; onStop?: () => void; + onSystemPromptAdd?: (draft: { message: string; files: ChatUploadedFile[] }) => void; showHelperText?: boolean; uploadedFiles?: ChatUploadedFile[]; } @@ -39,11 +41,13 @@ let { class: className, disabled = false, + initialMessage = '', isLoading = false, onFileRemove, onFileUpload, onSend, onStop, + onSystemPromptAdd, showHelperText = true, uploadedFiles = $bindable([]) }: Props = $props(); @@ -53,15 +57,28 @@ let currentConfig = $derived(config()); let fileInputRef: ChatFormFileInputInvisible | undefined = $state(undefined); let isRecording = $state(false); - let message = $state(''); + let message = $state(initialMessage); let pasteLongTextToFileLength = $derived.by(() => { const n = Number(currentConfig.pasteLongTextToFileLen); return Number.isNaN(n) ? Number(SETTING_CONFIG_DEFAULT.pasteLongTextToFileLen) : n; }); let previousIsLoading = $state(isLoading); + let previousInitialMessage = $state(initialMessage); let recordingSupported = $state(false); let textareaRef: ChatFormTextarea | undefined = $state(undefined); + // Sync message when initialMessage prop changes (e.g., after draft restoration) + $effect(() => { + if (initialMessage !== previousInitialMessage) { + message = initialMessage; + previousInitialMessage = initialMessage; + } + }); + + function handleSystemPromptClick() { + onSystemPromptAdd?.({ message, files: uploadedFiles }); + } + // Check if model is selected (in ROUTER mode) let conversationModel = $derived( chatStore.getConversationModel(activeMessages() as DatabaseMessage[]) @@ -308,6 +325,7 @@ onFileUpload={handleFileUpload} onMicClick={handleMicClick} onStop={handleStop} + onSystemPromptClick={handleSystemPromptClick} /> diff --git a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionFileAttachments.svelte b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionFileAttachments.svelte index dd37268096..3545b4aebf 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionFileAttachments.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionFileAttachments.svelte @@ -1,5 +1,6 @@ -
+
@@ -81,6 +88,16 @@
+ + {#if showRawOutputSwitch} +
+ Show raw output + onRawOutputToggle?.(checked)} + /> +
+ {/if}
{ @@ -238,7 +241,7 @@ {:else if message.role === 'assistant'} - {#if config().disableReasoningFormat} + {#if showRawOutput}
{messageContent || ''}
{:else} @@ -352,6 +355,9 @@ {onConfirmDelete} {onNavigateToSibling} {onShowDeleteDialogChange} + showRawOutputSwitch={currentConfig.showRawOutputSwitch} + rawOutputEnabled={showRawOutput} + onRawOutputToggle={(enabled) => (showRawOutput = enabled)} /> {/if} diff --git a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageSystem.svelte b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageSystem.svelte index c203822f60..887df5b771 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageSystem.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageSystem.svelte @@ -116,7 +116,7 @@ diff --git a/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte b/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte index 27439551a1..a5450e6af8 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte @@ -21,6 +21,7 @@ chatStore, errorDialog, isLoading, + isChatStreaming, isEditing, getAddFilesHandler } from '$lib/stores/chat.svelte'; @@ -71,6 +72,8 @@ let emptyFileNames = $state([]); + let initialMessage = $state(''); + let isEmpty = $derived( showCenteredEmpty && !activeConversation() && activeMessages().length === 0 && !isLoading() ); @@ -79,7 +82,7 @@ let isServerLoading = $derived(serverLoading()); let hasPropsError = $derived(!!serverError()); - let isCurrentConversationLoading = $derived(isLoading()); + let isCurrentConversationLoading = $derived(isLoading() || isChatStreaming()); let isRouter = $derived(isRouterMode()); @@ -221,6 +224,14 @@ } } + async function handleSystemPromptAdd(draft: { message: string; files: ChatUploadedFile[] }) { + if (draft.message || draft.files.length > 0) { + chatStore.savePendingDraft(draft.message, draft.files); + } + + await chatStore.addSystemPrompt(); + } + function handleScroll() { if (disableAutoScroll || !chatScrollContainer) return; @@ -343,6 +354,12 @@ if (!disableAutoScroll) { setTimeout(() => scrollChatToBottom('instant'), INITIAL_SCROLL_DELAY); } + + const pendingDraft = chatStore.consumePendingDraft(); + if (pendingDraft) { + initialMessage = pendingDraft.message; + uploadedFiles = pendingDraft.files; + } }); $effect(() => { @@ -428,11 +445,13 @@
chatStore.stopGeneration()} + onSystemPromptAdd={handleSystemPromptAdd} showHelperText={false} bind:uploadedFiles /> @@ -486,11 +505,13 @@
chatStore.stopGeneration()} + onSystemPromptAdd={handleSystemPromptAdd} showHelperText={true} bind:uploadedFiles /> diff --git a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettings.svelte b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettings.svelte index 5a668aa300..967f19bbce 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettings.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettings.svelte @@ -254,8 +254,13 @@ type: 'checkbox' }, { - key: 'disableReasoningFormat', - label: 'Show raw LLM output', + key: 'disableReasoningParsing', + label: 'Disable reasoning content parsing', + type: 'checkbox' + }, + { + key: 'showRawOutputSwitch', + label: 'Enable raw output toggle', type: 'checkbox' }, { diff --git a/tools/server/webui/src/lib/components/app/content/CollapsibleContentBlock.svelte b/tools/server/webui/src/lib/components/app/content/CollapsibleContentBlock.svelte new file mode 100644 index 0000000000..082738da57 --- /dev/null +++ b/tools/server/webui/src/lib/components/app/content/CollapsibleContentBlock.svelte @@ -0,0 +1,97 @@ + + + { + open = value; + onToggle?.(); + }} + class={className} +> + + +
+ {#if Icon} + + {/if} + + {title} + + {#if subtitle} + {subtitle} + {/if} +
+ +
+ + + Toggle content +
+
+ + +
+ {@render children()} +
+
+
+
diff --git a/tools/server/webui/src/lib/components/app/content/MarkdownContent.svelte b/tools/server/webui/src/lib/components/app/content/MarkdownContent.svelte new file mode 100644 index 0000000000..ef6c7e064f --- /dev/null +++ b/tools/server/webui/src/lib/components/app/content/MarkdownContent.svelte @@ -0,0 +1,1201 @@ + + +
+ {#each renderedBlocks as block (block.id)} +
+ + {@html block.html} +
+ {/each} + + {#if unstableBlockHtml} +
+ + {@html unstableBlockHtml} +
+ {/if} + + {#if incompleteCodeBlock} +
+
+ {incompleteCodeBlock.language || 'text'} + { + previewCode = code; + previewLanguage = lang; + previewDialogOpen = true; + }} + /> +
+
streamingAutoScroll.handleScroll()} + > +
{@html highlightCode(
+							incompleteCodeBlock.code,
+							incompleteCodeBlock.language || 'text'
+						)}
+
+
+ {/if} +
+ + + + diff --git a/tools/server/webui/src/lib/components/app/content/SyntaxHighlightedCode.svelte b/tools/server/webui/src/lib/components/app/content/SyntaxHighlightedCode.svelte new file mode 100644 index 0000000000..625fdc7b1b --- /dev/null +++ b/tools/server/webui/src/lib/components/app/content/SyntaxHighlightedCode.svelte @@ -0,0 +1,95 @@ + + +
+ +
{@html highlightedHtml}
+
+ + diff --git a/tools/server/webui/src/lib/components/app/content/index.ts b/tools/server/webui/src/lib/components/app/content/index.ts new file mode 100644 index 0000000000..bca1c9f4c2 --- /dev/null +++ b/tools/server/webui/src/lib/components/app/content/index.ts @@ -0,0 +1,79 @@ +/** + * + * CONTENT RENDERING + * + * Components for rendering rich content: markdown, code, and previews. + * + */ + +/** + * **MarkdownContent** - Rich markdown renderer + * + * Renders markdown content with syntax highlighting, LaTeX math, + * tables, links, and code blocks. Optimized for streaming with + * incremental block-based rendering. + * + * **Features:** + * - GFM (GitHub Flavored Markdown): tables, task lists, strikethrough + * - LaTeX math via KaTeX (`$inline$` and `$$block$$`) + * - Syntax highlighting (highlight.js) with language detection + * - Code copy buttons with click feedback + * - External links open in new tab with security attrs + * - Image attachment resolution from message extras + * - Dark/light theme support (auto-switching) + * - Streaming-optimized incremental rendering + * - Code preview dialog for large blocks + * + * @example + * ```svelte + * + * ``` + */ +export { default as MarkdownContent } from './MarkdownContent.svelte'; + +/** + * **SyntaxHighlightedCode** - Code syntax highlighting + * + * Renders code with syntax highlighting using highlight.js. + * Supports theme switching and scrollable containers. + * + * **Features:** + * - Auto language detection with fallback + * - Dark/light theme auto-switching + * - Scrollable container with configurable max dimensions + * - Monospace font styling + * - Preserves whitespace and formatting + * + * @example + * ```svelte + * + * ``` + */ +export { default as SyntaxHighlightedCode } from './SyntaxHighlightedCode.svelte'; + +/** + * **CollapsibleContentBlock** - Expandable content card + * + * Reusable collapsible card with header, icon, and auto-scroll. + * Used for tool calls and reasoning blocks in chat messages. + * + * **Features:** + * - Collapsible content with smooth animation + * - Custom icon and title display + * - Optional subtitle/status text + * - Auto-scroll during streaming (pauses on user scroll) + * - Configurable max height with overflow scroll + * + * @example + * ```svelte + * + * {reasoningContent} + * + * ``` + */ +export { default as CollapsibleContentBlock } from './CollapsibleContentBlock.svelte'; diff --git a/tools/server/webui/src/lib/components/app/misc/ConversationSelection.svelte b/tools/server/webui/src/lib/components/app/misc/ConversationSelection.svelte index e2095e0876..21412f47e5 100644 --- a/tools/server/webui/src/lib/components/app/misc/ConversationSelection.svelte +++ b/tools/server/webui/src/lib/components/app/misc/ConversationSelection.svelte @@ -17,9 +17,13 @@ let { conversations, messageCountMap = new Map(), mode, onCancel, onConfirm }: Props = $props(); let searchQuery = $state(''); - let selectedIds = $state.raw>(new SvelteSet(conversations.map((c) => c.id))); + let selectedIds = $state.raw>(getInitialSelectedIds()); let lastClickedId = $state(null); + function getInitialSelectedIds(): SvelteSet { + return new SvelteSet(conversations.map((c) => c.id)); + } + let filteredConversations = $derived( conversations.filter((conv) => { const name = conv.name || 'Untitled conversation'; @@ -92,7 +96,7 @@ } function handleCancel() { - selectedIds = new SvelteSet(conversations.map((c) => c.id)); + selectedIds = getInitialSelectedIds(); searchQuery = ''; lastClickedId = null; @@ -100,7 +104,7 @@ } export function reset() { - selectedIds = new SvelteSet(conversations.map((c) => c.id)); + selectedIds = getInitialSelectedIds(); searchQuery = ''; lastClickedId = null; } diff --git a/tools/server/webui/src/lib/components/app/misc/HorizontalScrollCarousel.svelte b/tools/server/webui/src/lib/components/app/misc/HorizontalScrollCarousel.svelte new file mode 100644 index 0000000000..e302f83e11 --- /dev/null +++ b/tools/server/webui/src/lib/components/app/misc/HorizontalScrollCarousel.svelte @@ -0,0 +1,93 @@ + + +
+ + +
+ {@render children?.()} +
+ + +
diff --git a/tools/server/webui/src/lib/components/app/misc/KeyboardShortcutInfo.svelte b/tools/server/webui/src/lib/components/app/misc/KeyboardShortcutInfo.svelte index 5b7522fe1b..da55abda02 100644 --- a/tools/server/webui/src/lib/components/app/misc/KeyboardShortcutInfo.svelte +++ b/tools/server/webui/src/lib/components/app/misc/KeyboardShortcutInfo.svelte @@ -11,7 +11,9 @@ let baseClasses = 'px-1 pointer-events-none inline-flex select-none items-center gap-0.5 font-sans text-md font-medium opacity-0 transition-opacity -my-1'; - let variantClasses = variant === 'destructive' ? 'text-destructive' : 'text-muted-foreground'; + let variantClasses = $derived( + variant === 'destructive' ? 'text-destructive' : 'text-muted-foreground' + ); diff --git a/tools/server/webui/src/lib/components/app/misc/TruncatedText.svelte b/tools/server/webui/src/lib/components/app/misc/TruncatedText.svelte new file mode 100644 index 0000000000..9a8731fc78 --- /dev/null +++ b/tools/server/webui/src/lib/components/app/misc/TruncatedText.svelte @@ -0,0 +1,48 @@ + + +{#if isTruncated} + + + + {text} + + + + +

{text}

+
+
+{:else} + + {text} + +{/if} diff --git a/tools/server/webui/src/lib/components/app/misc/index.ts b/tools/server/webui/src/lib/components/app/misc/index.ts new file mode 100644 index 0000000000..02bd70b24f --- /dev/null +++ b/tools/server/webui/src/lib/components/app/misc/index.ts @@ -0,0 +1,45 @@ +/** + * + * MISC + * + * Miscellaneous utility components. + * + */ + +/** + * **ConversationSelection** - Multi-select conversation picker + * + * List of conversations with checkboxes for multi-selection. + * Used in import/export dialogs for selecting conversations. + * + * **Features:** + * - Search/filter conversations by name + * - Select all / deselect all controls + * - Shift-click for range selection + * - Message count display per conversation + * - Mode-specific UI (export vs import) + */ +export { default as ConversationSelection } from './ConversationSelection.svelte'; + +/** + * Horizontal scrollable carousel with navigation arrows. + * Used for displaying items in a horizontally scrollable container + * with left/right navigation buttons that appear on hover. + */ +export { default as HorizontalScrollCarousel } from './HorizontalScrollCarousel.svelte'; + +/** + * **TruncatedText** - Text with ellipsis and tooltip + * + * Displays text with automatic truncation and full content in tooltip. + * Useful for long names or paths in constrained spaces. + */ +export { default as TruncatedText } from './TruncatedText.svelte'; + +/** + * **KeyboardShortcutInfo** - Keyboard shortcut hint display + * + * Displays keyboard shortcut hints (e.g., "⌘ + Enter"). + * Supports special keys like shift, cmd, and custom text. + */ +export { default as KeyboardShortcutInfo } from './KeyboardShortcutInfo.svelte'; diff --git a/tools/server/webui/src/lib/components/app/navigation/DropdownMenuActions.svelte b/tools/server/webui/src/lib/components/app/navigation/DropdownMenuActions.svelte new file mode 100644 index 0000000000..83d856d10e --- /dev/null +++ b/tools/server/webui/src/lib/components/app/navigation/DropdownMenuActions.svelte @@ -0,0 +1,86 @@ + + + + e.stopPropagation()} + > + {#if triggerTooltip} + + + {@render iconComponent(triggerIcon, 'h-3 w-3')} + {triggerTooltip} + + +

{triggerTooltip}

+
+
+ {:else} + {@render iconComponent(triggerIcon, 'h-3 w-3')} + {/if} +
+ + + {#each actions as action, index (action.label)} + {#if action.separator && index > 0} + + {/if} + + +
+ {@render iconComponent( + action.icon, + `h-4 w-4 ${action.variant === 'destructive' ? 'text-destructive' : ''}` + )} + {action.label} +
+ + {#if action.shortcut} + + {/if} +
+ {/each} +
+
+ +{#snippet iconComponent(IconComponent: Component, className: string)} + +{/snippet} diff --git a/tools/server/webui/src/lib/components/app/navigation/DropdownMenuSearchable.svelte b/tools/server/webui/src/lib/components/app/navigation/DropdownMenuSearchable.svelte new file mode 100644 index 0000000000..3bd68d3bd6 --- /dev/null +++ b/tools/server/webui/src/lib/components/app/navigation/DropdownMenuSearchable.svelte @@ -0,0 +1,50 @@ + + +
+ +
+ +
+ {@render children()} + + {#if isEmpty} +
{emptyMessage}
+ {/if} +
+ +{#if footer} + + + {@render footer()} +{/if} diff --git a/tools/server/webui/src/lib/components/app/navigation/index.ts b/tools/server/webui/src/lib/components/app/navigation/index.ts new file mode 100644 index 0000000000..051491b866 --- /dev/null +++ b/tools/server/webui/src/lib/components/app/navigation/index.ts @@ -0,0 +1,65 @@ +/** + * + * NAVIGATION & MENUS + * + * Components for dropdown menus and action selection. + * + */ + +/** + * **DropdownMenuSearchable** - Searchable content for dropdown menus + * + * Renders a search input with filtered content area, empty state, and optional footer. + * Designed to be injected into any dropdown container (DropdownMenu.Content, + * DropdownMenu.SubContent, etc.) without providing its own Root. + * + * **Features:** + * - Search/filter input + * - Keyboard navigation support + * - Custom content and footer via snippets + * - Empty state message + * + * @example + * ```svelte + * + * ... + * + * + * {#each items as item}{/each} + * + * + * + * ``` + */ +export { default as DropdownMenuSearchable } from './DropdownMenuSearchable.svelte'; + +/** + * **DropdownMenuActions** - Multi-action dropdown menu + * + * Dropdown menu for multiple action options with icons and shortcuts. + * Supports destructive variants and keyboard shortcut hints. + * + * **Features:** + * - Configurable trigger icon with tooltip + * - Action items with icons and labels + * - Destructive variant styling + * - Keyboard shortcut display + * - Separator support between groups + * + * @example + * ```svelte + * + * ``` + */ +export { default as DropdownMenuActions } from './DropdownMenuActions.svelte'; diff --git a/tools/server/webui/src/lib/components/app/server/ServerErrorSplash.svelte b/tools/server/webui/src/lib/components/app/server/ServerErrorSplash.svelte index fa4c2842cc..520e5bf56f 100644 --- a/tools/server/webui/src/lib/components/app/server/ServerErrorSplash.svelte +++ b/tools/server/webui/src/lib/components/app/server/ServerErrorSplash.svelte @@ -8,6 +8,7 @@ import { serverStore, serverLoading } from '$lib/stores/server.svelte'; import { config, settingsStore } from '$lib/stores/settings.svelte'; import { fade, fly, scale } from 'svelte/transition'; + import { KeyboardKey } from '$lib/enums/keyboard'; interface Props { class?: string; @@ -117,7 +118,7 @@ } function handleApiKeyKeydown(event: KeyboardEvent) { - if (event.key === 'Enter') { + if (event.key === KeyboardKey.ENTER) { handleSaveApiKey(); } } diff --git a/tools/server/webui/src/lib/components/app/server/ServerStatus.svelte b/tools/server/webui/src/lib/components/app/server/ServerStatus.svelte index d9f6d4a32a..86a962de12 100644 --- a/tools/server/webui/src/lib/components/app/server/ServerStatus.svelte +++ b/tools/server/webui/src/lib/components/app/server/ServerStatus.svelte @@ -48,7 +48,7 @@ {model || 'Unknown Model'} - {#if serverData.default_generation_settings.n_ctx} + {#if serverData?.default_generation_settings?.n_ctx} ctx: {serverData.default_generation_settings.n_ctx.toLocaleString()} diff --git a/tools/server/webui/src/lib/components/app/server/index.ts b/tools/server/webui/src/lib/components/app/server/index.ts new file mode 100644 index 0000000000..39ac5b482d --- /dev/null +++ b/tools/server/webui/src/lib/components/app/server/index.ts @@ -0,0 +1,80 @@ +/** + * + * SERVER + * + * Components for displaying server connection state and handling + * connection errors. Integrates with serverStore for state management. + * + */ + +/** + * **ServerStatus** - Server connection status indicator + * + * Compact status display showing connection state, model name, + * and context size. Used in headers and loading screens. + * + * **Architecture:** + * - Reads state from serverStore (props, loading, error) + * - Displays model name from modelsStore + * + * **Features:** + * - Status dot: green (connected), yellow (connecting), red (error), gray (unknown) + * - Status text label + * - Model name badge with icon + * - Context size badge + * - Optional error action button + * + * @example + * ```svelte + * + * ``` + */ +export { default as ServerStatus } from './ServerStatus.svelte'; + +/** + * **ServerErrorSplash** - Full-screen connection error display + * + * Blocking error screen shown when server connection fails. + * Provides retry options and API key input for authentication errors. + * + * **Architecture:** + * - Detects access denied errors for API key flow + * - Validates API key against server before saving + * - Integrates with settingsStore for API key persistence + * + * **Features:** + * - Error message display with icon + * - Retry connection button with loading state + * - API key input for authentication errors + * - API key validation with success/error feedback + * - Troubleshooting section with server start commands + * - Animated transitions for UI elements + * + * @example + * ```svelte + * + * ``` + */ +export { default as ServerErrorSplash } from './ServerErrorSplash.svelte'; + +/** + * **ServerLoadingSplash** - Full-screen loading display + * + * Shown during initial server connection. Displays loading animation + * with ServerStatus component for real-time connection state. + * + * **Features:** + * - Animated server icon + * - Customizable loading message + * - Embedded ServerStatus for live updates + * + * @example + * ```svelte + * + * ``` + */ +export { default as ServerLoadingSplash } from './ServerLoadingSplash.svelte'; diff --git a/tools/server/webui/src/lib/components/ui/badge/badge.svelte b/tools/server/webui/src/lib/components/ui/badge/badge.svelte index 4d15145493..c3e6ac0720 100644 --- a/tools/server/webui/src/lib/components/ui/badge/badge.svelte +++ b/tools/server/webui/src/lib/components/ui/badge/badge.svelte @@ -42,7 +42,7 @@ bind:this={ref} data-slot="badge" {href} - class={cn(badgeVariants({ variant }), className)} + class={cn(badgeVariants({ variant }), className, 'backdrop-blur-sm')} {...restProps} > {@render children?.()} diff --git a/tools/server/webui/src/lib/components/ui/button/button.svelte b/tools/server/webui/src/lib/components/ui/button/button.svelte index d12c8de147..d29358c8e0 100644 --- a/tools/server/webui/src/lib/components/ui/button/button.svelte +++ b/tools/server/webui/src/lib/components/ui/button/button.svelte @@ -12,8 +12,9 @@ 'bg-destructive shadow-xs hover:bg-destructive/90 focus-visible:ring-destructive/20 dark:focus-visible:ring-destructive/40 dark:bg-destructive/60 text-white', outline: 'bg-background shadow-xs hover:bg-accent hover:text-accent-foreground dark:bg-input/30 dark:border-input dark:hover:bg-input/50 border', - secondary: 'bg-secondary text-secondary-foreground shadow-xs hover:bg-secondary/80', - ghost: 'hover:bg-accent hover:text-accent-foreground dark:hover:bg-accent/50', + secondary: + 'dark:bg-secondary dark:text-secondary-foreground bg-background shadow-sm text-foreground hover:bg-muted-foreground/20', + ghost: 'hover:text-accent-foreground hover:bg-muted-foreground/10', link: 'text-primary underline-offset-4 hover:underline' }, size: { diff --git a/tools/server/webui/src/lib/components/ui/card/card.svelte b/tools/server/webui/src/lib/components/ui/card/card.svelte index c40d14309f..b9dcd2de6f 100644 --- a/tools/server/webui/src/lib/components/ui/card/card.svelte +++ b/tools/server/webui/src/lib/components/ui/card/card.svelte @@ -1,6 +1,7 @@ - +{#snippet tooltipContent()} {@render children?.()} @@ -44,4 +50,12 @@ {/snippet} - +{/snippet} + +{#if noPortal} + {@render tooltipContent()} +{:else} + + {@render tooltipContent()} + +{/if} diff --git a/tools/server/webui/src/lib/constants/binary-detection.ts b/tools/server/webui/src/lib/constants/binary-detection.ts index a4440fde5d..eac919ad96 100644 --- a/tools/server/webui/src/lib/constants/binary-detection.ts +++ b/tools/server/webui/src/lib/constants/binary-detection.ts @@ -1,9 +1,6 @@ export interface BinaryDetectionOptions { - /** Number of characters to check from the beginning of the file */ prefixLength: number; - /** Maximum ratio of suspicious characters allowed (0.0 to 1.0) */ suspiciousCharThresholdRatio: number; - /** Maximum absolute number of null bytes allowed */ maxAbsoluteNullBytes: number; } diff --git a/tools/server/webui/src/lib/constants/chat-form.ts b/tools/server/webui/src/lib/constants/chat-form.ts new file mode 100644 index 0000000000..c5e3dc3d1b --- /dev/null +++ b/tools/server/webui/src/lib/constants/chat-form.ts @@ -0,0 +1,3 @@ +export const INITIAL_FILE_SIZE = 0; +export const PROMPT_CONTENT_SEPARATOR = '\n\n'; +export const CLIPBOARD_CONTENT_QUOTE_PREFIX = '"'; diff --git a/tools/server/webui/src/lib/constants/code-blocks.ts b/tools/server/webui/src/lib/constants/code-blocks.ts new file mode 100644 index 0000000000..0f7265104d --- /dev/null +++ b/tools/server/webui/src/lib/constants/code-blocks.ts @@ -0,0 +1,8 @@ +export const CODE_BLOCK_SCROLL_CONTAINER_CLASS = 'code-block-scroll-container'; +export const CODE_BLOCK_WRAPPER_CLASS = 'code-block-wrapper'; +export const CODE_BLOCK_HEADER_CLASS = 'code-block-header'; +export const CODE_BLOCK_ACTIONS_CLASS = 'code-block-actions'; +export const CODE_LANGUAGE_CLASS = 'code-language'; +export const COPY_CODE_BTN_CLASS = 'copy-code-btn'; +export const PREVIEW_CODE_BTN_CLASS = 'preview-code-btn'; +export const RELATIVE_CLASS = 'relative'; diff --git a/tools/server/webui/src/lib/constants/code.ts b/tools/server/webui/src/lib/constants/code.ts new file mode 100644 index 0000000000..12bcd0db77 --- /dev/null +++ b/tools/server/webui/src/lib/constants/code.ts @@ -0,0 +1,7 @@ +export const NEWLINE = '\n'; +export const DEFAULT_LANGUAGE = 'text'; +export const LANG_PATTERN = /^(\w*)\n?/; +export const AMPERSAND_REGEX = /&/g; +export const LT_REGEX = //g; +export const FENCE_PATTERN = /^```|\n```/g; diff --git a/tools/server/webui/src/lib/constants/css-classes.ts b/tools/server/webui/src/lib/constants/css-classes.ts new file mode 100644 index 0000000000..46076e55f6 --- /dev/null +++ b/tools/server/webui/src/lib/constants/css-classes.ts @@ -0,0 +1,10 @@ +export const BOX_BORDER = + 'border border-border/30 focus-within:border-border dark:border-border/20 dark:focus-within:border-border'; + +export const INPUT_CLASSES = ` + bg-muted/60 dark:bg-muted/75 + ${BOX_BORDER} + shadow-sm + outline-none + text-foreground +`; diff --git a/tools/server/webui/src/lib/constants/formatters.ts b/tools/server/webui/src/lib/constants/formatters.ts new file mode 100644 index 0000000000..d6d1b883ff --- /dev/null +++ b/tools/server/webui/src/lib/constants/formatters.ts @@ -0,0 +1,8 @@ +export const MS_PER_SECOND = 1000; +export const SECONDS_PER_MINUTE = 60; +export const SECONDS_PER_HOUR = 3600; +export const SHORT_DURATION_THRESHOLD = 1; +export const MEDIUM_DURATION_THRESHOLD = 10; + +/** Default display value when no performance time is available */ +export const DEFAULT_PERFORMANCE_TIME = '0s'; diff --git a/tools/server/webui/src/lib/constants/markdown.ts b/tools/server/webui/src/lib/constants/markdown.ts new file mode 100644 index 0000000000..783d31a22c --- /dev/null +++ b/tools/server/webui/src/lib/constants/markdown.ts @@ -0,0 +1,4 @@ +export const IMAGE_NOT_ERROR_BOUND_SELECTOR = 'img:not([data-error-bound])'; +export const DATA_ERROR_BOUND_ATTR = 'errorBound'; +export const DATA_ERROR_HANDLED_ATTR = 'errorHandled'; +export const BOOL_TRUE_STRING = 'true'; diff --git a/tools/server/webui/src/lib/constants/processing-info.ts b/tools/server/webui/src/lib/constants/processing-info.ts index 726439211b..2c3f7dc534 100644 --- a/tools/server/webui/src/lib/constants/processing-info.ts +++ b/tools/server/webui/src/lib/constants/processing-info.ts @@ -1 +1,8 @@ export const PROCESSING_INFO_TIMEOUT = 2000; + +/** + * Statistics units labels + */ +export const STATS_UNITS = { + TOKENS_PER_SECOND: 't/s' +} as const; diff --git a/tools/server/webui/src/lib/constants/settings-config.ts b/tools/server/webui/src/lib/constants/settings-config.ts index cac48a557c..1b959f3b69 100644 --- a/tools/server/webui/src/lib/constants/settings-config.ts +++ b/tools/server/webui/src/lib/constants/settings-config.ts @@ -7,7 +7,8 @@ export const SETTING_CONFIG_DEFAULT: Record = theme: 'system', showThoughtInProgress: false, showToolCalls: false, - disableReasoningFormat: false, + disableReasoningParsing: false, + showRawOutputSwitch: false, keepStatsVisible: false, showMessageStats: true, askForTitleConfirmation: false, @@ -92,8 +93,10 @@ export const SETTING_CONFIG_INFO: Record = { showThoughtInProgress: 'Expand thought process by default when generating messages.', showToolCalls: 'Display tool call labels and payloads from Harmony-compatible delta.tool_calls data below assistant messages.', - disableReasoningFormat: - 'Show raw LLM output without backend parsing and frontend Markdown rendering to inspect streaming across different models.', + disableReasoningParsing: + 'Send reasoning_format=none to prevent server-side extraction of reasoning tokens into separate field', + showRawOutputSwitch: + 'Show toggle button to display messages as plain text instead of Markdown-formatted content', keepStatsVisible: 'Keep processing statistics visible after generation finishes.', showMessageStats: 'Display generation statistics (tokens/second, token count, duration) below each assistant message.', diff --git a/tools/server/webui/src/lib/constants/settings-fields.ts b/tools/server/webui/src/lib/constants/settings-fields.ts new file mode 100644 index 0000000000..79a6e92870 --- /dev/null +++ b/tools/server/webui/src/lib/constants/settings-fields.ts @@ -0,0 +1,33 @@ +/** + * List of all numeric fields in settings configuration. + * These fields will be converted from strings to numbers during save. + */ +export const NUMERIC_FIELDS = [ + 'temperature', + 'top_k', + 'top_p', + 'min_p', + 'max_tokens', + 'pasteLongTextToFileLen', + 'dynatemp_range', + 'dynatemp_exponent', + 'typ_p', + 'xtc_probability', + 'xtc_threshold', + 'repeat_last_n', + 'repeat_penalty', + 'presence_penalty', + 'frequency_penalty', + 'dry_multiplier', + 'dry_base', + 'dry_allowed_length', + 'dry_penalty_last_n', + 'agenticMaxTurns', + 'agenticMaxToolPreviewLines' +] as const; + +/** + * Fields that must be positive integers (>= 1). + * These will be clamped to minimum 1 and rounded during save. + */ +export const POSITIVE_INTEGER_FIELDS = ['agenticMaxTurns', 'agenticMaxToolPreviewLines'] as const; diff --git a/tools/server/webui/src/lib/constants/tooltip-config.ts b/tools/server/webui/src/lib/constants/tooltip-config.ts index 3c30c8c072..ad76ab3522 100644 --- a/tools/server/webui/src/lib/constants/tooltip-config.ts +++ b/tools/server/webui/src/lib/constants/tooltip-config.ts @@ -1 +1 @@ -export const TOOLTIP_DELAY_DURATION = 100; +export const TOOLTIP_DELAY_DURATION = 500; diff --git a/tools/server/webui/src/lib/constants/ui.ts b/tools/server/webui/src/lib/constants/ui.ts new file mode 100644 index 0000000000..a75b30f2f8 --- /dev/null +++ b/tools/server/webui/src/lib/constants/ui.ts @@ -0,0 +1 @@ +export const SYSTEM_MESSAGE_PLACEHOLDER = 'System message'; diff --git a/tools/server/webui/src/lib/contexts/chat-actions.context.ts b/tools/server/webui/src/lib/contexts/chat-actions.context.ts new file mode 100644 index 0000000000..eba0fec027 --- /dev/null +++ b/tools/server/webui/src/lib/contexts/chat-actions.context.ts @@ -0,0 +1,34 @@ +import { getContext, setContext } from 'svelte'; + +export interface ChatActionsContext { + copy: (message: DatabaseMessage) => void; + delete: (message: DatabaseMessage) => void; + navigateToSibling: (siblingId: string) => void; + editWithBranching: ( + message: DatabaseMessage, + newContent: string, + newExtras?: DatabaseMessageExtra[] + ) => void; + editWithReplacement: ( + message: DatabaseMessage, + newContent: string, + shouldBranch: boolean + ) => void; + editUserMessagePreserveResponses: ( + message: DatabaseMessage, + newContent: string, + newExtras?: DatabaseMessageExtra[] + ) => void; + regenerateWithBranching: (message: DatabaseMessage, modelOverride?: string) => void; + continueAssistantMessage: (message: DatabaseMessage) => void; +} + +const CHAT_ACTIONS_KEY = Symbol.for('chat-actions'); + +export function setChatActionsContext(ctx: ChatActionsContext): ChatActionsContext { + return setContext(CHAT_ACTIONS_KEY, ctx); +} + +export function getChatActionsContext(): ChatActionsContext { + return getContext(CHAT_ACTIONS_KEY); +} diff --git a/tools/server/webui/src/lib/contexts/index.ts b/tools/server/webui/src/lib/contexts/index.ts new file mode 100644 index 0000000000..73ff6f96fa --- /dev/null +++ b/tools/server/webui/src/lib/contexts/index.ts @@ -0,0 +1,13 @@ +export { + getMessageEditContext, + setMessageEditContext, + type MessageEditContext, + type MessageEditState, + type MessageEditActions +} from './message-edit.context'; + +export { + getChatActionsContext, + setChatActionsContext, + type ChatActionsContext +} from './chat-actions.context'; diff --git a/tools/server/webui/src/lib/contexts/message-edit.context.ts b/tools/server/webui/src/lib/contexts/message-edit.context.ts new file mode 100644 index 0000000000..7af116daa5 --- /dev/null +++ b/tools/server/webui/src/lib/contexts/message-edit.context.ts @@ -0,0 +1,39 @@ +import { getContext, setContext } from 'svelte'; + +export interface MessageEditState { + readonly isEditing: boolean; + readonly editedContent: string; + readonly editedExtras: DatabaseMessageExtra[]; + readonly editedUploadedFiles: ChatUploadedFile[]; + readonly originalContent: string; + readonly originalExtras: DatabaseMessageExtra[]; + readonly showSaveOnlyOption: boolean; +} + +export interface MessageEditActions { + setContent: (content: string) => void; + setExtras: (extras: DatabaseMessageExtra[]) => void; + setUploadedFiles: (files: ChatUploadedFile[]) => void; + save: () => void; + saveOnly: () => void; + cancel: () => void; + startEdit: () => void; +} + +export type MessageEditContext = MessageEditState & MessageEditActions; + +const MESSAGE_EDIT_KEY = Symbol.for('chat-message-edit'); + +/** + * Sets the message edit context. Call this in the parent component (ChatMessage.svelte). + */ +export function setMessageEditContext(ctx: MessageEditContext): MessageEditContext { + return setContext(MESSAGE_EDIT_KEY, ctx); +} + +/** + * Gets the message edit context. Call this in child components. + */ +export function getMessageEditContext(): MessageEditContext { + return getContext(MESSAGE_EDIT_KEY); +} diff --git a/tools/server/webui/src/lib/enums/chat.ts b/tools/server/webui/src/lib/enums/chat.ts index 2b9eb7bc2e..0b6f357d9a 100644 --- a/tools/server/webui/src/lib/enums/chat.ts +++ b/tools/server/webui/src/lib/enums/chat.ts @@ -1,4 +1,51 @@ export enum ChatMessageStatsView { GENERATION = 'generation', - READING = 'reading' + READING = 'reading', + TOOLS = 'tools', + SUMMARY = 'summary' +} + +/** + * Reasoning format options for API requests. + */ +export enum ReasoningFormat { + NONE = 'none', + AUTO = 'auto' +} + +/** + * Message roles for chat messages. + */ +export enum MessageRole { + USER = 'user', + ASSISTANT = 'assistant', + SYSTEM = 'system', + TOOL = 'tool' +} + +/** + * Message types for different content kinds. + */ +export enum MessageType { + ROOT = 'root', + TEXT = 'text', + THINK = 'think', + SYSTEM = 'system' +} + +/** + * Content part types for API chat message content. + */ +export enum ContentPartType { + TEXT = 'text', + IMAGE_URL = 'image_url', + INPUT_AUDIO = 'input_audio' +} + +/** + * Error dialog types for displaying server/timeout errors. + */ +export enum ErrorDialogType { + TIMEOUT = 'timeout', + SERVER = 'server' } diff --git a/tools/server/webui/src/lib/enums/keyboard.ts b/tools/server/webui/src/lib/enums/keyboard.ts new file mode 100644 index 0000000000..b8f6d5f7a2 --- /dev/null +++ b/tools/server/webui/src/lib/enums/keyboard.ts @@ -0,0 +1,15 @@ +/** + * Keyboard key names for event handling + */ +export enum KeyboardKey { + ENTER = 'Enter', + ESCAPE = 'Escape', + ARROW_UP = 'ArrowUp', + ARROW_DOWN = 'ArrowDown', + TAB = 'Tab', + D_LOWER = 'd', + D_UPPER = 'D', + E_UPPER = 'E', + K_LOWER = 'k', + O_UPPER = 'O' +} diff --git a/tools/server/webui/src/lib/enums/settings.ts b/tools/server/webui/src/lib/enums/settings.ts new file mode 100644 index 0000000000..f17f219762 --- /dev/null +++ b/tools/server/webui/src/lib/enums/settings.ts @@ -0,0 +1,26 @@ +/** + * Parameter source - indicates whether a parameter uses default or custom value + */ +export enum ParameterSource { + DEFAULT = 'default', + CUSTOM = 'custom' +} + +/** + * Syncable parameter type - data types for parameters that can be synced with server + */ +export enum SyncableParameterType { + NUMBER = 'number', + STRING = 'string', + BOOLEAN = 'boolean' +} + +/** + * Settings field type - defines the input type for settings fields + */ +export enum SettingsFieldType { + INPUT = 'input', + TEXTAREA = 'textarea', + CHECKBOX = 'checkbox', + SELECT = 'select' +} diff --git a/tools/server/webui/src/lib/hooks/use-auto-scroll.svelte.ts b/tools/server/webui/src/lib/hooks/use-auto-scroll.svelte.ts new file mode 100644 index 0000000000..bbaa5d1362 --- /dev/null +++ b/tools/server/webui/src/lib/hooks/use-auto-scroll.svelte.ts @@ -0,0 +1,165 @@ +import { AUTO_SCROLL_AT_BOTTOM_THRESHOLD, AUTO_SCROLL_INTERVAL } from '$lib/constants/auto-scroll'; + +export interface AutoScrollOptions { + /** Whether auto-scroll is disabled globally (e.g., from settings) */ + disabled?: boolean; +} + +/** + * Creates an auto-scroll controller for a scrollable container. + * + * Features: + * - Auto-scrolls to bottom during streaming/loading + * - Stops auto-scroll when user manually scrolls up + * - Resumes auto-scroll when user scrolls back to bottom + */ +export class AutoScrollController { + private _autoScrollEnabled = $state(true); + private _userScrolledUp = $state(false); + private _lastScrollTop = $state(0); + private _scrollInterval: ReturnType | undefined; + private _scrollTimeout: ReturnType | undefined; + private _container: HTMLElement | undefined; + private _disabled: boolean; + + constructor(options: AutoScrollOptions = {}) { + this._disabled = options.disabled ?? false; + } + + get autoScrollEnabled(): boolean { + return this._autoScrollEnabled; + } + + get userScrolledUp(): boolean { + return this._userScrolledUp; + } + + /** + * Binds the controller to a scrollable container element. + */ + setContainer(container: HTMLElement | undefined): void { + this._container = container; + } + + /** + * Updates the disabled state. + */ + setDisabled(disabled: boolean): void { + this._disabled = disabled; + if (disabled) { + this._autoScrollEnabled = false; + this.stopInterval(); + } + } + + /** + * Handles scroll events to detect user scroll direction and toggle auto-scroll. + */ + handleScroll(): void { + if (this._disabled || !this._container) return; + + const { scrollTop, scrollHeight, clientHeight } = this._container; + const distanceFromBottom = scrollHeight - scrollTop - clientHeight; + const isAtBottom = distanceFromBottom < AUTO_SCROLL_AT_BOTTOM_THRESHOLD; + + if (scrollTop < this._lastScrollTop && !isAtBottom) { + this._userScrolledUp = true; + this._autoScrollEnabled = false; + } else if (isAtBottom && this._userScrolledUp) { + this._userScrolledUp = false; + this._autoScrollEnabled = true; + } + + if (this._scrollTimeout) { + clearTimeout(this._scrollTimeout); + } + + this._scrollTimeout = setTimeout(() => { + if (isAtBottom) { + this._userScrolledUp = false; + this._autoScrollEnabled = true; + } + }, AUTO_SCROLL_INTERVAL); + + this._lastScrollTop = scrollTop; + } + + /** + * Scrolls the container to the bottom. + */ + scrollToBottom(behavior: ScrollBehavior = 'smooth'): void { + if (this._disabled || !this._container) return; + + this._container.scrollTo({ + top: this._container.scrollHeight, + behavior + }); + } + + /** + * Enables auto-scroll (e.g., when user sends a message). + */ + enable(): void { + if (this._disabled) return; + this._userScrolledUp = false; + this._autoScrollEnabled = true; + } + + /** + * Starts the auto-scroll interval for continuous scrolling during streaming. + */ + startInterval(): void { + if (this._disabled || this._scrollInterval) return; + + this._scrollInterval = setInterval(() => { + this.scrollToBottom(); + }, AUTO_SCROLL_INTERVAL); + } + + /** + * Stops the auto-scroll interval. + */ + stopInterval(): void { + if (this._scrollInterval) { + clearInterval(this._scrollInterval); + this._scrollInterval = undefined; + } + } + + /** + * Updates the auto-scroll interval based on streaming state. + * Call this in a $effect to automatically manage the interval. + */ + updateInterval(isStreaming: boolean): void { + if (this._disabled) { + this.stopInterval(); + return; + } + + if (isStreaming && this._autoScrollEnabled) { + if (!this._scrollInterval) { + this.startInterval(); + } + } else { + this.stopInterval(); + } + } + + /** + * Cleans up resources. Call this in onDestroy or when the component unmounts. + */ + destroy(): void { + this.stopInterval(); + if (this._scrollTimeout) { + clearTimeout(this._scrollTimeout); + this._scrollTimeout = undefined; + } + } +} + +/** + * Creates a new AutoScrollController instance. + */ +export function createAutoScrollController(options: AutoScrollOptions = {}): AutoScrollController { + return new AutoScrollController(options); +} diff --git a/tools/server/webui/src/lib/hooks/use-processing-state.svelte.ts b/tools/server/webui/src/lib/hooks/use-processing-state.svelte.ts index c06cf28864..068440cdc0 100644 --- a/tools/server/webui/src/lib/hooks/use-processing-state.svelte.ts +++ b/tools/server/webui/src/lib/hooks/use-processing-state.svelte.ts @@ -1,7 +1,9 @@ import { activeProcessingState } from '$lib/stores/chat.svelte'; import { config } from '$lib/stores/settings.svelte'; +import { STATS_UNITS } from '$lib/constants/processing-info'; +import type { ApiProcessingState } from '$lib/types'; -export interface LiveProcessingStats { +interface LiveProcessingStats { tokensProcessed: number; totalTokens: number; timeMs: number; @@ -9,7 +11,7 @@ export interface LiveProcessingStats { etaSecs?: number; } -export interface LiveGenerationStats { +interface LiveGenerationStats { tokensGenerated: number; timeMs: number; tokensPerSecond: number; @@ -18,6 +20,7 @@ export interface LiveGenerationStats { export interface UseProcessingStateReturn { readonly processingState: ApiProcessingState | null; getProcessingDetails(): string[]; + getTechnicalDetails(): string[]; getProcessingMessage(): string; getPromptProgressText(): string | null; getLiveProcessingStats(): LiveProcessingStats | null; @@ -138,8 +141,31 @@ export function useProcessingState(): UseProcessingStateReturn { const details: string[] = []; + // Show prompt processing progress with ETA during preparation phase + if (stateToUse.promptProgress) { + const { processed, total, time_ms, cache } = stateToUse.promptProgress; + const actualProcessed = processed - cache; + const actualTotal = total - cache; + + if (actualProcessed < actualTotal && actualProcessed > 0) { + const percent = Math.round((actualProcessed / actualTotal) * 100); + const eta = getETASecs(actualProcessed, actualTotal, time_ms); + + if (eta !== undefined) { + const etaSecs = Math.ceil(eta); + details.push(`Processing ${percent}% (ETA: ${etaSecs}s)`); + } else { + details.push(`Processing ${percent}%`); + } + } + } + // Always show context info when we have valid data - if (stateToUse.contextUsed >= 0 && stateToUse.contextTotal > 0) { + if ( + typeof stateToUse.contextTotal === 'number' && + stateToUse.contextUsed >= 0 && + stateToUse.contextTotal > 0 + ) { const contextPercent = Math.round((stateToUse.contextUsed / stateToUse.contextTotal) * 100); details.push( @@ -163,7 +189,57 @@ export function useProcessingState(): UseProcessingStateReturn { } if (stateToUse.tokensPerSecond && stateToUse.tokensPerSecond > 0) { - details.push(`${stateToUse.tokensPerSecond.toFixed(1)} tokens/sec`); + details.push(`${stateToUse.tokensPerSecond.toFixed(1)} ${STATS_UNITS.TOKENS_PER_SECOND}`); + } + + if (stateToUse.speculative) { + details.push('Speculative decoding enabled'); + } + + return details; + } + + /** + * Returns technical details without the progress message (for bottom bar) + */ + function getTechnicalDetails(): string[] { + const stateToUse = processingState || lastKnownState; + if (!stateToUse) { + return []; + } + + const details: string[] = []; + + // Always show context info when we have valid data + if ( + typeof stateToUse.contextTotal === 'number' && + stateToUse.contextUsed >= 0 && + stateToUse.contextTotal > 0 + ) { + const contextPercent = Math.round((stateToUse.contextUsed / stateToUse.contextTotal) * 100); + + details.push( + `Context: ${stateToUse.contextUsed}/${stateToUse.contextTotal} (${contextPercent}%)` + ); + } + + if (stateToUse.outputTokensUsed > 0) { + // Handle infinite max_tokens (-1) case + if (stateToUse.outputTokensMax <= 0) { + details.push(`Output: ${stateToUse.outputTokensUsed}/∞`); + } else { + const outputPercent = Math.round( + (stateToUse.outputTokensUsed / stateToUse.outputTokensMax) * 100 + ); + + details.push( + `Output: ${stateToUse.outputTokensUsed}/${stateToUse.outputTokensMax} (${outputPercent}%)` + ); + } + } + + if (stateToUse.tokensPerSecond && stateToUse.tokensPerSecond > 0) { + details.push(`${stateToUse.tokensPerSecond.toFixed(1)} ${STATS_UNITS.TOKENS_PER_SECOND}`); } if (stateToUse.speculative) { @@ -251,6 +327,7 @@ export function useProcessingState(): UseProcessingStateReturn { return processingState; }, getProcessingDetails, + getTechnicalDetails, getProcessingMessage, getPromptProgressText, getLiveProcessingStats, diff --git a/tools/server/webui/src/lib/markdown/enhance-code-blocks.ts b/tools/server/webui/src/lib/markdown/enhance-code-blocks.ts index 6f0e03e211..168de97403 100644 --- a/tools/server/webui/src/lib/markdown/enhance-code-blocks.ts +++ b/tools/server/webui/src/lib/markdown/enhance-code-blocks.ts @@ -13,6 +13,16 @@ import type { Plugin } from 'unified'; import type { Root, Element, ElementContent } from 'hast'; import { visit } from 'unist-util-visit'; +import { + CODE_BLOCK_SCROLL_CONTAINER_CLASS, + CODE_BLOCK_WRAPPER_CLASS, + CODE_BLOCK_HEADER_CLASS, + CODE_BLOCK_ACTIONS_CLASS, + CODE_LANGUAGE_CLASS, + COPY_CODE_BTN_CLASS, + PREVIEW_CODE_BTN_CLASS, + RELATIVE_CLASS +} from '$lib/constants/code-blocks'; declare global { interface Window { @@ -42,7 +52,7 @@ function createCopyButton(codeId: string): Element { type: 'element', tagName: 'button', properties: { - className: ['copy-code-btn'], + className: [COPY_CODE_BTN_CLASS], 'data-code-id': codeId, title: 'Copy code', type: 'button' @@ -56,7 +66,7 @@ function createPreviewButton(codeId: string): Element { type: 'element', tagName: 'button', properties: { - className: ['preview-code-btn'], + className: [PREVIEW_CODE_BTN_CLASS], 'data-code-id': codeId, title: 'Preview code', type: 'button' @@ -75,30 +85,39 @@ function createHeader(language: string, codeId: string): Element { return { type: 'element', tagName: 'div', - properties: { className: ['code-block-header'] }, + properties: { className: [CODE_BLOCK_HEADER_CLASS] }, children: [ { type: 'element', tagName: 'span', - properties: { className: ['code-language'] }, + properties: { className: [CODE_LANGUAGE_CLASS] }, children: [{ type: 'text', value: language }] }, { type: 'element', tagName: 'div', - properties: { className: ['code-block-actions'] }, + properties: { className: [CODE_BLOCK_ACTIONS_CLASS] }, children: actions } ] }; } +function createScrollContainer(preElement: Element): Element { + return { + type: 'element', + tagName: 'div', + properties: { className: [CODE_BLOCK_SCROLL_CONTAINER_CLASS] }, + children: [preElement] + }; +} + function createWrapper(header: Element, preElement: Element): Element { return { type: 'element', tagName: 'div', - properties: { className: ['code-block-wrapper'] }, - children: [header, preElement] + properties: { className: [CODE_BLOCK_WRAPPER_CLASS, RELATIVE_CLASS] }, + children: [header, createScrollContainer(preElement)] }; } diff --git a/tools/server/webui/src/lib/services/chat.ts b/tools/server/webui/src/lib/services/chat.ts index 02fc6381c0..55af0ce816 100644 --- a/tools/server/webui/src/lib/services/chat.ts +++ b/tools/server/webui/src/lib/services/chat.ts @@ -90,7 +90,7 @@ export class ChatService { custom, timings_per_token, // Config options - disableReasoningFormat + disableReasoningParsing } = options; const normalizedMessages: ApiChatMessageData[] = messages @@ -127,7 +127,7 @@ export class ChatService { requestBody.model = options.model; } - requestBody.reasoning_format = disableReasoningFormat ? 'none' : 'auto'; + requestBody.reasoning_format = disableReasoningParsing ? 'none' : 'auto'; if (temperature !== undefined) requestBody.temperature = temperature; if (max_tokens !== undefined) { diff --git a/tools/server/webui/src/lib/services/database.service.ts b/tools/server/webui/src/lib/services/database.service.ts new file mode 100644 index 0000000000..0d5a9c1b99 --- /dev/null +++ b/tools/server/webui/src/lib/services/database.service.ts @@ -0,0 +1,368 @@ +import Dexie, { type EntityTable } from 'dexie'; +import { findDescendantMessages } from '$lib/utils'; + +class LlamacppDatabase extends Dexie { + conversations!: EntityTable; + messages!: EntityTable; + + constructor() { + super('LlamacppWebui'); + + this.version(1).stores({ + conversations: 'id, lastModified, currNode, name', + messages: 'id, convId, type, role, timestamp, parent, children' + }); + } +} + +const db = new LlamacppDatabase(); +import { v4 as uuid } from 'uuid'; +import { MessageRole } from '$lib/enums/chat'; + +export class DatabaseService { + /** + * + * + * Conversations + * + * + */ + + /** + * Creates a new conversation. + * + * @param name - Name of the conversation + * @returns The created conversation + */ + static async createConversation(name: string): Promise { + const conversation: DatabaseConversation = { + id: uuid(), + name, + lastModified: Date.now(), + currNode: '' + }; + + await db.conversations.add(conversation); + return conversation; + } + + /** + * + * + * Messages + * + * + */ + + /** + * Creates a new message branch by adding a message and updating parent/child relationships. + * Also updates the conversation's currNode to point to the new message. + * + * @param message - Message to add (without id) + * @param parentId - Parent message ID to attach to + * @returns The created message + */ + static async createMessageBranch( + message: Omit, + parentId: string | null + ): Promise { + return await db.transaction('rw', [db.conversations, db.messages], async () => { + // Handle null parent (root message case) + if (parentId !== null) { + const parentMessage = await db.messages.get(parentId); + if (!parentMessage) { + throw new Error(`Parent message ${parentId} not found`); + } + } + + const newMessage: DatabaseMessage = { + ...message, + id: uuid(), + parent: parentId, + toolCalls: message.toolCalls ?? '', + children: [] + }; + + await db.messages.add(newMessage); + + // Update parent's children array if parent exists + if (parentId !== null) { + const parentMessage = await db.messages.get(parentId); + if (parentMessage) { + await db.messages.update(parentId, { + children: [...parentMessage.children, newMessage.id] + }); + } + } + + await this.updateConversation(message.convId, { + currNode: newMessage.id + }); + + return newMessage; + }); + } + + /** + * Creates a root message for a new conversation. + * Root messages are not displayed but serve as the tree root for branching. + * + * @param convId - Conversation ID + * @returns The created root message + */ + static async createRootMessage(convId: string): Promise { + const rootMessage: DatabaseMessage = { + id: uuid(), + convId, + type: 'root', + timestamp: Date.now(), + role: MessageRole.SYSTEM, + content: '', + parent: null, + toolCalls: '', + children: [] + }; + + await db.messages.add(rootMessage); + return rootMessage.id; + } + + /** + * Creates a system prompt message for a conversation. + * + * @param convId - Conversation ID + * @param systemPrompt - The system prompt content (must be non-empty) + * @param parentId - Parent message ID (typically the root message) + * @returns The created system message + * @throws Error if systemPrompt is empty + */ + static async createSystemMessage( + convId: string, + systemPrompt: string, + parentId: string + ): Promise { + const trimmedPrompt = systemPrompt.trim(); + if (!trimmedPrompt) { + throw new Error('Cannot create system message with empty content'); + } + + const systemMessage: DatabaseMessage = { + id: uuid(), + convId, + type: MessageRole.SYSTEM, + timestamp: Date.now(), + role: MessageRole.SYSTEM, + content: trimmedPrompt, + parent: parentId, + children: [] + }; + + await db.messages.add(systemMessage); + + const parentMessage = await db.messages.get(parentId); + if (parentMessage) { + await db.messages.update(parentId, { + children: [...parentMessage.children, systemMessage.id] + }); + } + + return systemMessage; + } + + /** + * Deletes a conversation and all its messages. + * + * @param id - Conversation ID + */ + static async deleteConversation(id: string): Promise { + await db.transaction('rw', [db.conversations, db.messages], async () => { + await db.conversations.delete(id); + await db.messages.where('convId').equals(id).delete(); + }); + } + + /** + * Deletes a message and removes it from its parent's children array. + * + * @param messageId - ID of the message to delete + */ + static async deleteMessage(messageId: string): Promise { + await db.transaction('rw', db.messages, async () => { + const message = await db.messages.get(messageId); + if (!message) return; + + // Remove this message from its parent's children array + if (message.parent) { + const parent = await db.messages.get(message.parent); + if (parent) { + parent.children = parent.children.filter((childId: string) => childId !== messageId); + await db.messages.put(parent); + } + } + + // Delete the message + await db.messages.delete(messageId); + }); + } + + /** + * Deletes a message and all its descendant messages (cascading deletion). + * This removes the entire branch starting from the specified message. + * + * @param conversationId - ID of the conversation containing the message + * @param messageId - ID of the root message to delete (along with all descendants) + * @returns Array of all deleted message IDs + */ + static async deleteMessageCascading( + conversationId: string, + messageId: string + ): Promise { + return await db.transaction('rw', db.messages, async () => { + // Get all messages in the conversation to find descendants + const allMessages = await db.messages.where('convId').equals(conversationId).toArray(); + + // Find all descendant messages + const descendants = findDescendantMessages(allMessages, messageId); + const allToDelete = [messageId, ...descendants]; + + // Get the message to delete for parent cleanup + const message = await db.messages.get(messageId); + if (message && message.parent) { + const parent = await db.messages.get(message.parent); + if (parent) { + parent.children = parent.children.filter((childId: string) => childId !== messageId); + await db.messages.put(parent); + } + } + + // Delete all messages in the branch + await db.messages.bulkDelete(allToDelete); + + return allToDelete; + }); + } + + /** + * Gets all conversations, sorted by last modified time (newest first). + * + * @returns Array of conversations + */ + static async getAllConversations(): Promise { + return await db.conversations.orderBy('lastModified').reverse().toArray(); + } + + /** + * Gets a conversation by ID. + * + * @param id - Conversation ID + * @returns The conversation if found, otherwise undefined + */ + static async getConversation(id: string): Promise { + return await db.conversations.get(id); + } + + /** + * Gets all messages in a conversation, sorted by timestamp (oldest first). + * + * @param convId - Conversation ID + * @returns Array of messages in the conversation + */ + static async getConversationMessages(convId: string): Promise { + return await db.messages.where('convId').equals(convId).sortBy('timestamp'); + } + + /** + * Updates a conversation. + * + * @param id - Conversation ID + * @param updates - Partial updates to apply + * @returns Promise that resolves when the conversation is updated + */ + static async updateConversation( + id: string, + updates: Partial> + ): Promise { + await db.conversations.update(id, { + ...updates, + lastModified: Date.now() + }); + } + + /** + * + * + * Navigation + * + * + */ + + /** + * Updates the conversation's current node (active branch). + * This determines which conversation path is currently being viewed. + * + * @param convId - Conversation ID + * @param nodeId - Message ID to set as current node + */ + static async updateCurrentNode(convId: string, nodeId: string): Promise { + await this.updateConversation(convId, { + currNode: nodeId + }); + } + + /** + * Updates a message. + * + * @param id - Message ID + * @param updates - Partial updates to apply + * @returns Promise that resolves when the message is updated + */ + static async updateMessage( + id: string, + updates: Partial> + ): Promise { + await db.messages.update(id, updates); + } + + /** + * + * + * Import + * + * + */ + + /** + * Imports multiple conversations and their messages. + * Skips conversations that already exist. + * + * @param data - Array of { conv, messages } objects + */ + static async importConversations( + data: { conv: DatabaseConversation; messages: DatabaseMessage[] }[] + ): Promise<{ imported: number; skipped: number }> { + let importedCount = 0; + let skippedCount = 0; + + return await db.transaction('rw', [db.conversations, db.messages], async () => { + for (const item of data) { + const { conv, messages } = item; + + const existing = await db.conversations.get(conv.id); + if (existing) { + console.warn(`Conversation "${conv.name}" already exists, skipping...`); + skippedCount++; + continue; + } + + await db.conversations.add(conv); + for (const msg of messages) { + await db.messages.put(msg); + } + + importedCount++; + } + + return { imported: importedCount, skipped: skippedCount }; + }); + } +} diff --git a/tools/server/webui/src/lib/services/models.service.ts b/tools/server/webui/src/lib/services/models.service.ts new file mode 100644 index 0000000000..7357c3f400 --- /dev/null +++ b/tools/server/webui/src/lib/services/models.service.ts @@ -0,0 +1,99 @@ +import { ServerModelStatus } from '$lib/enums'; +import { apiFetch, apiPost } from '$lib/utils/api-fetch'; + +export class ModelsService { + /** + * + * + * Listing + * + * + */ + + /** + * Fetch list of models from OpenAI-compatible endpoint. + * Works in both MODEL and ROUTER modes. + * + * @returns List of available models with basic metadata + */ + static async list(): Promise { + return apiFetch('/v1/models'); + } + + /** + * Fetch list of all models with detailed metadata (ROUTER mode). + * Returns models with load status, paths, and other metadata + * beyond what the OpenAI-compatible endpoint provides. + * + * @returns List of models with detailed status and configuration info + */ + static async listRouter(): Promise { + return apiFetch('/v1/models'); + } + + /** + * + * + * Load/Unload + * + * + */ + + /** + * Load a model (ROUTER mode only). + * Sends POST request to `/models/load`. Note: the endpoint returns success + * before loading completes — use polling to await actual load status. + * + * @param modelId - Model identifier to load + * @param extraArgs - Optional additional arguments to pass to the model instance + * @returns Load response from the server + */ + static async load(modelId: string, extraArgs?: string[]): Promise { + const payload: { model: string; extra_args?: string[] } = { model: modelId }; + if (extraArgs && extraArgs.length > 0) { + payload.extra_args = extraArgs; + } + + return apiPost('/models/load', payload); + } + + /** + * Unload a model (ROUTER mode only). + * Sends POST request to `/models/unload`. Note: the endpoint returns success + * before unloading completes — use polling to await actual unload status. + * + * @param modelId - Model identifier to unload + * @returns Unload response from the server + */ + static async unload(modelId: string): Promise { + return apiPost('/models/unload', { model: modelId }); + } + + /** + * + * + * Status + * + * + */ + + /** + * Check if a model is loaded based on its metadata. + * + * @param model - Model data entry from the API response + * @returns True if the model status is LOADED + */ + static isModelLoaded(model: ApiModelDataEntry): boolean { + return model.status.value === ServerModelStatus.LOADED; + } + + /** + * Check if a model is currently loading. + * + * @param model - Model data entry from the API response + * @returns True if the model status is LOADING + */ + static isModelLoading(model: ApiModelDataEntry): boolean { + return model.status.value === ServerModelStatus.LOADING; + } +} diff --git a/tools/server/webui/src/lib/services/parameter-sync.service.spec.ts b/tools/server/webui/src/lib/services/parameter-sync.service.spec.ts new file mode 100644 index 0000000000..46cce5e7cb --- /dev/null +++ b/tools/server/webui/src/lib/services/parameter-sync.service.spec.ts @@ -0,0 +1,148 @@ +import { describe, it, expect } from 'vitest'; +import { ParameterSyncService } from './parameter-sync.service'; + +describe('ParameterSyncService', () => { + describe('roundFloatingPoint', () => { + it('should fix JavaScript floating-point precision issues', () => { + // Test the specific values from the screenshot + const mockServerParams = { + top_p: 0.949999988079071, + min_p: 0.009999999776482582, + temperature: 0.800000011920929, + top_k: 40, + samplers: ['top_k', 'typ_p', 'top_p', 'min_p', 'temperature'] + }; + + const result = ParameterSyncService.extractServerDefaults({ + ...mockServerParams, + // Add other required fields to match the API type + n_predict: 512, + seed: -1, + dynatemp_range: 0.0, + dynatemp_exponent: 1.0, + xtc_probability: 0.0, + xtc_threshold: 0.1, + typ_p: 1.0, + repeat_last_n: 64, + repeat_penalty: 1.0, + presence_penalty: 0.0, + frequency_penalty: 0.0, + dry_multiplier: 0.0, + dry_base: 1.75, + dry_allowed_length: 2, + dry_penalty_last_n: -1, + mirostat: 0, + mirostat_tau: 5.0, + mirostat_eta: 0.1, + stop: [], + max_tokens: -1, + n_keep: 0, + n_discard: 0, + ignore_eos: false, + stream: true, + logit_bias: [], + n_probs: 0, + min_keep: 0, + grammar: '', + grammar_lazy: false, + grammar_triggers: [], + preserved_tokens: [], + chat_format: '', + reasoning_format: '', + reasoning_in_content: false, + thinking_forced_open: false, + 'speculative.n_max': 0, + 'speculative.n_min': 0, + 'speculative.p_min': 0.0, + timings_per_token: false, + post_sampling_probs: false, + lora: [], + top_n_sigma: 0.0, + dry_sequence_breakers: [] + } as ApiLlamaCppServerProps['default_generation_settings']['params']); + + // Check that the problematic floating-point values are rounded correctly + expect(result.top_p).toBe(0.95); + expect(result.min_p).toBe(0.01); + expect(result.temperature).toBe(0.8); + expect(result.top_k).toBe(40); // Integer should remain unchanged + expect(result.samplers).toBe('top_k;typ_p;top_p;min_p;temperature'); + }); + + it('should preserve non-numeric values', () => { + const mockServerParams = { + samplers: ['top_k', 'temperature'], + max_tokens: -1, + temperature: 0.7 + }; + + const result = ParameterSyncService.extractServerDefaults({ + ...mockServerParams, + // Minimal required fields + n_predict: 512, + seed: -1, + dynatemp_range: 0.0, + dynatemp_exponent: 1.0, + top_k: 40, + top_p: 0.95, + min_p: 0.05, + xtc_probability: 0.0, + xtc_threshold: 0.1, + typ_p: 1.0, + repeat_last_n: 64, + repeat_penalty: 1.0, + presence_penalty: 0.0, + frequency_penalty: 0.0, + dry_multiplier: 0.0, + dry_base: 1.75, + dry_allowed_length: 2, + dry_penalty_last_n: -1, + mirostat: 0, + mirostat_tau: 5.0, + mirostat_eta: 0.1, + stop: [], + n_keep: 0, + n_discard: 0, + ignore_eos: false, + stream: true, + logit_bias: [], + n_probs: 0, + min_keep: 0, + grammar: '', + grammar_lazy: false, + grammar_triggers: [], + preserved_tokens: [], + chat_format: '', + reasoning_format: '', + reasoning_in_content: false, + thinking_forced_open: false, + 'speculative.n_max': 0, + 'speculative.n_min': 0, + 'speculative.p_min': 0.0, + timings_per_token: false, + post_sampling_probs: false, + lora: [], + top_n_sigma: 0.0, + dry_sequence_breakers: [] + } as ApiLlamaCppServerProps['default_generation_settings']['params']); + + expect(result.samplers).toBe('top_k;temperature'); + expect(result.max_tokens).toBe(-1); + expect(result.temperature).toBe(0.7); + }); + + it('should merge webui settings from props when provided', () => { + const result = ParameterSyncService.extractServerDefaults(null, { + pasteLongTextToFileLen: 0, + pdfAsImage: true, + renderUserContentAsMarkdown: false, + theme: 'dark' + }); + + expect(result.pasteLongTextToFileLen).toBe(0); + expect(result.pdfAsImage).toBe(true); + expect(result.renderUserContentAsMarkdown).toBe(false); + expect(result.theme).toBeUndefined(); + }); + }); +}); diff --git a/tools/server/webui/src/lib/services/parameter-sync.service.ts b/tools/server/webui/src/lib/services/parameter-sync.service.ts new file mode 100644 index 0000000000..6cb53d12d1 --- /dev/null +++ b/tools/server/webui/src/lib/services/parameter-sync.service.ts @@ -0,0 +1,400 @@ +import { normalizeFloatingPoint } from '$lib/utils'; +import { SyncableParameterType, ParameterSource } from '$lib/enums/settings'; + +type ParameterValue = string | number | boolean; +type ParameterRecord = Record; + +interface ParameterInfo { + value: string | number | boolean; + source: ParameterSource; + serverDefault?: string | number | boolean; + userOverride?: string | number | boolean; +} + +interface SyncableParameter { + key: string; + serverKey: string; + type: SyncableParameterType; + canSync: boolean; +} + +/** + * Mapping of webui setting keys to server parameter keys. + * Only parameters listed here can be synced from the server `/props` endpoint. + * Each entry defines the webui key, corresponding server key, value type, + * and whether sync is enabled. + */ +export const SYNCABLE_PARAMETERS: SyncableParameter[] = [ + { + key: 'temperature', + serverKey: 'temperature', + type: SyncableParameterType.NUMBER, + canSync: true + }, + { key: 'top_k', serverKey: 'top_k', type: SyncableParameterType.NUMBER, canSync: true }, + { key: 'top_p', serverKey: 'top_p', type: SyncableParameterType.NUMBER, canSync: true }, + { key: 'min_p', serverKey: 'min_p', type: SyncableParameterType.NUMBER, canSync: true }, + { + key: 'dynatemp_range', + serverKey: 'dynatemp_range', + type: SyncableParameterType.NUMBER, + canSync: true + }, + { + key: 'dynatemp_exponent', + serverKey: 'dynatemp_exponent', + type: SyncableParameterType.NUMBER, + canSync: true + }, + { + key: 'xtc_probability', + serverKey: 'xtc_probability', + type: SyncableParameterType.NUMBER, + canSync: true + }, + { + key: 'xtc_threshold', + serverKey: 'xtc_threshold', + type: SyncableParameterType.NUMBER, + canSync: true + }, + { key: 'typ_p', serverKey: 'typ_p', type: SyncableParameterType.NUMBER, canSync: true }, + { + key: 'repeat_last_n', + serverKey: 'repeat_last_n', + type: SyncableParameterType.NUMBER, + canSync: true + }, + { + key: 'repeat_penalty', + serverKey: 'repeat_penalty', + type: SyncableParameterType.NUMBER, + canSync: true + }, + { + key: 'presence_penalty', + serverKey: 'presence_penalty', + type: SyncableParameterType.NUMBER, + canSync: true + }, + { + key: 'frequency_penalty', + serverKey: 'frequency_penalty', + type: SyncableParameterType.NUMBER, + canSync: true + }, + { + key: 'dry_multiplier', + serverKey: 'dry_multiplier', + type: SyncableParameterType.NUMBER, + canSync: true + }, + { key: 'dry_base', serverKey: 'dry_base', type: SyncableParameterType.NUMBER, canSync: true }, + { + key: 'dry_allowed_length', + serverKey: 'dry_allowed_length', + type: SyncableParameterType.NUMBER, + canSync: true + }, + { + key: 'dry_penalty_last_n', + serverKey: 'dry_penalty_last_n', + type: SyncableParameterType.NUMBER, + canSync: true + }, + { key: 'max_tokens', serverKey: 'max_tokens', type: SyncableParameterType.NUMBER, canSync: true }, + { key: 'samplers', serverKey: 'samplers', type: SyncableParameterType.STRING, canSync: true }, + { + key: 'pasteLongTextToFileLen', + serverKey: 'pasteLongTextToFileLen', + type: SyncableParameterType.NUMBER, + canSync: true + }, + { + key: 'pdfAsImage', + serverKey: 'pdfAsImage', + type: SyncableParameterType.BOOLEAN, + canSync: true + }, + { + key: 'showThoughtInProgress', + serverKey: 'showThoughtInProgress', + type: SyncableParameterType.BOOLEAN, + canSync: true + }, + { + key: 'keepStatsVisible', + serverKey: 'keepStatsVisible', + type: SyncableParameterType.BOOLEAN, + canSync: true + }, + { + key: 'showMessageStats', + serverKey: 'showMessageStats', + type: SyncableParameterType.BOOLEAN, + canSync: true + }, + { + key: 'askForTitleConfirmation', + serverKey: 'askForTitleConfirmation', + type: SyncableParameterType.BOOLEAN, + canSync: true + }, + { + key: 'disableAutoScroll', + serverKey: 'disableAutoScroll', + type: SyncableParameterType.BOOLEAN, + canSync: true + }, + { + key: 'renderUserContentAsMarkdown', + serverKey: 'renderUserContentAsMarkdown', + type: SyncableParameterType.BOOLEAN, + canSync: true + }, + { + key: 'autoMicOnEmpty', + serverKey: 'autoMicOnEmpty', + type: SyncableParameterType.BOOLEAN, + canSync: true + }, + { + key: 'pyInterpreterEnabled', + serverKey: 'pyInterpreterEnabled', + type: SyncableParameterType.BOOLEAN, + canSync: true + }, + { + key: 'enableContinueGeneration', + serverKey: 'enableContinueGeneration', + type: SyncableParameterType.BOOLEAN, + canSync: true + } +]; + +export class ParameterSyncService { + /** + * + * + * Extraction + * + * + */ + + /** + * Round floating-point numbers to avoid JavaScript precision issues. + * E.g., 0.1 + 0.2 = 0.30000000000000004 → 0.3 + * + * @param value - Parameter value to normalize + * @returns Precision-normalized value + */ + private static roundFloatingPoint(value: ParameterValue): ParameterValue { + return normalizeFloatingPoint(value) as ParameterValue; + } + + /** + * Extract server default parameters that can be synced from `/props` response. + * Handles both generation settings parameters and webui-specific settings. + * Converts samplers array to semicolon-delimited string for UI display. + * + * @param serverParams - Raw generation settings from server `/props` endpoint + * @param webuiSettings - Optional webui-specific settings from server + * @returns Record of extracted parameter key-value pairs with normalized precision + */ + static extractServerDefaults( + serverParams: ApiLlamaCppServerProps['default_generation_settings']['params'] | null, + webuiSettings?: Record + ): ParameterRecord { + const extracted: ParameterRecord = {}; + + if (serverParams) { + for (const param of SYNCABLE_PARAMETERS) { + if (param.canSync && param.serverKey in serverParams) { + const value = (serverParams as unknown as Record)[ + param.serverKey + ]; + if (value !== undefined) { + // Apply precision rounding to avoid JavaScript floating-point issues + extracted[param.key] = this.roundFloatingPoint(value); + } + } + } + + // Handle samplers array conversion to string + if (serverParams.samplers && Array.isArray(serverParams.samplers)) { + extracted.samplers = serverParams.samplers.join(';'); + } + } + + if (webuiSettings) { + for (const param of SYNCABLE_PARAMETERS) { + if (param.canSync && param.serverKey in webuiSettings) { + const value = webuiSettings[param.serverKey]; + if (value !== undefined) { + extracted[param.key] = this.roundFloatingPoint(value); + } + } + } + } + + return extracted; + } + + /** + * + * + * Merging + * + * + */ + + /** + * Merge server defaults with current user settings. + * User overrides always take priority — only parameters not in `userOverrides` + * set will be updated from server defaults. + * + * @param currentSettings - Current parameter values in the settings store + * @param serverDefaults - Default values extracted from server props + * @param userOverrides - Set of parameter keys explicitly overridden by the user + * @returns Merged parameter record with user overrides preserved + */ + static mergeWithServerDefaults( + currentSettings: ParameterRecord, + serverDefaults: ParameterRecord, + userOverrides: Set = new Set() + ): ParameterRecord { + const merged = { ...currentSettings }; + + for (const [key, serverValue] of Object.entries(serverDefaults)) { + // Only update if user hasn't explicitly overridden this parameter + if (!userOverrides.has(key)) { + merged[key] = this.roundFloatingPoint(serverValue); + } + } + + return merged; + } + + /** + * + * + * Info + * + * + */ + + /** + * Get parameter information including source and values. + * Used by ChatSettingsParameterSourceIndicator to display the correct badge + * (Custom vs Default) for each parameter in the settings UI. + * + * @param key - The parameter key to get info for + * @param currentValue - The current value of the parameter + * @param propsDefaults - Server default values from `/props` + * @param userOverrides - Set of parameter keys explicitly overridden by the user + * @returns Parameter info with source, server default, and user override values + */ + static getParameterInfo( + key: string, + currentValue: ParameterValue, + propsDefaults: ParameterRecord, + userOverrides: Set + ): ParameterInfo { + const hasPropsDefault = propsDefaults[key] !== undefined; + const isUserOverride = userOverrides.has(key); + + // Simple logic: either using default (from props) or custom (user override) + const source = isUserOverride ? ParameterSource.CUSTOM : ParameterSource.DEFAULT; + + return { + value: currentValue, + source, + serverDefault: hasPropsDefault ? propsDefaults[key] : undefined, // Keep same field name for compatibility + userOverride: isUserOverride ? currentValue : undefined + }; + } + + /** + * Check if a parameter can be synced from server. + * + * @param key - The parameter key to check + * @returns True if the parameter is in the syncable parameters list + */ + static canSyncParameter(key: string): boolean { + return SYNCABLE_PARAMETERS.some((param) => param.key === key && param.canSync); + } + + /** + * Get all syncable parameter keys. + * + * @returns Array of parameter keys that can be synced from server + */ + static getSyncableParameterKeys(): string[] { + return SYNCABLE_PARAMETERS.filter((param) => param.canSync).map((param) => param.key); + } + + /** + * Validate a server parameter value against its expected type. + * + * @param key - The parameter key to validate + * @param value - The value to validate + * @returns True if value matches the expected type for this parameter + */ + static validateServerParameter(key: string, value: ParameterValue): boolean { + const param = SYNCABLE_PARAMETERS.find((p) => p.key === key); + if (!param) return false; + + switch (param.type) { + case SyncableParameterType.NUMBER: + return typeof value === 'number' && !isNaN(value); + case SyncableParameterType.STRING: + return typeof value === 'string'; + case SyncableParameterType.BOOLEAN: + return typeof value === 'boolean'; + default: + return false; + } + } + + /** + * + * + * Diff + * + * + */ + + /** + * Create a diff between current settings and server defaults. + * Shows which parameters differ from server values, useful for debugging + * and for the "Reset to defaults" functionality. + * + * @param currentSettings - Current parameter values in the settings store + * @param serverDefaults - Default values extracted from server props + * @returns Record of parameter diffs with current value, server value, and whether they differ + */ + static createParameterDiff( + currentSettings: ParameterRecord, + serverDefaults: ParameterRecord + ): Record { + const diff: Record< + string, + { current: ParameterValue; server: ParameterValue; differs: boolean } + > = {}; + + for (const key of this.getSyncableParameterKeys()) { + const currentValue = currentSettings[key]; + const serverValue = serverDefaults[key]; + + if (serverValue !== undefined) { + diff[key] = { + current: currentValue, + server: serverValue, + differs: currentValue !== serverValue + }; + } + } + + return diff; + } +} diff --git a/tools/server/webui/src/lib/services/parameter-sync.ts b/tools/server/webui/src/lib/services/parameter-sync.ts index d124cf5c8d..333260701f 100644 --- a/tools/server/webui/src/lib/services/parameter-sync.ts +++ b/tools/server/webui/src/lib/services/parameter-sync.ts @@ -70,12 +70,6 @@ export const SYNCABLE_PARAMETERS: SyncableParameter[] = [ canSync: true }, { key: 'showToolCalls', serverKey: 'showToolCalls', type: 'boolean', canSync: true }, - { - key: 'disableReasoningFormat', - serverKey: 'disableReasoningFormat', - type: 'boolean', - canSync: true - }, { key: 'keepStatsVisible', serverKey: 'keepStatsVisible', type: 'boolean', canSync: true }, { key: 'showMessageStats', serverKey: 'showMessageStats', type: 'boolean', canSync: true }, { diff --git a/tools/server/webui/src/lib/services/props.service.ts b/tools/server/webui/src/lib/services/props.service.ts new file mode 100644 index 0000000000..7373b7e016 --- /dev/null +++ b/tools/server/webui/src/lib/services/props.service.ts @@ -0,0 +1,47 @@ +import { apiFetchWithParams } from '$lib/utils/api-fetch'; + +export class PropsService { + /** + * + * + * Fetching + * + * + */ + + /** + * Fetches global server properties from the `/props` endpoint. + * In MODEL mode, returns modalities for the single loaded model. + * In ROUTER mode, returns server-wide settings without model-specific modalities. + * + * @param autoload - If false, prevents automatic model loading (default: false) + * @returns Server properties including default generation settings and capabilities + * @throws {Error} If the request fails or returns invalid data + */ + static async fetch(autoload = false): Promise { + const params: Record = {}; + if (!autoload) { + params.autoload = 'false'; + } + + return apiFetchWithParams('./props', params, { authOnly: true }); + } + + /** + * Fetches server properties for a specific model (ROUTER mode only). + * Required in ROUTER mode because global `/props` does not include per-model modalities. + * + * @param modelId - The model ID to fetch properties for + * @param autoload - If false, prevents automatic model loading (default: false) + * @returns Server properties specific to the requested model + * @throws {Error} If the request fails, model not found, or model not loaded + */ + static async fetchForModel(modelId: string, autoload = false): Promise { + const params: Record = { model: modelId }; + if (!autoload) { + params.autoload = 'false'; + } + + return apiFetchWithParams('./props', params, { authOnly: true }); + } +} diff --git a/tools/server/webui/src/lib/stores/chat.svelte.ts b/tools/server/webui/src/lib/stores/chat.svelte.ts index 879b2f3245..f00f418b4c 100644 --- a/tools/server/webui/src/lib/stores/chat.svelte.ts +++ b/tools/server/webui/src/lib/stores/chat.svelte.ts @@ -15,6 +15,7 @@ import { } from '$lib/utils'; import { SvelteMap } from 'svelte/reactivity'; import { DEFAULT_CONTEXT } from '$lib/constants/default-context'; +import { SYSTEM_MESSAGE_PLACEHOLDER } from '$lib/constants/ui'; /** * chatStore - Active AI interaction and streaming state management @@ -76,6 +77,10 @@ class ChatStore { private isStreamingActive = $state(false); private isEditModeActive = $state(false); private addFilesHandler: ((files: File[]) => void) | null = $state(null); + pendingEditMessageId = $state(null); + // Draft preservation for navigation (e.g., when adding system prompt from welcome page) + private _pendingDraftMessage = $state(''); + private _pendingDraftFiles = $state([]); // ───────────────────────────────────────────────────────────────────────────── // Loading State @@ -113,6 +118,16 @@ class ChatStore { this.isLoading = this.isChatLoading(convId); const streamingState = this.getChatStreaming(convId); this.currentResponse = streamingState?.response || ''; + this.isStreamingActive = streamingState !== undefined; + this.setActiveProcessingConversation(convId); + + // Sync streaming content to activeMessages so UI displays current content + if (streamingState?.response && streamingState?.messageId) { + const idx = conversationsStore.findMessageIndex(streamingState.messageId); + if (idx !== -1) { + conversationsStore.updateMessageAtIndex(idx, { content: streamingState.response }); + } + } } /** @@ -455,6 +470,166 @@ class ChatStore { } } + /** + * Adds a system message at the top of a conversation and triggers edit mode. + * The system message is inserted between root and the first message of the active branch. + * Creates a new conversation if one doesn't exist. + */ + async addSystemPrompt(): Promise { + let activeConv = conversationsStore.activeConversation; + + // Create conversation if needed + if (!activeConv) { + await conversationsStore.createConversation(); + activeConv = conversationsStore.activeConversation; + } + if (!activeConv) return; + + try { + // Get all messages to find the root + const allMessages = await conversationsStore.getConversationMessages(activeConv.id); + const rootMessage = allMessages.find((m) => m.type === 'root' && m.parent === null); + let rootId: string; + + // Create root message if it doesn't exist + if (!rootMessage) { + rootId = await DatabaseService.createRootMessage(activeConv.id); + } else { + rootId = rootMessage.id; + } + + // Check if there's already a system message as root's child + const existingSystemMessage = allMessages.find( + (m) => m.role === 'system' && m.parent === rootId + ); + + if (existingSystemMessage) { + // If system message exists, just trigger edit mode on it + this.pendingEditMessageId = existingSystemMessage.id; + + // Make sure it's in active messages at the beginning + if (!conversationsStore.activeMessages.some((m) => m.id === existingSystemMessage.id)) { + conversationsStore.activeMessages.unshift(existingSystemMessage); + } + return; + } + + // Find the first message of the active branch (child of root that's in activeMessages) + const activeMessages = conversationsStore.activeMessages; + const firstActiveMessage = activeMessages.find((m) => m.parent === rootId); + + // Create new system message with placeholder content (will be edited by user) + const systemMessage = await DatabaseService.createSystemMessage( + activeConv.id, + SYSTEM_MESSAGE_PLACEHOLDER, + rootId + ); + + // If there's a first message in the active branch, re-parent it to the system message + if (firstActiveMessage) { + // Update the first message's parent to be the system message + await DatabaseService.updateMessage(firstActiveMessage.id, { + parent: systemMessage.id + }); + + // Update the system message's children to include the first message + await DatabaseService.updateMessage(systemMessage.id, { + children: [firstActiveMessage.id] + }); + + // Remove first message from root's children + const updatedRootChildren = rootMessage + ? rootMessage.children.filter((id: string) => id !== firstActiveMessage.id) + : []; + // Note: system message was already added to root's children by createSystemMessage + await DatabaseService.updateMessage(rootId, { + children: [ + ...updatedRootChildren.filter((id: string) => id !== systemMessage.id), + systemMessage.id + ] + }); + + // Update local state + const firstMsgIndex = conversationsStore.findMessageIndex(firstActiveMessage.id); + if (firstMsgIndex !== -1) { + conversationsStore.updateMessageAtIndex(firstMsgIndex, { parent: systemMessage.id }); + } + } + + // Add system message to active messages at the beginning + conversationsStore.activeMessages.unshift(systemMessage); + + // Set pending edit message ID to trigger edit mode + this.pendingEditMessageId = systemMessage.id; + + conversationsStore.updateConversationTimestamp(); + } catch (error) { + console.error('Failed to add system prompt:', error); + } + } + + /** + * Removes a system message placeholder without deleting its children. + * Re-parents children back to the root message. + * If this is a new empty conversation (only root + system placeholder), deletes the entire conversation. + * @returns true if the entire conversation was deleted, false otherwise + */ + async removeSystemPromptPlaceholder(messageId: string): Promise { + const activeConv = conversationsStore.activeConversation; + if (!activeConv) return false; + + try { + const allMessages = await conversationsStore.getConversationMessages(activeConv.id); + const systemMessage = allMessages.find((m) => m.id === messageId); + if (!systemMessage || systemMessage.role !== 'system') return false; + + const rootMessage = allMessages.find((m) => m.type === 'root' && m.parent === null); + if (!rootMessage) return false; + + // Check if this is a new empty conversation (only root + system placeholder) + const isEmptyConversation = allMessages.length === 2 && systemMessage.children.length === 0; + + if (isEmptyConversation) { + // Delete the entire conversation + await conversationsStore.deleteConversation(activeConv.id); + return true; + } + + // Re-parent system message's children to root + for (const childId of systemMessage.children) { + await DatabaseService.updateMessage(childId, { parent: rootMessage.id }); + + // Update local state + const childIndex = conversationsStore.findMessageIndex(childId); + if (childIndex !== -1) { + conversationsStore.updateMessageAtIndex(childIndex, { parent: rootMessage.id }); + } + } + + // Update root's children: remove system message, add system's children + const newRootChildren = [ + ...rootMessage.children.filter((id: string) => id !== messageId), + ...systemMessage.children + ]; + await DatabaseService.updateMessage(rootMessage.id, { children: newRootChildren }); + + // Delete the system message (without cascade) + await DatabaseService.deleteMessage(messageId); + + // Remove from active messages + const systemIndex = conversationsStore.findMessageIndex(messageId); + if (systemIndex !== -1) { + conversationsStore.activeMessages.splice(systemIndex, 1); + } + + conversationsStore.updateConversationTimestamp(); + return false; + } catch (error) { + console.error('Failed to remove system prompt placeholder:', error); + return false; + } + } + private async createAssistantMessage(parentId?: string): Promise { const activeConv = conversationsStore.activeConversation; if (!activeConv) return null; @@ -916,6 +1091,28 @@ class ChatStore { if (!activeConv) return { totalCount: 0, userMessages: 0, assistantMessages: 0, messageTypes: [] }; const allMessages = await conversationsStore.getConversationMessages(activeConv.id); + const messageToDelete = allMessages.find((m) => m.id === messageId); + + // For system messages, don't count descendants as they will be preserved (reparented to root) + if (messageToDelete?.role === 'system') { + const messagesToDelete = allMessages.filter((m) => m.id === messageId); + let userMessages = 0, + assistantMessages = 0; + const messageTypes: string[] = []; + + for (const msg of messagesToDelete) { + if (msg.role === 'user') { + userMessages++; + if (!messageTypes.includes('user message')) messageTypes.push('user message'); + } else if (msg.role === 'assistant') { + assistantMessages++; + if (!messageTypes.includes('assistant response')) messageTypes.push('assistant response'); + } + } + + return { totalCount: 1, userMessages, assistantMessages, messageTypes }; + } + const descendants = findDescendantMessages(allMessages, messageId); const allToDelete = [messageId, ...descendants]; const messagesToDelete = allMessages.filter((m) => allToDelete.includes(m.id)); @@ -1381,6 +1578,31 @@ class ChatStore { return this.addFilesHandler; } + savePendingDraft(message: string, files: ChatUploadedFile[]): void { + this._pendingDraftMessage = message; + this._pendingDraftFiles = [...files]; + } + + consumePendingDraft(): { message: string; files: ChatUploadedFile[] } | null { + if (!this._pendingDraftMessage && this._pendingDraftFiles.length === 0) { + return null; + } + + const draft = { + message: this._pendingDraftMessage, + files: [...this._pendingDraftFiles] + }; + + this._pendingDraftMessage = ''; + this._pendingDraftFiles = []; + + return draft; + } + + hasPendingDraft(): boolean { + return Boolean(this._pendingDraftMessage) || this._pendingDraftFiles.length > 0; + } + public getAllLoadingChats(): string[] { return Array.from(this.chatLoadingStates.keys()); } @@ -1427,7 +1649,7 @@ class ChatStore { // Config options needed by ChatService if (currentConfig.systemMessage) apiOptions.systemMessage = currentConfig.systemMessage; - if (currentConfig.disableReasoningFormat) apiOptions.disableReasoningFormat = true; + if (currentConfig.disableReasoningParsing) apiOptions.disableReasoningParsing = true; if (hasValue(currentConfig.temperature)) apiOptions.temperature = Number(currentConfig.temperature); @@ -1485,3 +1707,7 @@ export const isEditing = () => chatStore.isEditing(); export const isLoading = () => chatStore.isLoading; export const setEditModeActive = (handler: (files: File[]) => void) => chatStore.setEditModeActive(handler); +export const pendingEditMessageId = () => chatStore.pendingEditMessageId; +export const clearPendingEditMessageId = () => (chatStore.pendingEditMessageId = null); +export const removeSystemPromptPlaceholder = (messageId: string) => + chatStore.removeSystemPromptPlaceholder(messageId); diff --git a/tools/server/webui/src/lib/types/api.d.ts b/tools/server/webui/src/lib/types/api.d.ts index 714509f024..307e3b71d9 100644 --- a/tools/server/webui/src/lib/types/api.d.ts +++ b/tools/server/webui/src/lib/types/api.d.ts @@ -1,8 +1,19 @@ -import type { ServerModelStatus, ServerRole } from '$lib/enums'; -import type { ChatMessagePromptProgress } from './chat'; +import type { ContentPartType, ServerModelStatus, ServerRole } from '$lib/enums'; +import type { ChatMessagePromptProgress, ChatRole } from './chat'; + +export interface ApiChatCompletionToolFunction { + name: string; + description?: string; + parameters: Record; +} + +export interface ApiChatCompletionTool { + type: 'function'; + function: ApiChatCompletionToolFunction; +} export interface ApiChatMessageContentPart { - type: 'text' | 'image_url' | 'input_audio'; + type: ContentPartType; text?: string; image_url?: { url: string; @@ -34,6 +45,8 @@ export interface ApiErrorResponse { export interface ApiChatMessageData { role: ChatRole; content: string | ApiChatMessageContentPart[]; + tool_calls?: ApiChatCompletionToolCall[]; + tool_call_id?: string; timestamp?: number; } @@ -188,6 +201,7 @@ export interface ApiChatCompletionRequest { stream?: boolean; model?: string; return_progress?: boolean; + tools?: ApiChatCompletionTool[]; // Reasoning parameters reasoning_format?: string; // Generation parameters @@ -247,6 +261,7 @@ export interface ApiChatCompletionStreamChunk { model?: string; tool_calls?: ApiChatCompletionToolCallDelta[]; }; + finish_reason?: string | null; }>; timings?: { prompt_n?: number; @@ -267,8 +282,9 @@ export interface ApiChatCompletionResponse { content: string; reasoning_content?: string; model?: string; - tool_calls?: ApiChatCompletionToolCallDelta[]; + tool_calls?: ApiChatCompletionToolCall[]; }; + finish_reason?: string | null; }>; } @@ -335,7 +351,7 @@ export interface ApiProcessingState { tokensDecoded: number; tokensRemaining: number; contextUsed: number; - contextTotal: number; + contextTotal: number | null; outputTokensUsed: number; // Total output tokens (thinking + regular content) outputTokensMax: number; // Max output tokens allowed temperature: number; diff --git a/tools/server/webui/src/lib/types/models.d.ts b/tools/server/webui/src/lib/types/models.d.ts index ef44a2cb6d..505867a1f0 100644 --- a/tools/server/webui/src/lib/types/models.d.ts +++ b/tools/server/webui/src/lib/types/models.d.ts @@ -1,8 +1,5 @@ import type { ApiModelDataEntry, ApiModelDetails } from '$lib/types/api'; -/** - * Model modalities - vision and audio capabilities - */ export interface ModelModalities { vision: boolean; audio: boolean; @@ -14,8 +11,15 @@ export interface ModelOption { model: string; description?: string; capabilities: string[]; - /** Model modalities from /props endpoint */ modalities?: ModelModalities; details?: ApiModelDetails['details']; meta?: ApiModelDataEntry['meta']; } + +/** + * Modality capabilities for file validation + */ +export interface ModalityCapabilities { + hasVision: boolean; + hasAudio: boolean; +} diff --git a/tools/server/webui/src/lib/types/settings.d.ts b/tools/server/webui/src/lib/types/settings.d.ts index 38b3047dd0..d894245ec3 100644 --- a/tools/server/webui/src/lib/types/settings.d.ts +++ b/tools/server/webui/src/lib/types/settings.d.ts @@ -18,8 +18,8 @@ export interface SettingsChatServiceOptions { model?: string; // System message to inject systemMessage?: string; - // Disable reasoning format (use 'none' instead of 'auto') - disableReasoningFormat?: boolean; + // Disable reasoning parsing (use 'none' instead of 'auto') + disableReasoningParsing?: boolean; // Generation parameters temperature?: number; max_tokens?: number; diff --git a/tools/server/webui/src/lib/utils/abort.ts b/tools/server/webui/src/lib/utils/abort.ts new file mode 100644 index 0000000000..fc4f31ec69 --- /dev/null +++ b/tools/server/webui/src/lib/utils/abort.ts @@ -0,0 +1,151 @@ +/** + * Abort Signal Utilities + * + * Provides utilities for consistent AbortSignal propagation across the application. + * These utilities help ensure that async operations can be properly cancelled + * when needed (e.g., user stops generation, navigates away, etc.). + */ + +/** + * Throws an AbortError if the signal is aborted. + * Use this at the start of async operations to fail fast. + * + * @param signal - Optional AbortSignal to check + * @throws DOMException with name 'AbortError' if signal is aborted + * + * @example + * ```ts + * async function fetchData(signal?: AbortSignal) { + * throwIfAborted(signal); + * // ... proceed with operation + * } + * ``` + */ +export function throwIfAborted(signal?: AbortSignal): void { + if (signal?.aborted) { + throw new DOMException('Operation was aborted', 'AbortError'); + } +} + +/** + * Checks if an error is an AbortError. + * Use this to distinguish between user-initiated cancellation and actual errors. + * + * @param error - Error to check + * @returns true if the error is an AbortError + * + * @example + * ```ts + * try { + * await fetchData(signal); + * } catch (error) { + * if (isAbortError(error)) { + * // User cancelled - no error dialog needed + * return; + * } + * // Handle actual error + * } + * ``` + */ +export function isAbortError(error: unknown): boolean { + if (error instanceof DOMException && error.name === 'AbortError') { + return true; + } + if (error instanceof Error && error.name === 'AbortError') { + return true; + } + return false; +} + +/** + * Creates a new AbortController that is linked to one or more parent signals. + * When any parent signal aborts, the returned controller also aborts. + * + * Useful for creating child operations that should be cancelled when + * either the parent operation or their own timeout/condition triggers. + * + * @param signals - Parent signals to link to (undefined signals are ignored) + * @returns A new AbortController linked to all provided signals + * + * @example + * ```ts + * // Link to user's abort signal and add a timeout + * const linked = createLinkedController(userSignal, timeoutSignal); + * await fetch(url, { signal: linked.signal }); + * ``` + */ +export function createLinkedController(...signals: (AbortSignal | undefined)[]): AbortController { + const controller = new AbortController(); + + for (const signal of signals) { + if (!signal) continue; + + // If already aborted, abort immediately + if (signal.aborted) { + controller.abort(signal.reason); + return controller; + } + + // Link to parent signal + signal.addEventListener('abort', () => controller.abort(signal.reason), { once: true }); + } + + return controller; +} + +/** + * Creates an AbortSignal that times out after the specified duration. + * + * @param ms - Timeout duration in milliseconds + * @returns AbortSignal that will abort after the timeout + * + * @example + * ```ts + * const signal = createTimeoutSignal(5000); // 5 second timeout + * await fetch(url, { signal }); + * ``` + */ +export function createTimeoutSignal(ms: number): AbortSignal { + return AbortSignal.timeout(ms); +} + +/** + * Wraps a promise to reject if the signal is aborted. + * Useful for making non-abortable promises respect an AbortSignal. + * + * @param promise - Promise to wrap + * @param signal - AbortSignal to respect + * @returns Promise that rejects with AbortError if signal aborts + * + * @example + * ```ts + * // Make a non-abortable operation respect abort signal + * const result = await withAbortSignal( + * someNonAbortableOperation(), + * signal + * ); + * ``` + */ +export async function withAbortSignal(promise: Promise, signal?: AbortSignal): Promise { + if (!signal) return promise; + + throwIfAborted(signal); + + return new Promise((resolve, reject) => { + const abortHandler = () => { + reject(new DOMException('Operation was aborted', 'AbortError')); + }; + + signal.addEventListener('abort', abortHandler, { once: true }); + + promise + .then((value) => { + signal.removeEventListener('abort', abortHandler); + resolve(value); + }) + .catch((error) => { + signal.removeEventListener('abort', abortHandler); + reject(error); + }); + }); +} diff --git a/tools/server/webui/src/lib/utils/api-fetch.ts b/tools/server/webui/src/lib/utils/api-fetch.ts new file mode 100644 index 0000000000..28757a966f --- /dev/null +++ b/tools/server/webui/src/lib/utils/api-fetch.ts @@ -0,0 +1,154 @@ +import { base } from '$app/paths'; +import { getJsonHeaders, getAuthHeaders } from './api-headers'; + +/** + * API Fetch Utilities + * + * Provides common fetch patterns used across services: + * - Automatic JSON headers + * - Error handling with proper error messages + * - Base path resolution + */ + +export interface ApiFetchOptions extends Omit { + /** + * Use auth-only headers (no Content-Type). + * Default: false (uses JSON headers with Content-Type: application/json) + */ + authOnly?: boolean; + /** + * Additional headers to merge with default headers. + */ + headers?: Record; +} + +/** + * Fetch JSON data from an API endpoint with standard headers and error handling. + * + * @param path - API path (will be prefixed with base path) + * @param options - Fetch options with additional authOnly flag + * @returns Parsed JSON response + * @throws Error with formatted message on failure + * + * @example + * ```typescript + * // GET request + * const models = await apiFetch('/v1/models'); + * + * // POST request + * const result = await apiFetch('/models/load', { + * method: 'POST', + * body: JSON.stringify({ model: 'gpt-4' }) + * }); + * ``` + */ +export async function apiFetch(path: string, options: ApiFetchOptions = {}): Promise { + const { authOnly = false, headers: customHeaders, ...fetchOptions } = options; + + const baseHeaders = authOnly ? getAuthHeaders() : getJsonHeaders(); + const headers = { ...baseHeaders, ...customHeaders }; + + const url = path.startsWith('http://') || path.startsWith('https://') ? path : `${base}${path}`; + + const response = await fetch(url, { + ...fetchOptions, + headers + }); + + if (!response.ok) { + const errorMessage = await parseErrorMessage(response); + throw new Error(errorMessage); + } + + return response.json() as Promise; +} + +/** + * Fetch with URL constructed from base URL and query parameters. + * + * @param basePath - Base API path + * @param params - Query parameters to append + * @param options - Fetch options + * @returns Parsed JSON response + * + * @example + * ```typescript + * const props = await apiFetchWithParams('./props', { + * model: 'gpt-4', + * autoload: 'false' + * }); + * ``` + */ +export async function apiFetchWithParams( + basePath: string, + params: Record, + options: ApiFetchOptions = {} +): Promise { + const url = new URL(basePath, window.location.href); + + for (const [key, value] of Object.entries(params)) { + if (value !== undefined && value !== null) { + url.searchParams.set(key, value); + } + } + + const { authOnly = false, headers: customHeaders, ...fetchOptions } = options; + + const baseHeaders = authOnly ? getAuthHeaders() : getJsonHeaders(); + const headers = { ...baseHeaders, ...customHeaders }; + + const response = await fetch(url.toString(), { + ...fetchOptions, + headers + }); + + if (!response.ok) { + const errorMessage = await parseErrorMessage(response); + throw new Error(errorMessage); + } + + return response.json() as Promise; +} + +/** + * POST JSON data to an API endpoint. + * + * @param path - API path + * @param body - Request body (will be JSON stringified) + * @param options - Additional fetch options + * @returns Parsed JSON response + */ +export async function apiPost( + path: string, + body: B, + options: ApiFetchOptions = {} +): Promise { + return apiFetch(path, { + method: 'POST', + body: JSON.stringify(body), + ...options + }); +} + +/** + * Parse error message from a failed response. + * Tries to extract error message from JSON body, falls back to status text. + */ +async function parseErrorMessage(response: Response): Promise { + try { + const errorData = await response.json(); + if (errorData?.error?.message) { + return errorData.error.message; + } + if (errorData?.error && typeof errorData.error === 'string') { + return errorData.error; + } + if (errorData?.message) { + return errorData.message; + } + } catch { + // JSON parsing failed, use status text + } + + return `Request failed: ${response.status} ${response.statusText}`; +} diff --git a/tools/server/webui/src/lib/utils/branching.ts b/tools/server/webui/src/lib/utils/branching.ts index 3be56047a5..ee3a505eed 100644 --- a/tools/server/webui/src/lib/utils/branching.ts +++ b/tools/server/webui/src/lib/utils/branching.ts @@ -15,6 +15,8 @@ * └── message 5 (assistant) */ +import { MessageRole } from '$lib/enums/chat'; + /** * Filters messages to get the conversation path from root to a specific leaf node. * If the leafNodeId doesn't exist, returns the path with the latest timestamp. @@ -65,8 +67,13 @@ export function filterByLeafNodeId( currentNode = nodeMap.get(currentNode.parent); } - // Sort by timestamp to get chronological order (root to leaf) - result.sort((a, b) => a.timestamp - b.timestamp); + // Sort: system messages first, then by timestamp + result.sort((a, b) => { + if (a.role === MessageRole.SYSTEM && b.role !== MessageRole.SYSTEM) return -1; + if (a.role !== MessageRole.SYSTEM && b.role === MessageRole.SYSTEM) return 1; + + return a.timestamp - b.timestamp; + }); return result; } diff --git a/tools/server/webui/src/lib/utils/browser-only.ts b/tools/server/webui/src/lib/utils/browser-only.ts index 0af800638b..27d2be4aaa 100644 --- a/tools/server/webui/src/lib/utils/browser-only.ts +++ b/tools/server/webui/src/lib/utils/browser-only.ts @@ -23,7 +23,7 @@ export { } from './pdf-processing'; // File conversion utilities (depends on pdf-processing) -export { parseFilesToMessageExtras, type FileProcessingResult } from './convert-files-to-extra'; +export { parseFilesToMessageExtras } from './convert-files-to-extra'; // File upload processing utilities (depends on pdf-processing, svg-to-png, webp-to-png) export { processFilesToChatUploaded } from './process-uploaded-files'; diff --git a/tools/server/webui/src/lib/utils/cache-ttl.ts b/tools/server/webui/src/lib/utils/cache-ttl.ts new file mode 100644 index 0000000000..9d1f005822 --- /dev/null +++ b/tools/server/webui/src/lib/utils/cache-ttl.ts @@ -0,0 +1,293 @@ +const DEFAULT_CACHE_TTL_MS = 5 * 60 * 1000; +const DEFAULT_CACHE_MAX_ENTRIES = 100; + +/** + * TTL Cache - Time-To-Live cache implementation for memory optimization + * + * Provides automatic expiration of cached entries to prevent memory bloat + * in long-running sessions. + * + * @example + * ```ts + * const cache = new TTLCache({ ttlMs: 5 * 60 * 1000 }); // 5 minutes + * cache.set('key', data); + * const value = cache.get('key'); // null if expired + * ``` + */ + +export interface TTLCacheOptions { + /** Time-to-live in milliseconds. Default: 5 minutes */ + ttlMs?: number; + /** Maximum number of entries. Oldest entries are evicted when exceeded. Default: 100 */ + maxEntries?: number; + /** Callback when an entry expires or is evicted */ + onEvict?: (key: string, value: unknown) => void; +} + +interface CacheEntry { + value: T; + expiresAt: number; + lastAccessed: number; +} + +export class TTLCache { + private cache = new Map>(); + private readonly ttlMs: number; + private readonly maxEntries: number; + private readonly onEvict?: (key: string, value: unknown) => void; + + constructor(options: TTLCacheOptions = {}) { + this.ttlMs = options.ttlMs ?? DEFAULT_CACHE_TTL_MS; + this.maxEntries = options.maxEntries ?? DEFAULT_CACHE_MAX_ENTRIES; + this.onEvict = options.onEvict; + } + + /** + * Get a value from cache. Returns null if expired or not found. + */ + get(key: K): V | null { + const entry = this.cache.get(key); + if (!entry) return null; + + if (Date.now() > entry.expiresAt) { + this.delete(key); + return null; + } + + // Update last accessed time for LRU-like behavior + entry.lastAccessed = Date.now(); + return entry.value; + } + + /** + * Set a value in cache with TTL. + */ + set(key: K, value: V, customTtlMs?: number): void { + // Evict oldest entries if at capacity + if (this.cache.size >= this.maxEntries && !this.cache.has(key)) { + this.evictOldest(); + } + + const ttl = customTtlMs ?? this.ttlMs; + const now = Date.now(); + + this.cache.set(key, { + value, + expiresAt: now + ttl, + lastAccessed: now + }); + } + + /** + * Check if key exists and is not expired. + */ + has(key: K): boolean { + const entry = this.cache.get(key); + if (!entry) return false; + + if (Date.now() > entry.expiresAt) { + this.delete(key); + return false; + } + + return true; + } + + /** + * Delete a specific key from cache. + */ + delete(key: K): boolean { + const entry = this.cache.get(key); + if (entry && this.onEvict) { + this.onEvict(key, entry.value); + } + return this.cache.delete(key); + } + + /** + * Clear all entries from cache. + */ + clear(): void { + if (this.onEvict) { + for (const [key, entry] of this.cache) { + this.onEvict(key, entry.value); + } + } + this.cache.clear(); + } + + /** + * Get the number of entries (including potentially expired ones). + */ + get size(): number { + return this.cache.size; + } + + /** + * Remove all expired entries from cache. + * Call periodically for proactive cleanup. + */ + prune(): number { + const now = Date.now(); + let pruned = 0; + + for (const [key, entry] of this.cache) { + if (now > entry.expiresAt) { + this.delete(key); + pruned++; + } + } + + return pruned; + } + + /** + * Get all valid (non-expired) keys. + */ + keys(): K[] { + const now = Date.now(); + const validKeys: K[] = []; + + for (const [key, entry] of this.cache) { + if (now <= entry.expiresAt) { + validKeys.push(key); + } + } + + return validKeys; + } + + /** + * Evict the oldest (least recently accessed) entry. + */ + private evictOldest(): void { + let oldestKey: K | null = null; + let oldestTime = Infinity; + + for (const [key, entry] of this.cache) { + if (entry.lastAccessed < oldestTime) { + oldestTime = entry.lastAccessed; + oldestKey = key; + } + } + + if (oldestKey !== null) { + this.delete(oldestKey); + } + } + + /** + * Refresh TTL for an existing entry without changing the value. + */ + touch(key: K): boolean { + const entry = this.cache.get(key); + if (!entry) return false; + + const now = Date.now(); + if (now > entry.expiresAt) { + this.delete(key); + return false; + } + + entry.expiresAt = now + this.ttlMs; + entry.lastAccessed = now; + return true; + } +} + +/** + * Reactive TTL Map for Svelte stores + * Wraps SvelteMap with TTL functionality + */ +export class ReactiveTTLMap { + private entries = $state>>(new Map()); + private readonly ttlMs: number; + private readonly maxEntries: number; + + constructor(options: TTLCacheOptions = {}) { + this.ttlMs = options.ttlMs ?? DEFAULT_CACHE_TTL_MS; + this.maxEntries = options.maxEntries ?? DEFAULT_CACHE_MAX_ENTRIES; + } + + get(key: K): V | null { + const entry = this.entries.get(key); + if (!entry) return null; + + if (Date.now() > entry.expiresAt) { + this.entries.delete(key); + return null; + } + + entry.lastAccessed = Date.now(); + return entry.value; + } + + set(key: K, value: V, customTtlMs?: number): void { + if (this.entries.size >= this.maxEntries && !this.entries.has(key)) { + this.evictOldest(); + } + + const ttl = customTtlMs ?? this.ttlMs; + const now = Date.now(); + + this.entries.set(key, { + value, + expiresAt: now + ttl, + lastAccessed: now + }); + } + + has(key: K): boolean { + const entry = this.entries.get(key); + if (!entry) return false; + + if (Date.now() > entry.expiresAt) { + this.entries.delete(key); + return false; + } + + return true; + } + + delete(key: K): boolean { + return this.entries.delete(key); + } + + clear(): void { + this.entries.clear(); + } + + get size(): number { + return this.entries.size; + } + + prune(): number { + const now = Date.now(); + let pruned = 0; + + for (const [key, entry] of this.entries) { + if (now > entry.expiresAt) { + this.entries.delete(key); + pruned++; + } + } + + return pruned; + } + + private evictOldest(): void { + let oldestKey: K | null = null; + let oldestTime = Infinity; + + for (const [key, entry] of this.entries) { + if (entry.lastAccessed < oldestTime) { + oldestTime = entry.lastAccessed; + oldestKey = key; + } + } + + if (oldestKey !== null) { + this.entries.delete(oldestKey); + } + } +} diff --git a/tools/server/webui/src/lib/utils/code.ts b/tools/server/webui/src/lib/utils/code.ts new file mode 100644 index 0000000000..67efc6b27e --- /dev/null +++ b/tools/server/webui/src/lib/utils/code.ts @@ -0,0 +1,85 @@ +import hljs from 'highlight.js'; +import { + NEWLINE, + DEFAULT_LANGUAGE, + LANG_PATTERN, + AMPERSAND_REGEX, + LT_REGEX, + GT_REGEX, + FENCE_PATTERN +} from '$lib/constants/code'; + +export interface IncompleteCodeBlock { + language: string; + code: string; + openingIndex: number; +} + +/** + * Highlights code using highlight.js + * @param code - The code to highlight + * @param language - The programming language + * @returns HTML string with syntax highlighting + */ +export function highlightCode(code: string, language: string): string { + if (!code) return ''; + + try { + const lang = language.toLowerCase(); + const isSupported = hljs.getLanguage(lang); + + if (isSupported) { + return hljs.highlight(code, { language: lang }).value; + } else { + return hljs.highlightAuto(code).value; + } + } catch { + // Fallback to escaped plain text + return code + .replace(AMPERSAND_REGEX, '&') + .replace(LT_REGEX, '<') + .replace(GT_REGEX, '>'); + } +} + +/** + * Detects if markdown ends with an incomplete code block (opened but not closed). + * Returns the code block info if found, null otherwise. + * @param markdown - The raw markdown string to check + * @returns IncompleteCodeBlock info or null + */ +export function detectIncompleteCodeBlock(markdown: string): IncompleteCodeBlock | null { + // Count all code fences in the markdown + // A code block is incomplete if there's an odd number of ``` fences + const fencePattern = new RegExp(FENCE_PATTERN.source, FENCE_PATTERN.flags); + const fences: number[] = []; + let fenceMatch; + + while ((fenceMatch = fencePattern.exec(markdown)) !== null) { + // Store the position after the ``` + const pos = fenceMatch[0].startsWith(NEWLINE) ? fenceMatch.index + 1 : fenceMatch.index; + fences.push(pos); + } + + // If even number of fences (including 0), all code blocks are closed + if (fences.length % 2 === 0) { + return null; + } + + // Odd number means last code block is incomplete + // The last fence is the opening of the incomplete block + const openingIndex = fences[fences.length - 1]; + const afterOpening = markdown.slice(openingIndex + 3); + + // Extract language and code content + const langMatch = afterOpening.match(LANG_PATTERN); + const language = langMatch?.[1] || DEFAULT_LANGUAGE; + const codeStartIndex = openingIndex + 3 + (langMatch?.[0]?.length ?? 0); + const code = markdown.slice(codeStartIndex); + + return { + language, + code, + openingIndex + }; +} diff --git a/tools/server/webui/src/lib/utils/data-url.ts b/tools/server/webui/src/lib/utils/data-url.ts new file mode 100644 index 0000000000..6f55be793d --- /dev/null +++ b/tools/server/webui/src/lib/utils/data-url.ts @@ -0,0 +1,10 @@ +/** + * Creates a base64 data URL from MIME type and base64-encoded data. + * + * @param mimeType - The MIME type (e.g., 'image/png', 'audio/mp3') + * @param base64Data - The base64-encoded data + * @returns A data URL string in format 'data:{mimeType};base64,{data}' + */ +export function createBase64DataUrl(mimeType: string, base64Data: string): string { + return `data:${mimeType};base64,${base64Data}`; +} diff --git a/tools/server/webui/src/lib/utils/debounce.ts b/tools/server/webui/src/lib/utils/debounce.ts new file mode 100644 index 0000000000..90a5a01783 --- /dev/null +++ b/tools/server/webui/src/lib/utils/debounce.ts @@ -0,0 +1,22 @@ +/** + * @param fn - The function to debounce + * @param delay - The delay in milliseconds + * @returns A debounced version of the function + */ +export function debounce) => void>( + fn: T, + delay: number +): (...args: Parameters) => void { + let timeoutId: ReturnType | null = null; + + return (...args: Parameters) => { + if (timeoutId) { + clearTimeout(timeoutId); + } + + timeoutId = setTimeout(() => { + fn(...args); + timeoutId = null; + }, delay); + }; +} diff --git a/tools/server/webui/src/lib/utils/image-error-fallback.ts b/tools/server/webui/src/lib/utils/image-error-fallback.ts new file mode 100644 index 0000000000..6e3260f4ae --- /dev/null +++ b/tools/server/webui/src/lib/utils/image-error-fallback.ts @@ -0,0 +1,10 @@ +/** + * Simplified HTML fallback for external images that fail to load. + * Displays a centered message with a link to open the image in a new tab. + */ +export function getImageErrorFallbackHtml(src: string): string { + return `
+ Image cannot be displayed + (open link) +
`; +} diff --git a/tools/server/webui/src/lib/utils/index.ts b/tools/server/webui/src/lib/utils/index.ts index 588167b8ca..5eb2bbaea1 100644 --- a/tools/server/webui/src/lib/utils/index.ts +++ b/tools/server/webui/src/lib/utils/index.ts @@ -9,6 +9,7 @@ // API utilities export { getAuthHeaders, getJsonHeaders } from './api-headers'; +export { apiFetch, apiFetchWithParams, apiPost, type ApiFetchOptions } from './api-fetch'; export { validateApiKey } from './api-key-validation'; // Attachment utilities @@ -75,8 +76,7 @@ export { maskInlineLaTeX, preprocessLaTeX } from './latex-protection'; export { isFileTypeSupportedByModel, filterFilesByModalities, - generateModalityErrorMessage, - type ModalityCapabilities + generateModalityErrorMessage } from './modality-file-validation'; // Model name utilities @@ -93,3 +93,6 @@ export { getLanguageFromFilename } from './syntax-highlight-language'; // Text file utilities export { isTextFileByName, readFileAsText, isLikelyTextFile } from './text-files'; + +// Image error fallback utilities +export { getImageErrorFallbackHtml } from './image-error-fallback'; diff --git a/tools/server/webui/src/lib/utils/modality-file-validation.ts b/tools/server/webui/src/lib/utils/modality-file-validation.ts index 136c084146..02fb4e4a36 100644 --- a/tools/server/webui/src/lib/utils/modality-file-validation.ts +++ b/tools/server/webui/src/lib/utils/modality-file-validation.ts @@ -5,12 +5,7 @@ import { getFileTypeCategory } from '$lib/utils'; import { FileTypeCategory } from '$lib/enums'; - -/** Modality capabilities for file validation */ -export interface ModalityCapabilities { - hasVision: boolean; - hasAudio: boolean; -} +import type { ModalityCapabilities } from '$lib/types/models'; /** * Check if a file type is supported by the given modalities diff --git a/tools/server/webui/src/lib/utils/text-files.ts b/tools/server/webui/src/lib/utils/text-files.ts index e8006de64d..2f1a575d1d 100644 --- a/tools/server/webui/src/lib/utils/text-files.ts +++ b/tools/server/webui/src/lib/utils/text-files.ts @@ -3,10 +3,8 @@ * Handles text file detection, reading, and validation */ -import { - DEFAULT_BINARY_DETECTION_OPTIONS, - type BinaryDetectionOptions -} from '$lib/constants/binary-detection'; +import { DEFAULT_BINARY_DETECTION_OPTIONS } from '$lib/constants/binary-detection'; +import type { BinaryDetectionOptions } from '$lib/constants/binary-detection'; import { FileExtensionText } from '$lib/enums'; /** diff --git a/tools/server/webui/tests/stories/ChatMessage.stories.svelte b/tools/server/webui/tests/stories/ChatMessage.stories.svelte index 5f4de7d476..a3579cf04e 100644 --- a/tools/server/webui/tests/stories/ChatMessage.stories.svelte +++ b/tools/server/webui/tests/stories/ChatMessage.stories.svelte @@ -93,7 +93,7 @@ }} play={async () => { const { settingsStore } = await import('$lib/stores/settings.svelte'); - settingsStore.updateConfig('disableReasoningFormat', false); + settingsStore.updateConfig('showRawOutputSwitch', false); }} /> @@ -105,7 +105,7 @@ }} play={async () => { const { settingsStore } = await import('$lib/stores/settings.svelte'); - settingsStore.updateConfig('disableReasoningFormat', false); + settingsStore.updateConfig('showRawOutputSwitch', false); }} /> @@ -117,7 +117,7 @@ }} play={async () => { const { settingsStore } = await import('$lib/stores/settings.svelte'); - settingsStore.updateConfig('disableReasoningFormat', false); + settingsStore.updateConfig('showRawOutputSwitch', false); }} /> @@ -129,7 +129,7 @@ }} play={async () => { const { settingsStore } = await import('$lib/stores/settings.svelte'); - settingsStore.updateConfig('disableReasoningFormat', true); + settingsStore.updateConfig('showRawOutputSwitch', true); }} /> @@ -141,7 +141,7 @@ asChild play={async () => { const { settingsStore } = await import('$lib/stores/settings.svelte'); - settingsStore.updateConfig('disableReasoningFormat', false); + settingsStore.updateConfig('showRawOutputSwitch', false); // Phase 1: Stream reasoning content in chunks let reasoningText = 'I need to think about this carefully. Let me break down the problem:\n\n1. The user is asking for help with something complex\n2. I should provide a thorough and helpful response\n3. I need to consider multiple approaches\n4. The best solution would be to explain step by step\n\nThis approach will ensure clarity and understanding.'; @@ -193,7 +193,7 @@ }} play={async () => { const { settingsStore } = await import('$lib/stores/settings.svelte'); - settingsStore.updateConfig('disableReasoningFormat', false); + settingsStore.updateConfig('showRawOutputSwitch', false); // Import the chat store to simulate loading state const { chatStore } = await import('$lib/stores/chat.svelte'); diff --git a/vendor/cpp-httplib/CMakeLists.txt b/vendor/cpp-httplib/CMakeLists.txt index a8a59e02f4..a5887476af 100644 --- a/vendor/cpp-httplib/CMakeLists.txt +++ b/vendor/cpp-httplib/CMakeLists.txt @@ -39,7 +39,7 @@ if (LLAMA_BUILD_BORINGSSL) set(FIPS OFF CACHE BOOL "Enable FIPS (BoringSSL)") set(BORINGSSL_GIT "https://boringssl.googlesource.com/boringssl" CACHE STRING "BoringSSL git repository") - set(BORINGSSL_VERSION "0.20260204.0" CACHE STRING "BoringSSL version") + set(BORINGSSL_VERSION "0.20260211.0" CACHE STRING "BoringSSL version") message(STATUS "Fetching BoringSSL version ${BORINGSSL_VERSION}") diff --git a/vendor/cpp-httplib/httplib.cpp b/vendor/cpp-httplib/httplib.cpp index ba5f9c8ff9..e309a7ad5d 100644 --- a/vendor/cpp-httplib/httplib.cpp +++ b/vendor/cpp-httplib/httplib.cpp @@ -6,8 +6,472 @@ namespace httplib { * Implementation that will be part of the .cc file if split into .h + .cc. */ +namespace stream { + +// stream::Result implementations +Result::Result() : chunk_size_(8192) {} + +Result::Result(ClientImpl::StreamHandle &&handle, size_t chunk_size) + : handle_(std::move(handle)), chunk_size_(chunk_size) {} + +Result::Result(Result &&other) noexcept + : handle_(std::move(other.handle_)), buffer_(std::move(other.buffer_)), + current_size_(other.current_size_), chunk_size_(other.chunk_size_), + finished_(other.finished_) { + other.current_size_ = 0; + other.finished_ = true; +} + +Result &Result::operator=(Result &&other) noexcept { + if (this != &other) { + handle_ = std::move(other.handle_); + buffer_ = std::move(other.buffer_); + current_size_ = other.current_size_; + chunk_size_ = other.chunk_size_; + finished_ = other.finished_; + other.current_size_ = 0; + other.finished_ = true; + } + return *this; +} + +bool Result::is_valid() const { return handle_.is_valid(); } +Result::operator bool() const { return is_valid(); } + +int Result::status() const { + return handle_.response ? handle_.response->status : -1; +} + +const Headers &Result::headers() const { + static const Headers empty_headers; + return handle_.response ? handle_.response->headers : empty_headers; +} + +std::string Result::get_header_value(const std::string &key, + const char *def) const { + return handle_.response ? handle_.response->get_header_value(key, def) : def; +} + +bool Result::has_header(const std::string &key) const { + return handle_.response ? handle_.response->has_header(key) : false; +} + +Error Result::error() const { return handle_.error; } +Error Result::read_error() const { return handle_.get_read_error(); } +bool Result::has_read_error() const { return handle_.has_read_error(); } + +bool Result::next() { + if (!handle_.is_valid() || finished_) { return false; } + + if (buffer_.size() < chunk_size_) { buffer_.resize(chunk_size_); } + + ssize_t n = handle_.read(&buffer_[0], chunk_size_); + if (n > 0) { + current_size_ = static_cast(n); + return true; + } + + current_size_ = 0; + finished_ = true; + return false; +} + +const char *Result::data() const { return buffer_.data(); } +size_t Result::size() const { return current_size_; } + +std::string Result::read_all() { + std::string result; + while (next()) { + result.append(data(), size()); + } + return result; +} + +} // namespace stream + +namespace sse { + +// SSEMessage implementations +SSEMessage::SSEMessage() : event("message") {} + +void SSEMessage::clear() { + event = "message"; + data.clear(); + id.clear(); +} + +// SSEClient implementations +SSEClient::SSEClient(Client &client, const std::string &path) + : client_(client), path_(path) {} + +SSEClient::SSEClient(Client &client, const std::string &path, + const Headers &headers) + : client_(client), path_(path), headers_(headers) {} + +SSEClient::~SSEClient() { stop(); } + +SSEClient &SSEClient::on_message(MessageHandler handler) { + on_message_ = std::move(handler); + return *this; +} + +SSEClient &SSEClient::on_event(const std::string &type, + MessageHandler handler) { + event_handlers_[type] = std::move(handler); + return *this; +} + +SSEClient &SSEClient::on_open(OpenHandler handler) { + on_open_ = std::move(handler); + return *this; +} + +SSEClient &SSEClient::on_error(ErrorHandler handler) { + on_error_ = std::move(handler); + return *this; +} + +SSEClient &SSEClient::set_reconnect_interval(int ms) { + reconnect_interval_ms_ = ms; + return *this; +} + +SSEClient &SSEClient::set_max_reconnect_attempts(int n) { + max_reconnect_attempts_ = n; + return *this; +} + +bool SSEClient::is_connected() const { return connected_.load(); } + +const std::string &SSEClient::last_event_id() const { + return last_event_id_; +} + +void SSEClient::start() { + running_.store(true); + run_event_loop(); +} + +void SSEClient::start_async() { + running_.store(true); + async_thread_ = std::thread([this]() { run_event_loop(); }); +} + +void SSEClient::stop() { + running_.store(false); + client_.stop(); // Cancel any pending operations + if (async_thread_.joinable()) { async_thread_.join(); } +} + +bool SSEClient::parse_sse_line(const std::string &line, SSEMessage &msg, + int &retry_ms) { + // Blank line signals end of event + if (line.empty() || line == "\r") { return true; } + + // Lines starting with ':' are comments (ignored) + if (!line.empty() && line[0] == ':') { return false; } + + // Find the colon separator + auto colon_pos = line.find(':'); + if (colon_pos == std::string::npos) { + // Line with no colon is treated as field name with empty value + return false; + } + + auto field = line.substr(0, colon_pos); + std::string value; + + // Value starts after colon, skip optional single space + if (colon_pos + 1 < line.size()) { + auto value_start = colon_pos + 1; + if (line[value_start] == ' ') { value_start++; } + value = line.substr(value_start); + // Remove trailing \r if present + if (!value.empty() && value.back() == '\r') { value.pop_back(); } + } + + // Handle known fields + if (field == "event") { + msg.event = value; + } else if (field == "data") { + // Multiple data lines are concatenated with newlines + if (!msg.data.empty()) { msg.data += "\n"; } + msg.data += value; + } else if (field == "id") { + // Empty id is valid (clears the last event ID) + msg.id = value; + } else if (field == "retry") { + // Parse retry interval in milliseconds + { + int v = 0; + auto res = + detail::from_chars(value.data(), value.data() + value.size(), v); + if (res.ec == std::errc{}) { retry_ms = v; } + } + } + // Unknown fields are ignored per SSE spec + + return false; +} + +void SSEClient::run_event_loop() { + auto reconnect_count = 0; + + while (running_.load()) { + // Build headers, including Last-Event-ID if we have one + auto request_headers = headers_; + if (!last_event_id_.empty()) { + request_headers.emplace("Last-Event-ID", last_event_id_); + } + + // Open streaming connection + auto result = stream::Get(client_, path_, request_headers); + + // Connection error handling + if (!result) { + connected_.store(false); + if (on_error_) { on_error_(result.error()); } + + if (!should_reconnect(reconnect_count)) { break; } + wait_for_reconnect(); + reconnect_count++; + continue; + } + + if (result.status() != 200) { + connected_.store(false); + // For certain errors, don't reconnect + if (result.status() == 204 || // No Content - server wants us to stop + result.status() == 404 || // Not Found + result.status() == 401 || // Unauthorized + result.status() == 403) { // Forbidden + if (on_error_) { on_error_(Error::Connection); } + break; + } + + if (on_error_) { on_error_(Error::Connection); } + + if (!should_reconnect(reconnect_count)) { break; } + wait_for_reconnect(); + reconnect_count++; + continue; + } + + // Connection successful + connected_.store(true); + reconnect_count = 0; + if (on_open_) { on_open_(); } + + // Event receiving loop + std::string buffer; + SSEMessage current_msg; + + while (running_.load() && result.next()) { + buffer.append(result.data(), result.size()); + + // Process complete lines in the buffer + size_t line_start = 0; + size_t newline_pos; + + while ((newline_pos = buffer.find('\n', line_start)) != + std::string::npos) { + auto line = buffer.substr(line_start, newline_pos - line_start); + line_start = newline_pos + 1; + + // Parse the line and check if event is complete + auto event_complete = + parse_sse_line(line, current_msg, reconnect_interval_ms_); + + if (event_complete && !current_msg.data.empty()) { + // Update last_event_id for reconnection + if (!current_msg.id.empty()) { last_event_id_ = current_msg.id; } + + // Dispatch event to appropriate handler + dispatch_event(current_msg); + + current_msg.clear(); + } + } + + // Keep unprocessed data in buffer + buffer.erase(0, line_start); + } + + // Connection ended + connected_.store(false); + + if (!running_.load()) { break; } + + // Check for read errors + if (result.has_read_error()) { + if (on_error_) { on_error_(result.read_error()); } + } + + if (!should_reconnect(reconnect_count)) { break; } + wait_for_reconnect(); + reconnect_count++; + } + + connected_.store(false); +} + +void SSEClient::dispatch_event(const SSEMessage &msg) { + // Check for specific event type handler first + auto it = event_handlers_.find(msg.event); + if (it != event_handlers_.end()) { + it->second(msg); + return; + } + + // Fall back to generic message handler + if (on_message_) { on_message_(msg); } +} + +bool SSEClient::should_reconnect(int count) const { + if (!running_.load()) { return false; } + if (max_reconnect_attempts_ == 0) { return true; } // unlimited + return count < max_reconnect_attempts_; +} + +void SSEClient::wait_for_reconnect() { + // Use small increments to check running_ flag frequently + auto waited = 0; + while (running_.load() && waited < reconnect_interval_ms_) { + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + waited += 100; + } +} + +} // namespace sse + +#ifdef CPPHTTPLIB_SSL_ENABLED +/* + * TLS abstraction layer - internal function declarations + * These are implementation details and not part of the public API. + */ +namespace tls { + +// Client context +ctx_t create_client_context(); +void free_context(ctx_t ctx); +bool set_min_version(ctx_t ctx, Version version); +bool load_ca_pem(ctx_t ctx, const char *pem, size_t len); +bool load_ca_file(ctx_t ctx, const char *file_path); +bool load_ca_dir(ctx_t ctx, const char *dir_path); +bool load_system_certs(ctx_t ctx); +bool set_client_cert_pem(ctx_t ctx, const char *cert, const char *key, + const char *password); +bool set_client_cert_file(ctx_t ctx, const char *cert_path, + const char *key_path, const char *password); + +// Server context +ctx_t create_server_context(); +bool set_server_cert_pem(ctx_t ctx, const char *cert, const char *key, + const char *password); +bool set_server_cert_file(ctx_t ctx, const char *cert_path, + const char *key_path, const char *password); +bool set_client_ca_file(ctx_t ctx, const char *ca_file, const char *ca_dir); +void set_verify_client(ctx_t ctx, bool require); + +// Session management +session_t create_session(ctx_t ctx, socket_t sock); +void free_session(session_t session); +bool set_sni(session_t session, const char *hostname); +bool set_hostname(session_t session, const char *hostname); + +// Handshake (non-blocking capable) +TlsError connect(session_t session); +TlsError accept(session_t session); + +// Handshake with timeout (blocking until timeout) +bool connect_nonblocking(session_t session, socket_t sock, time_t timeout_sec, + time_t timeout_usec, TlsError *err); +bool accept_nonblocking(session_t session, socket_t sock, time_t timeout_sec, + time_t timeout_usec, TlsError *err); + +// I/O (non-blocking capable) +ssize_t read(session_t session, void *buf, size_t len, TlsError &err); +ssize_t write(session_t session, const void *buf, size_t len, TlsError &err); +int pending(const_session_t session); +void shutdown(session_t session, bool graceful); + +// Connection state +bool is_peer_closed(session_t session, socket_t sock); + +// Certificate verification +cert_t get_peer_cert(const_session_t session); +void free_cert(cert_t cert); +bool verify_hostname(cert_t cert, const char *hostname); +uint64_t hostname_mismatch_code(); +long get_verify_result(const_session_t session); + +// Certificate introspection +std::string get_cert_subject_cn(cert_t cert); +std::string get_cert_issuer_name(cert_t cert); +bool get_cert_sans(cert_t cert, std::vector &sans); +bool get_cert_validity(cert_t cert, time_t ¬_before, time_t ¬_after); +std::string get_cert_serial(cert_t cert); +bool get_cert_der(cert_t cert, std::vector &der); +const char *get_sni(const_session_t session); + +// CA store management +ca_store_t create_ca_store(const char *pem, size_t len); +void free_ca_store(ca_store_t store); +bool set_ca_store(ctx_t ctx, ca_store_t store); +size_t get_ca_certs(ctx_t ctx, std::vector &certs); +std::vector get_ca_names(ctx_t ctx); + +// Dynamic certificate update (for servers) +bool update_server_cert(ctx_t ctx, const char *cert_pem, const char *key_pem, + const char *password); +bool update_server_client_ca(ctx_t ctx, const char *ca_pem); + +// Certificate verification callback +bool set_verify_callback(ctx_t ctx, VerifyCallback callback); +long get_verify_error(const_session_t session); +std::string verify_error_string(long error_code); + +// TlsError information +uint64_t peek_error(); +uint64_t get_error(); +std::string error_string(uint64_t code); + +} // namespace tls +#endif // CPPHTTPLIB_SSL_ENABLED + +/* + * Group 1: detail namespace - Non-SSL utilities + */ + namespace detail { +bool set_socket_opt_impl(socket_t sock, int level, int optname, + const void *optval, socklen_t optlen) { + return setsockopt(sock, level, optname, +#ifdef _WIN32 + reinterpret_cast(optval), +#else + optval, +#endif + optlen) == 0; +} + +bool set_socket_opt(socket_t sock, int level, int optname, int optval) { + return set_socket_opt_impl(sock, level, optname, &optval, sizeof(optval)); +} + +bool set_socket_opt_time(socket_t sock, int level, int optname, + time_t sec, time_t usec) { +#ifdef _WIN32 + auto timeout = static_cast(sec * 1000 + usec / 1000); +#else + timeval timeout; + timeout.tv_sec = static_cast(sec); + timeout.tv_usec = static_cast(usec); +#endif + return set_socket_opt_impl(sock, level, optname, &timeout, sizeof(timeout)); +} + bool is_hex(char c, int &v) { if (isdigit(c)) { v = c - '0'; @@ -940,39 +1404,6 @@ private: static const size_t read_buff_size_ = 1024l * 4; }; -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT -class SSLSocketStream final : public Stream { -public: - SSLSocketStream( - socket_t sock, SSL *ssl, time_t read_timeout_sec, - time_t read_timeout_usec, time_t write_timeout_sec, - time_t write_timeout_usec, time_t max_timeout_msec = 0, - std::chrono::time_point start_time = - (std::chrono::steady_clock::time_point::min)()); - ~SSLSocketStream() override; - - bool is_readable() const override; - bool wait_readable() const override; - bool wait_writable() const override; - ssize_t read(char *ptr, size_t size) override; - ssize_t write(const char *ptr, size_t size) override; - void get_remote_ip_and_port(std::string &ip, int &port) const override; - void get_local_ip_and_port(std::string &ip, int &port) const override; - socket_t socket() const override; - time_t duration() const override; - -private: - socket_t sock_; - SSL *ssl_; - time_t read_timeout_sec_; - time_t read_timeout_usec_; - time_t write_timeout_sec_; - time_t write_timeout_usec_; - time_t max_timeout_msec_; - const std::chrono::time_point start_time_; -}; -#endif - bool keep_alive(const std::atomic &svr_sock, socket_t sock, time_t keep_alive_timeout_sec) { using namespace std::chrono; @@ -2270,14 +2701,23 @@ bool read_headers(Stream &strm, Headers &headers) { return true; } -bool read_content_with_length(Stream &strm, size_t len, - DownloadProgress progress, - ContentReceiverWithProgress out) { +enum class ReadContentResult { + Success, // Successfully read the content + PayloadTooLarge, // The content exceeds the specified payload limit + Error // An error occurred while reading the content +}; + +ReadContentResult read_content_with_length( + Stream &strm, size_t len, DownloadProgress progress, + ContentReceiverWithProgress out, + size_t payload_max_length = (std::numeric_limits::max)()) { char buf[CPPHTTPLIB_RECV_BUFSIZ]; detail::BodyReader br; br.stream = &strm; + br.has_content_length = true; br.content_length = len; + br.payload_max_length = payload_max_length; br.chunked = false; br.bytes_read = 0; br.last_error = Error::Success; @@ -2287,36 +2727,27 @@ bool read_content_with_length(Stream &strm, size_t len, auto read_len = static_cast(len - r); auto to_read = (std::min)(read_len, CPPHTTPLIB_RECV_BUFSIZ); auto n = detail::read_body_content(&strm, br, buf, to_read); - if (n <= 0) { return false; } + if (n <= 0) { + // Check if it was a payload size error + if (br.last_error == Error::ExceedMaxPayloadSize) { + return ReadContentResult::PayloadTooLarge; + } + return ReadContentResult::Error; + } - if (!out(buf, static_cast(n), r, len)) { return false; } + if (!out(buf, static_cast(n), r, len)) { + return ReadContentResult::Error; + } r += static_cast(n); if (progress) { - if (!progress(r, len)) { return false; } + if (!progress(r, len)) { return ReadContentResult::Error; } } } - return true; + return ReadContentResult::Success; } -void skip_content_with_length(Stream &strm, size_t len) { - char buf[CPPHTTPLIB_RECV_BUFSIZ]; - size_t r = 0; - while (r < len) { - auto read_len = static_cast(len - r); - auto n = strm.read(buf, (std::min)(read_len, CPPHTTPLIB_RECV_BUFSIZ)); - if (n <= 0) { return; } - r += static_cast(n); - } -} - -enum class ReadContentResult { - Success, // Successfully read the content - PayloadTooLarge, // The content exceeds the specified payload limit - Error // An error occurred while reading the content -}; - ReadContentResult read_content_without_length(Stream &strm, size_t payload_max_length, ContentReceiverWithProgress out) { @@ -2462,12 +2893,13 @@ bool read_content(Stream &strm, T &x, size_t payload_max_length, int &status, if (is_invalid_value) { ret = false; - } else if (len > payload_max_length) { - exceed_payload_max_length = true; - skip_content_with_length(strm, len); - ret = false; } else if (len > 0) { - ret = read_content_with_length(strm, len, std::move(progress), out); + auto result = read_content_with_length( + strm, len, std::move(progress), out, payload_max_length); + ret = (result == ReadContentResult::Success); + if (result == ReadContentResult::PayloadTooLarge) { + exceed_payload_max_length = true; + } } } @@ -3645,226 +4077,6 @@ bool has_crlf(const std::string &s) { return false; } -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT -std::string message_digest(const std::string &s, const EVP_MD *algo) { - auto context = std::unique_ptr( - EVP_MD_CTX_new(), EVP_MD_CTX_free); - - unsigned int hash_length = 0; - unsigned char hash[EVP_MAX_MD_SIZE]; - - EVP_DigestInit_ex(context.get(), algo, nullptr); - EVP_DigestUpdate(context.get(), s.c_str(), s.size()); - EVP_DigestFinal_ex(context.get(), hash, &hash_length); - - std::stringstream ss; - for (auto i = 0u; i < hash_length; ++i) { - ss << std::hex << std::setw(2) << std::setfill('0') - << static_cast(hash[i]); - } - - return ss.str(); -} - -std::string MD5(const std::string &s) { - return message_digest(s, EVP_md5()); -} - -std::string SHA_256(const std::string &s) { - return message_digest(s, EVP_sha256()); -} - -std::string SHA_512(const std::string &s) { - return message_digest(s, EVP_sha512()); -} - -std::pair make_digest_authentication_header( - const Request &req, const std::map &auth, - size_t cnonce_count, const std::string &cnonce, const std::string &username, - const std::string &password, bool is_proxy = false) { - std::string nc; - { - std::stringstream ss; - ss << std::setfill('0') << std::setw(8) << std::hex << cnonce_count; - nc = ss.str(); - } - - std::string qop; - if (auth.find("qop") != auth.end()) { - qop = auth.at("qop"); - if (qop.find("auth-int") != std::string::npos) { - qop = "auth-int"; - } else if (qop.find("auth") != std::string::npos) { - qop = "auth"; - } else { - qop.clear(); - } - } - - std::string algo = "MD5"; - if (auth.find("algorithm") != auth.end()) { algo = auth.at("algorithm"); } - - std::string response; - { - auto H = algo == "SHA-256" ? detail::SHA_256 - : algo == "SHA-512" ? detail::SHA_512 - : detail::MD5; - - auto A1 = username + ":" + auth.at("realm") + ":" + password; - - auto A2 = req.method + ":" + req.path; - if (qop == "auth-int") { A2 += ":" + H(req.body); } - - if (qop.empty()) { - response = H(H(A1) + ":" + auth.at("nonce") + ":" + H(A2)); - } else { - response = H(H(A1) + ":" + auth.at("nonce") + ":" + nc + ":" + cnonce + - ":" + qop + ":" + H(A2)); - } - } - - auto opaque = (auth.find("opaque") != auth.end()) ? auth.at("opaque") : ""; - - auto field = "Digest username=\"" + username + "\", realm=\"" + - auth.at("realm") + "\", nonce=\"" + auth.at("nonce") + - "\", uri=\"" + req.path + "\", algorithm=" + algo + - (qop.empty() ? ", response=\"" - : ", qop=" + qop + ", nc=" + nc + ", cnonce=\"" + - cnonce + "\", response=\"") + - response + "\"" + - (opaque.empty() ? "" : ", opaque=\"" + opaque + "\""); - - auto key = is_proxy ? "Proxy-Authorization" : "Authorization"; - return std::make_pair(key, field); -} - -bool is_ssl_peer_could_be_closed(SSL *ssl, socket_t sock) { - detail::set_nonblocking(sock, true); - auto se = detail::scope_exit([&]() { detail::set_nonblocking(sock, false); }); - - char buf[1]; - return !SSL_peek(ssl, buf, 1) && - SSL_get_error(ssl, 0) == SSL_ERROR_ZERO_RETURN; -} - -#ifdef _WIN32 -// NOTE: This code came up with the following stackoverflow post: -// https://stackoverflow.com/questions/9507184/can-openssl-on-windows-use-the-system-certificate-store -bool load_system_certs_on_windows(X509_STORE *store) { - auto hStore = CertOpenSystemStoreW((HCRYPTPROV_LEGACY)NULL, L"ROOT"); - if (!hStore) { return false; } - - auto result = false; - PCCERT_CONTEXT pContext = NULL; - while ((pContext = CertEnumCertificatesInStore(hStore, pContext)) != - nullptr) { - auto encoded_cert = - static_cast(pContext->pbCertEncoded); - - auto x509 = d2i_X509(NULL, &encoded_cert, pContext->cbCertEncoded); - if (x509) { - X509_STORE_add_cert(store, x509); - X509_free(x509); - result = true; - } - } - - CertFreeCertificateContext(pContext); - CertCloseStore(hStore, 0); - - return result; -} -#elif defined(CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN) && TARGET_OS_MAC -template -using CFObjectPtr = - std::unique_ptr::type, void (*)(CFTypeRef)>; - -void cf_object_ptr_deleter(CFTypeRef obj) { - if (obj) { CFRelease(obj); } -} - -bool retrieve_certs_from_keychain(CFObjectPtr &certs) { - CFStringRef keys[] = {kSecClass, kSecMatchLimit, kSecReturnRef}; - CFTypeRef values[] = {kSecClassCertificate, kSecMatchLimitAll, - kCFBooleanTrue}; - - CFObjectPtr query( - CFDictionaryCreate(nullptr, reinterpret_cast(keys), values, - sizeof(keys) / sizeof(keys[0]), - &kCFTypeDictionaryKeyCallBacks, - &kCFTypeDictionaryValueCallBacks), - cf_object_ptr_deleter); - - if (!query) { return false; } - - CFTypeRef security_items = nullptr; - if (SecItemCopyMatching(query.get(), &security_items) != errSecSuccess || - CFArrayGetTypeID() != CFGetTypeID(security_items)) { - return false; - } - - certs.reset(reinterpret_cast(security_items)); - return true; -} - -bool retrieve_root_certs_from_keychain(CFObjectPtr &certs) { - CFArrayRef root_security_items = nullptr; - if (SecTrustCopyAnchorCertificates(&root_security_items) != errSecSuccess) { - return false; - } - - certs.reset(root_security_items); - return true; -} - -bool add_certs_to_x509_store(CFArrayRef certs, X509_STORE *store) { - auto result = false; - for (auto i = 0; i < CFArrayGetCount(certs); ++i) { - const auto cert = reinterpret_cast( - CFArrayGetValueAtIndex(certs, i)); - - if (SecCertificateGetTypeID() != CFGetTypeID(cert)) { continue; } - - CFDataRef cert_data = nullptr; - if (SecItemExport(cert, kSecFormatX509Cert, 0, nullptr, &cert_data) != - errSecSuccess) { - continue; - } - - CFObjectPtr cert_data_ptr(cert_data, cf_object_ptr_deleter); - - auto encoded_cert = static_cast( - CFDataGetBytePtr(cert_data_ptr.get())); - - auto x509 = - d2i_X509(NULL, &encoded_cert, CFDataGetLength(cert_data_ptr.get())); - - if (x509) { - X509_STORE_add_cert(store, x509); - X509_free(x509); - result = true; - } - } - - return result; -} - -bool load_system_certs_on_macos(X509_STORE *store) { - auto result = false; - CFObjectPtr certs(nullptr, cf_object_ptr_deleter); - if (retrieve_certs_from_keychain(certs) && certs) { - result = add_certs_to_x509_store(certs.get(), store); - } - - if (retrieve_root_certs_from_keychain(certs) && certs) { - result = add_certs_to_x509_store(certs.get(), store) || result; - } - - return result; -} -#endif // _WIN32 -#endif // CPPHTTPLIB_OPENSSL_SUPPORT - #ifdef _WIN32 class WSInit { public: @@ -3984,8 +4196,393 @@ bool is_field_content(const std::string &s) { bool is_field_value(const std::string &s) { return is_field_content(s); } } // namespace fields +} // namespace detail + +/* + * Group 2: detail namespace - SSL common utilities + */ + +#ifdef CPPHTTPLIB_SSL_ENABLED +namespace detail { + +class SSLSocketStream final : public Stream { +public: + SSLSocketStream( + socket_t sock, tls::session_t session, time_t read_timeout_sec, + time_t read_timeout_usec, time_t write_timeout_sec, + time_t write_timeout_usec, time_t max_timeout_msec = 0, + std::chrono::time_point start_time = + (std::chrono::steady_clock::time_point::min)()); + ~SSLSocketStream() override; + + bool is_readable() const override; + bool wait_readable() const override; + bool wait_writable() const override; + ssize_t read(char *ptr, size_t size) override; + ssize_t write(const char *ptr, size_t size) override; + void get_remote_ip_and_port(std::string &ip, int &port) const override; + void get_local_ip_and_port(std::string &ip, int &port) const override; + socket_t socket() const override; + time_t duration() const override; + +private: + socket_t sock_; + tls::session_t session_; + time_t read_timeout_sec_; + time_t read_timeout_usec_; + time_t write_timeout_sec_; + time_t write_timeout_usec_; + time_t max_timeout_msec_; + const std::chrono::time_point start_time_; +}; + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +std::string message_digest(const std::string &s, const EVP_MD *algo) { + auto context = std::unique_ptr( + EVP_MD_CTX_new(), EVP_MD_CTX_free); + + unsigned int hash_length = 0; + unsigned char hash[EVP_MAX_MD_SIZE]; + + EVP_DigestInit_ex(context.get(), algo, nullptr); + EVP_DigestUpdate(context.get(), s.c_str(), s.size()); + EVP_DigestFinal_ex(context.get(), hash, &hash_length); + + std::stringstream ss; + for (auto i = 0u; i < hash_length; ++i) { + ss << std::hex << std::setw(2) << std::setfill('0') + << static_cast(hash[i]); + } + + return ss.str(); +} + +std::string MD5(const std::string &s) { + return message_digest(s, EVP_md5()); +} + +std::string SHA_256(const std::string &s) { + return message_digest(s, EVP_sha256()); +} + +std::string SHA_512(const std::string &s) { + return message_digest(s, EVP_sha512()); +} +#elif defined(CPPHTTPLIB_MBEDTLS_SUPPORT) +namespace { +template +std::string hash_to_hex(const unsigned char (&hash)[N]) { + std::stringstream ss; + for (size_t i = 0; i < N; ++i) { + ss << std::hex << std::setw(2) << std::setfill('0') + << static_cast(hash[i]); + } + return ss.str(); +} +} // namespace + +std::string MD5(const std::string &s) { + unsigned char hash[16]; +#ifdef CPPHTTPLIB_MBEDTLS_V3 + mbedtls_md5(reinterpret_cast(s.c_str()), s.size(), + hash); +#else + mbedtls_md5_ret(reinterpret_cast(s.c_str()), s.size(), + hash); +#endif + return hash_to_hex(hash); +} + +std::string SHA_256(const std::string &s) { + unsigned char hash[32]; +#ifdef CPPHTTPLIB_MBEDTLS_V3 + mbedtls_sha256(reinterpret_cast(s.c_str()), s.size(), + hash, 0); +#else + mbedtls_sha256_ret(reinterpret_cast(s.c_str()), + s.size(), hash, 0); +#endif + return hash_to_hex(hash); +} + +std::string SHA_512(const std::string &s) { + unsigned char hash[64]; +#ifdef CPPHTTPLIB_MBEDTLS_V3 + mbedtls_sha512(reinterpret_cast(s.c_str()), s.size(), + hash, 0); +#else + mbedtls_sha512_ret(reinterpret_cast(s.c_str()), + s.size(), hash, 0); +#endif + return hash_to_hex(hash); +} +#endif + +bool is_ip_address(const std::string &host) { + struct in_addr addr4; + struct in6_addr addr6; + return inet_pton(AF_INET, host.c_str(), &addr4) == 1 || + inet_pton(AF_INET6, host.c_str(), &addr6) == 1; +} + +template +bool process_server_socket_ssl( + const std::atomic &svr_sock, tls::session_t session, + socket_t sock, size_t keep_alive_max_count, time_t keep_alive_timeout_sec, + time_t read_timeout_sec, time_t read_timeout_usec, time_t write_timeout_sec, + time_t write_timeout_usec, T callback) { + return process_server_socket_core( + svr_sock, sock, keep_alive_max_count, keep_alive_timeout_sec, + [&](bool close_connection, bool &connection_closed) { + SSLSocketStream strm(sock, session, read_timeout_sec, read_timeout_usec, + write_timeout_sec, write_timeout_usec); + return callback(strm, close_connection, connection_closed); + }); +} + +template +bool process_client_socket_ssl( + tls::session_t session, socket_t sock, time_t read_timeout_sec, + time_t read_timeout_usec, time_t write_timeout_sec, + time_t write_timeout_usec, time_t max_timeout_msec, + std::chrono::time_point start_time, T callback) { + SSLSocketStream strm(sock, session, read_timeout_sec, read_timeout_usec, + write_timeout_sec, write_timeout_usec, max_timeout_msec, + start_time); + return callback(strm); +} + +std::pair make_digest_authentication_header( + const Request &req, const std::map &auth, + size_t cnonce_count, const std::string &cnonce, const std::string &username, + const std::string &password, bool is_proxy = false) { + std::string nc; + { + std::stringstream ss; + ss << std::setfill('0') << std::setw(8) << std::hex << cnonce_count; + nc = ss.str(); + } + + std::string qop; + if (auth.find("qop") != auth.end()) { + qop = auth.at("qop"); + if (qop.find("auth-int") != std::string::npos) { + qop = "auth-int"; + } else if (qop.find("auth") != std::string::npos) { + qop = "auth"; + } else { + qop.clear(); + } + } + + std::string algo = "MD5"; + if (auth.find("algorithm") != auth.end()) { algo = auth.at("algorithm"); } + + std::string response; + { + auto H = algo == "SHA-256" ? detail::SHA_256 + : algo == "SHA-512" ? detail::SHA_512 + : detail::MD5; + + auto A1 = username + ":" + auth.at("realm") + ":" + password; + + auto A2 = req.method + ":" + req.path; + if (qop == "auth-int") { A2 += ":" + H(req.body); } + + if (qop.empty()) { + response = H(H(A1) + ":" + auth.at("nonce") + ":" + H(A2)); + } else { + response = H(H(A1) + ":" + auth.at("nonce") + ":" + nc + ":" + cnonce + + ":" + qop + ":" + H(A2)); + } + } + + auto opaque = (auth.find("opaque") != auth.end()) ? auth.at("opaque") : ""; + + auto field = "Digest username=\"" + username + "\", realm=\"" + + auth.at("realm") + "\", nonce=\"" + auth.at("nonce") + + "\", uri=\"" + req.path + "\", algorithm=" + algo + + (qop.empty() ? ", response=\"" + : ", qop=" + qop + ", nc=" + nc + ", cnonce=\"" + + cnonce + "\", response=\"") + + response + "\"" + + (opaque.empty() ? "" : ", opaque=\"" + opaque + "\""); + + auto key = is_proxy ? "Proxy-Authorization" : "Authorization"; + return std::make_pair(key, field); +} + +bool match_hostname(const std::string &pattern, + const std::string &hostname) { + // Exact match (case-insensitive) + if (detail::case_ignore::equal(hostname, pattern)) { return true; } + + // Split both pattern and hostname into components by '.' + std::vector pattern_components; + if (!pattern.empty()) { + split(pattern.data(), pattern.data() + pattern.size(), '.', + [&](const char *b, const char *e) { + pattern_components.emplace_back(b, e); + }); + } + + std::vector host_components; + if (!hostname.empty()) { + split(hostname.data(), hostname.data() + hostname.size(), '.', + [&](const char *b, const char *e) { + host_components.emplace_back(b, e); + }); + } + + // Component count must match + if (host_components.size() != pattern_components.size()) { return false; } + + // Compare each component with wildcard support + // Supports: "*" (full wildcard), "prefix*" (partial wildcard) + // https://bugs.launchpad.net/ubuntu/+source/firefox-3.0/+bug/376484 + auto itr = pattern_components.begin(); + for (const auto &h : host_components) { + auto &p = *itr; + if (!detail::case_ignore::equal(p, h) && p != "*") { + bool partial_match = false; + if (!p.empty() && p[p.size() - 1] == '*') { + const auto prefix_length = p.size() - 1; + if (prefix_length == 0) { + partial_match = true; + } else if (h.size() >= prefix_length) { + partial_match = + std::equal(p.begin(), + p.begin() + static_cast( + prefix_length), + h.begin(), [](const char ca, const char cb) { + return detail::case_ignore::to_lower(ca) == + detail::case_ignore::to_lower(cb); + }); + } + } + if (!partial_match) { return false; } + } + ++itr; + } + + return true; +} + +#ifdef _WIN32 +// Verify certificate using Windows CertGetCertificateChain API. +// This provides real-time certificate validation with Windows Update +// integration, independent of the TLS backend (OpenSSL or MbedTLS). +bool verify_cert_with_windows_schannel( + const std::vector &der_cert, const std::string &hostname, + bool verify_hostname, unsigned long &out_error) { + if (der_cert.empty()) { return false; } + + out_error = 0; + + // Create Windows certificate context from DER data + auto cert_context = CertCreateCertificateContext( + X509_ASN_ENCODING | PKCS_7_ASN_ENCODING, der_cert.data(), + static_cast(der_cert.size())); + + if (!cert_context) { + out_error = GetLastError(); + return false; + } + + auto cert_guard = + scope_exit([&] { CertFreeCertificateContext(cert_context); }); + + // Setup chain parameters + CERT_CHAIN_PARA chain_para = {}; + chain_para.cbSize = sizeof(chain_para); + + // Build certificate chain with revocation checking + PCCERT_CHAIN_CONTEXT chain_context = nullptr; + auto chain_result = CertGetCertificateChain( + nullptr, cert_context, nullptr, cert_context->hCertStore, &chain_para, + CERT_CHAIN_CACHE_END_CERT | CERT_CHAIN_REVOCATION_CHECK_END_CERT | + CERT_CHAIN_REVOCATION_ACCUMULATIVE_TIMEOUT, + nullptr, &chain_context); + + if (!chain_result || !chain_context) { + out_error = GetLastError(); + return false; + } + + auto chain_guard = + scope_exit([&] { CertFreeCertificateChain(chain_context); }); + + // Check if chain has errors + if (chain_context->TrustStatus.dwErrorStatus != CERT_TRUST_NO_ERROR) { + out_error = chain_context->TrustStatus.dwErrorStatus; + return false; + } + + // Verify SSL policy + SSL_EXTRA_CERT_CHAIN_POLICY_PARA extra_policy_para = {}; + extra_policy_para.cbSize = sizeof(extra_policy_para); +#ifdef AUTHTYPE_SERVER + extra_policy_para.dwAuthType = AUTHTYPE_SERVER; +#endif + + std::wstring whost; + if (verify_hostname) { + whost = u8string_to_wstring(hostname.c_str()); + extra_policy_para.pwszServerName = const_cast(whost.c_str()); + } + + CERT_CHAIN_POLICY_PARA policy_para = {}; + policy_para.cbSize = sizeof(policy_para); +#ifdef CERT_CHAIN_POLICY_IGNORE_ALL_REV_UNKNOWN_FLAGS + policy_para.dwFlags = CERT_CHAIN_POLICY_IGNORE_ALL_REV_UNKNOWN_FLAGS; +#else + policy_para.dwFlags = 0; +#endif + policy_para.pvExtraPolicyPara = &extra_policy_para; + + CERT_CHAIN_POLICY_STATUS policy_status = {}; + policy_status.cbSize = sizeof(policy_status); + + if (!CertVerifyCertificateChainPolicy(CERT_CHAIN_POLICY_SSL, chain_context, + &policy_para, &policy_status)) { + out_error = GetLastError(); + return false; + } + + if (policy_status.dwError != 0) { + out_error = policy_status.dwError; + return false; + } + + return true; +} +#endif // _WIN32 } // namespace detail +#endif // CPPHTTPLIB_SSL_ENABLED + +/* + * Group 3: httplib namespace - Non-SSL public API implementations + */ + +void default_socket_options(socket_t sock) { + detail::set_socket_opt(sock, SOL_SOCKET, +#ifdef SO_REUSEPORT + SO_REUSEPORT, +#else + SO_REUSEADDR, +#endif + 1); +} + +std::string get_bearer_token_auth(const Request &req) { + if (req.has_header("Authorization")) { + constexpr auto bearer_header_prefix_len = detail::str_len("Bearer "); + return req.get_header_value("Authorization") + .substr(bearer_header_prefix_len); + } + return ""; +} const char *status_message(int status) { switch (status) { @@ -4426,6 +5023,11 @@ make_bearer_token_authentication_header(const std::string &token, } // Request implementation +size_t Request::get_header_value_u64(const std::string &key, size_t def, + size_t id) const { + return detail::get_header_value_u64(headers, key, def, id); +} + bool Request::has_header(const std::string &key) const { return detail::has_header(headers, key); } @@ -4547,6 +5149,11 @@ size_t MultipartFormData::get_file_count(const std::string &key) const { } // Response implementation +size_t Response::get_header_value_u64(const std::string &key, size_t def, + size_t id) const { + return detail::get_header_value_u64(headers, key, def, id); +} + bool Response::has_header(const std::string &key) const { return headers.find(key) != headers.end(); } @@ -4662,6 +5269,12 @@ void Response::set_file_content(const std::string &path) { } // Result implementation +size_t Result::get_request_header_value_u64(const std::string &key, + size_t def, + size_t id) const { + return detail::get_header_value_u64(request_headers_, key, def, id); +} + bool Result::has_request_header(const std::string &key) const { return request_headers_.find(key) != request_headers_.end(); } @@ -4697,13 +5310,16 @@ ssize_t detail::BodyReader::read(char *buf, size_t len) { if (!chunked) { // Content-Length based reading - if (bytes_read >= content_length) { + if (has_content_length && bytes_read >= content_length) { eof = true; return 0; } - auto remaining = content_length - bytes_read; - auto to_read = (std::min)(len, remaining); + auto to_read = len; + if (has_content_length) { + auto remaining = content_length - bytes_read; + to_read = (std::min)(len, remaining); + } auto n = stream->read(buf, to_read); if (n < 0) { @@ -4721,7 +5337,12 @@ ssize_t detail::BodyReader::read(char *buf, size_t len) { } bytes_read += static_cast(n); - if (bytes_read >= content_length) { eof = true; } + if (has_content_length && bytes_read >= content_length) { eof = true; } + if (payload_max_length > 0 && bytes_read > payload_max_length) { + last_error = Error::ExceedMaxPayloadSize; + eof = true; + return -1; + } return n; } @@ -4745,9 +5366,83 @@ ssize_t detail::BodyReader::read(char *buf, size_t len) { } bytes_read += static_cast(n); + if (payload_max_length > 0 && bytes_read > payload_max_length) { + last_error = Error::ExceedMaxPayloadSize; + eof = true; + return -1; + } return n; } +// ThreadPool implementation +ThreadPool::ThreadPool(size_t n, size_t mqr) + : shutdown_(false), max_queued_requests_(mqr) { + threads_.reserve(n); + while (n) { + threads_.emplace_back(worker(*this)); + n--; + } +} + +bool ThreadPool::enqueue(std::function fn) { + { + std::unique_lock lock(mutex_); + if (max_queued_requests_ > 0 && jobs_.size() >= max_queued_requests_) { + return false; + } + jobs_.push_back(std::move(fn)); + } + + cond_.notify_one(); + return true; +} + +void ThreadPool::shutdown() { + // Stop all worker threads... + { + std::unique_lock lock(mutex_); + shutdown_ = true; + } + + cond_.notify_all(); + + // Join... + for (auto &t : threads_) { + t.join(); + } +} + +ThreadPool::worker::worker(ThreadPool &pool) : pool_(pool) {} + +void ThreadPool::worker::operator()() { + for (;;) { + std::function fn; + { + std::unique_lock lock(pool_.mutex_); + + pool_.cond_.wait(lock, + [&] { return !pool_.jobs_.empty() || pool_.shutdown_; }); + + if (pool_.shutdown_ && pool_.jobs_.empty()) { break; } + + fn = pool_.jobs_.front(); + pool_.jobs_.pop_front(); + } + + assert(true == static_cast(fn)); + fn(); + } + +#if defined(CPPHTTPLIB_OPENSSL_SUPPORT) && !defined(OPENSSL_IS_BORINGSSL) && \ + !defined(LIBRESSL_VERSION_NUMBER) + OPENSSL_thread_stop(); +#endif +} + +/* + * Group 1 (continued): detail namespace - Stream implementations + */ + namespace detail { void calc_actual_timeout(time_t max_timeout_msec, time_t duration_msec, @@ -5076,6 +5771,155 @@ bool check_and_write_headers(Stream &strm, Headers &headers, } // namespace detail +/* + * Group 2 (continued): detail namespace - SSLSocketStream implementation + */ + +#ifdef CPPHTTPLIB_SSL_ENABLED +namespace detail { + +// SSL socket stream implementation +SSLSocketStream::SSLSocketStream( + socket_t sock, tls::session_t session, time_t read_timeout_sec, + time_t read_timeout_usec, time_t write_timeout_sec, + time_t write_timeout_usec, time_t max_timeout_msec, + std::chrono::time_point start_time) + : sock_(sock), session_(session), read_timeout_sec_(read_timeout_sec), + read_timeout_usec_(read_timeout_usec), + write_timeout_sec_(write_timeout_sec), + write_timeout_usec_(write_timeout_usec), + max_timeout_msec_(max_timeout_msec), start_time_(start_time) { +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + // Clear AUTO_RETRY for proper non-blocking I/O timeout handling + // Note: create_session() also clears this, but SSLClient currently + // uses ssl_new() which does not. Until full TLS API migration is complete, + // we need to ensure AUTO_RETRY is cleared here regardless of how the + // SSL session was created. + SSL_clear_mode(static_cast(session), SSL_MODE_AUTO_RETRY); +#endif +} + +SSLSocketStream::~SSLSocketStream() = default; + +bool SSLSocketStream::is_readable() const { + return tls::pending(session_) > 0; +} + +bool SSLSocketStream::wait_readable() const { + if (max_timeout_msec_ <= 0) { + return select_read(sock_, read_timeout_sec_, read_timeout_usec_) > 0; + } + + time_t read_timeout_sec; + time_t read_timeout_usec; + calc_actual_timeout(max_timeout_msec_, duration(), read_timeout_sec_, + read_timeout_usec_, read_timeout_sec, read_timeout_usec); + + return select_read(sock_, read_timeout_sec, read_timeout_usec) > 0; +} + +bool SSLSocketStream::wait_writable() const { + return select_write(sock_, write_timeout_sec_, write_timeout_usec_) > 0 && + is_socket_alive(sock_) && !tls::is_peer_closed(session_, sock_); +} + +ssize_t SSLSocketStream::read(char *ptr, size_t size) { + if (tls::pending(session_) > 0) { + tls::TlsError err; + auto ret = tls::read(session_, ptr, size, err); + if (ret == 0 || err.code == tls::ErrorCode::PeerClosed) { + error_ = Error::ConnectionClosed; + } + return ret; + } else if (wait_readable()) { + tls::TlsError err; + auto ret = tls::read(session_, ptr, size, err); + if (ret < 0) { + auto n = 1000; +#ifdef _WIN32 + while (--n >= 0 && (err.code == tls::ErrorCode::WantRead || + (err.code == tls::ErrorCode::SyscallError && + WSAGetLastError() == WSAETIMEDOUT))) { +#else + while (--n >= 0 && err.code == tls::ErrorCode::WantRead) { +#endif + if (tls::pending(session_) > 0) { + return tls::read(session_, ptr, size, err); + } else if (wait_readable()) { + std::this_thread::sleep_for(std::chrono::microseconds{10}); + ret = tls::read(session_, ptr, size, err); + if (ret >= 0) { return ret; } + } else { + break; + } + } + assert(ret < 0); + } else if (ret == 0 || err.code == tls::ErrorCode::PeerClosed) { + error_ = Error::ConnectionClosed; + } + return ret; + } else { + error_ = Error::Timeout; + return -1; + } +} + +ssize_t SSLSocketStream::write(const char *ptr, size_t size) { + if (wait_writable()) { + auto handle_size = + std::min(size, (std::numeric_limits::max)()); + + tls::TlsError err; + auto ret = tls::write(session_, ptr, handle_size, err); + if (ret < 0) { + auto n = 1000; +#ifdef _WIN32 + while (--n >= 0 && (err.code == tls::ErrorCode::WantWrite || + (err.code == tls::ErrorCode::SyscallError && + WSAGetLastError() == WSAETIMEDOUT))) { +#else + while (--n >= 0 && err.code == tls::ErrorCode::WantWrite) { +#endif + if (wait_writable()) { + std::this_thread::sleep_for(std::chrono::microseconds{10}); + ret = tls::write(session_, ptr, handle_size, err); + if (ret >= 0) { return ret; } + } else { + break; + } + } + assert(ret < 0); + } + return ret; + } + return -1; +} + +void SSLSocketStream::get_remote_ip_and_port(std::string &ip, + int &port) const { + detail::get_remote_ip_and_port(sock_, ip, port); +} + +void SSLSocketStream::get_local_ip_and_port(std::string &ip, + int &port) const { + detail::get_local_ip_and_port(sock_, ip, port); +} + +socket_t SSLSocketStream::socket() const { return sock_; } + +time_t SSLSocketStream::duration() const { + return std::chrono::duration_cast( + std::chrono::steady_clock::now() - start_time_) + .count(); +} + +} // namespace detail +#endif // CPPHTTPLIB_SSL_ENABLED + +/* + * Group 4: Server implementation + */ + // HTTP server implementation Server::Server() : new_task_queue( @@ -5677,36 +6521,40 @@ bool Server::read_content_core( // are true (no Transfer-Encoding and no Content-Length), then the message // body length is zero (no message body is present). // - // For non-SSL builds, peek into the socket to detect clients that send a - // body without a Content-Length header (raw HTTP over TCP). If there is - // pending data that exceeds the configured payload limit, treat this as an - // oversized request and fail early (causing connection close). For SSL - // builds we cannot reliably peek the decrypted application bytes, so keep - // the original behaviour. -#if !defined(CPPHTTPLIB_OPENSSL_SUPPORT) + // For non-SSL builds, detect clients that send a body without a + // Content-Length header (raw HTTP over TCP). Check both the stream's + // internal read buffer (data already read from the socket during header + // parsing) and the socket itself for pending data. If data is found and + // exceeds the configured payload limit, reject with 413. + // For SSL builds we cannot reliably peek the decrypted application bytes, + // so keep the original behaviour. +#if !defined(CPPHTTPLIB_SSL_ENABLED) if (!req.has_header("Content-Length") && !detail::is_chunked_transfer_encoding(req.headers)) { - // Only peek if payload_max_length is set to a finite value + // Only check if payload_max_length is set to a finite value if (payload_max_length_ > 0 && payload_max_length_ < (std::numeric_limits::max)()) { - socket_t s = strm.socket(); - if (s != INVALID_SOCKET) { - // Peek to check if there is any pending data - char peekbuf[1]; - ssize_t n = ::recv(s, peekbuf, 1, MSG_PEEK); - if (n > 0) { - // There is data, so read it with payload limit enforcement - auto result = detail::read_content_without_length( - strm, payload_max_length_, out); - if (result == detail::ReadContentResult::PayloadTooLarge) { - res.status = StatusCode::PayloadTooLarge_413; - return false; - } else if (result != detail::ReadContentResult::Success) { - return false; - } - return true; + // Check if there is data already buffered in the stream (read during + // header parsing) or pending on the socket. Use a non-blocking socket + // check to avoid deadlock when the client sends no body. + bool has_data = strm.is_readable(); + if (!has_data) { + socket_t s = strm.socket(); + if (s != INVALID_SOCKET) { + has_data = detail::select_read(s, 0, 0) > 0; } } + if (has_data) { + auto result = + detail::read_content_without_length(strm, payload_max_length_, out); + if (result == detail::ReadContentResult::PayloadTooLarge) { + res.status = StatusCode::PayloadTooLarge_413; + return false; + } else if (result != detail::ReadContentResult::Success) { + return false; + } + return true; + } } return true; } @@ -5815,8 +6663,10 @@ bool Server::check_if_not_modified(const Request &req, Response &res, // simplified implementation requires exact matches. auto ret = detail::split_find(val.data(), val.data() + val.size(), ',', [&](const char *b, const char *e) { - return std::equal(b, e, "*") || - std::equal(b, e, etag.begin()); + auto seg_len = static_cast(e - b); + return (seg_len == 1 && *b == '*') || + (seg_len == etag.size() && + std::equal(b, e, etag.begin())); }); if (ret) { @@ -6518,6 +7368,9 @@ void Server::output_error_log(const Error &err, } } +/* + * Group 5: ClientImpl and Client (Universal) implementation + */ // HTTP client implementation ClientImpl::ClientImpl(const std::string &host) : ClientImpl(host, 80, std::string(), std::string()) {} @@ -6561,10 +7414,6 @@ void ClientImpl::copy_settings(const ClientImpl &rhs) { basic_auth_username_ = rhs.basic_auth_username_; basic_auth_password_ = rhs.basic_auth_password_; bearer_token_auth_token_ = rhs.bearer_token_auth_token_; -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - digest_auth_username_ = rhs.digest_auth_username_; - digest_auth_password_ = rhs.digest_auth_password_; -#endif keep_alive_ = rhs.keep_alive_; follow_location_ = rhs.follow_location_; path_encode_ = rhs.path_encode_; @@ -6574,28 +7423,27 @@ void ClientImpl::copy_settings(const ClientImpl &rhs) { socket_options_ = rhs.socket_options_; compress_ = rhs.compress_; decompress_ = rhs.decompress_; + payload_max_length_ = rhs.payload_max_length_; + has_payload_max_length_ = rhs.has_payload_max_length_; interface_ = rhs.interface_; proxy_host_ = rhs.proxy_host_; proxy_port_ = rhs.proxy_port_; proxy_basic_auth_username_ = rhs.proxy_basic_auth_username_; proxy_basic_auth_password_ = rhs.proxy_basic_auth_password_; proxy_bearer_token_auth_token_ = rhs.proxy_bearer_token_auth_token_; -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - proxy_digest_auth_username_ = rhs.proxy_digest_auth_username_; - proxy_digest_auth_password_ = rhs.proxy_digest_auth_password_; -#endif -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - ca_cert_file_path_ = rhs.ca_cert_file_path_; - ca_cert_dir_path_ = rhs.ca_cert_dir_path_; - ca_cert_store_ = rhs.ca_cert_store_; -#endif -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - server_certificate_verification_ = rhs.server_certificate_verification_; - server_hostname_verification_ = rhs.server_hostname_verification_; - server_certificate_verifier_ = rhs.server_certificate_verifier_; -#endif logger_ = rhs.logger_; error_logger_ = rhs.error_logger_; + +#ifdef CPPHTTPLIB_SSL_ENABLED + digest_auth_username_ = rhs.digest_auth_username_; + digest_auth_password_ = rhs.digest_auth_password_; + proxy_digest_auth_username_ = rhs.proxy_digest_auth_username_; + proxy_digest_auth_password_ = rhs.proxy_digest_auth_password_; + ca_cert_file_path_ = rhs.ca_cert_file_path_; + ca_cert_dir_path_ = rhs.ca_cert_dir_path_; + server_certificate_verification_ = rhs.server_certificate_verification_; + server_hostname_verification_ = rhs.server_hostname_verification_; +#endif } socket_t ClientImpl::create_client_socket(Error &error) const { @@ -6631,22 +7479,6 @@ bool ClientImpl::ensure_socket_connection(Socket &socket, Error &error) { return create_and_connect_socket(socket, error); } -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT -bool SSLClient::ensure_socket_connection(Socket &socket, Error &error) { - if (!ClientImpl::ensure_socket_connection(socket, error)) { return false; } - - if (!proxy_host_.empty() && proxy_port_ != -1) { return true; } - - if (!initialize_ssl(socket, error)) { - shutdown_socket(socket); - close_socket(socket); - return false; - } - - return true; -} -#endif - void ClientImpl::shutdown_ssl(Socket & /*socket*/, bool /*shutdown_gracefully*/) { // If there are any requests in flight from threads other than us, then it's @@ -6671,9 +7503,10 @@ void ClientImpl::close_socket(Socket &socket) { socket_requests_are_from_thread_ == std::this_thread::get_id()); // It is also a bug if this happens while SSL is still active -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +#ifdef CPPHTTPLIB_SSL_ENABLED assert(socket.ssl == nullptr); #endif + if (socket.sock == INVALID_SOCKET) { return; } detail::close_socket(socket.sock); socket.sock = INVALID_SOCKET; @@ -6722,6 +7555,8 @@ bool ClientImpl::send(Request &req, Response &res, Error &error) { if (error == Error::SSLPeerCouldBeClosed_) { assert(!ret); ret = send_(req, res, error); + // If still failing with SSLPeerCouldBeClosed_, convert to Read error + if (error == Error::SSLPeerCouldBeClosed_) { error = Error::Read; } } return ret; } @@ -6739,9 +7574,9 @@ bool ClientImpl::send_(Request &req, Response &res, Error &error) { if (socket_.is_open()) { is_alive = detail::is_socket_alive(socket_.sock); -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +#ifdef CPPHTTPLIB_SSL_ENABLED if (is_alive && is_ssl()) { - if (detail::is_ssl_peer_could_be_closed(socket_.ssl, socket_.sock)) { + if (tls::is_peer_closed(socket_.ssl, socket_.sock)) { is_alive = false; } } @@ -6765,7 +7600,7 @@ bool ClientImpl::send_(Request &req, Response &res, Error &error) { return false; } -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +#ifdef CPPHTTPLIB_SSL_ENABLED // TODO: refactoring if (is_ssl()) { auto &scli = static_cast(*this); @@ -6847,9 +7682,9 @@ Result ClientImpl::send_(Request &&req) { auto res = detail::make_unique(); auto error = Error::Success; auto ret = send(req, *res, error); -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +#ifdef CPPHTTPLIB_SSL_ENABLED return Result{ret ? std::move(res) : nullptr, error, std::move(req.headers), - last_ssl_error_, last_openssl_error_}; + last_ssl_error_, last_backend_error_}; #else return Result{ret ? std::move(res) : nullptr, error, std::move(req.headers)}; #endif @@ -6926,9 +7761,9 @@ ClientImpl::open_stream(const std::string &method, const std::string &path, auto is_alive = false; if (socket_.is_open()) { is_alive = detail::is_socket_alive(socket_.sock); -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +#ifdef CPPHTTPLIB_SSL_ENABLED if (is_alive && is_ssl()) { - if (detail::is_ssl_peer_could_be_closed(socket_.ssl, socket_.sock)) { + if (tls::is_peer_closed(socket_.ssl, socket_.sock)) { is_alive = false; } } @@ -6946,7 +7781,7 @@ ClientImpl::open_stream(const std::string &method, const std::string &path, return handle; } -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +#ifdef CPPHTTPLIB_SSL_ENABLED if (is_ssl()) { auto &scli = static_cast(*this); if (!proxy_host_.empty() && proxy_port_ != -1) { @@ -6962,11 +7797,12 @@ ClientImpl::open_stream(const std::string &method, const std::string &path, transfer_socket_ownership_to_handle(handle); } -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - if (is_ssl() && handle.connection_->ssl) { +#ifdef CPPHTTPLIB_SSL_ENABLED + if (is_ssl() && handle.connection_->session) { handle.socket_stream_ = detail::make_unique( - handle.connection_->sock, handle.connection_->ssl, read_timeout_sec_, - read_timeout_usec_, write_timeout_sec_, write_timeout_usec_); + handle.connection_->sock, handle.connection_->session, + read_timeout_sec_, read_timeout_usec_, write_timeout_sec_, + write_timeout_usec_); } else { handle.socket_stream_ = detail::make_unique( handle.connection_->sock, read_timeout_sec_, read_timeout_usec_, @@ -7016,9 +7852,11 @@ ClientImpl::open_stream(const std::string &method, const std::string &path, } handle.body_reader_.stream = handle.stream_; + handle.body_reader_.payload_max_length = payload_max_length_; auto content_length_str = handle.response->get_header_value("Content-Length"); if (!content_length_str.empty()) { + handle.body_reader_.has_content_length = true; handle.body_reader_.content_length = static_cast(std::stoull(content_length_str)); } @@ -7066,6 +7904,7 @@ ssize_t ClientImpl::StreamHandle::read_with_decompression(char *buf, auto to_copy = (std::min)(len, available); std::memcpy(buf, decompress_buffer_.data() + decompress_offset_, to_copy); decompress_offset_ += to_copy; + decompressed_bytes_read_ += to_copy; return static_cast(to_copy); } @@ -7081,12 +7920,16 @@ ssize_t ClientImpl::StreamHandle::read_with_decompression(char *buf, if (n <= 0) { return n; } - bool decompress_ok = - decompressor_->decompress(compressed_buf, static_cast(n), - [this](const char *data, size_t data_len) { - decompress_buffer_.append(data, data_len); - return true; - }); + bool decompress_ok = decompressor_->decompress( + compressed_buf, static_cast(n), + [this](const char *data, size_t data_len) { + decompress_buffer_.append(data, data_len); + auto limit = body_reader_.payload_max_length; + if (decompressed_bytes_read_ + decompress_buffer_.size() > limit) { + return false; + } + return true; + }); if (!decompress_ok) { body_reader_.last_error = Error::Read; @@ -7099,6 +7942,7 @@ ssize_t ClientImpl::StreamHandle::read_with_decompression(char *buf, auto to_copy = (std::min)(len, decompress_buffer_.size()); std::memcpy(buf, decompress_buffer_.data(), to_copy); decompress_offset_ = to_copy; + decompressed_bytes_read_ += to_copy; return static_cast(to_copy); } @@ -7121,7 +7965,6 @@ void ClientImpl::StreamHandle::parse_trailers_if_needed() { } } -// Inline method implementations for `ChunkedDecoder`. namespace detail { ChunkedDecoder::ChunkedDecoder(Stream &s) : strm(s) {} @@ -7185,8 +8028,8 @@ bool ChunkedDecoder::parse_trailers_into(Headers &dest, void ClientImpl::transfer_socket_ownership_to_handle(StreamHandle &handle) { handle.connection_->sock = socket_.sock; -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - handle.connection_->ssl = socket_.ssl; +#ifdef CPPHTTPLIB_SSL_ENABLED + handle.connection_->session = socket_.ssl; socket_.ssl = nullptr; #endif socket_.sock = INVALID_SOCKET; @@ -7239,7 +8082,7 @@ bool ClientImpl::handle_request(Stream &strm, Request &req, ret = redirect(req, res, error); } -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +#ifdef CPPHTTPLIB_SSL_ENABLED if ((res.status == StatusCode::Unauthorized_401 || res.status == StatusCode::ProxyAuthenticationRequired_407) && req.authorization_count_ < 5) { @@ -7343,7 +8186,7 @@ bool ClientImpl::create_redirect_client( // Create appropriate client type and handle redirect if (need_ssl) { -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +#ifdef CPPHTTPLIB_SSL_ENABLED // Create SSL client for HTTPS redirect SSLClient redirect_client(host, port); @@ -7363,9 +8206,10 @@ bool ClientImpl::create_redirect_client( server_hostname_verification_); } - // Handle CA certificate store and paths if available - if (ca_cert_store_ && X509_STORE_up_ref(ca_cert_store_)) { - redirect_client.set_ca_cert_store(ca_cert_store_); + // Transfer CA certificate to redirect client + if (!ca_cert_pem_.empty()) { + redirect_client.load_ca_cert_store(ca_cert_pem_.c_str(), + ca_cert_pem_.size()); } if (!ca_cert_file_path_.empty()) { redirect_client.set_ca_cert_path(ca_cert_file_path_, ca_cert_dir_path_); @@ -7418,7 +8262,7 @@ void ClientImpl::setup_redirect_client(ClientType &client) { if (!bearer_token_auth_token_.empty()) { client.set_bearer_token_auth(bearer_token_auth_token_); } -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +#ifdef CPPHTTPLIB_SSL_ENABLED if (!digest_auth_username_.empty()) { client.set_digest_auth(digest_auth_username_, digest_auth_password_); } @@ -7438,7 +8282,7 @@ void ClientImpl::setup_redirect_client(ClientType &client) { if (!proxy_bearer_token_auth_token_.empty()) { client.set_proxy_bearer_token_auth(proxy_bearer_token_auth_token_); } -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +#ifdef CPPHTTPLIB_SSL_ENABLED if (!proxy_digest_auth_username_.empty()) { client.set_proxy_digest_auth(proxy_digest_auth_username_, proxy_digest_auth_password_); @@ -7809,9 +8653,9 @@ Result ClientImpl::send_with_content_provider_and_receiver( std::move(content_provider_without_length), content_type, std::move(content_receiver), error); -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +#ifdef CPPHTTPLIB_SSL_ENABLED return Result{std::move(res), error, std::move(req.headers), last_ssl_error_, - last_openssl_error_}; + last_backend_error_}; #else return Result{std::move(res), error, std::move(req.headers)}; #endif @@ -7851,11 +8695,11 @@ bool ClientImpl::process_request(Stream &strm, Request &req, auto write_request_success = write_request(strm, req, close_connection, error, expect_100_continue); -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - if (is_ssl()) { +#ifdef CPPHTTPLIB_SSL_ENABLED + if (is_ssl() && !expect_100_continue) { auto is_proxy_enabled = !proxy_host_.empty() && proxy_port_ != -1; if (!is_proxy_enabled) { - if (detail::is_ssl_peer_could_be_closed(socket_.ssl, socket_.sock)) { + if (tls::is_peer_closed(socket_.ssl, socket_.sock)) { error = Error::SSLPeerCouldBeClosed_; output_error_log(error, &req); return false; @@ -7937,6 +8781,11 @@ bool ClientImpl::process_request(Stream &strm, Request &req, [&](const char *buf, size_t n, size_t /*off*/, size_t /*len*/) { assert(res.body.size() + n <= res.body.max_size()); + if (payload_max_length_ > 0 && + (res.body.size() >= payload_max_length_ || + n > payload_max_length_ - res.body.size())) { + return false; + } res.body.append(buf, n); return true; }); @@ -7965,9 +8814,12 @@ bool ClientImpl::process_request(Stream &strm, Request &req, if (res.status != StatusCode::NotModified_304) { int dummy_status; - if (!detail::read_content(strm, res, (std::numeric_limits::max)(), - dummy_status, std::move(progress), - std::move(out), decompress_)) { + auto max_length = (!has_payload_max_length_ && req.content_receiver) + ? (std::numeric_limits::max)() + : payload_max_length_; + if (!detail::read_content(strm, res, max_length, dummy_status, + std::move(progress), std::move(out), + decompress_)) { if (error != Error::Canceled) { error = Error::Read; } output_error_log(error, &req); return false; @@ -8878,14 +9730,6 @@ void ClientImpl::set_bearer_token_auth(const std::string &token) { bearer_token_auth_token_ = token; } -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT -void ClientImpl::set_digest_auth(const std::string &username, - const std::string &password) { - digest_auth_username_ = username; - digest_auth_password_ = password; -} -#endif - void ClientImpl::set_keep_alive(bool on) { keep_alive_ = on; } void ClientImpl::set_follow_location(bool on) { follow_location_ = on; } @@ -8922,6 +9766,11 @@ void ClientImpl::set_compress(bool on) { compress_ = on; } void ClientImpl::set_decompress(bool on) { decompress_ = on; } +void ClientImpl::set_payload_max_length(size_t length) { + payload_max_length_ = length; + has_payload_max_length_ = true; +} + void ClientImpl::set_interface(const std::string &intf) { interface_ = intf; } @@ -8941,11 +9790,11 @@ void ClientImpl::set_proxy_bearer_token_auth(const std::string &token) { proxy_bearer_token_auth_token_ = token; } -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT -void ClientImpl::set_proxy_digest_auth(const std::string &username, - const std::string &password) { - proxy_digest_auth_username_ = username; - proxy_digest_auth_password_ = password; +#ifdef CPPHTTPLIB_SSL_ENABLED +void ClientImpl::set_digest_auth(const std::string &username, + const std::string &password) { + digest_auth_username_ = username; + digest_auth_password_ = password; } void ClientImpl::set_ca_cert_path(const std::string &ca_cert_file_path, @@ -8954,12 +9803,23 @@ void ClientImpl::set_ca_cert_path(const std::string &ca_cert_file_path, ca_cert_dir_path_ = ca_cert_dir_path; } -void ClientImpl::set_ca_cert_store(X509_STORE *ca_cert_store) { - if (ca_cert_store && ca_cert_store != ca_cert_store_) { - ca_cert_store_ = ca_cert_store; - } +void ClientImpl::set_proxy_digest_auth(const std::string &username, + const std::string &password) { + proxy_digest_auth_username_ = username; + proxy_digest_auth_password_ = password; } +void ClientImpl::enable_server_certificate_verification(bool enabled) { + server_certificate_verification_ = enabled; +} + +void ClientImpl::enable_server_hostname_verification(bool enabled) { + server_hostname_verification_ = enabled; +} +#endif + +// ClientImpl::set_ca_cert_store is defined after TLS namespace (uses helpers) +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT X509_STORE *ClientImpl::create_ca_cert_store(const char *ca_cert, std::size_t size) const { auto mem = BIO_new_mem_buf(ca_cert, static_cast(size)); @@ -8984,17 +9844,9 @@ X509_STORE *ClientImpl::create_ca_cert_store(const char *ca_cert, return cts; } -void ClientImpl::enable_server_certificate_verification(bool enabled) { - server_certificate_verification_ = enabled; -} - -void ClientImpl::enable_server_hostname_verification(bool enabled) { - server_hostname_verification_ = enabled; -} - void ClientImpl::set_server_certificate_verifier( - std::function verifier) { - server_certificate_verifier_ = verifier; + std::function /*verifier*/) { + // Base implementation does nothing - SSLClient overrides this } #endif @@ -9007,958 +9859,24 @@ void ClientImpl::set_error_logger(ErrorLogger error_logger) { } /* - * SSL Implementation + * SSL/TLS Common Implementation */ -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT -namespace detail { -bool is_ip_address(const std::string &host) { - struct in_addr addr4; - struct in6_addr addr6; - return inet_pton(AF_INET, host.c_str(), &addr4) == 1 || - inet_pton(AF_INET6, host.c_str(), &addr6) == 1; -} - -template -SSL *ssl_new(socket_t sock, SSL_CTX *ctx, std::mutex &ctx_mutex, - U SSL_connect_or_accept, V setup) { - SSL *ssl = nullptr; - { - std::lock_guard guard(ctx_mutex); - ssl = SSL_new(ctx); - } - - if (ssl) { - set_nonblocking(sock, true); - auto bio = BIO_new_socket(static_cast(sock), BIO_NOCLOSE); - BIO_set_nbio(bio, 1); - SSL_set_bio(ssl, bio, bio); - - if (!setup(ssl) || SSL_connect_or_accept(ssl) != 1) { - SSL_shutdown(ssl); - { - std::lock_guard guard(ctx_mutex); - SSL_free(ssl); - } - set_nonblocking(sock, false); - return nullptr; - } - BIO_set_nbio(bio, 0); - set_nonblocking(sock, false); - } - - return ssl; -} - -void ssl_delete(std::mutex &ctx_mutex, SSL *ssl, socket_t sock, - bool shutdown_gracefully) { - // sometimes we may want to skip this to try to avoid SIGPIPE if we know - // the remote has closed the network connection - // Note that it is not always possible to avoid SIGPIPE, this is merely a - // best-efforts. - if (shutdown_gracefully) { - (void)(sock); - // SSL_shutdown() returns 0 on first call (indicating close_notify alert - // sent) and 1 on subsequent call (indicating close_notify alert received) - if (SSL_shutdown(ssl) == 0) { - // Expected to return 1, but even if it doesn't, we free ssl - SSL_shutdown(ssl); - } - } - - std::lock_guard guard(ctx_mutex); - SSL_free(ssl); -} - -template -bool ssl_connect_or_accept_nonblocking(socket_t sock, SSL *ssl, - U ssl_connect_or_accept, - time_t timeout_sec, time_t timeout_usec, - int *ssl_error) { - auto res = 0; - while ((res = ssl_connect_or_accept(ssl)) != 1) { - auto err = SSL_get_error(ssl, res); - switch (err) { - case SSL_ERROR_WANT_READ: - if (select_read(sock, timeout_sec, timeout_usec) > 0) { continue; } - break; - case SSL_ERROR_WANT_WRITE: - if (select_write(sock, timeout_sec, timeout_usec) > 0) { continue; } - break; - default: break; - } - if (ssl_error) { *ssl_error = err; } - return false; - } - return true; -} - -template -bool process_server_socket_ssl( - const std::atomic &svr_sock, SSL *ssl, socket_t sock, - size_t keep_alive_max_count, time_t keep_alive_timeout_sec, - time_t read_timeout_sec, time_t read_timeout_usec, time_t write_timeout_sec, - time_t write_timeout_usec, T callback) { - return process_server_socket_core( - svr_sock, sock, keep_alive_max_count, keep_alive_timeout_sec, - [&](bool close_connection, bool &connection_closed) { - SSLSocketStream strm(sock, ssl, read_timeout_sec, read_timeout_usec, - write_timeout_sec, write_timeout_usec); - return callback(strm, close_connection, connection_closed); - }); -} - -template -bool process_client_socket_ssl( - SSL *ssl, socket_t sock, time_t read_timeout_sec, time_t read_timeout_usec, - time_t write_timeout_sec, time_t write_timeout_usec, - time_t max_timeout_msec, - std::chrono::time_point start_time, T callback) { - SSLSocketStream strm(sock, ssl, read_timeout_sec, read_timeout_usec, - write_timeout_sec, write_timeout_usec, max_timeout_msec, - start_time); - return callback(strm); -} - -// SSL socket stream implementation -SSLSocketStream::SSLSocketStream( - socket_t sock, SSL *ssl, time_t read_timeout_sec, time_t read_timeout_usec, - time_t write_timeout_sec, time_t write_timeout_usec, - time_t max_timeout_msec, - std::chrono::time_point start_time) - : sock_(sock), ssl_(ssl), read_timeout_sec_(read_timeout_sec), - read_timeout_usec_(read_timeout_usec), - write_timeout_sec_(write_timeout_sec), - write_timeout_usec_(write_timeout_usec), - max_timeout_msec_(max_timeout_msec), start_time_(start_time) { - SSL_clear_mode(ssl, SSL_MODE_AUTO_RETRY); -} - -SSLSocketStream::~SSLSocketStream() = default; - -bool SSLSocketStream::is_readable() const { - return SSL_pending(ssl_) > 0; -} - -bool SSLSocketStream::wait_readable() const { - if (max_timeout_msec_ <= 0) { - return select_read(sock_, read_timeout_sec_, read_timeout_usec_) > 0; - } - - time_t read_timeout_sec; - time_t read_timeout_usec; - calc_actual_timeout(max_timeout_msec_, duration(), read_timeout_sec_, - read_timeout_usec_, read_timeout_sec, read_timeout_usec); - - return select_read(sock_, read_timeout_sec, read_timeout_usec) > 0; -} - -bool SSLSocketStream::wait_writable() const { - return select_write(sock_, write_timeout_sec_, write_timeout_usec_) > 0 && - is_socket_alive(sock_) && !is_ssl_peer_could_be_closed(ssl_, sock_); -} - -ssize_t SSLSocketStream::read(char *ptr, size_t size) { - if (SSL_pending(ssl_) > 0) { - auto ret = SSL_read(ssl_, ptr, static_cast(size)); - if (ret == 0) { error_ = Error::ConnectionClosed; } - return ret; - } else if (wait_readable()) { - auto ret = SSL_read(ssl_, ptr, static_cast(size)); - if (ret < 0) { - auto err = SSL_get_error(ssl_, ret); - auto n = 1000; -#ifdef _WIN32 - while (--n >= 0 && (err == SSL_ERROR_WANT_READ || - (err == SSL_ERROR_SYSCALL && - WSAGetLastError() == WSAETIMEDOUT))) { -#else - while (--n >= 0 && err == SSL_ERROR_WANT_READ) { -#endif - if (SSL_pending(ssl_) > 0) { - return SSL_read(ssl_, ptr, static_cast(size)); - } else if (wait_readable()) { - std::this_thread::sleep_for(std::chrono::microseconds{10}); - ret = SSL_read(ssl_, ptr, static_cast(size)); - if (ret >= 0) { return ret; } - err = SSL_get_error(ssl_, ret); - } else { - break; - } - } - assert(ret < 0); - } else if (ret == 0) { - error_ = Error::ConnectionClosed; - } - return ret; - } else { - error_ = Error::Timeout; - return -1; - } -} - -ssize_t SSLSocketStream::write(const char *ptr, size_t size) { - if (wait_writable()) { - auto handle_size = static_cast( - std::min(size, (std::numeric_limits::max)())); - - auto ret = SSL_write(ssl_, ptr, static_cast(handle_size)); - if (ret < 0) { - auto err = SSL_get_error(ssl_, ret); - auto n = 1000; -#ifdef _WIN32 - while (--n >= 0 && (err == SSL_ERROR_WANT_WRITE || - (err == SSL_ERROR_SYSCALL && - WSAGetLastError() == WSAETIMEDOUT))) { -#else - while (--n >= 0 && err == SSL_ERROR_WANT_WRITE) { -#endif - if (wait_writable()) { - std::this_thread::sleep_for(std::chrono::microseconds{10}); - ret = SSL_write(ssl_, ptr, static_cast(handle_size)); - if (ret >= 0) { return ret; } - err = SSL_get_error(ssl_, ret); - } else { - break; - } - } - assert(ret < 0); - } - return ret; - } - return -1; -} - -void SSLSocketStream::get_remote_ip_and_port(std::string &ip, - int &port) const { - detail::get_remote_ip_and_port(sock_, ip, port); -} - -void SSLSocketStream::get_local_ip_and_port(std::string &ip, - int &port) const { - detail::get_local_ip_and_port(sock_, ip, port); -} - -socket_t SSLSocketStream::socket() const { return sock_; } - -time_t SSLSocketStream::duration() const { - return std::chrono::duration_cast( - std::chrono::steady_clock::now() - start_time_) - .count(); -} - -} // namespace detail - -// SSL HTTP server implementation -SSLServer::SSLServer(const char *cert_path, const char *private_key_path, - const char *client_ca_cert_file_path, - const char *client_ca_cert_dir_path, - const char *private_key_password) { - ctx_ = SSL_CTX_new(TLS_server_method()); - - if (ctx_) { - SSL_CTX_set_options(ctx_, - SSL_OP_NO_COMPRESSION | - SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION); - - SSL_CTX_set_min_proto_version(ctx_, TLS1_2_VERSION); - - if (private_key_password != nullptr && (private_key_password[0] != '\0')) { - SSL_CTX_set_default_passwd_cb_userdata( - ctx_, - reinterpret_cast(const_cast(private_key_password))); - } - - if (SSL_CTX_use_certificate_chain_file(ctx_, cert_path) != 1 || - SSL_CTX_use_PrivateKey_file(ctx_, private_key_path, SSL_FILETYPE_PEM) != - 1 || - SSL_CTX_check_private_key(ctx_) != 1) { - last_ssl_error_ = static_cast(ERR_get_error()); - SSL_CTX_free(ctx_); - ctx_ = nullptr; - } else if (client_ca_cert_file_path || client_ca_cert_dir_path) { - SSL_CTX_load_verify_locations(ctx_, client_ca_cert_file_path, - client_ca_cert_dir_path); - - // Set client CA list to be sent to clients during TLS handshake - if (client_ca_cert_file_path) { - auto ca_list = SSL_load_client_CA_file(client_ca_cert_file_path); - if (ca_list != nullptr) { - SSL_CTX_set_client_CA_list(ctx_, ca_list); - } else { - // Failed to load client CA list, but we continue since - // SSL_CTX_load_verify_locations already succeeded and - // certificate verification will still work - last_ssl_error_ = static_cast(ERR_get_error()); - } - } - - SSL_CTX_set_verify( - ctx_, SSL_VERIFY_PEER | SSL_VERIFY_FAIL_IF_NO_PEER_CERT, nullptr); - } - } -} - -SSLServer::SSLServer(X509 *cert, EVP_PKEY *private_key, - X509_STORE *client_ca_cert_store) { - ctx_ = SSL_CTX_new(TLS_server_method()); - - if (ctx_) { - SSL_CTX_set_options(ctx_, - SSL_OP_NO_COMPRESSION | - SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION); - - SSL_CTX_set_min_proto_version(ctx_, TLS1_2_VERSION); - - if (SSL_CTX_use_certificate(ctx_, cert) != 1 || - SSL_CTX_use_PrivateKey(ctx_, private_key) != 1) { - SSL_CTX_free(ctx_); - ctx_ = nullptr; - } else if (client_ca_cert_store) { - SSL_CTX_set_cert_store(ctx_, client_ca_cert_store); - - // Extract CA names from the store and set them as the client CA list - auto ca_list = extract_ca_names_from_x509_store(client_ca_cert_store); - if (ca_list) { - SSL_CTX_set_client_CA_list(ctx_, ca_list); - } else { - // Failed to extract CA names, record the error - last_ssl_error_ = static_cast(ERR_get_error()); - } - - SSL_CTX_set_verify( - ctx_, SSL_VERIFY_PEER | SSL_VERIFY_FAIL_IF_NO_PEER_CERT, nullptr); - } - } -} - -SSLServer::SSLServer( - const std::function &setup_ssl_ctx_callback) { - ctx_ = SSL_CTX_new(TLS_method()); - if (ctx_) { - if (!setup_ssl_ctx_callback(*ctx_)) { - SSL_CTX_free(ctx_); - ctx_ = nullptr; - } - } -} - -SSLServer::~SSLServer() { - if (ctx_) { SSL_CTX_free(ctx_); } -} - -bool SSLServer::is_valid() const { return ctx_; } - -SSL_CTX *SSLServer::ssl_context() const { return ctx_; } - -void SSLServer::update_certs(X509 *cert, EVP_PKEY *private_key, - X509_STORE *client_ca_cert_store) { - - std::lock_guard guard(ctx_mutex_); - - SSL_CTX_use_certificate(ctx_, cert); - SSL_CTX_use_PrivateKey(ctx_, private_key); - - if (client_ca_cert_store != nullptr) { - SSL_CTX_set_cert_store(ctx_, client_ca_cert_store); - } -} - -bool SSLServer::process_and_close_socket(socket_t sock) { - auto ssl = detail::ssl_new( - sock, ctx_, ctx_mutex_, - [&](SSL *ssl2) { - return detail::ssl_connect_or_accept_nonblocking( - sock, ssl2, SSL_accept, read_timeout_sec_, read_timeout_usec_, - &last_ssl_error_); - }, - [](SSL * /*ssl2*/) { return true; }); - - auto ret = false; - if (ssl) { - std::string remote_addr; - int remote_port = 0; - detail::get_remote_ip_and_port(sock, remote_addr, remote_port); - - std::string local_addr; - int local_port = 0; - detail::get_local_ip_and_port(sock, local_addr, local_port); - - ret = detail::process_server_socket_ssl( - svr_sock_, ssl, sock, keep_alive_max_count_, keep_alive_timeout_sec_, - read_timeout_sec_, read_timeout_usec_, write_timeout_sec_, - write_timeout_usec_, - [&](Stream &strm, bool close_connection, bool &connection_closed) { - return process_request(strm, remote_addr, remote_port, local_addr, - local_port, close_connection, - connection_closed, - [&](Request &req) { req.ssl = ssl; }); - }); - - // Shutdown gracefully if the result seemed successful, non-gracefully if - // the connection appeared to be closed. - const bool shutdown_gracefully = ret; - detail::ssl_delete(ctx_mutex_, ssl, sock, shutdown_gracefully); - } - - detail::shutdown_socket(sock); - detail::close_socket(sock); - return ret; -} - -STACK_OF(X509_NAME) * SSLServer::extract_ca_names_from_x509_store( - X509_STORE *store) { - if (!store) { return nullptr; } - - auto ca_list = sk_X509_NAME_new_null(); - if (!ca_list) { return nullptr; } - - // Get all objects from the store - auto objs = X509_STORE_get0_objects(store); - if (!objs) { - sk_X509_NAME_free(ca_list); - return nullptr; - } - - // Iterate through objects and extract certificate subject names - for (int i = 0; i < sk_X509_OBJECT_num(objs); i++) { - auto obj = sk_X509_OBJECT_value(objs, i); - if (X509_OBJECT_get_type(obj) == X509_LU_X509) { - auto cert = X509_OBJECT_get0_X509(obj); - if (cert) { - auto subject = X509_get_subject_name(cert); - if (subject) { - auto name_dup = X509_NAME_dup(subject); - if (name_dup) { sk_X509_NAME_push(ca_list, name_dup); } - } - } - } - } - - // If no names were extracted, free the list and return nullptr - if (sk_X509_NAME_num(ca_list) == 0) { - sk_X509_NAME_free(ca_list); - return nullptr; - } - - return ca_list; -} - -// SSL HTTP client implementation -SSLClient::SSLClient(const std::string &host) - : SSLClient(host, 443, std::string(), std::string()) {} - -SSLClient::SSLClient(const std::string &host, int port) - : SSLClient(host, port, std::string(), std::string()) {} - -SSLClient::SSLClient(const std::string &host, int port, - const std::string &client_cert_path, - const std::string &client_key_path, - const std::string &private_key_password) - : ClientImpl(host, port, client_cert_path, client_key_path) { - ctx_ = SSL_CTX_new(TLS_client_method()); - - SSL_CTX_set_min_proto_version(ctx_, TLS1_2_VERSION); - - detail::split(&host_[0], &host_[host_.size()], '.', - [&](const char *b, const char *e) { - host_components_.emplace_back(b, e); - }); - - if (!client_cert_path.empty() && !client_key_path.empty()) { - if (!private_key_password.empty()) { - SSL_CTX_set_default_passwd_cb_userdata( - ctx_, reinterpret_cast( - const_cast(private_key_password.c_str()))); - } - - if (SSL_CTX_use_certificate_file(ctx_, client_cert_path.c_str(), - SSL_FILETYPE_PEM) != 1 || - SSL_CTX_use_PrivateKey_file(ctx_, client_key_path.c_str(), - SSL_FILETYPE_PEM) != 1) { - last_openssl_error_ = ERR_get_error(); - SSL_CTX_free(ctx_); - ctx_ = nullptr; - } - } -} - -SSLClient::SSLClient(const std::string &host, int port, - X509 *client_cert, EVP_PKEY *client_key, - const std::string &private_key_password) - : ClientImpl(host, port) { - ctx_ = SSL_CTX_new(TLS_client_method()); - - detail::split(&host_[0], &host_[host_.size()], '.', - [&](const char *b, const char *e) { - host_components_.emplace_back(b, e); - }); - - if (client_cert != nullptr && client_key != nullptr) { - if (!private_key_password.empty()) { - SSL_CTX_set_default_passwd_cb_userdata( - ctx_, reinterpret_cast( - const_cast(private_key_password.c_str()))); - } - - if (SSL_CTX_use_certificate(ctx_, client_cert) != 1 || - SSL_CTX_use_PrivateKey(ctx_, client_key) != 1) { - last_openssl_error_ = ERR_get_error(); - SSL_CTX_free(ctx_); - ctx_ = nullptr; - } - } -} - -SSLClient::~SSLClient() { - if (ctx_) { SSL_CTX_free(ctx_); } - // Make sure to shut down SSL since shutdown_ssl will resolve to the - // base function rather than the derived function once we get to the - // base class destructor, and won't free the SSL (causing a leak). - shutdown_ssl_impl(socket_, true); -} - -bool SSLClient::is_valid() const { return ctx_; } - -void SSLClient::set_ca_cert_store(X509_STORE *ca_cert_store) { - if (ca_cert_store) { - if (ctx_) { - if (SSL_CTX_get_cert_store(ctx_) != ca_cert_store) { - // Free memory allocated for old cert and use new store - // `ca_cert_store` - SSL_CTX_set_cert_store(ctx_, ca_cert_store); - ca_cert_store_ = ca_cert_store; - } - } else { - X509_STORE_free(ca_cert_store); - } - } -} - -void SSLClient::load_ca_cert_store(const char *ca_cert, - std::size_t size) { - set_ca_cert_store(ClientImpl::create_ca_cert_store(ca_cert, size)); -} - -long SSLClient::get_openssl_verify_result() const { - return verify_result_; -} - -SSL_CTX *SSLClient::ssl_context() const { return ctx_; } - -bool SSLClient::create_and_connect_socket(Socket &socket, Error &error) { - if (!is_valid()) { - error = Error::SSLConnection; - return false; - } - return ClientImpl::create_and_connect_socket(socket, error); -} - -// Assumes that socket_mutex_ is locked and that there are no requests in -// flight -bool SSLClient::connect_with_proxy( - Socket &socket, - std::chrono::time_point start_time, - Response &res, bool &success, Error &error) { - success = true; - Response proxy_res; - if (!detail::process_client_socket( - socket.sock, read_timeout_sec_, read_timeout_usec_, - write_timeout_sec_, write_timeout_usec_, max_timeout_msec_, - start_time, [&](Stream &strm) { - Request req2; - req2.method = "CONNECT"; - req2.path = - detail::make_host_and_port_string_always_port(host_, port_); - if (max_timeout_msec_ > 0) { - req2.start_time_ = std::chrono::steady_clock::now(); - } - return process_request(strm, req2, proxy_res, false, error); - })) { - // Thread-safe to close everything because we are assuming there are no - // requests in flight - shutdown_ssl(socket, true); - shutdown_socket(socket); - close_socket(socket); - success = false; - return false; - } - - if (proxy_res.status == StatusCode::ProxyAuthenticationRequired_407) { - if (!proxy_digest_auth_username_.empty() && - !proxy_digest_auth_password_.empty()) { - std::map auth; - if (detail::parse_www_authenticate(proxy_res, auth, true)) { - // Close the current socket and create a new one for the authenticated - // request - shutdown_ssl(socket, true); - shutdown_socket(socket); - close_socket(socket); - - // Create a new socket for the authenticated CONNECT request - if (!ensure_socket_connection(socket, error)) { - success = false; - output_error_log(error, nullptr); - return false; - } - - proxy_res = Response(); - if (!detail::process_client_socket( - socket.sock, read_timeout_sec_, read_timeout_usec_, - write_timeout_sec_, write_timeout_usec_, max_timeout_msec_, - start_time, [&](Stream &strm) { - Request req3; - req3.method = "CONNECT"; - req3.path = detail::make_host_and_port_string_always_port( - host_, port_); - req3.headers.insert(detail::make_digest_authentication_header( - req3, auth, 1, detail::random_string(10), - proxy_digest_auth_username_, proxy_digest_auth_password_, - true)); - if (max_timeout_msec_ > 0) { - req3.start_time_ = std::chrono::steady_clock::now(); - } - return process_request(strm, req3, proxy_res, false, error); - })) { - // Thread-safe to close everything because we are assuming there are - // no requests in flight - shutdown_ssl(socket, true); - shutdown_socket(socket); - close_socket(socket); - success = false; - return false; - } - } - } - } - - // If status code is not 200, proxy request is failed. - // Set error to ProxyConnection and return proxy response - // as the response of the request - if (proxy_res.status != StatusCode::OK_200) { - error = Error::ProxyConnection; - output_error_log(error, nullptr); - res = std::move(proxy_res); - // Thread-safe to close everything because we are assuming there are - // no requests in flight - shutdown_ssl(socket, true); - shutdown_socket(socket); - close_socket(socket); - return false; - } - - return true; -} - -bool SSLClient::load_certs() { - auto ret = true; - - std::call_once(initialize_cert_, [&]() { - std::lock_guard guard(ctx_mutex_); - if (!ca_cert_file_path_.empty()) { - if (!SSL_CTX_load_verify_locations(ctx_, ca_cert_file_path_.c_str(), - nullptr)) { - last_openssl_error_ = ERR_get_error(); - ret = false; - } - } else if (!ca_cert_dir_path_.empty()) { - if (!SSL_CTX_load_verify_locations(ctx_, nullptr, - ca_cert_dir_path_.c_str())) { - last_openssl_error_ = ERR_get_error(); - ret = false; - } - } else if (!ca_cert_store_) { - auto loaded = false; -#ifdef _WIN32 - loaded = - detail::load_system_certs_on_windows(SSL_CTX_get_cert_store(ctx_)); -#elif defined(CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN) && TARGET_OS_MAC - loaded = detail::load_system_certs_on_macos(SSL_CTX_get_cert_store(ctx_)); -#endif // _WIN32 - if (!loaded) { SSL_CTX_set_default_verify_paths(ctx_); } - } - }); - - return ret; -} - -bool SSLClient::initialize_ssl(Socket &socket, Error &error) { - auto ssl = detail::ssl_new( - socket.sock, ctx_, ctx_mutex_, - [&](SSL *ssl2) { - if (server_certificate_verification_) { - if (!load_certs()) { - error = Error::SSLLoadingCerts; - output_error_log(error, nullptr); - return false; - } - SSL_set_verify(ssl2, SSL_VERIFY_NONE, nullptr); - } - - if (!detail::ssl_connect_or_accept_nonblocking( - socket.sock, ssl2, SSL_connect, connection_timeout_sec_, - connection_timeout_usec_, &last_ssl_error_)) { - error = Error::SSLConnection; - output_error_log(error, nullptr); - return false; - } - - if (server_certificate_verification_) { - auto verification_status = SSLVerifierResponse::NoDecisionMade; - - if (server_certificate_verifier_) { - verification_status = server_certificate_verifier_(ssl2); - } - - if (verification_status == SSLVerifierResponse::CertificateRejected) { - last_openssl_error_ = ERR_get_error(); - error = Error::SSLServerVerification; - output_error_log(error, nullptr); - return false; - } - - if (verification_status == SSLVerifierResponse::NoDecisionMade) { - verify_result_ = SSL_get_verify_result(ssl2); - - if (verify_result_ != X509_V_OK) { - last_openssl_error_ = static_cast(verify_result_); - error = Error::SSLServerVerification; - output_error_log(error, nullptr); - return false; - } - - auto server_cert = SSL_get1_peer_certificate(ssl2); - auto se = detail::scope_exit([&] { X509_free(server_cert); }); - - if (server_cert == nullptr) { - last_openssl_error_ = ERR_get_error(); - error = Error::SSLServerVerification; - output_error_log(error, nullptr); - return false; - } - - if (server_hostname_verification_) { - if (!verify_host(server_cert)) { - last_openssl_error_ = X509_V_ERR_HOSTNAME_MISMATCH; - error = Error::SSLServerHostnameVerification; - output_error_log(error, nullptr); - return false; - } - } - } - } - - return true; - }, - [&](SSL *ssl2) { - // Set SNI only if host is not IP address - if (!detail::is_ip_address(host_)) { -#if defined(OPENSSL_IS_BORINGSSL) - SSL_set_tlsext_host_name(ssl2, host_.c_str()); -#else - // NOTE: Direct call instead of using the OpenSSL macro to suppress - // -Wold-style-cast warning - SSL_ctrl(ssl2, SSL_CTRL_SET_TLSEXT_HOSTNAME, - TLSEXT_NAMETYPE_host_name, - static_cast(const_cast(host_.c_str()))); -#endif - } - return true; - }); - - if (ssl) { - socket.ssl = ssl; - return true; - } - - if (ctx_ == nullptr) { - error = Error::SSLConnection; - last_openssl_error_ = ERR_get_error(); - } - - shutdown_socket(socket); - close_socket(socket); - return false; -} - -void SSLClient::shutdown_ssl(Socket &socket, bool shutdown_gracefully) { - shutdown_ssl_impl(socket, shutdown_gracefully); -} - -void SSLClient::shutdown_ssl_impl(Socket &socket, - bool shutdown_gracefully) { - if (socket.sock == INVALID_SOCKET) { - assert(socket.ssl == nullptr); - return; - } - if (socket.ssl) { - detail::ssl_delete(ctx_mutex_, socket.ssl, socket.sock, - shutdown_gracefully); - socket.ssl = nullptr; - } - assert(socket.ssl == nullptr); -} - -bool SSLClient::process_socket( - const Socket &socket, - std::chrono::time_point start_time, - std::function callback) { - assert(socket.ssl); - return detail::process_client_socket_ssl( - socket.ssl, socket.sock, read_timeout_sec_, read_timeout_usec_, - write_timeout_sec_, write_timeout_usec_, max_timeout_msec_, start_time, - std::move(callback)); -} - -bool SSLClient::is_ssl() const { return true; } - -bool SSLClient::verify_host(X509 *server_cert) const { - /* Quote from RFC2818 section 3.1 "Server Identity" - - If a subjectAltName extension of type dNSName is present, that MUST - be used as the identity. Otherwise, the (most specific) Common Name - field in the Subject field of the certificate MUST be used. Although - the use of the Common Name is existing practice, it is deprecated and - Certification Authorities are encouraged to use the dNSName instead. - - Matching is performed using the matching rules specified by - [RFC2459]. If more than one identity of a given type is present in - the certificate (e.g., more than one dNSName name, a match in any one - of the set is considered acceptable.) Names may contain the wildcard - character * which is considered to match any single domain name - component or component fragment. E.g., *.a.com matches foo.a.com but - not bar.foo.a.com. f*.com matches foo.com but not bar.com. - - In some cases, the URI is specified as an IP address rather than a - hostname. In this case, the iPAddress subjectAltName must be present - in the certificate and must exactly match the IP in the URI. - - */ - return verify_host_with_subject_alt_name(server_cert) || - verify_host_with_common_name(server_cert); -} - -bool -SSLClient::verify_host_with_subject_alt_name(X509 *server_cert) const { - auto ret = false; - - auto type = GEN_DNS; - - struct in6_addr addr6 = {}; - struct in_addr addr = {}; - size_t addr_len = 0; - -#ifndef __MINGW32__ - if (inet_pton(AF_INET6, host_.c_str(), &addr6)) { - type = GEN_IPADD; - addr_len = sizeof(struct in6_addr); - } else if (inet_pton(AF_INET, host_.c_str(), &addr)) { - type = GEN_IPADD; - addr_len = sizeof(struct in_addr); +ClientConnection::~ClientConnection() { +#ifdef CPPHTTPLIB_SSL_ENABLED + if (session) { + tls::shutdown(session, true); + tls::free_session(session); + session = nullptr; } #endif - auto alt_names = static_cast( - X509_get_ext_d2i(server_cert, NID_subject_alt_name, nullptr, nullptr)); - - if (alt_names) { - auto dsn_matched = false; - auto ip_matched = false; - - auto count = sk_GENERAL_NAME_num(alt_names); - - for (decltype(count) i = 0; i < count && !dsn_matched; i++) { - auto val = sk_GENERAL_NAME_value(alt_names, i); - if (!val || val->type != type) { continue; } - - auto name = - reinterpret_cast(ASN1_STRING_get0_data(val->d.ia5)); - if (name == nullptr) { continue; } - - auto name_len = static_cast(ASN1_STRING_length(val->d.ia5)); - - switch (type) { - case GEN_DNS: dsn_matched = check_host_name(name, name_len); break; - - case GEN_IPADD: - if (!memcmp(&addr6, name, addr_len) || !memcmp(&addr, name, addr_len)) { - ip_matched = true; - } - break; - } - } - - if (dsn_matched || ip_matched) { ret = true; } + if (sock != INVALID_SOCKET) { + detail::close_socket(sock); + sock = INVALID_SOCKET; } - - GENERAL_NAMES_free(const_cast( - reinterpret_cast(alt_names))); - return ret; } -bool SSLClient::verify_host_with_common_name(X509 *server_cert) const { - const auto subject_name = X509_get_subject_name(server_cert); - - if (subject_name != nullptr) { - char name[BUFSIZ]; - auto name_len = X509_NAME_get_text_by_NID(subject_name, NID_commonName, - name, sizeof(name)); - - if (name_len != -1) { - return check_host_name(name, static_cast(name_len)); - } - } - - return false; -} - -bool SSLClient::check_host_name(const char *pattern, - size_t pattern_len) const { - // Exact match (case-insensitive) - if (host_.size() == pattern_len && - detail::case_ignore::equal(host_, std::string(pattern, pattern_len))) { - return true; - } - - // Wildcard match - // https://bugs.launchpad.net/ubuntu/+source/firefox-3.0/+bug/376484 - std::vector pattern_components; - detail::split(&pattern[0], &pattern[pattern_len], '.', - [&](const char *b, const char *e) { - pattern_components.emplace_back(b, e); - }); - - if (host_components_.size() != pattern_components.size()) { return false; } - - auto itr = pattern_components.begin(); - for (const auto &h : host_components_) { - auto &p = *itr; - if (!httplib::detail::case_ignore::equal(p, h) && p != "*") { - bool partial_match = false; - if (!p.empty() && p[p.size() - 1] == '*') { - const auto prefix_length = p.size() - 1; - if (prefix_length == 0) { - partial_match = true; - } else if (h.size() >= prefix_length) { - partial_match = - std::equal(p.begin(), - p.begin() + static_cast( - prefix_length), - h.begin(), [](const char ca, const char cb) { - return httplib::detail::case_ignore::to_lower(ca) == - httplib::detail::case_ignore::to_lower(cb); - }); - } - } - if (!partial_match) { return false; } - } - ++itr; - } - - return true; -} -#endif - // Universal client implementation Client::Client(const std::string &scheme_host_port) : Client(scheme_host_port, std::string(), std::string()) {} @@ -9973,7 +9891,7 @@ Client::Client(const std::string &scheme_host_port, if (std::regex_match(scheme_host_port, m, re)) { auto scheme = m[1].str(); -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +#ifdef CPPHTTPLIB_SSL_ENABLED if (!scheme.empty() && (scheme != "http" && scheme != "https")) { #else if (!scheme.empty() && scheme != "http") { @@ -9994,7 +9912,7 @@ Client::Client(const std::string &scheme_host_port, auto port = !port_str.empty() ? std::stoi(port_str) : (is_ssl ? 443 : 80); if (is_ssl) { -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +#ifdef CPPHTTPLIB_SSL_ENABLED cli_ = detail::make_unique(host, port, client_cert_path, client_key_path); is_ssl_ = is_ssl; @@ -10579,12 +10497,6 @@ void Client::set_basic_auth(const std::string &username, void Client::set_bearer_token_auth(const std::string &token) { cli_->set_bearer_token_auth(token); } -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT -void Client::set_digest_auth(const std::string &username, - const std::string &password) { - cli_->set_digest_auth(username, password); -} -#endif void Client::set_keep_alive(bool on) { cli_->set_keep_alive(on); } void Client::set_follow_location(bool on) { @@ -10602,6 +10514,10 @@ void Client::set_compress(bool on) { cli_->set_compress(on); } void Client::set_decompress(bool on) { cli_->set_decompress(on); } +void Client::set_payload_max_length(size_t length) { + cli_->set_payload_max_length(length); +} + void Client::set_interface(const std::string &intf) { cli_->set_interface(intf); } @@ -10616,27 +10532,6 @@ void Client::set_proxy_basic_auth(const std::string &username, void Client::set_proxy_bearer_token_auth(const std::string &token) { cli_->set_proxy_bearer_token_auth(token); } -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT -void Client::set_proxy_digest_auth(const std::string &username, - const std::string &password) { - cli_->set_proxy_digest_auth(username, password); -} -#endif - -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT -void Client::enable_server_certificate_verification(bool enabled) { - cli_->enable_server_certificate_verification(enabled); -} - -void Client::enable_server_hostname_verification(bool enabled) { - cli_->enable_server_hostname_verification(enabled); -} - -void Client::set_server_certificate_verifier( - std::function verifier) { - cli_->set_server_certificate_verifier(verifier); -} -#endif void Client::set_logger(Logger logger) { cli_->set_logger(std::move(logger)); @@ -10646,35 +10541,3399 @@ void Client::set_error_logger(ErrorLogger error_logger) { cli_->set_error_logger(std::move(error_logger)); } +/* + * Group 6: SSL Server and Client implementation + */ + +#ifdef CPPHTTPLIB_SSL_ENABLED + +// SSL HTTP server implementation +SSLServer::SSLServer(const char *cert_path, const char *private_key_path, + const char *client_ca_cert_file_path, + const char *client_ca_cert_dir_path, + const char *private_key_password) { + using namespace tls; + + ctx_ = create_server_context(); + if (!ctx_) { return; } + + // Load server certificate and private key + if (!set_server_cert_file(ctx_, cert_path, private_key_path, + private_key_password)) { + last_ssl_error_ = static_cast(get_error()); + free_context(ctx_); + ctx_ = nullptr; + return; + } + + // Load client CA certificates for client authentication + if (client_ca_cert_file_path || client_ca_cert_dir_path) { + if (!set_client_ca_file(ctx_, client_ca_cert_file_path, + client_ca_cert_dir_path)) { + last_ssl_error_ = static_cast(get_error()); + free_context(ctx_); + ctx_ = nullptr; + return; + } + // Enable client certificate verification + set_verify_client(ctx_, true); + } +} + +SSLServer::SSLServer(const PemMemory &pem) { + using namespace tls; + ctx_ = create_server_context(); + if (ctx_) { + if (!set_server_cert_pem(ctx_, pem.cert_pem, pem.key_pem, + pem.private_key_password)) { + last_ssl_error_ = static_cast(get_error()); + free_context(ctx_); + ctx_ = nullptr; + } else if (pem.client_ca_pem && pem.client_ca_pem_len > 0) { + if (!load_ca_pem(ctx_, pem.client_ca_pem, pem.client_ca_pem_len)) { + last_ssl_error_ = static_cast(get_error()); + free_context(ctx_); + ctx_ = nullptr; + } else { + set_verify_client(ctx_, true); + } + } + } +} + +SSLServer::SSLServer(const tls::ContextSetupCallback &setup_callback) { + using namespace tls; + ctx_ = create_server_context(); + if (ctx_) { + if (!setup_callback(ctx_)) { + free_context(ctx_); + ctx_ = nullptr; + } + } +} + +SSLServer::~SSLServer() { + if (ctx_) { tls::free_context(ctx_); } +} + +bool SSLServer::is_valid() const { return ctx_ != nullptr; } + +bool SSLServer::process_and_close_socket(socket_t sock) { + using namespace tls; + + // Create TLS session with mutex protection + session_t session = nullptr; + { + std::lock_guard guard(ctx_mutex_); + session = create_session(static_cast(ctx_), sock); + } + + if (!session) { + last_ssl_error_ = static_cast(get_error()); + detail::shutdown_socket(sock); + detail::close_socket(sock); + return false; + } + + // Use scope_exit to ensure cleanup on all paths (including exceptions) + bool handshake_done = false; + bool ret = false; + auto cleanup = detail::scope_exit([&] { + // Shutdown gracefully if handshake succeeded and processing was successful + if (handshake_done) { shutdown(session, ret); } + free_session(session); + detail::shutdown_socket(sock); + detail::close_socket(sock); + }); + + // Perform TLS accept handshake with timeout + TlsError tls_err; + if (!accept_nonblocking(session, sock, read_timeout_sec_, read_timeout_usec_, + &tls_err)) { #ifdef CPPHTTPLIB_OPENSSL_SUPPORT + // Map TlsError to legacy ssl_error for backward compatibility + if (tls_err.code == ErrorCode::WantRead) { + last_ssl_error_ = SSL_ERROR_WANT_READ; + } else if (tls_err.code == ErrorCode::WantWrite) { + last_ssl_error_ = SSL_ERROR_WANT_WRITE; + } else { + last_ssl_error_ = SSL_ERROR_SSL; + } +#else + last_ssl_error_ = static_cast(get_error()); +#endif + return false; + } + + handshake_done = true; + + std::string remote_addr; + int remote_port = 0; + detail::get_remote_ip_and_port(sock, remote_addr, remote_port); + + std::string local_addr; + int local_port = 0; + detail::get_local_ip_and_port(sock, local_addr, local_port); + + ret = detail::process_server_socket_ssl( + svr_sock_, session, sock, keep_alive_max_count_, keep_alive_timeout_sec_, + read_timeout_sec_, read_timeout_usec_, write_timeout_sec_, + write_timeout_usec_, + [&](Stream &strm, bool close_connection, bool &connection_closed) { + return process_request(strm, remote_addr, remote_port, local_addr, + local_port, close_connection, connection_closed, + [&](Request &req) { req.ssl = session; }); + }); + + return ret; +} + +bool SSLServer::update_certs_pem(const char *cert_pem, + const char *key_pem, + const char *client_ca_pem, + const char *password) { + if (!ctx_) { return false; } + std::lock_guard guard(ctx_mutex_); + if (!tls::update_server_cert(ctx_, cert_pem, key_pem, password)) { + return false; + } + if (client_ca_pem) { + return tls::update_server_client_ca(ctx_, client_ca_pem); + } + return true; +} + +// SSL HTTP client implementation +SSLClient::~SSLClient() { + if (ctx_) { tls::free_context(ctx_); } + // Make sure to shut down SSL since shutdown_ssl will resolve to the + // base function rather than the derived function once we get to the + // base class destructor, and won't free the SSL (causing a leak). + shutdown_ssl_impl(socket_, true); +} + +bool SSLClient::is_valid() const { return ctx_ != nullptr; } + +void SSLClient::shutdown_ssl(Socket &socket, bool shutdown_gracefully) { + shutdown_ssl_impl(socket, shutdown_gracefully); +} + +void SSLClient::shutdown_ssl_impl(Socket &socket, + bool shutdown_gracefully) { + if (socket.sock == INVALID_SOCKET) { + assert(socket.ssl == nullptr); + return; + } + if (socket.ssl) { + tls::shutdown(socket.ssl, shutdown_gracefully); + { + std::lock_guard guard(ctx_mutex_); + tls::free_session(socket.ssl); + } + socket.ssl = nullptr; + } + assert(socket.ssl == nullptr); +} + +bool SSLClient::process_socket( + const Socket &socket, + std::chrono::time_point start_time, + std::function callback) { + assert(socket.ssl); + return detail::process_client_socket_ssl( + socket.ssl, socket.sock, read_timeout_sec_, read_timeout_usec_, + write_timeout_sec_, write_timeout_usec_, max_timeout_msec_, start_time, + std::move(callback)); +} + +bool SSLClient::is_ssl() const { return true; } + +bool SSLClient::create_and_connect_socket(Socket &socket, Error &error) { + if (!is_valid()) { + error = Error::SSLConnection; + return false; + } + return ClientImpl::create_and_connect_socket(socket, error); +} + +// Assumes that socket_mutex_ is locked and that there are no requests in +// flight +bool SSLClient::connect_with_proxy( + Socket &socket, + std::chrono::time_point start_time, + Response &res, bool &success, Error &error) { + success = true; + Response proxy_res; + if (!detail::process_client_socket( + socket.sock, read_timeout_sec_, read_timeout_usec_, + write_timeout_sec_, write_timeout_usec_, max_timeout_msec_, + start_time, [&](Stream &strm) { + Request req2; + req2.method = "CONNECT"; + req2.path = + detail::make_host_and_port_string_always_port(host_, port_); + if (max_timeout_msec_ > 0) { + req2.start_time_ = std::chrono::steady_clock::now(); + } + return process_request(strm, req2, proxy_res, false, error); + })) { + // Thread-safe to close everything because we are assuming there are no + // requests in flight + shutdown_ssl(socket, true); + shutdown_socket(socket); + close_socket(socket); + success = false; + return false; + } + + if (proxy_res.status == StatusCode::ProxyAuthenticationRequired_407) { + if (!proxy_digest_auth_username_.empty() && + !proxy_digest_auth_password_.empty()) { + std::map auth; + if (detail::parse_www_authenticate(proxy_res, auth, true)) { + // Close the current socket and create a new one for the authenticated + // request + shutdown_ssl(socket, true); + shutdown_socket(socket); + close_socket(socket); + + // Create a new socket for the authenticated CONNECT request + if (!ensure_socket_connection(socket, error)) { + success = false; + output_error_log(error, nullptr); + return false; + } + + proxy_res = Response(); + if (!detail::process_client_socket( + socket.sock, read_timeout_sec_, read_timeout_usec_, + write_timeout_sec_, write_timeout_usec_, max_timeout_msec_, + start_time, [&](Stream &strm) { + Request req3; + req3.method = "CONNECT"; + req3.path = detail::make_host_and_port_string_always_port( + host_, port_); + req3.headers.insert(detail::make_digest_authentication_header( + req3, auth, 1, detail::random_string(10), + proxy_digest_auth_username_, proxy_digest_auth_password_, + true)); + if (max_timeout_msec_ > 0) { + req3.start_time_ = std::chrono::steady_clock::now(); + } + return process_request(strm, req3, proxy_res, false, error); + })) { + // Thread-safe to close everything because we are assuming there are + // no requests in flight + shutdown_ssl(socket, true); + shutdown_socket(socket); + close_socket(socket); + success = false; + return false; + } + } + } + } + + // If status code is not 200, proxy request is failed. + // Set error to ProxyConnection and return proxy response + // as the response of the request + if (proxy_res.status != StatusCode::OK_200) { + error = Error::ProxyConnection; + output_error_log(error, nullptr); + res = std::move(proxy_res); + // Thread-safe to close everything because we are assuming there are + // no requests in flight + shutdown_ssl(socket, true); + shutdown_socket(socket); + close_socket(socket); + return false; + } + + return true; +} + +bool SSLClient::ensure_socket_connection(Socket &socket, Error &error) { + if (!ClientImpl::ensure_socket_connection(socket, error)) { return false; } + + if (!proxy_host_.empty() && proxy_port_ != -1) { return true; } + + if (!initialize_ssl(socket, error)) { + shutdown_socket(socket); + close_socket(socket); + return false; + } + + return true; +} + +// SSL HTTP client implementation +SSLClient::SSLClient(const std::string &host) + : SSLClient(host, 443, std::string(), std::string()) {} + +SSLClient::SSLClient(const std::string &host, int port) + : SSLClient(host, port, std::string(), std::string()) {} + +SSLClient::SSLClient(const std::string &host, int port, + const std::string &client_cert_path, + const std::string &client_key_path, + const std::string &private_key_password) + : ClientImpl(host, port, client_cert_path, client_key_path) { + ctx_ = tls::create_client_context(); + if (!ctx_) { return; } + + tls::set_min_version(ctx_, tls::Version::TLS1_2); + + if (!client_cert_path.empty() && !client_key_path.empty()) { + const char *password = + private_key_password.empty() ? nullptr : private_key_password.c_str(); + if (!tls::set_client_cert_file(ctx_, client_cert_path.c_str(), + client_key_path.c_str(), password)) { + last_backend_error_ = tls::get_error(); + tls::free_context(ctx_); + ctx_ = nullptr; + } + } +} + +SSLClient::SSLClient(const std::string &host, int port, + const PemMemory &pem) + : ClientImpl(host, port) { + ctx_ = tls::create_client_context(); + if (!ctx_) { return; } + + tls::set_min_version(ctx_, tls::Version::TLS1_2); + + if (pem.cert_pem && pem.key_pem) { + if (!tls::set_client_cert_pem(ctx_, pem.cert_pem, pem.key_pem, + pem.private_key_password)) { + last_backend_error_ = tls::get_error(); + tls::free_context(ctx_); + ctx_ = nullptr; + } + } +} + +void SSLClient::set_ca_cert_store(tls::ca_store_t ca_cert_store) { + if (ca_cert_store && ctx_) { + // set_ca_store takes ownership of ca_cert_store + tls::set_ca_store(ctx_, ca_cert_store); + } else if (ca_cert_store) { + tls::free_ca_store(ca_cert_store); + } +} + +void +SSLClient::set_server_certificate_verifier(tls::VerifyCallback verifier) { + if (!ctx_) { return; } + tls::set_verify_callback(ctx_, verifier); +} + +void SSLClient::set_session_verifier( + std::function verifier) { + session_verifier_ = std::move(verifier); +} + +#if defined(_WIN32) && \ + !defined(CPPHTTPLIB_DISABLE_WINDOWS_AUTOMATIC_ROOT_CERTIFICATES_UPDATE) +void SSLClient::enable_windows_certificate_verification(bool enabled) { + enable_windows_cert_verification_ = enabled; +} +#endif + +void SSLClient::load_ca_cert_store(const char *ca_cert, + std::size_t size) { + if (ctx_ && ca_cert && size > 0) { + ca_cert_pem_.assign(ca_cert, size); // Store for redirect transfer + tls::load_ca_pem(ctx_, ca_cert, size); + } +} + +bool SSLClient::load_certs() { + auto ret = true; + + std::call_once(initialize_cert_, [&]() { + std::lock_guard guard(ctx_mutex_); + + if (!ca_cert_file_path_.empty()) { + if (!tls::load_ca_file(ctx_, ca_cert_file_path_.c_str())) { + last_backend_error_ = tls::get_error(); + ret = false; + } + } else if (!ca_cert_dir_path_.empty()) { + if (!tls::load_ca_dir(ctx_, ca_cert_dir_path_.c_str())) { + last_backend_error_ = tls::get_error(); + ret = false; + } + } else if (ca_cert_pem_.empty()) { + if (!tls::load_system_certs(ctx_)) { + last_backend_error_ = tls::get_error(); + } + } + }); + + return ret; +} + +bool SSLClient::initialize_ssl(Socket &socket, Error &error) { + using namespace tls; + + // Load CA certificates if server verification is enabled + if (server_certificate_verification_) { + if (!load_certs()) { + error = Error::SSLLoadingCerts; + output_error_log(error, nullptr); + return false; + } + } + + bool is_ip = detail::is_ip_address(host_); + +#ifdef CPPHTTPLIB_MBEDTLS_SUPPORT + // MbedTLS needs explicit verification mode (OpenSSL uses SSL_VERIFY_NONE + // by default and performs all verification post-handshake). + // For IP addresses with verification enabled, use OPTIONAL mode since + // MbedTLS requires hostname for VERIFY_REQUIRED. + if (is_ip && server_certificate_verification_) { + set_verify_client(ctx_, false); + } else { + set_verify_client(ctx_, server_certificate_verification_); + } +#endif + + // Create TLS session + session_t session = nullptr; + { + std::lock_guard guard(ctx_mutex_); + session = create_session(ctx_, socket.sock); + } + + if (!session) { + error = Error::SSLConnection; + last_backend_error_ = get_error(); + return false; + } + + // Use scope_exit to ensure session is freed on error paths + bool success = false; + auto session_guard = detail::scope_exit([&] { + if (!success) { free_session(session); } + }); + + // Set SNI extension (skip for IP addresses per RFC 6066). + // On MbedTLS, set_sni also enables hostname verification internally. + // On OpenSSL, set_sni only sets SNI; verification is done post-handshake. + if (!is_ip) { + if (!set_sni(session, host_.c_str())) { + error = Error::SSLConnection; + last_backend_error_ = get_error(); + return false; + } + } + + // Perform non-blocking TLS handshake with timeout + TlsError tls_err; + if (!connect_nonblocking(session, socket.sock, connection_timeout_sec_, + connection_timeout_usec_, &tls_err)) { + last_ssl_error_ = static_cast(tls_err.code); + last_backend_error_ = tls_err.backend_code; + if (tls_err.code == ErrorCode::CertVerifyFailed) { + error = Error::SSLServerVerification; + } else if (tls_err.code == ErrorCode::HostnameMismatch) { + error = Error::SSLServerHostnameVerification; + } else { + error = Error::SSLConnection; + } + output_error_log(error, nullptr); + return false; + } + + // Post-handshake session verifier callback + auto verification_status = SSLVerifierResponse::NoDecisionMade; + if (session_verifier_) { verification_status = session_verifier_(session); } + + if (verification_status == SSLVerifierResponse::CertificateRejected) { + last_backend_error_ = get_error(); + error = Error::SSLServerVerification; + output_error_log(error, nullptr); + return false; + } + + // Default server certificate verification + if (verification_status == SSLVerifierResponse::NoDecisionMade && + server_certificate_verification_) { + verify_result_ = tls::get_verify_result(session); + if (verify_result_ != 0) { + last_backend_error_ = static_cast(verify_result_); + error = Error::SSLServerVerification; + output_error_log(error, nullptr); + return false; + } + + auto server_cert = get_peer_cert(session); + if (!server_cert) { + last_backend_error_ = get_error(); + error = Error::SSLServerVerification; + output_error_log(error, nullptr); + return false; + } + auto cert_guard = detail::scope_exit([&] { free_cert(server_cert); }); + + // Hostname verification (post-handshake for all cases). + // On OpenSSL, verification is always post-handshake (SSL_VERIFY_NONE). + // On MbedTLS, set_sni already enabled hostname verification during + // handshake for non-IP hosts, but this check is still needed for IP + // addresses where SNI is not set. + if (server_hostname_verification_) { + if (!verify_hostname(server_cert, host_.c_str())) { + last_backend_error_ = hostname_mismatch_code(); + error = Error::SSLServerHostnameVerification; + output_error_log(error, nullptr); + return false; + } + } + +#if defined(_WIN32) && \ + !defined(CPPHTTPLIB_DISABLE_WINDOWS_AUTOMATIC_ROOT_CERTIFICATES_UPDATE) + // Additional Windows Schannel verification. + // This provides real-time certificate validation with Windows Update + // integration, working with both OpenSSL and MbedTLS backends. + // Skip when a custom CA cert is specified, as the Windows certificate + // store would not know about user-provided CA certificates. + if (enable_windows_cert_verification_ && ca_cert_file_path_.empty() && + ca_cert_dir_path_.empty() && ca_cert_pem_.empty()) { + std::vector der; + if (get_cert_der(server_cert, der)) { + unsigned long wincrypt_error = 0; + if (!detail::verify_cert_with_windows_schannel( + der, host_, server_hostname_verification_, wincrypt_error)) { + last_backend_error_ = wincrypt_error; + error = Error::SSLServerVerification; + output_error_log(error, nullptr); + return false; + } + } + } +#endif + } + + success = true; + socket.ssl = session; + return true; +} + +void Client::set_digest_auth(const std::string &username, + const std::string &password) { + cli_->set_digest_auth(username, password); +} + +void Client::set_proxy_digest_auth(const std::string &username, + const std::string &password) { + cli_->set_proxy_digest_auth(username, password); +} + +void Client::enable_server_certificate_verification(bool enabled) { + cli_->enable_server_certificate_verification(enabled); +} + +void Client::enable_server_hostname_verification(bool enabled) { + cli_->enable_server_hostname_verification(enabled); +} + +#if defined(_WIN32) && \ + !defined(CPPHTTPLIB_DISABLE_WINDOWS_AUTOMATIC_ROOT_CERTIFICATES_UPDATE) +void Client::enable_windows_certificate_verification(bool enabled) { + if (is_ssl_) { + static_cast(*cli_).enable_windows_certificate_verification( + enabled); + } +} +#endif + void Client::set_ca_cert_path(const std::string &ca_cert_file_path, const std::string &ca_cert_dir_path) { cli_->set_ca_cert_path(ca_cert_file_path, ca_cert_dir_path); } -void Client::set_ca_cert_store(X509_STORE *ca_cert_store) { +void Client::set_ca_cert_store(tls::ca_store_t ca_cert_store) { if (is_ssl_) { static_cast(*cli_).set_ca_cert_store(ca_cert_store); - } else { - cli_->set_ca_cert_store(ca_cert_store); + } else if (ca_cert_store) { + tls::free_ca_store(ca_cert_store); } } void Client::load_ca_cert_store(const char *ca_cert, std::size_t size) { - set_ca_cert_store(cli_->create_ca_cert_store(ca_cert, size)); + set_ca_cert_store(tls::create_ca_store(ca_cert, size)); } -long Client::get_openssl_verify_result() const { +void +Client::set_server_certificate_verifier(tls::VerifyCallback verifier) { if (is_ssl_) { - return static_cast(*cli_).get_openssl_verify_result(); + static_cast(*cli_).set_server_certificate_verifier( + std::move(verifier)); } - return -1; // NOTE: -1 doesn't match any of X509_V_ERR_??? } +void Client::set_session_verifier( + std::function verifier) { + if (is_ssl_) { + static_cast(*cli_).set_session_verifier(std::move(verifier)); + } +} + +tls::ctx_t Client::tls_context() const { + if (is_ssl_) { return static_cast(*cli_).tls_context(); } + return nullptr; +} + +#endif // CPPHTTPLIB_SSL_ENABLED + +/* + * Group 7: TLS abstraction layer - Common API + */ + +#ifdef CPPHTTPLIB_SSL_ENABLED + +namespace tls { + +// Helper for PeerCert construction +PeerCert get_peer_cert_from_session(const_session_t session) { + return PeerCert(get_peer_cert(session)); +} + +namespace impl { + +VerifyCallback &get_verify_callback() { + static thread_local VerifyCallback callback; + return callback; +} + +VerifyCallback &get_mbedtls_verify_callback() { + static thread_local VerifyCallback callback; + return callback; +} + +} // namespace impl + +bool set_client_ca_file(ctx_t ctx, const char *ca_file, + const char *ca_dir) { + if (!ctx) { return false; } + + bool success = true; + if (ca_file && *ca_file) { + if (!load_ca_file(ctx, ca_file)) { success = false; } + } + if (ca_dir && *ca_dir) { + if (!load_ca_dir(ctx, ca_dir)) { success = false; } + } + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + // Set CA list for client certificate request (CertificateRequest message) + if (ca_file && *ca_file) { + auto list = SSL_load_client_CA_file(ca_file); + if (list) { SSL_CTX_set_client_CA_list(static_cast(ctx), list); } + } +#endif + + return success; +} + +bool set_server_cert_pem(ctx_t ctx, const char *cert, const char *key, + const char *password) { + return set_client_cert_pem(ctx, cert, key, password); +} + +bool set_server_cert_file(ctx_t ctx, const char *cert_path, + const char *key_path, const char *password) { + return set_client_cert_file(ctx, cert_path, key_path, password); +} + +// PeerCert implementation +PeerCert::PeerCert() = default; + +PeerCert::PeerCert(cert_t cert) : cert_(cert) {} + +PeerCert::PeerCert(PeerCert &&other) noexcept : cert_(other.cert_) { + other.cert_ = nullptr; +} + +PeerCert &PeerCert::operator=(PeerCert &&other) noexcept { + if (this != &other) { + if (cert_) { free_cert(cert_); } + cert_ = other.cert_; + other.cert_ = nullptr; + } + return *this; +} + +PeerCert::~PeerCert() { + if (cert_) { free_cert(cert_); } +} + +PeerCert::operator bool() const { return cert_ != nullptr; } + +std::string PeerCert::subject_cn() const { + return cert_ ? get_cert_subject_cn(cert_) : std::string(); +} + +std::string PeerCert::issuer_name() const { + return cert_ ? get_cert_issuer_name(cert_) : std::string(); +} + +bool PeerCert::check_hostname(const char *hostname) const { + return cert_ ? verify_hostname(cert_, hostname) : false; +} + +std::vector PeerCert::sans() const { + std::vector result; + if (cert_) { get_cert_sans(cert_, result); } + return result; +} + +bool PeerCert::validity(time_t ¬_before, time_t ¬_after) const { + return cert_ ? get_cert_validity(cert_, not_before, not_after) : false; +} + +std::string PeerCert::serial() const { + return cert_ ? get_cert_serial(cert_) : std::string(); +} + +// VerifyContext method implementations +std::string VerifyContext::subject_cn() const { + return cert ? get_cert_subject_cn(cert) : std::string(); +} + +std::string VerifyContext::issuer_name() const { + return cert ? get_cert_issuer_name(cert) : std::string(); +} + +bool VerifyContext::check_hostname(const char *hostname) const { + return cert ? verify_hostname(cert, hostname) : false; +} + +std::vector VerifyContext::sans() const { + std::vector result; + if (cert) { get_cert_sans(cert, result); } + return result; +} + +bool VerifyContext::validity(time_t ¬_before, + time_t ¬_after) const { + return cert ? get_cert_validity(cert, not_before, not_after) : false; +} + +std::string VerifyContext::serial() const { + return cert ? get_cert_serial(cert) : std::string(); +} + +// TlsError static method implementation +std::string TlsError::verify_error_to_string(long error_code) { + return verify_error_string(error_code); +} + +} // namespace tls + +// Request::peer_cert() implementation +tls::PeerCert Request::peer_cert() const { + return tls::get_peer_cert_from_session(ssl); +} + +// Request::sni() implementation +std::string Request::sni() const { + if (!ssl) { return std::string(); } + const char *s = tls::get_sni(ssl); + return s ? std::string(s) : std::string(); +} + +#endif // CPPHTTPLIB_SSL_ENABLED + +/* + * Group 8: TLS abstraction layer - OpenSSL backend + */ + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT SSL_CTX *Client::ssl_context() const { if (is_ssl_) { return static_cast(*cli_).ssl_context(); } return nullptr; } + +void Client::set_server_certificate_verifier( + std::function verifier) { + cli_->set_server_certificate_verifier(verifier); +} + +long Client::get_verify_result() const { + if (is_ssl_) { return static_cast(*cli_).get_verify_result(); } + return -1; // NOTE: -1 doesn't match any of X509_V_ERR_??? +} +#endif // CPPHTTPLIB_OPENSSL_SUPPORT + +/* + * OpenSSL Backend Implementation + */ + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +namespace tls { + +namespace impl { + +// OpenSSL-specific helpers for converting native types to PEM +std::string x509_to_pem(X509 *cert) { + if (!cert) return {}; + BIO *bio = BIO_new(BIO_s_mem()); + if (!bio) return {}; + if (PEM_write_bio_X509(bio, cert) != 1) { + BIO_free(bio); + return {}; + } + char *data = nullptr; + long len = BIO_get_mem_data(bio, &data); + std::string pem(data, static_cast(len)); + BIO_free(bio); + return pem; +} + +std::string evp_pkey_to_pem(EVP_PKEY *key) { + if (!key) return {}; + BIO *bio = BIO_new(BIO_s_mem()); + if (!bio) return {}; + if (PEM_write_bio_PrivateKey(bio, key, nullptr, nullptr, 0, nullptr, + nullptr) != 1) { + BIO_free(bio); + return {}; + } + char *data = nullptr; + long len = BIO_get_mem_data(bio, &data); + std::string pem(data, static_cast(len)); + BIO_free(bio); + return pem; +} + +std::string x509_store_to_pem(X509_STORE *store) { + if (!store) return {}; + std::string pem; + auto objs = X509_STORE_get0_objects(store); + if (!objs) return {}; + auto count = sk_X509_OBJECT_num(objs); + for (decltype(count) i = 0; i < count; i++) { + auto obj = sk_X509_OBJECT_value(objs, i); + if (X509_OBJECT_get_type(obj) == X509_LU_X509) { + auto cert = X509_OBJECT_get0_X509(obj); + if (cert) { pem += x509_to_pem(cert); } + } + } + return pem; +} + +// Helper to map OpenSSL SSL_get_error to ErrorCode +ErrorCode map_ssl_error(int ssl_error, int &out_errno) { + switch (ssl_error) { + case SSL_ERROR_NONE: return ErrorCode::Success; + case SSL_ERROR_WANT_READ: return ErrorCode::WantRead; + case SSL_ERROR_WANT_WRITE: return ErrorCode::WantWrite; + case SSL_ERROR_ZERO_RETURN: return ErrorCode::PeerClosed; + case SSL_ERROR_SYSCALL: out_errno = errno; return ErrorCode::SyscallError; + case SSL_ERROR_SSL: + default: return ErrorCode::Fatal; + } +} + +// Helper: Create client CA list from PEM string +// Returns a new STACK_OF(X509_NAME)* or nullptr on failure +// Caller takes ownership of returned list +STACK_OF(X509_NAME) * + create_client_ca_list_from_pem(const char *ca_pem) { + if (!ca_pem) { return nullptr; } + + auto ca_list = sk_X509_NAME_new_null(); + if (!ca_list) { return nullptr; } + + BIO *bio = BIO_new_mem_buf(ca_pem, -1); + if (!bio) { + sk_X509_NAME_pop_free(ca_list, X509_NAME_free); + return nullptr; + } + + X509 *cert = nullptr; + while ((cert = PEM_read_bio_X509(bio, nullptr, nullptr, nullptr)) != + nullptr) { + X509_NAME *name = X509_get_subject_name(cert); + if (name) { sk_X509_NAME_push(ca_list, X509_NAME_dup(name)); } + X509_free(cert); + } + BIO_free(bio); + + return ca_list; +} + +// Helper: Extract CA names from X509_STORE +// Returns a new STACK_OF(X509_NAME)* or nullptr on failure +// Caller takes ownership of returned list +STACK_OF(X509_NAME) * + extract_client_ca_list_from_store(X509_STORE *store) { + if (!store) { return nullptr; } + + auto ca_list = sk_X509_NAME_new_null(); + if (!ca_list) { return nullptr; } + + auto objs = X509_STORE_get0_objects(store); + if (!objs) { + sk_X509_NAME_free(ca_list); + return nullptr; + } + + auto count = sk_X509_OBJECT_num(objs); + for (decltype(count) i = 0; i < count; i++) { + auto obj = sk_X509_OBJECT_value(objs, i); + if (X509_OBJECT_get_type(obj) == X509_LU_X509) { + auto cert = X509_OBJECT_get0_X509(obj); + if (cert) { + auto subject = X509_get_subject_name(cert); + if (subject) { + auto name_dup = X509_NAME_dup(subject); + if (name_dup) { sk_X509_NAME_push(ca_list, name_dup); } + } + } + } + } + + if (sk_X509_NAME_num(ca_list) == 0) { + sk_X509_NAME_free(ca_list); + return nullptr; + } + + return ca_list; +} + +// OpenSSL verify callback wrapper +int openssl_verify_callback(int preverify_ok, X509_STORE_CTX *ctx) { + auto &callback = get_verify_callback(); + if (!callback) { return preverify_ok; } + + // Get SSL object from X509_STORE_CTX + auto ssl = static_cast( + X509_STORE_CTX_get_ex_data(ctx, SSL_get_ex_data_X509_STORE_CTX_idx())); + if (!ssl) { return preverify_ok; } + + // Get current certificate and depth + auto cert = X509_STORE_CTX_get_current_cert(ctx); + int depth = X509_STORE_CTX_get_error_depth(ctx); + int error = X509_STORE_CTX_get_error(ctx); + + // Build context + VerifyContext verify_ctx; + verify_ctx.session = static_cast(ssl); + verify_ctx.cert = static_cast(cert); + verify_ctx.depth = depth; + verify_ctx.preverify_ok = (preverify_ok != 0); + verify_ctx.error_code = error; + verify_ctx.error_string = + (error != X509_V_OK) ? X509_verify_cert_error_string(error) : nullptr; + + return callback(verify_ctx) ? 1 : 0; +} + +} // namespace impl + +ctx_t create_client_context() { + SSL_CTX *ctx = SSL_CTX_new(TLS_client_method()); + if (ctx) { + // Disable auto-retry to properly handle non-blocking I/O + SSL_CTX_clear_mode(ctx, SSL_MODE_AUTO_RETRY); + // Set minimum TLS version + SSL_CTX_set_min_proto_version(ctx, TLS1_2_VERSION); + } + return static_cast(ctx); +} + +void free_context(ctx_t ctx) { + if (ctx) { SSL_CTX_free(static_cast(ctx)); } +} + +bool set_min_version(ctx_t ctx, Version version) { + if (!ctx) return false; + return SSL_CTX_set_min_proto_version(static_cast(ctx), + static_cast(version)) == 1; +} + +bool load_ca_pem(ctx_t ctx, const char *pem, size_t len) { + if (!ctx || !pem || len == 0) return false; + + auto ssl_ctx = static_cast(ctx); + auto store = SSL_CTX_get_cert_store(ssl_ctx); + if (!store) return false; + + auto bio = BIO_new_mem_buf(pem, static_cast(len)); + if (!bio) return false; + + bool ok = true; + X509 *cert = nullptr; + while ((cert = PEM_read_bio_X509(bio, nullptr, nullptr, nullptr)) != + nullptr) { + if (X509_STORE_add_cert(store, cert) != 1) { + // Ignore duplicate errors + auto err = ERR_peek_last_error(); + if (ERR_GET_REASON(err) != X509_R_CERT_ALREADY_IN_HASH_TABLE) { + ok = false; + } + } + X509_free(cert); + if (!ok) break; + } + BIO_free(bio); + + // Clear any "no more certificates" errors + ERR_clear_error(); + return ok; +} + +bool load_ca_file(ctx_t ctx, const char *file_path) { + if (!ctx || !file_path) return false; + return SSL_CTX_load_verify_locations(static_cast(ctx), file_path, + nullptr) == 1; +} + +bool load_ca_dir(ctx_t ctx, const char *dir_path) { + if (!ctx || !dir_path) return false; + return SSL_CTX_load_verify_locations(static_cast(ctx), nullptr, + dir_path) == 1; +} + +bool load_system_certs(ctx_t ctx) { + if (!ctx) return false; + auto ssl_ctx = static_cast(ctx); + +#ifdef _WIN32 + // Windows: Load from system certificate store (ROOT and CA) + auto store = SSL_CTX_get_cert_store(ssl_ctx); + if (!store) return false; + + bool loaded_any = false; + static const wchar_t *store_names[] = {L"ROOT", L"CA"}; + for (auto store_name : store_names) { + auto hStore = CertOpenSystemStoreW(NULL, store_name); + if (!hStore) continue; + + PCCERT_CONTEXT pContext = nullptr; + while ((pContext = CertEnumCertificatesInStore(hStore, pContext)) != + nullptr) { + const unsigned char *data = pContext->pbCertEncoded; + auto x509 = d2i_X509(nullptr, &data, pContext->cbCertEncoded); + if (x509) { + if (X509_STORE_add_cert(store, x509) == 1) { loaded_any = true; } + X509_free(x509); + } + } + CertCloseStore(hStore, 0); + } + return loaded_any; + +#elif defined(__APPLE__) +#ifdef CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN + // macOS: Load from Keychain + auto store = SSL_CTX_get_cert_store(ssl_ctx); + if (!store) return false; + + CFArrayRef certs = nullptr; + if (SecTrustCopyAnchorCertificates(&certs) != errSecSuccess || !certs) { + return SSL_CTX_set_default_verify_paths(ssl_ctx) == 1; + } + + bool loaded_any = false; + auto count = CFArrayGetCount(certs); + for (CFIndex i = 0; i < count; i++) { + auto cert = reinterpret_cast( + const_cast(CFArrayGetValueAtIndex(certs, i))); + CFDataRef der = SecCertificateCopyData(cert); + if (der) { + const unsigned char *data = CFDataGetBytePtr(der); + auto x509 = d2i_X509(nullptr, &data, CFDataGetLength(der)); + if (x509) { + if (X509_STORE_add_cert(store, x509) == 1) { loaded_any = true; } + X509_free(x509); + } + CFRelease(der); + } + } + CFRelease(certs); + return loaded_any || SSL_CTX_set_default_verify_paths(ssl_ctx) == 1; +#else + return SSL_CTX_set_default_verify_paths(ssl_ctx) == 1; #endif +#else + // Other Unix: use default verify paths + return SSL_CTX_set_default_verify_paths(ssl_ctx) == 1; +#endif +} + +bool set_client_cert_pem(ctx_t ctx, const char *cert, const char *key, + const char *password) { + if (!ctx || !cert || !key) return false; + + auto ssl_ctx = static_cast(ctx); + + // Load certificate + auto cert_bio = BIO_new_mem_buf(cert, -1); + if (!cert_bio) return false; + + auto x509 = PEM_read_bio_X509(cert_bio, nullptr, nullptr, nullptr); + BIO_free(cert_bio); + if (!x509) return false; + + auto cert_ok = SSL_CTX_use_certificate(ssl_ctx, x509) == 1; + X509_free(x509); + if (!cert_ok) return false; + + // Load private key + auto key_bio = BIO_new_mem_buf(key, -1); + if (!key_bio) return false; + + auto pkey = PEM_read_bio_PrivateKey(key_bio, nullptr, nullptr, + password ? const_cast(password) + : nullptr); + BIO_free(key_bio); + if (!pkey) return false; + + auto key_ok = SSL_CTX_use_PrivateKey(ssl_ctx, pkey) == 1; + EVP_PKEY_free(pkey); + + return key_ok && SSL_CTX_check_private_key(ssl_ctx) == 1; +} + +bool set_client_cert_file(ctx_t ctx, const char *cert_path, + const char *key_path, const char *password) { + if (!ctx || !cert_path || !key_path) return false; + + auto ssl_ctx = static_cast(ctx); + + if (password && password[0] != '\0') { + SSL_CTX_set_default_passwd_cb_userdata( + ssl_ctx, reinterpret_cast(const_cast(password))); + } + + return SSL_CTX_use_certificate_chain_file(ssl_ctx, cert_path) == 1 && + SSL_CTX_use_PrivateKey_file(ssl_ctx, key_path, SSL_FILETYPE_PEM) == 1; +} + +ctx_t create_server_context() { + SSL_CTX *ctx = SSL_CTX_new(TLS_server_method()); + if (ctx) { + SSL_CTX_set_options(ctx, SSL_OP_NO_COMPRESSION | + SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION); + SSL_CTX_set_min_proto_version(ctx, TLS1_2_VERSION); + } + return static_cast(ctx); +} + +void set_verify_client(ctx_t ctx, bool require) { + if (!ctx) return; + SSL_CTX_set_verify(static_cast(ctx), + require + ? (SSL_VERIFY_PEER | SSL_VERIFY_FAIL_IF_NO_PEER_CERT) + : SSL_VERIFY_NONE, + nullptr); +} + +session_t create_session(ctx_t ctx, socket_t sock) { + if (!ctx || sock == INVALID_SOCKET) return nullptr; + + auto ssl_ctx = static_cast(ctx); + SSL *ssl = SSL_new(ssl_ctx); + if (!ssl) return nullptr; + + // Disable auto-retry for proper non-blocking I/O handling + SSL_clear_mode(ssl, SSL_MODE_AUTO_RETRY); + + auto bio = BIO_new_socket(static_cast(sock), BIO_NOCLOSE); + if (!bio) { + SSL_free(ssl); + return nullptr; + } + + SSL_set_bio(ssl, bio, bio); + return static_cast(ssl); +} + +void free_session(session_t session) { + if (session) { SSL_free(static_cast(session)); } +} + +bool set_sni(session_t session, const char *hostname) { + if (!session || !hostname) return false; + + auto ssl = static_cast(session); + + // Set SNI (Server Name Indication) only - does not enable verification +#if defined(OPENSSL_IS_BORINGSSL) + return SSL_set_tlsext_host_name(ssl, hostname) == 1; +#else + // Direct call instead of macro to suppress -Wold-style-cast warning + return SSL_ctrl(ssl, SSL_CTRL_SET_TLSEXT_HOSTNAME, TLSEXT_NAMETYPE_host_name, + static_cast(const_cast(hostname))) == 1; +#endif +} + +bool set_hostname(session_t session, const char *hostname) { + if (!session || !hostname) return false; + + auto ssl = static_cast(session); + + // Set SNI (Server Name Indication) + if (!set_sni(session, hostname)) { return false; } + + // Enable hostname verification + auto param = SSL_get0_param(ssl); + if (!param) return false; + + X509_VERIFY_PARAM_set_hostflags(param, X509_CHECK_FLAG_NO_PARTIAL_WILDCARDS); + if (X509_VERIFY_PARAM_set1_host(param, hostname, 0) != 1) { return false; } + + SSL_set_verify(ssl, SSL_VERIFY_PEER, nullptr); + return true; +} + +TlsError connect(session_t session) { + if (!session) { return TlsError(); } + + auto ssl = static_cast(session); + auto ret = SSL_connect(ssl); + + TlsError err; + if (ret == 1) { + err.code = ErrorCode::Success; + } else { + auto ssl_err = SSL_get_error(ssl, ret); + err.code = impl::map_ssl_error(ssl_err, err.sys_errno); + err.backend_code = ERR_get_error(); + } + return err; +} + +TlsError accept(session_t session) { + if (!session) { return TlsError(); } + + auto ssl = static_cast(session); + auto ret = SSL_accept(ssl); + + TlsError err; + if (ret == 1) { + err.code = ErrorCode::Success; + } else { + auto ssl_err = SSL_get_error(ssl, ret); + err.code = impl::map_ssl_error(ssl_err, err.sys_errno); + err.backend_code = ERR_get_error(); + } + return err; +} + +bool connect_nonblocking(session_t session, socket_t sock, + time_t timeout_sec, time_t timeout_usec, + TlsError *err) { + if (!session) { + if (err) { err->code = ErrorCode::Fatal; } + return false; + } + + auto ssl = static_cast(session); + auto bio = SSL_get_rbio(ssl); + + // Set non-blocking mode for handshake + detail::set_nonblocking(sock, true); + if (bio) { BIO_set_nbio(bio, 1); } + + auto cleanup = detail::scope_exit([&]() { + // Restore blocking mode after handshake + if (bio) { BIO_set_nbio(bio, 0); } + detail::set_nonblocking(sock, false); + }); + + auto res = 0; + while ((res = SSL_connect(ssl)) != 1) { + auto ssl_err = SSL_get_error(ssl, res); + switch (ssl_err) { + case SSL_ERROR_WANT_READ: + if (detail::select_read(sock, timeout_sec, timeout_usec) > 0) { + continue; + } + break; + case SSL_ERROR_WANT_WRITE: + if (detail::select_write(sock, timeout_sec, timeout_usec) > 0) { + continue; + } + break; + default: break; + } + if (err) { + err->code = impl::map_ssl_error(ssl_err, err->sys_errno); + err->backend_code = ERR_get_error(); + } + return false; + } + if (err) { err->code = ErrorCode::Success; } + return true; +} + +bool accept_nonblocking(session_t session, socket_t sock, + time_t timeout_sec, time_t timeout_usec, + TlsError *err) { + if (!session) { + if (err) { err->code = ErrorCode::Fatal; } + return false; + } + + auto ssl = static_cast(session); + auto bio = SSL_get_rbio(ssl); + + // Set non-blocking mode for handshake + detail::set_nonblocking(sock, true); + if (bio) { BIO_set_nbio(bio, 1); } + + auto cleanup = detail::scope_exit([&]() { + // Restore blocking mode after handshake + if (bio) { BIO_set_nbio(bio, 0); } + detail::set_nonblocking(sock, false); + }); + + auto res = 0; + while ((res = SSL_accept(ssl)) != 1) { + auto ssl_err = SSL_get_error(ssl, res); + switch (ssl_err) { + case SSL_ERROR_WANT_READ: + if (detail::select_read(sock, timeout_sec, timeout_usec) > 0) { + continue; + } + break; + case SSL_ERROR_WANT_WRITE: + if (detail::select_write(sock, timeout_sec, timeout_usec) > 0) { + continue; + } + break; + default: break; + } + if (err) { + err->code = impl::map_ssl_error(ssl_err, err->sys_errno); + err->backend_code = ERR_get_error(); + } + return false; + } + if (err) { err->code = ErrorCode::Success; } + return true; +} + +ssize_t read(session_t session, void *buf, size_t len, TlsError &err) { + if (!session || !buf) { + err.code = ErrorCode::Fatal; + return -1; + } + + auto ssl = static_cast(session); + constexpr auto max_len = + static_cast((std::numeric_limits::max)()); + if (len > max_len) { len = max_len; } + auto ret = SSL_read(ssl, buf, static_cast(len)); + + if (ret > 0) { + err.code = ErrorCode::Success; + return ret; + } + + auto ssl_err = SSL_get_error(ssl, ret); + err.code = impl::map_ssl_error(ssl_err, err.sys_errno); + if (err.code == ErrorCode::Fatal) { err.backend_code = ERR_get_error(); } + return -1; +} + +ssize_t write(session_t session, const void *buf, size_t len, + TlsError &err) { + if (!session || !buf) { + err.code = ErrorCode::Fatal; + return -1; + } + + auto ssl = static_cast(session); + auto ret = SSL_write(ssl, buf, static_cast(len)); + + if (ret > 0) { + err.code = ErrorCode::Success; + return ret; + } + + auto ssl_err = SSL_get_error(ssl, ret); + err.code = impl::map_ssl_error(ssl_err, err.sys_errno); + if (err.code == ErrorCode::Fatal) { err.backend_code = ERR_get_error(); } + return -1; +} + +int pending(const_session_t session) { + if (!session) return 0; + return SSL_pending(static_cast(const_cast(session))); +} + +void shutdown(session_t session, bool graceful) { + if (!session) return; + + auto ssl = static_cast(session); + if (graceful) { + // First call sends close_notify + if (SSL_shutdown(ssl) == 0) { + // Second call waits for peer's close_notify + SSL_shutdown(ssl); + } + } +} + +bool is_peer_closed(session_t session, socket_t sock) { + if (!session) return true; + + // Temporarily set socket to non-blocking to avoid blocking on SSL_peek + detail::set_nonblocking(sock, true); + auto se = detail::scope_exit([&]() { detail::set_nonblocking(sock, false); }); + + auto ssl = static_cast(session); + char buf; + auto ret = SSL_peek(ssl, &buf, 1); + if (ret > 0) return false; + + auto err = SSL_get_error(ssl, ret); + return err == SSL_ERROR_ZERO_RETURN; +} + +cert_t get_peer_cert(const_session_t session) { + if (!session) return nullptr; + return static_cast(SSL_get1_peer_certificate( + static_cast(const_cast(session)))); +} + +void free_cert(cert_t cert) { + if (cert) { X509_free(static_cast(cert)); } +} + +bool verify_hostname(cert_t cert, const char *hostname) { + if (!cert || !hostname) return false; + + auto x509 = static_cast(cert); + + // Use X509_check_ip_asc for IP addresses, X509_check_host for DNS names + if (detail::is_ip_address(hostname)) { + return X509_check_ip_asc(x509, hostname, 0) == 1; + } + return X509_check_host(x509, hostname, strlen(hostname), 0, nullptr) == 1; +} + +uint64_t hostname_mismatch_code() { + return static_cast(X509_V_ERR_HOSTNAME_MISMATCH); +} + +long get_verify_result(const_session_t session) { + if (!session) return X509_V_ERR_UNSPECIFIED; + return SSL_get_verify_result(static_cast(const_cast(session))); +} + +std::string get_cert_subject_cn(cert_t cert) { + if (!cert) return ""; + auto x509 = static_cast(cert); + auto subject_name = X509_get_subject_name(x509); + if (!subject_name) return ""; + + char buf[256]; + auto len = + X509_NAME_get_text_by_NID(subject_name, NID_commonName, buf, sizeof(buf)); + if (len < 0) return ""; + return std::string(buf, static_cast(len)); +} + +std::string get_cert_issuer_name(cert_t cert) { + if (!cert) return ""; + auto x509 = static_cast(cert); + auto issuer_name = X509_get_issuer_name(x509); + if (!issuer_name) return ""; + + char buf[256]; + X509_NAME_oneline(issuer_name, buf, sizeof(buf)); + return std::string(buf); +} + +bool get_cert_sans(cert_t cert, std::vector &sans) { + sans.clear(); + if (!cert) return false; + auto x509 = static_cast(cert); + + auto names = static_cast( + X509_get_ext_d2i(x509, NID_subject_alt_name, nullptr, nullptr)); + if (!names) return true; // No SANs is valid + + auto count = sk_GENERAL_NAME_num(names); + for (int i = 0; i < count; i++) { + auto gen = sk_GENERAL_NAME_value(names, i); + if (!gen) continue; + + SanEntry entry; + switch (gen->type) { + case GEN_DNS: + entry.type = SanType::DNS; + if (gen->d.dNSName) { + entry.value = std::string( + reinterpret_cast( + ASN1_STRING_get0_data(gen->d.dNSName)), + static_cast(ASN1_STRING_length(gen->d.dNSName))); + } + break; + case GEN_IPADD: + entry.type = SanType::IP; + if (gen->d.iPAddress) { + auto data = ASN1_STRING_get0_data(gen->d.iPAddress); + auto len = ASN1_STRING_length(gen->d.iPAddress); + if (len == 4) { + // IPv4 + char buf[INET_ADDRSTRLEN]; + inet_ntop(AF_INET, data, buf, sizeof(buf)); + entry.value = buf; + } else if (len == 16) { + // IPv6 + char buf[INET6_ADDRSTRLEN]; + inet_ntop(AF_INET6, data, buf, sizeof(buf)); + entry.value = buf; + } + } + break; + case GEN_EMAIL: + entry.type = SanType::EMAIL; + if (gen->d.rfc822Name) { + entry.value = std::string( + reinterpret_cast( + ASN1_STRING_get0_data(gen->d.rfc822Name)), + static_cast(ASN1_STRING_length(gen->d.rfc822Name))); + } + break; + case GEN_URI: + entry.type = SanType::URI; + if (gen->d.uniformResourceIdentifier) { + entry.value = std::string( + reinterpret_cast( + ASN1_STRING_get0_data(gen->d.uniformResourceIdentifier)), + static_cast( + ASN1_STRING_length(gen->d.uniformResourceIdentifier))); + } + break; + default: entry.type = SanType::OTHER; break; + } + + if (!entry.value.empty()) { sans.push_back(std::move(entry)); } + } + + GENERAL_NAMES_free(names); + return true; +} + +bool get_cert_validity(cert_t cert, time_t ¬_before, + time_t ¬_after) { + if (!cert) return false; + auto x509 = static_cast(cert); + + auto nb = X509_get0_notBefore(x509); + auto na = X509_get0_notAfter(x509); + if (!nb || !na) return false; + + ASN1_TIME *epoch = ASN1_TIME_new(); + if (!epoch) return false; + auto se = detail::scope_exit([&] { ASN1_TIME_free(epoch); }); + + if (!ASN1_TIME_set(epoch, 0)) return false; + + int pday, psec; + + if (!ASN1_TIME_diff(&pday, &psec, epoch, nb)) return false; + not_before = 86400 * (time_t)pday + psec; + + if (!ASN1_TIME_diff(&pday, &psec, epoch, na)) return false; + not_after = 86400 * (time_t)pday + psec; + + return true; +} + +std::string get_cert_serial(cert_t cert) { + if (!cert) return ""; + auto x509 = static_cast(cert); + + auto serial = X509_get_serialNumber(x509); + if (!serial) return ""; + + auto bn = ASN1_INTEGER_to_BN(serial, nullptr); + if (!bn) return ""; + + auto hex = BN_bn2hex(bn); + BN_free(bn); + if (!hex) return ""; + + std::string result(hex); + OPENSSL_free(hex); + return result; +} + +bool get_cert_der(cert_t cert, std::vector &der) { + if (!cert) return false; + auto x509 = static_cast(cert); + auto len = i2d_X509(x509, nullptr); + if (len < 0) return false; + der.resize(static_cast(len)); + auto p = der.data(); + i2d_X509(x509, &p); + return true; +} + +const char *get_sni(const_session_t session) { + if (!session) return nullptr; + auto ssl = static_cast(const_cast(session)); + return SSL_get_servername(ssl, TLSEXT_NAMETYPE_host_name); +} + +uint64_t peek_error() { return ERR_peek_last_error(); } + +uint64_t get_error() { return ERR_get_error(); } + +std::string error_string(uint64_t code) { + char buf[256]; + ERR_error_string_n(static_cast(code), buf, sizeof(buf)); + return std::string(buf); +} + +ca_store_t create_ca_store(const char *pem, size_t len) { + auto mem = BIO_new_mem_buf(pem, static_cast(len)); + if (!mem) { return nullptr; } + auto mem_guard = detail::scope_exit([&] { BIO_free_all(mem); }); + + auto inf = PEM_X509_INFO_read_bio(mem, nullptr, nullptr, nullptr); + if (!inf) { return nullptr; } + + auto store = X509_STORE_new(); + if (store) { + for (auto i = 0; i < static_cast(sk_X509_INFO_num(inf)); i++) { + auto itmp = sk_X509_INFO_value(inf, i); + if (!itmp) { continue; } + if (itmp->x509) { X509_STORE_add_cert(store, itmp->x509); } + if (itmp->crl) { X509_STORE_add_crl(store, itmp->crl); } + } + } + + sk_X509_INFO_pop_free(inf, X509_INFO_free); + return static_cast(store); +} + +void free_ca_store(ca_store_t store) { + if (store) { X509_STORE_free(static_cast(store)); } +} + +bool set_ca_store(ctx_t ctx, ca_store_t store) { + if (!ctx || !store) { return false; } + auto ssl_ctx = static_cast(ctx); + auto x509_store = static_cast(store); + + // Check if same store is already set + if (SSL_CTX_get_cert_store(ssl_ctx) == x509_store) { return true; } + + // SSL_CTX_set_cert_store takes ownership and frees the old store + SSL_CTX_set_cert_store(ssl_ctx, x509_store); + return true; +} + +size_t get_ca_certs(ctx_t ctx, std::vector &certs) { + certs.clear(); + if (!ctx) { return 0; } + auto ssl_ctx = static_cast(ctx); + + auto store = SSL_CTX_get_cert_store(ssl_ctx); + if (!store) { return 0; } + + auto objs = X509_STORE_get0_objects(store); + if (!objs) { return 0; } + + auto count = sk_X509_OBJECT_num(objs); + for (decltype(count) i = 0; i < count; i++) { + auto obj = sk_X509_OBJECT_value(objs, i); + if (!obj) { continue; } + if (X509_OBJECT_get_type(obj) == X509_LU_X509) { + auto x509 = X509_OBJECT_get0_X509(obj); + if (x509) { + // Increment reference count so caller can free it + X509_up_ref(x509); + certs.push_back(static_cast(x509)); + } + } + } + return certs.size(); +} + +std::vector get_ca_names(ctx_t ctx) { + std::vector names; + if (!ctx) { return names; } + auto ssl_ctx = static_cast(ctx); + + auto store = SSL_CTX_get_cert_store(ssl_ctx); + if (!store) { return names; } + + auto objs = X509_STORE_get0_objects(store); + if (!objs) { return names; } + + auto count = sk_X509_OBJECT_num(objs); + for (decltype(count) i = 0; i < count; i++) { + auto obj = sk_X509_OBJECT_value(objs, i); + if (!obj) { continue; } + if (X509_OBJECT_get_type(obj) == X509_LU_X509) { + auto x509 = X509_OBJECT_get0_X509(obj); + if (x509) { + auto subject = X509_get_subject_name(x509); + if (subject) { + char buf[512]; + X509_NAME_oneline(subject, buf, sizeof(buf)); + names.push_back(buf); + } + } + } + } + return names; +} + +bool update_server_cert(ctx_t ctx, const char *cert_pem, + const char *key_pem, const char *password) { + if (!ctx || !cert_pem || !key_pem) { return false; } + auto ssl_ctx = static_cast(ctx); + + // Load certificate from PEM + auto cert_bio = BIO_new_mem_buf(cert_pem, -1); + if (!cert_bio) { return false; } + auto cert = PEM_read_bio_X509(cert_bio, nullptr, nullptr, nullptr); + BIO_free(cert_bio); + if (!cert) { return false; } + + // Load private key from PEM + auto key_bio = BIO_new_mem_buf(key_pem, -1); + if (!key_bio) { + X509_free(cert); + return false; + } + auto key = PEM_read_bio_PrivateKey(key_bio, nullptr, nullptr, + password ? const_cast(password) + : nullptr); + BIO_free(key_bio); + if (!key) { + X509_free(cert); + return false; + } + + // Update certificate and key + auto ret = SSL_CTX_use_certificate(ssl_ctx, cert) == 1 && + SSL_CTX_use_PrivateKey(ssl_ctx, key) == 1; + + X509_free(cert); + EVP_PKEY_free(key); + return ret; +} + +bool update_server_client_ca(ctx_t ctx, const char *ca_pem) { + if (!ctx || !ca_pem) { return false; } + auto ssl_ctx = static_cast(ctx); + + // Create new X509_STORE from PEM + auto store = create_ca_store(ca_pem, strlen(ca_pem)); + if (!store) { return false; } + + // SSL_CTX_set_cert_store takes ownership + SSL_CTX_set_cert_store(ssl_ctx, static_cast(store)); + + // Set client CA list for client certificate request + auto ca_list = impl::create_client_ca_list_from_pem(ca_pem); + if (ca_list) { + // SSL_CTX_set_client_CA_list takes ownership of ca_list + SSL_CTX_set_client_CA_list(ssl_ctx, ca_list); + } + + return true; +} + +bool set_verify_callback(ctx_t ctx, VerifyCallback callback) { + if (!ctx) { return false; } + auto ssl_ctx = static_cast(ctx); + + impl::get_verify_callback() = std::move(callback); + + if (impl::get_verify_callback()) { + SSL_CTX_set_verify(ssl_ctx, SSL_VERIFY_PEER, impl::openssl_verify_callback); + } else { + SSL_CTX_set_verify(ssl_ctx, SSL_VERIFY_PEER, nullptr); + } + return true; +} + +long get_verify_error(const_session_t session) { + if (!session) { return -1; } + auto ssl = static_cast(const_cast(session)); + return SSL_get_verify_result(ssl); +} + +std::string verify_error_string(long error_code) { + if (error_code == X509_V_OK) { return ""; } + const char *str = X509_verify_cert_error_string(static_cast(error_code)); + return str ? str : "unknown error"; +} + +namespace impl { + +// OpenSSL-specific helpers for public API wrappers +ctx_t create_server_context_from_x509(X509 *cert, EVP_PKEY *key, + X509_STORE *client_ca_store, + int &out_error) { + out_error = 0; + auto cert_pem = x509_to_pem(cert); + auto key_pem = evp_pkey_to_pem(key); + if (cert_pem.empty() || key_pem.empty()) { + out_error = static_cast(ERR_get_error()); + return nullptr; + } + + auto ctx = create_server_context(); + if (!ctx) { + out_error = static_cast(get_error()); + return nullptr; + } + + if (!set_server_cert_pem(ctx, cert_pem.c_str(), key_pem.c_str(), nullptr)) { + out_error = static_cast(get_error()); + free_context(ctx); + return nullptr; + } + + if (client_ca_store) { + // Set cert store for verification (SSL_CTX_set_cert_store takes ownership) + SSL_CTX_set_cert_store(static_cast(ctx), client_ca_store); + + // Extract and set client CA list directly from store (more efficient than + // PEM conversion) + auto ca_list = extract_client_ca_list_from_store(client_ca_store); + if (ca_list) { + SSL_CTX_set_client_CA_list(static_cast(ctx), ca_list); + } + + set_verify_client(ctx, true); + } + + return ctx; +} + +void update_server_certs_from_x509(ctx_t ctx, X509 *cert, EVP_PKEY *key, + X509_STORE *client_ca_store) { + auto cert_pem = x509_to_pem(cert); + auto key_pem = evp_pkey_to_pem(key); + + if (!cert_pem.empty() && !key_pem.empty()) { + update_server_cert(ctx, cert_pem.c_str(), key_pem.c_str(), nullptr); + } + + if (client_ca_store) { + auto ca_pem = x509_store_to_pem(client_ca_store); + if (!ca_pem.empty()) { update_server_client_ca(ctx, ca_pem.c_str()); } + X509_STORE_free(client_ca_store); + } +} + +ctx_t create_client_context_from_x509(X509 *cert, EVP_PKEY *key, + const char *password, + unsigned long &out_error) { + out_error = 0; + auto ctx = create_client_context(); + if (!ctx) { + out_error = static_cast(get_error()); + return nullptr; + } + + if (cert && key) { + auto cert_pem = x509_to_pem(cert); + auto key_pem = evp_pkey_to_pem(key); + if (cert_pem.empty() || key_pem.empty()) { + out_error = ERR_get_error(); + free_context(ctx); + return nullptr; + } + if (!set_client_cert_pem(ctx, cert_pem.c_str(), key_pem.c_str(), + password)) { + out_error = static_cast(get_error()); + free_context(ctx); + return nullptr; + } + } + + return ctx; +} + +} // namespace impl + +} // namespace tls + +// ClientImpl::set_ca_cert_store - defined here to use +// tls::impl::x509_store_to_pem Deprecated: converts X509_STORE to PEM and +// stores for redirect transfer +void ClientImpl::set_ca_cert_store(X509_STORE *ca_cert_store) { + if (ca_cert_store) { + ca_cert_pem_ = tls::impl::x509_store_to_pem(ca_cert_store); + } +} + +SSLServer::SSLServer(X509 *cert, EVP_PKEY *private_key, + X509_STORE *client_ca_cert_store) { + ctx_ = tls::impl::create_server_context_from_x509( + cert, private_key, client_ca_cert_store, last_ssl_error_); +} + +SSLServer::SSLServer( + const std::function &setup_ssl_ctx_callback) { + // Use abstract API to create context + ctx_ = tls::create_server_context(); + if (ctx_) { + // Pass to OpenSSL-specific callback (ctx_ is SSL_CTX* internally) + auto ssl_ctx = static_cast(ctx_); + if (!setup_ssl_ctx_callback(*ssl_ctx)) { + tls::free_context(ctx_); + ctx_ = nullptr; + } + } +} + +SSL_CTX *SSLServer::ssl_context() const { + return static_cast(ctx_); +} + +void SSLServer::update_certs(X509 *cert, EVP_PKEY *private_key, + X509_STORE *client_ca_cert_store) { + std::lock_guard guard(ctx_mutex_); + tls::impl::update_server_certs_from_x509(ctx_, cert, private_key, + client_ca_cert_store); +} + +SSLClient::SSLClient(const std::string &host, int port, + X509 *client_cert, EVP_PKEY *client_key, + const std::string &private_key_password) + : ClientImpl(host, port) { + const char *password = + private_key_password.empty() ? nullptr : private_key_password.c_str(); + ctx_ = tls::impl::create_client_context_from_x509( + client_cert, client_key, password, last_backend_error_); +} + +long SSLClient::get_verify_result() const { return verify_result_; } + +void SSLClient::set_server_certificate_verifier( + std::function verifier) { + // Wrap SSL* callback into backend-independent session_verifier_ + auto v = std::make_shared>( + std::move(verifier)); + session_verifier_ = [v](tls::session_t session) { + return (*v)(static_cast(session)); + }; +} + +SSL_CTX *SSLClient::ssl_context() const { + return static_cast(ctx_); +} + +bool SSLClient::verify_host(X509 *server_cert) const { + /* Quote from RFC2818 section 3.1 "Server Identity" + + If a subjectAltName extension of type dNSName is present, that MUST + be used as the identity. Otherwise, the (most specific) Common Name + field in the Subject field of the certificate MUST be used. Although + the use of the Common Name is existing practice, it is deprecated and + Certification Authorities are encouraged to use the dNSName instead. + + Matching is performed using the matching rules specified by + [RFC2459]. If more than one identity of a given type is present in + the certificate (e.g., more than one dNSName name, a match in any one + of the set is considered acceptable.) Names may contain the wildcard + character * which is considered to match any single domain name + component or component fragment. E.g., *.a.com matches foo.a.com but + not bar.foo.a.com. f*.com matches foo.com but not bar.com. + + In some cases, the URI is specified as an IP address rather than a + hostname. In this case, the iPAddress subjectAltName must be present + in the certificate and must exactly match the IP in the URI. + + */ + return verify_host_with_subject_alt_name(server_cert) || + verify_host_with_common_name(server_cert); +} + +bool +SSLClient::verify_host_with_subject_alt_name(X509 *server_cert) const { + auto ret = false; + + auto type = GEN_DNS; + + struct in6_addr addr6 = {}; + struct in_addr addr = {}; + size_t addr_len = 0; + +#ifndef __MINGW32__ + if (inet_pton(AF_INET6, host_.c_str(), &addr6)) { + type = GEN_IPADD; + addr_len = sizeof(struct in6_addr); + } else if (inet_pton(AF_INET, host_.c_str(), &addr)) { + type = GEN_IPADD; + addr_len = sizeof(struct in_addr); + } +#endif + + auto alt_names = static_cast( + X509_get_ext_d2i(server_cert, NID_subject_alt_name, nullptr, nullptr)); + + if (alt_names) { + auto dsn_matched = false; + auto ip_matched = false; + + auto count = sk_GENERAL_NAME_num(alt_names); + + for (decltype(count) i = 0; i < count && !dsn_matched; i++) { + auto val = sk_GENERAL_NAME_value(alt_names, i); + if (!val || val->type != type) { continue; } + + auto name = + reinterpret_cast(ASN1_STRING_get0_data(val->d.ia5)); + if (name == nullptr) { continue; } + + auto name_len = static_cast(ASN1_STRING_length(val->d.ia5)); + + switch (type) { + case GEN_DNS: + dsn_matched = + detail::match_hostname(std::string(name, name_len), host_); + break; + + case GEN_IPADD: + if (!memcmp(&addr6, name, addr_len) || !memcmp(&addr, name, addr_len)) { + ip_matched = true; + } + break; + } + } + + if (dsn_matched || ip_matched) { ret = true; } + } + + GENERAL_NAMES_free(const_cast( + reinterpret_cast(alt_names))); + return ret; +} + +bool SSLClient::verify_host_with_common_name(X509 *server_cert) const { + const auto subject_name = X509_get_subject_name(server_cert); + + if (subject_name != nullptr) { + char name[BUFSIZ]; + auto name_len = X509_NAME_get_text_by_NID(subject_name, NID_commonName, + name, sizeof(name)); + + if (name_len != -1) { + return detail::match_hostname( + std::string(name, static_cast(name_len)), host_); + } + } + + return false; +} + +#endif // CPPHTTPLIB_OPENSSL_SUPPORT + +/* + * Group 9: TLS abstraction layer - Mbed TLS backend + */ + +/* + * Mbed TLS Backend Implementation + */ + +#ifdef CPPHTTPLIB_MBEDTLS_SUPPORT +namespace tls { + +namespace impl { + +// Mbed TLS session wrapper +struct MbedTlsSession { + mbedtls_ssl_context ssl; + socket_t sock = INVALID_SOCKET; + std::string hostname; // For client: set via set_sni + std::string sni_hostname; // For server: received from client via SNI callback + + MbedTlsSession() { mbedtls_ssl_init(&ssl); } + + ~MbedTlsSession() { mbedtls_ssl_free(&ssl); } + + MbedTlsSession(const MbedTlsSession &) = delete; + MbedTlsSession &operator=(const MbedTlsSession &) = delete; +}; + +// Thread-local error code accessor for Mbed TLS (since it doesn't have an error +// queue) +int &mbedtls_last_error() { + static thread_local int err = 0; + return err; +} + +// Helper to map Mbed TLS error to ErrorCode +ErrorCode map_mbedtls_error(int ret, int &out_errno) { + if (ret == 0) { return ErrorCode::Success; } + if (ret == MBEDTLS_ERR_SSL_WANT_READ) { return ErrorCode::WantRead; } + if (ret == MBEDTLS_ERR_SSL_WANT_WRITE) { return ErrorCode::WantWrite; } + if (ret == MBEDTLS_ERR_SSL_PEER_CLOSE_NOTIFY) { + return ErrorCode::PeerClosed; + } + if (ret == MBEDTLS_ERR_NET_CONN_RESET || ret == MBEDTLS_ERR_NET_SEND_FAILED || + ret == MBEDTLS_ERR_NET_RECV_FAILED) { + out_errno = errno; + return ErrorCode::SyscallError; + } + if (ret == MBEDTLS_ERR_X509_CERT_VERIFY_FAILED) { + return ErrorCode::CertVerifyFailed; + } + return ErrorCode::Fatal; +} + +// BIO-like send callback for Mbed TLS +int mbedtls_net_send_cb(void *ctx, const unsigned char *buf, + size_t len) { + auto sock = *static_cast(ctx); +#ifdef _WIN32 + auto ret = + send(sock, reinterpret_cast(buf), static_cast(len), 0); + if (ret == SOCKET_ERROR) { + int err = WSAGetLastError(); + if (err == WSAEWOULDBLOCK) { return MBEDTLS_ERR_SSL_WANT_WRITE; } + return MBEDTLS_ERR_NET_SEND_FAILED; + } +#else + auto ret = send(sock, buf, len, 0); + if (ret < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + return MBEDTLS_ERR_SSL_WANT_WRITE; + } + return MBEDTLS_ERR_NET_SEND_FAILED; + } +#endif + return static_cast(ret); +} + +// BIO-like recv callback for Mbed TLS +int mbedtls_net_recv_cb(void *ctx, unsigned char *buf, size_t len) { + auto sock = *static_cast(ctx); +#ifdef _WIN32 + auto ret = + recv(sock, reinterpret_cast(buf), static_cast(len), 0); + if (ret == SOCKET_ERROR) { + int err = WSAGetLastError(); + if (err == WSAEWOULDBLOCK) { return MBEDTLS_ERR_SSL_WANT_READ; } + return MBEDTLS_ERR_NET_RECV_FAILED; + } +#else + auto ret = recv(sock, buf, len, 0); + if (ret < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + return MBEDTLS_ERR_SSL_WANT_READ; + } + return MBEDTLS_ERR_NET_RECV_FAILED; + } +#endif + if (ret == 0) { return MBEDTLS_ERR_SSL_PEER_CLOSE_NOTIFY; } + return static_cast(ret); +} + +// MbedTlsContext constructor/destructor implementations +MbedTlsContext::MbedTlsContext() { + mbedtls_ssl_config_init(&conf); + mbedtls_entropy_init(&entropy); + mbedtls_ctr_drbg_init(&ctr_drbg); + mbedtls_x509_crt_init(&ca_chain); + mbedtls_x509_crt_init(&own_cert); + mbedtls_pk_init(&own_key); +} + +MbedTlsContext::~MbedTlsContext() { + mbedtls_pk_free(&own_key); + mbedtls_x509_crt_free(&own_cert); + mbedtls_x509_crt_free(&ca_chain); + mbedtls_ctr_drbg_free(&ctr_drbg); + mbedtls_entropy_free(&entropy); + mbedtls_ssl_config_free(&conf); +} + +// Thread-local storage for SNI captured during handshake +// This is needed because the SNI callback doesn't have a way to pass +// session-specific data before the session is fully set up +std::string &mbedpending_sni() { + static thread_local std::string sni; + return sni; +} + +// SNI callback for Mbed TLS server to capture client's SNI hostname +int mbedtls_sni_callback(void *p_ctx, mbedtls_ssl_context *ssl, + const unsigned char *name, size_t name_len) { + (void)p_ctx; + (void)ssl; + + // Store SNI name in thread-local storage + // It will be retrieved and stored in the session after handshake + if (name && name_len > 0) { + mbedpending_sni().assign(reinterpret_cast(name), name_len); + } else { + mbedpending_sni().clear(); + } + return 0; // Accept any SNI +} + +int mbedtls_verify_callback(void *data, mbedtls_x509_crt *crt, + int cert_depth, uint32_t *flags); + +// Check if a string is an IPv4 address +bool is_ipv4_address(const std::string &str) { + int dots = 0; + for (char c : str) { + if (c == '.') { + dots++; + } else if (!isdigit(static_cast(c))) { + return false; + } + } + return dots == 3; +} + +// Parse IPv4 address string to bytes +bool parse_ipv4(const std::string &str, unsigned char *out) { + int parts[4]; + if (sscanf(str.c_str(), "%d.%d.%d.%d", &parts[0], &parts[1], &parts[2], + &parts[3]) != 4) { + return false; + } + for (int i = 0; i < 4; i++) { + if (parts[i] < 0 || parts[i] > 255) return false; + out[i] = static_cast(parts[i]); + } + return true; +} + +// MbedTLS verify callback wrapper +int mbedtls_verify_callback(void *data, mbedtls_x509_crt *crt, + int cert_depth, uint32_t *flags) { + auto &callback = get_verify_callback(); + if (!callback) { return 0; } // Continue with default verification + + // data points to the MbedTlsSession + auto *session = static_cast(data); + + // Build context + VerifyContext verify_ctx; + verify_ctx.session = static_cast(session); + verify_ctx.cert = static_cast(crt); + verify_ctx.depth = cert_depth; + verify_ctx.preverify_ok = (*flags == 0); + verify_ctx.error_code = static_cast(*flags); + + // Convert Mbed TLS flags to error string + static thread_local char error_buf[256]; + if (*flags != 0) { + mbedtls_x509_crt_verify_info(error_buf, sizeof(error_buf), "", *flags); + verify_ctx.error_string = error_buf; + } else { + verify_ctx.error_string = nullptr; + } + + bool accepted = callback(verify_ctx); + + if (accepted) { + *flags = 0; // Clear all error flags + return 0; + } + return MBEDTLS_ERR_X509_CERT_VERIFY_FAILED; +} + +} // namespace impl + +ctx_t create_client_context() { + auto ctx = new (std::nothrow) impl::MbedTlsContext(); + if (!ctx) { return nullptr; } + + ctx->is_server = false; + + // Seed the random number generator + const char *pers = "httplib_client"; + int ret = mbedtls_ctr_drbg_seed( + &ctx->ctr_drbg, mbedtls_entropy_func, &ctx->entropy, + reinterpret_cast(pers), strlen(pers)); + if (ret != 0) { + impl::mbedtls_last_error() = ret; + delete ctx; + return nullptr; + } + + // Set up SSL config for client + ret = mbedtls_ssl_config_defaults(&ctx->conf, MBEDTLS_SSL_IS_CLIENT, + MBEDTLS_SSL_TRANSPORT_STREAM, + MBEDTLS_SSL_PRESET_DEFAULT); + if (ret != 0) { + impl::mbedtls_last_error() = ret; + delete ctx; + return nullptr; + } + + // Set random number generator + mbedtls_ssl_conf_rng(&ctx->conf, mbedtls_ctr_drbg_random, &ctx->ctr_drbg); + + // Default: verify peer certificate + mbedtls_ssl_conf_authmode(&ctx->conf, MBEDTLS_SSL_VERIFY_REQUIRED); + + // Set minimum TLS version to 1.2 +#ifdef CPPHTTPLIB_MBEDTLS_V3 + mbedtls_ssl_conf_min_tls_version(&ctx->conf, MBEDTLS_SSL_VERSION_TLS1_2); +#else + mbedtls_ssl_conf_min_version(&ctx->conf, MBEDTLS_SSL_MAJOR_VERSION_3, + MBEDTLS_SSL_MINOR_VERSION_3); +#endif + + return static_cast(ctx); +} + +ctx_t create_server_context() { + auto ctx = new (std::nothrow) impl::MbedTlsContext(); + if (!ctx) { return nullptr; } + + ctx->is_server = true; + + // Seed the random number generator + const char *pers = "httplib_server"; + int ret = mbedtls_ctr_drbg_seed( + &ctx->ctr_drbg, mbedtls_entropy_func, &ctx->entropy, + reinterpret_cast(pers), strlen(pers)); + if (ret != 0) { + impl::mbedtls_last_error() = ret; + delete ctx; + return nullptr; + } + + // Set up SSL config for server + ret = mbedtls_ssl_config_defaults(&ctx->conf, MBEDTLS_SSL_IS_SERVER, + MBEDTLS_SSL_TRANSPORT_STREAM, + MBEDTLS_SSL_PRESET_DEFAULT); + if (ret != 0) { + impl::mbedtls_last_error() = ret; + delete ctx; + return nullptr; + } + + // Set random number generator + mbedtls_ssl_conf_rng(&ctx->conf, mbedtls_ctr_drbg_random, &ctx->ctr_drbg); + + // Default: don't verify client + mbedtls_ssl_conf_authmode(&ctx->conf, MBEDTLS_SSL_VERIFY_NONE); + + // Set minimum TLS version to 1.2 +#ifdef CPPHTTPLIB_MBEDTLS_V3 + mbedtls_ssl_conf_min_tls_version(&ctx->conf, MBEDTLS_SSL_VERSION_TLS1_2); +#else + mbedtls_ssl_conf_min_version(&ctx->conf, MBEDTLS_SSL_MAJOR_VERSION_3, + MBEDTLS_SSL_MINOR_VERSION_3); +#endif + + // Set SNI callback to capture client's SNI hostname + mbedtls_ssl_conf_sni(&ctx->conf, impl::mbedtls_sni_callback, nullptr); + + return static_cast(ctx); +} + +void free_context(ctx_t ctx) { + if (ctx) { delete static_cast(ctx); } +} + +bool set_min_version(ctx_t ctx, Version version) { + if (!ctx) { return false; } + auto mctx = static_cast(ctx); + +#ifdef CPPHTTPLIB_MBEDTLS_V3 + // Mbed TLS 3.x uses mbedtls_ssl_protocol_version enum + mbedtls_ssl_protocol_version min_ver = MBEDTLS_SSL_VERSION_TLS1_2; + if (version >= Version::TLS1_3) { +#if defined(MBEDTLS_SSL_PROTO_TLS1_3) + min_ver = MBEDTLS_SSL_VERSION_TLS1_3; +#endif + } + mbedtls_ssl_conf_min_tls_version(&mctx->conf, min_ver); +#else + // Mbed TLS 2.x uses major/minor version numbers + int major = MBEDTLS_SSL_MAJOR_VERSION_3; + int minor = MBEDTLS_SSL_MINOR_VERSION_3; // TLS 1.2 + if (version >= Version::TLS1_3) { +#if defined(MBEDTLS_SSL_PROTO_TLS1_3) + minor = MBEDTLS_SSL_MINOR_VERSION_4; // TLS 1.3 +#else + minor = MBEDTLS_SSL_MINOR_VERSION_3; // Fall back to TLS 1.2 +#endif + } + mbedtls_ssl_conf_min_version(&mctx->conf, major, minor); +#endif + return true; +} + +bool load_ca_pem(ctx_t ctx, const char *pem, size_t len) { + if (!ctx || !pem) { return false; } + auto mctx = static_cast(ctx); + + // mbedtls_x509_crt_parse expects null-terminated string for PEM + // Add null terminator if not present + std::string pem_str(pem, len); + int ret = mbedtls_x509_crt_parse( + &mctx->ca_chain, reinterpret_cast(pem_str.c_str()), + pem_str.size() + 1); + if (ret != 0) { + impl::mbedtls_last_error() = ret; + return false; + } + + mbedtls_ssl_conf_ca_chain(&mctx->conf, &mctx->ca_chain, nullptr); + return true; +} + +bool load_ca_file(ctx_t ctx, const char *file_path) { + if (!ctx || !file_path) { return false; } + auto mctx = static_cast(ctx); + + int ret = mbedtls_x509_crt_parse_file(&mctx->ca_chain, file_path); + if (ret != 0) { + impl::mbedtls_last_error() = ret; + return false; + } + + mbedtls_ssl_conf_ca_chain(&mctx->conf, &mctx->ca_chain, nullptr); + return true; +} + +bool load_ca_dir(ctx_t ctx, const char *dir_path) { + if (!ctx || !dir_path) { return false; } + auto mctx = static_cast(ctx); + + int ret = mbedtls_x509_crt_parse_path(&mctx->ca_chain, dir_path); + if (ret < 0) { // Returns number of certs on success, negative on error + impl::mbedtls_last_error() = ret; + return false; + } + + mbedtls_ssl_conf_ca_chain(&mctx->conf, &mctx->ca_chain, nullptr); + return true; +} + +bool load_system_certs(ctx_t ctx) { + if (!ctx) { return false; } + auto mctx = static_cast(ctx); + bool loaded = false; + +#ifdef _WIN32 + // Load from Windows certificate store (ROOT and CA) + static const wchar_t *store_names[] = {L"ROOT", L"CA"}; + for (auto store_name : store_names) { + HCERTSTORE hStore = CertOpenSystemStoreW(0, store_name); + if (hStore) { + PCCERT_CONTEXT pContext = nullptr; + while ((pContext = CertEnumCertificatesInStore(hStore, pContext)) != + nullptr) { + int ret = mbedtls_x509_crt_parse_der( + &mctx->ca_chain, pContext->pbCertEncoded, pContext->cbCertEncoded); + if (ret == 0) { loaded = true; } + } + CertCloseStore(hStore, 0); + } + } +#elif defined(__APPLE__) && defined(CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN) + // Load from macOS Keychain + CFArrayRef certs = nullptr; + OSStatus status = SecTrustCopyAnchorCertificates(&certs); + if (status == errSecSuccess && certs) { + CFIndex count = CFArrayGetCount(certs); + for (CFIndex i = 0; i < count; i++) { + SecCertificateRef cert = + (SecCertificateRef)CFArrayGetValueAtIndex(certs, i); + CFDataRef data = SecCertificateCopyData(cert); + if (data) { + int ret = mbedtls_x509_crt_parse_der( + &mctx->ca_chain, CFDataGetBytePtr(data), + static_cast(CFDataGetLength(data))); + if (ret == 0) { loaded = true; } + CFRelease(data); + } + } + CFRelease(certs); + } +#else + // Try common CA certificate locations on Linux/Unix + static const char *ca_paths[] = { + "/etc/ssl/certs/ca-certificates.crt", // Debian/Ubuntu + "/etc/pki/tls/certs/ca-bundle.crt", // RHEL/CentOS + "/etc/ssl/ca-bundle.pem", // OpenSUSE + "/etc/pki/tls/cacert.pem", // OpenELEC + "/etc/ssl/cert.pem", // Alpine, FreeBSD + nullptr}; + + for (const char **path = ca_paths; *path; ++path) { + int ret = mbedtls_x509_crt_parse_file(&mctx->ca_chain, *path); + if (ret >= 0) { + loaded = true; + break; + } + } + + // Also try the CA directory + if (!loaded) { + static const char *ca_dirs[] = {"/etc/ssl/certs", // Debian/Ubuntu + "/etc/pki/tls/certs", // RHEL/CentOS + "/usr/share/ca-certificates", nullptr}; + + for (const char **dir = ca_dirs; *dir; ++dir) { + int ret = mbedtls_x509_crt_parse_path(&mctx->ca_chain, *dir); + if (ret >= 0) { + loaded = true; + break; + } + } + } +#endif + + if (loaded) { + mbedtls_ssl_conf_ca_chain(&mctx->conf, &mctx->ca_chain, nullptr); + } + return loaded; +} + +bool set_client_cert_pem(ctx_t ctx, const char *cert, const char *key, + const char *password) { + if (!ctx || !cert || !key) { return false; } + auto mctx = static_cast(ctx); + + // Parse certificate + std::string cert_str(cert); + int ret = mbedtls_x509_crt_parse( + &mctx->own_cert, + reinterpret_cast(cert_str.c_str()), + cert_str.size() + 1); + if (ret != 0) { + impl::mbedtls_last_error() = ret; + return false; + } + + // Parse private key + std::string key_str(key); + const unsigned char *pwd = + password ? reinterpret_cast(password) : nullptr; + size_t pwd_len = password ? strlen(password) : 0; + +#ifdef CPPHTTPLIB_MBEDTLS_V3 + ret = mbedtls_pk_parse_key( + &mctx->own_key, reinterpret_cast(key_str.c_str()), + key_str.size() + 1, pwd, pwd_len, mbedtls_ctr_drbg_random, + &mctx->ctr_drbg); +#else + ret = mbedtls_pk_parse_key( + &mctx->own_key, reinterpret_cast(key_str.c_str()), + key_str.size() + 1, pwd, pwd_len); +#endif + if (ret != 0) { + impl::mbedtls_last_error() = ret; + return false; + } + + ret = mbedtls_ssl_conf_own_cert(&mctx->conf, &mctx->own_cert, &mctx->own_key); + if (ret != 0) { + impl::mbedtls_last_error() = ret; + return false; + } + + return true; +} + +bool set_client_cert_file(ctx_t ctx, const char *cert_path, + const char *key_path, const char *password) { + if (!ctx || !cert_path || !key_path) { return false; } + auto mctx = static_cast(ctx); + + // Parse certificate file + int ret = mbedtls_x509_crt_parse_file(&mctx->own_cert, cert_path); + if (ret != 0) { + impl::mbedtls_last_error() = ret; + return false; + } + + // Parse private key file +#ifdef CPPHTTPLIB_MBEDTLS_V3 + ret = mbedtls_pk_parse_keyfile(&mctx->own_key, key_path, password, + mbedtls_ctr_drbg_random, &mctx->ctr_drbg); +#else + ret = mbedtls_pk_parse_keyfile(&mctx->own_key, key_path, password); +#endif + if (ret != 0) { + impl::mbedtls_last_error() = ret; + return false; + } + + ret = mbedtls_ssl_conf_own_cert(&mctx->conf, &mctx->own_cert, &mctx->own_key); + if (ret != 0) { + impl::mbedtls_last_error() = ret; + return false; + } + + return true; +} + +void set_verify_client(ctx_t ctx, bool require) { + if (!ctx) { return; } + auto mctx = static_cast(ctx); + mctx->verify_client = require; + if (require) { + mbedtls_ssl_conf_authmode(&mctx->conf, MBEDTLS_SSL_VERIFY_REQUIRED); + } else { + // If a verify callback is set, use OPTIONAL mode to ensure the callback + // is called (matching OpenSSL behavior). Otherwise use NONE. + mbedtls_ssl_conf_authmode(&mctx->conf, mctx->has_verify_callback + ? MBEDTLS_SSL_VERIFY_OPTIONAL + : MBEDTLS_SSL_VERIFY_NONE); + } +} + +session_t create_session(ctx_t ctx, socket_t sock) { + if (!ctx || sock == INVALID_SOCKET) { return nullptr; } + auto mctx = static_cast(ctx); + + auto session = new (std::nothrow) impl::MbedTlsSession(); + if (!session) { return nullptr; } + + session->sock = sock; + + int ret = mbedtls_ssl_setup(&session->ssl, &mctx->conf); + if (ret != 0) { + impl::mbedtls_last_error() = ret; + delete session; + return nullptr; + } + + // Set BIO callbacks + mbedtls_ssl_set_bio(&session->ssl, &session->sock, impl::mbedtls_net_send_cb, + impl::mbedtls_net_recv_cb, nullptr); + + // Set per-session verify callback with session pointer if callback is + // registered + if (mctx->has_verify_callback) { + mbedtls_ssl_set_verify(&session->ssl, impl::mbedtls_verify_callback, + session); + } + + return static_cast(session); +} + +void free_session(session_t session) { + if (session) { delete static_cast(session); } +} + +bool set_sni(session_t session, const char *hostname) { + if (!session || !hostname) { return false; } + auto msession = static_cast(session); + + int ret = mbedtls_ssl_set_hostname(&msession->ssl, hostname); + if (ret != 0) { + impl::mbedtls_last_error() = ret; + return false; + } + + msession->hostname = hostname; + return true; +} + +bool set_hostname(session_t session, const char *hostname) { + // In Mbed TLS, set_hostname also sets up hostname verification + return set_sni(session, hostname); +} + +TlsError connect(session_t session) { + TlsError err; + if (!session) { + err.code = ErrorCode::Fatal; + return err; + } + + auto msession = static_cast(session); + int ret = mbedtls_ssl_handshake(&msession->ssl); + + if (ret == 0) { + err.code = ErrorCode::Success; + } else { + err.code = impl::map_mbedtls_error(ret, err.sys_errno); + err.backend_code = static_cast(-ret); + impl::mbedtls_last_error() = ret; + } + + return err; +} + +TlsError accept(session_t session) { + // Same as connect for Mbed TLS - handshake works for both client and server + auto result = connect(session); + + // After successful handshake, capture SNI from thread-local storage + if (result.code == ErrorCode::Success && session) { + auto msession = static_cast(session); + msession->sni_hostname = std::move(impl::mbedpending_sni()); + impl::mbedpending_sni().clear(); + } + + return result; +} + +bool connect_nonblocking(session_t session, socket_t sock, + time_t timeout_sec, time_t timeout_usec, + TlsError *err) { + if (!session) { + if (err) { err->code = ErrorCode::Fatal; } + return false; + } + + auto msession = static_cast(session); + + // Set socket to non-blocking mode + detail::set_nonblocking(sock, true); + auto cleanup = + detail::scope_exit([&]() { detail::set_nonblocking(sock, false); }); + + int ret; + while ((ret = mbedtls_ssl_handshake(&msession->ssl)) != 0) { + if (ret == MBEDTLS_ERR_SSL_WANT_READ) { + if (detail::select_read(sock, timeout_sec, timeout_usec) > 0) { + continue; + } + } else if (ret == MBEDTLS_ERR_SSL_WANT_WRITE) { + if (detail::select_write(sock, timeout_sec, timeout_usec) > 0) { + continue; + } + } + + // TlsError or timeout + if (err) { + err->code = impl::map_mbedtls_error(ret, err->sys_errno); + err->backend_code = static_cast(-ret); + } + impl::mbedtls_last_error() = ret; + return false; + } + + if (err) { err->code = ErrorCode::Success; } + return true; +} + +bool accept_nonblocking(session_t session, socket_t sock, + time_t timeout_sec, time_t timeout_usec, + TlsError *err) { + // Same implementation as connect for Mbed TLS + bool result = + connect_nonblocking(session, sock, timeout_sec, timeout_usec, err); + + // After successful handshake, capture SNI from thread-local storage + if (result && session) { + auto msession = static_cast(session); + msession->sni_hostname = std::move(impl::mbedpending_sni()); + impl::mbedpending_sni().clear(); + } + + return result; +} + +ssize_t read(session_t session, void *buf, size_t len, TlsError &err) { + if (!session || !buf) { + err.code = ErrorCode::Fatal; + return -1; + } + + auto msession = static_cast(session); + int ret = + mbedtls_ssl_read(&msession->ssl, static_cast(buf), len); + + if (ret > 0) { + err.code = ErrorCode::Success; + return static_cast(ret); + } + + if (ret == 0) { + err.code = ErrorCode::PeerClosed; + return 0; + } + + err.code = impl::map_mbedtls_error(ret, err.sys_errno); + err.backend_code = static_cast(-ret); + impl::mbedtls_last_error() = ret; + return -1; +} + +ssize_t write(session_t session, const void *buf, size_t len, + TlsError &err) { + if (!session || !buf) { + err.code = ErrorCode::Fatal; + return -1; + } + + auto msession = static_cast(session); + int ret = mbedtls_ssl_write(&msession->ssl, + static_cast(buf), len); + + if (ret > 0) { + err.code = ErrorCode::Success; + return static_cast(ret); + } + + if (ret == 0) { + err.code = ErrorCode::PeerClosed; + return 0; + } + + err.code = impl::map_mbedtls_error(ret, err.sys_errno); + err.backend_code = static_cast(-ret); + impl::mbedtls_last_error() = ret; + return -1; +} + +int pending(const_session_t session) { + if (!session) { return 0; } + auto msession = + static_cast(const_cast(session)); + return static_cast(mbedtls_ssl_get_bytes_avail(&msession->ssl)); +} + +void shutdown(session_t session, bool graceful) { + if (!session) { return; } + auto msession = static_cast(session); + + if (graceful) { + // Try to send close_notify, but don't block forever + int ret; + int attempts = 0; + while ((ret = mbedtls_ssl_close_notify(&msession->ssl)) != 0 && + attempts < 3) { + if (ret != MBEDTLS_ERR_SSL_WANT_READ && + ret != MBEDTLS_ERR_SSL_WANT_WRITE) { + break; + } + attempts++; + } + } +} + +bool is_peer_closed(session_t session, socket_t sock) { + if (!session || sock == INVALID_SOCKET) { return true; } + auto msession = static_cast(session); + + // Check if there's already decrypted data available in the TLS buffer + // If so, the connection is definitely alive + if (mbedtls_ssl_get_bytes_avail(&msession->ssl) > 0) { return false; } + + // Set socket to non-blocking to avoid blocking on read + detail::set_nonblocking(sock, true); + auto cleanup = + detail::scope_exit([&]() { detail::set_nonblocking(sock, false); }); + + // Try a 1-byte read to check connection status + // Note: This will consume the byte if data is available, but for the + // purpose of checking if peer is closed, this should be acceptable + // since we're only called when we expect the connection might be closing + unsigned char buf; + int ret = mbedtls_ssl_read(&msession->ssl, &buf, 1); + + // If we got data or WANT_READ (would block), connection is alive + if (ret > 0 || ret == MBEDTLS_ERR_SSL_WANT_READ) { return false; } + + // If we get a peer close notify or a connection reset, the peer is closed + return ret == MBEDTLS_ERR_SSL_PEER_CLOSE_NOTIFY || + ret == MBEDTLS_ERR_NET_CONN_RESET || ret == 0; +} + +cert_t get_peer_cert(const_session_t session) { + if (!session) { return nullptr; } + auto msession = + static_cast(const_cast(session)); + + // Mbed TLS returns a pointer to the internal peer cert chain. + // WARNING: This pointer is only valid while the session is active. + // Do not use the certificate after calling free_session(). + const mbedtls_x509_crt *cert = mbedtls_ssl_get_peer_cert(&msession->ssl); + return const_cast(cert); +} + +void free_cert(cert_t cert) { + // Mbed TLS: peer certificate is owned by the SSL context. + // No-op here, but callers should still call this for cross-backend + // portability. + (void)cert; +} + +bool verify_hostname(cert_t cert, const char *hostname) { + if (!cert || !hostname) { return false; } + auto mcert = static_cast(cert); + std::string host_str(hostname); + + // Check if hostname is an IP address + bool is_ip = impl::is_ipv4_address(host_str); + unsigned char ip_bytes[4]; + if (is_ip) { impl::parse_ipv4(host_str, ip_bytes); } + + // Check Subject Alternative Names (SAN) + // In Mbed TLS 3.x, subject_alt_names contains raw values without ASN.1 tags + // - DNS names: raw string bytes + // - IP addresses: raw IP bytes (4 for IPv4, 16 for IPv6) + const mbedtls_x509_sequence *san = &mcert->subject_alt_names; + while (san != nullptr && san->buf.p != nullptr && san->buf.len > 0) { + const unsigned char *p = san->buf.p; + size_t len = san->buf.len; + + if (is_ip) { + // Check if this SAN is an IPv4 address (4 bytes) + if (len == 4 && memcmp(p, ip_bytes, 4) == 0) { return true; } + // Check if this SAN is an IPv6 address (16 bytes) - skip for now + } else { + // Check if this SAN is a DNS name (printable ASCII string) + bool is_dns = len > 0; + for (size_t i = 0; i < len && is_dns; i++) { + if (p[i] < 32 || p[i] > 126) { is_dns = false; } + } + if (is_dns) { + std::string san_name(reinterpret_cast(p), len); + if (detail::match_hostname(san_name, host_str)) { return true; } + } + } + san = san->next; + } + + // Fallback: Check Common Name (CN) in subject + char cn[256]; + int ret = mbedtls_x509_dn_gets(cn, sizeof(cn), &mcert->subject); + if (ret > 0) { + std::string cn_str(cn); + + // Look for "CN=" in the DN string + size_t cn_pos = cn_str.find("CN="); + if (cn_pos != std::string::npos) { + size_t start = cn_pos + 3; + size_t end = cn_str.find(',', start); + std::string cn_value = + cn_str.substr(start, end == std::string::npos ? end : end - start); + + if (detail::match_hostname(cn_value, host_str)) { return true; } + } + } + + return false; +} + +uint64_t hostname_mismatch_code() { + return static_cast(MBEDTLS_X509_BADCERT_CN_MISMATCH); +} + +long get_verify_result(const_session_t session) { + if (!session) { return -1; } + auto msession = + static_cast(const_cast(session)); + uint32_t flags = mbedtls_ssl_get_verify_result(&msession->ssl); + // Return 0 (X509_V_OK equivalent) if verification passed + return flags == 0 ? 0 : static_cast(flags); +} + +std::string get_cert_subject_cn(cert_t cert) { + if (!cert) return ""; + auto x509 = static_cast(cert); + + // Find the CN in the subject + const mbedtls_x509_name *name = &x509->subject; + while (name != nullptr) { + if (MBEDTLS_OID_CMP(MBEDTLS_OID_AT_CN, &name->oid) == 0) { + return std::string(reinterpret_cast(name->val.p), + name->val.len); + } + name = name->next; + } + return ""; +} + +std::string get_cert_issuer_name(cert_t cert) { + if (!cert) return ""; + auto x509 = static_cast(cert); + + // Build a human-readable issuer name string + char buf[512]; + int ret = mbedtls_x509_dn_gets(buf, sizeof(buf), &x509->issuer); + if (ret < 0) return ""; + return std::string(buf); +} + +bool get_cert_sans(cert_t cert, std::vector &sans) { + sans.clear(); + if (!cert) return false; + auto x509 = static_cast(cert); + + // Parse the Subject Alternative Name extension + const mbedtls_x509_sequence *cur = &x509->subject_alt_names; + while (cur != nullptr) { + if (cur->buf.len > 0) { + // Mbed TLS stores SAN as ASN.1 sequences + // The tag byte indicates the type + const unsigned char *p = cur->buf.p; + size_t len = cur->buf.len; + + // First byte is the tag + unsigned char tag = *p; + p++; + len--; + + // Parse length (simple single-byte length assumed) + if (len > 0 && *p < 0x80) { + size_t value_len = *p; + p++; + len--; + + if (value_len <= len) { + SanEntry entry; + // ASN.1 context tags for GeneralName + switch (tag & 0x1F) { + case 2: // dNSName + entry.type = SanType::DNS; + entry.value = + std::string(reinterpret_cast(p), value_len); + break; + case 7: // iPAddress + entry.type = SanType::IP; + if (value_len == 4) { + // IPv4 + char buf[16]; + snprintf(buf, sizeof(buf), "%d.%d.%d.%d", p[0], p[1], p[2], p[3]); + entry.value = buf; + } else if (value_len == 16) { + // IPv6 + char buf[64]; + snprintf(buf, sizeof(buf), + "%02x%02x:%02x%02x:%02x%02x:%02x%02x:" + "%02x%02x:%02x%02x:%02x%02x:%02x%02x", + p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], + p[9], p[10], p[11], p[12], p[13], p[14], p[15]); + entry.value = buf; + } + break; + case 1: // rfc822Name (email) + entry.type = SanType::EMAIL; + entry.value = + std::string(reinterpret_cast(p), value_len); + break; + case 6: // uniformResourceIdentifier + entry.type = SanType::URI; + entry.value = + std::string(reinterpret_cast(p), value_len); + break; + default: entry.type = SanType::OTHER; break; + } + + if (!entry.value.empty()) { sans.push_back(std::move(entry)); } + } + } + } + cur = cur->next; + } + return true; +} + +bool get_cert_validity(cert_t cert, time_t ¬_before, + time_t ¬_after) { + if (!cert) return false; + auto x509 = static_cast(cert); + + // Convert mbedtls_x509_time to time_t + auto to_time_t = [](const mbedtls_x509_time &t) -> time_t { + struct tm tm_time = {}; + tm_time.tm_year = t.year - 1900; + tm_time.tm_mon = t.mon - 1; + tm_time.tm_mday = t.day; + tm_time.tm_hour = t.hour; + tm_time.tm_min = t.min; + tm_time.tm_sec = t.sec; +#ifdef _WIN32 + return _mkgmtime(&tm_time); +#else + return timegm(&tm_time); +#endif + }; + + not_before = to_time_t(x509->valid_from); + not_after = to_time_t(x509->valid_to); + return true; +} + +std::string get_cert_serial(cert_t cert) { + if (!cert) return ""; + auto x509 = static_cast(cert); + + // Convert serial number to hex string + std::string result; + result.reserve(x509->serial.len * 2); + for (size_t i = 0; i < x509->serial.len; i++) { + char hex[3]; + snprintf(hex, sizeof(hex), "%02X", x509->serial.p[i]); + result += hex; + } + return result; +} + +bool get_cert_der(cert_t cert, std::vector &der) { + if (!cert) return false; + auto crt = static_cast(cert); + if (!crt->raw.p || crt->raw.len == 0) return false; + der.assign(crt->raw.p, crt->raw.p + crt->raw.len); + return true; +} + +const char *get_sni(const_session_t session) { + if (!session) return nullptr; + auto msession = static_cast(session); + + // For server: return SNI received from client during handshake + if (!msession->sni_hostname.empty()) { + return msession->sni_hostname.c_str(); + } + + // For client: return the hostname set via set_sni + if (!msession->hostname.empty()) { return msession->hostname.c_str(); } + + return nullptr; +} + +uint64_t peek_error() { + // Mbed TLS doesn't have an error queue, return the last error + return static_cast(-impl::mbedtls_last_error()); +} + +uint64_t get_error() { + // Mbed TLS doesn't have an error queue, return and clear the last error + uint64_t err = static_cast(-impl::mbedtls_last_error()); + impl::mbedtls_last_error() = 0; + return err; +} + +std::string error_string(uint64_t code) { + char buf[256]; + mbedtls_strerror(-static_cast(code), buf, sizeof(buf)); + return std::string(buf); +} + +ca_store_t create_ca_store(const char *pem, size_t len) { + auto *ca_chain = new (std::nothrow) mbedtls_x509_crt; + if (!ca_chain) { return nullptr; } + + mbedtls_x509_crt_init(ca_chain); + + // mbedtls_x509_crt_parse expects null-terminated PEM + int ret = mbedtls_x509_crt_parse(ca_chain, + reinterpret_cast(pem), + len + 1); // +1 for null terminator + if (ret != 0) { + // Try without +1 in case PEM is already null-terminated + ret = mbedtls_x509_crt_parse( + ca_chain, reinterpret_cast(pem), len); + if (ret != 0) { + mbedtls_x509_crt_free(ca_chain); + delete ca_chain; + return nullptr; + } + } + + return static_cast(ca_chain); +} + +void free_ca_store(ca_store_t store) { + if (store) { + auto *ca_chain = static_cast(store); + mbedtls_x509_crt_free(ca_chain); + delete ca_chain; + } +} + +bool set_ca_store(ctx_t ctx, ca_store_t store) { + if (!ctx || !store) { return false; } + auto *mbed_ctx = static_cast(ctx); + auto *ca_chain = static_cast(store); + + // Free existing CA chain + mbedtls_x509_crt_free(&mbed_ctx->ca_chain); + mbedtls_x509_crt_init(&mbed_ctx->ca_chain); + + // Copy the CA chain (deep copy) + // Parse from the raw data of the source cert + mbedtls_x509_crt *src = ca_chain; + while (src != nullptr) { + int ret = mbedtls_x509_crt_parse_der(&mbed_ctx->ca_chain, src->raw.p, + src->raw.len); + if (ret != 0) { return false; } + src = src->next; + } + + // Update the SSL config to use the new CA chain + mbedtls_ssl_conf_ca_chain(&mbed_ctx->conf, &mbed_ctx->ca_chain, nullptr); + return true; +} + +size_t get_ca_certs(ctx_t ctx, std::vector &certs) { + certs.clear(); + if (!ctx) { return 0; } + auto *mbed_ctx = static_cast(ctx); + + // Iterate through the CA chain + mbedtls_x509_crt *cert = &mbed_ctx->ca_chain; + while (cert != nullptr && cert->raw.len > 0) { + // Create a copy of the certificate for the caller + auto *copy = new mbedtls_x509_crt; + mbedtls_x509_crt_init(copy); + int ret = mbedtls_x509_crt_parse_der(copy, cert->raw.p, cert->raw.len); + if (ret == 0) { + certs.push_back(static_cast(copy)); + } else { + mbedtls_x509_crt_free(copy); + delete copy; + } + cert = cert->next; + } + return certs.size(); +} + +std::vector get_ca_names(ctx_t ctx) { + std::vector names; + if (!ctx) { return names; } + auto *mbed_ctx = static_cast(ctx); + + // Iterate through the CA chain + mbedtls_x509_crt *cert = &mbed_ctx->ca_chain; + while (cert != nullptr && cert->raw.len > 0) { + char buf[512]; + int ret = mbedtls_x509_dn_gets(buf, sizeof(buf), &cert->subject); + if (ret > 0) { names.push_back(buf); } + cert = cert->next; + } + return names; +} + +bool update_server_cert(ctx_t ctx, const char *cert_pem, + const char *key_pem, const char *password) { + if (!ctx || !cert_pem || !key_pem) { return false; } + auto *mbed_ctx = static_cast(ctx); + + // Free existing certificate and key + mbedtls_x509_crt_free(&mbed_ctx->own_cert); + mbedtls_pk_free(&mbed_ctx->own_key); + mbedtls_x509_crt_init(&mbed_ctx->own_cert); + mbedtls_pk_init(&mbed_ctx->own_key); + + // Parse certificate PEM + int ret = mbedtls_x509_crt_parse( + &mbed_ctx->own_cert, reinterpret_cast(cert_pem), + strlen(cert_pem) + 1); + if (ret != 0) { + impl::mbedtls_last_error() = ret; + return false; + } + + // Parse private key PEM +#ifdef CPPHTTPLIB_MBEDTLS_V3 + ret = mbedtls_pk_parse_key( + &mbed_ctx->own_key, reinterpret_cast(key_pem), + strlen(key_pem) + 1, + password ? reinterpret_cast(password) : nullptr, + password ? strlen(password) : 0, mbedtls_ctr_drbg_random, + &mbed_ctx->ctr_drbg); +#else + ret = mbedtls_pk_parse_key( + &mbed_ctx->own_key, reinterpret_cast(key_pem), + strlen(key_pem) + 1, + password ? reinterpret_cast(password) : nullptr, + password ? strlen(password) : 0); +#endif + if (ret != 0) { + impl::mbedtls_last_error() = ret; + return false; + } + + // Configure SSL to use the new certificate and key + ret = mbedtls_ssl_conf_own_cert(&mbed_ctx->conf, &mbed_ctx->own_cert, + &mbed_ctx->own_key); + if (ret != 0) { + impl::mbedtls_last_error() = ret; + return false; + } + + return true; +} + +bool update_server_client_ca(ctx_t ctx, const char *ca_pem) { + if (!ctx || !ca_pem) { return false; } + auto *mbed_ctx = static_cast(ctx); + + // Free existing CA chain + mbedtls_x509_crt_free(&mbed_ctx->ca_chain); + mbedtls_x509_crt_init(&mbed_ctx->ca_chain); + + // Parse CA PEM + int ret = mbedtls_x509_crt_parse( + &mbed_ctx->ca_chain, reinterpret_cast(ca_pem), + strlen(ca_pem) + 1); + if (ret != 0) { + impl::mbedtls_last_error() = ret; + return false; + } + + // Update SSL config to use new CA chain + mbedtls_ssl_conf_ca_chain(&mbed_ctx->conf, &mbed_ctx->ca_chain, nullptr); + return true; +} + +bool set_verify_callback(ctx_t ctx, VerifyCallback callback) { + if (!ctx) { return false; } + auto *mbed_ctx = static_cast(ctx); + + impl::get_verify_callback() = std::move(callback); + mbed_ctx->has_verify_callback = + static_cast(impl::get_verify_callback()); + + if (mbed_ctx->has_verify_callback) { + // Set OPTIONAL mode to ensure callback is called even when verification + // is disabled (matching OpenSSL behavior where SSL_VERIFY_PEER is set) + mbedtls_ssl_conf_authmode(&mbed_ctx->conf, MBEDTLS_SSL_VERIFY_OPTIONAL); + mbedtls_ssl_conf_verify(&mbed_ctx->conf, impl::mbedtls_verify_callback, + nullptr); + } else { + mbedtls_ssl_conf_verify(&mbed_ctx->conf, nullptr, nullptr); + } + return true; +} + +long get_verify_error(const_session_t session) { + if (!session) { return -1; } + auto *msession = + static_cast(const_cast(session)); + return static_cast(mbedtls_ssl_get_verify_result(&msession->ssl)); +} + +std::string verify_error_string(long error_code) { + if (error_code == 0) { return ""; } + char buf[256]; + mbedtls_x509_crt_verify_info(buf, sizeof(buf), "", + static_cast(error_code)); + // Remove trailing newline if present + std::string result(buf); + while (!result.empty() && (result.back() == '\n' || result.back() == ' ')) { + result.pop_back(); + } + return result; +} + +} // namespace tls + +#endif // CPPHTTPLIB_MBEDTLS_SUPPORT + } // namespace httplib diff --git a/vendor/cpp-httplib/httplib.h b/vendor/cpp-httplib/httplib.h index 7c7790f41f..1fd8b1d1e8 100644 --- a/vendor/cpp-httplib/httplib.h +++ b/vendor/cpp-httplib/httplib.h @@ -8,8 +8,8 @@ #ifndef CPPHTTPLIB_HTTPLIB_H #define CPPHTTPLIB_HTTPLIB_H -#define CPPHTTPLIB_VERSION "0.30.2" -#define CPPHTTPLIB_VERSION_NUM "0x001E02" +#define CPPHTTPLIB_VERSION "0.31.0" +#define CPPHTTPLIB_VERSION_NUM "0x001F00" /* * Platform compatibility check @@ -147,7 +147,7 @@ #endif #ifndef CPPHTTPLIB_PAYLOAD_MAX_LENGTH -#define CPPHTTPLIB_PAYLOAD_MAX_LENGTH ((std::numeric_limits::max)()) +#define CPPHTTPLIB_PAYLOAD_MAX_LENGTH (100 * 1024 * 1024) // 100MB #endif #ifndef CPPHTTPLIB_FORM_URL_ENCODED_PAYLOAD_MAX_LENGTH @@ -383,6 +383,45 @@ using socket_t = int; #endif // CPPHTTPLIB_OPENSSL_SUPPORT +#ifdef CPPHTTPLIB_MBEDTLS_SUPPORT +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#ifdef _MSC_VER +#pragma comment(lib, "crypt32.lib") +#endif +#endif // _WIN32 +#if defined(CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN) +#if TARGET_OS_MAC +#include +#endif +#endif // CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN + +// Mbed TLS 3.x API compatibility +#if MBEDTLS_VERSION_MAJOR >= 3 +#define CPPHTTPLIB_MBEDTLS_V3 +#endif + +#endif // CPPHTTPLIB_MBEDTLS_SUPPORT + +// Define CPPHTTPLIB_SSL_ENABLED if any SSL backend is available +// This simplifies conditional compilation when adding new backends (e.g., +// wolfSSL) +#if defined(CPPHTTPLIB_OPENSSL_SUPPORT) || defined(CPPHTTPLIB_MBEDTLS_SUPPORT) +#define CPPHTTPLIB_SSL_ENABLED +#endif + #ifdef CPPHTTPLIB_ZLIB_SUPPORT #include #endif @@ -799,6 +838,105 @@ public: using Range = std::pair; using Ranges = std::vector; +#ifdef CPPHTTPLIB_SSL_ENABLED +// TLS abstraction layer - public type definitions and API +namespace tls { + +// Opaque handles (defined as void* for abstraction) +using ctx_t = void *; +using session_t = void *; +using const_session_t = const void *; // For read-only session access +using cert_t = void *; +using ca_store_t = void *; + +// TLS versions +enum class Version { + TLS1_2 = 0x0303, + TLS1_3 = 0x0304, +}; + +// Subject Alternative Names (SAN) entry types +enum class SanType { DNS, IP, EMAIL, URI, OTHER }; + +// SAN entry structure +struct SanEntry { + SanType type; + std::string value; +}; + +// Verification context for certificate verification callback +struct VerifyContext { + session_t session; // TLS session handle + cert_t cert; // Current certificate being verified + int depth; // Certificate chain depth (0 = leaf) + bool preverify_ok; // OpenSSL/Mbed TLS pre-verification result + long error_code; // Backend-specific error code (0 = no error) + const char *error_string; // Human-readable error description + + // Certificate introspection methods + std::string subject_cn() const; + std::string issuer_name() const; + bool check_hostname(const char *hostname) const; + std::vector sans() const; + bool validity(time_t ¬_before, time_t ¬_after) const; + std::string serial() const; +}; + +using VerifyCallback = std::function; + +// TlsError codes for TLS operations (backend-independent) +enum class ErrorCode : int { + Success = 0, + WantRead, // Non-blocking: need to wait for read + WantWrite, // Non-blocking: need to wait for write + PeerClosed, // Peer closed the connection + Fatal, // Unrecoverable error + SyscallError, // System call error (check sys_errno) + CertVerifyFailed, // Certificate verification failed + HostnameMismatch, // Hostname verification failed +}; + +// TLS error information +struct TlsError { + ErrorCode code = ErrorCode::Fatal; + uint64_t backend_code = 0; // OpenSSL: ERR_get_error(), mbedTLS: return value + int sys_errno = 0; // errno when SyscallError + + // Convert verification error code to human-readable string + static std::string verify_error_to_string(long error_code); +}; + +// RAII wrapper for peer certificate +class PeerCert { +public: + PeerCert(); + PeerCert(PeerCert &&other) noexcept; + PeerCert &operator=(PeerCert &&other) noexcept; + ~PeerCert(); + + PeerCert(const PeerCert &) = delete; + PeerCert &operator=(const PeerCert &) = delete; + + explicit operator bool() const; + std::string subject_cn() const; + std::string issuer_name() const; + bool check_hostname(const char *hostname) const; + std::vector sans() const; + bool validity(time_t ¬_before, time_t ¬_after) const; + std::string serial() const; + +private: + explicit PeerCert(cert_t cert); + cert_t cert_ = nullptr; + friend PeerCert get_peer_cert_from_session(const_session_t session); +}; + +// Callback for TLS context setup (used by SSLServer constructor) +using ContextSetupCallback = std::function; + +} // namespace tls +#endif + struct Request { std::string method; std::string path; @@ -828,9 +966,6 @@ struct Request { ContentReceiverWithProgress content_receiver; DownloadProgress download_progress; UploadProgress upload_progress; -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - const SSL *ssl = nullptr; -#endif bool has_header(const std::string &key) const; std::string get_header_value(const std::string &key, const char *def = "", @@ -858,6 +993,12 @@ struct Request { size_t authorization_count_ = 0; std::chrono::time_point start_time_ = (std::chrono::steady_clock::time_point::min)(); + +#ifdef CPPHTTPLIB_SSL_ENABLED + tls::const_session_t ssl = nullptr; + tls::PeerCert peer_cert() const; + std::string sni() const; +#endif }; struct Response { @@ -1005,74 +1146,18 @@ public: class ThreadPool final : public TaskQueue { public: - explicit ThreadPool(size_t n, size_t mqr = 0) - : shutdown_(false), max_queued_requests_(mqr) { - threads_.reserve(n); - while (n) { - threads_.emplace_back(worker(*this)); - n--; - } - } - + explicit ThreadPool(size_t n, size_t mqr = 0); ThreadPool(const ThreadPool &) = delete; ~ThreadPool() override = default; - bool enqueue(std::function fn) override { - { - std::unique_lock lock(mutex_); - if (max_queued_requests_ > 0 && jobs_.size() >= max_queued_requests_) { - return false; - } - jobs_.push_back(std::move(fn)); - } - - cond_.notify_one(); - return true; - } - - void shutdown() override { - // Stop all worker threads... - { - std::unique_lock lock(mutex_); - shutdown_ = true; - } - - cond_.notify_all(); - - // Join... - for (auto &t : threads_) { - t.join(); - } - } + bool enqueue(std::function fn) override; + void shutdown() override; private: struct worker { - explicit worker(ThreadPool &pool) : pool_(pool) {} + explicit worker(ThreadPool &pool); - void operator()() { - for (;;) { - std::function fn; - { - std::unique_lock lock(pool_.mutex_); - - pool_.cond_.wait( - lock, [&] { return !pool_.jobs_.empty() || pool_.shutdown_; }); - - if (pool_.shutdown_ && pool_.jobs_.empty()) { break; } - - fn = pool_.jobs_.front(); - pool_.jobs_.pop_front(); - } - - assert(true == static_cast(fn)); - fn(); - } - -#if defined(CPPHTTPLIB_OPENSSL_SUPPORT) && !defined(OPENSSL_IS_BORINGSSL) && \ - !defined(LIBRESSL_VERSION_NUMBER) - OPENSSL_thread_stop(); -#endif - } + void operator()(); ThreadPool &pool_; }; @@ -1184,6 +1269,9 @@ int close_socket(socket_t sock); ssize_t write_headers(Stream &strm, const Headers &headers); +bool set_socket_opt_time(socket_t sock, int level, int optname, time_t sec, + time_t usec); + } // namespace detail class Server { @@ -1429,17 +1517,6 @@ public: Headers &&request_headers = Headers{}) : res_(std::move(res)), err_(err), request_headers_(std::move(request_headers)) {} -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - Result(std::unique_ptr &&res, Error err, Headers &&request_headers, - int ssl_error) - : res_(std::move(res)), err_(err), - request_headers_(std::move(request_headers)), ssl_error_(ssl_error) {} - Result(std::unique_ptr &&res, Error err, Headers &&request_headers, - int ssl_error, unsigned long ssl_openssl_error) - : res_(std::move(res)), err_(err), - request_headers_(std::move(request_headers)), ssl_error_(ssl_error), - ssl_openssl_error_(ssl_openssl_error) {} -#endif // Response operator bool() const { return res_ != nullptr; } bool operator==(std::nullptr_t) const { return res_ == nullptr; } @@ -1454,13 +1531,6 @@ public: // Error Error error() const { return err_; } -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - // SSL Error - int ssl_error() const { return ssl_error_; } - // OpenSSL Error - unsigned long ssl_openssl_error() const { return ssl_openssl_error_; } -#endif - // Request Headers bool has_request_header(const std::string &key) const; std::string get_request_header_value(const std::string &key, @@ -1474,64 +1544,76 @@ private: std::unique_ptr res_; Error err_ = Error::Unknown; Headers request_headers_; -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + +#ifdef CPPHTTPLIB_SSL_ENABLED +public: + Result(std::unique_ptr &&res, Error err, Headers &&request_headers, + int ssl_error) + : res_(std::move(res)), err_(err), + request_headers_(std::move(request_headers)), ssl_error_(ssl_error) {} + Result(std::unique_ptr &&res, Error err, Headers &&request_headers, + int ssl_error, unsigned long ssl_backend_error) + : res_(std::move(res)), err_(err), + request_headers_(std::move(request_headers)), ssl_error_(ssl_error), + ssl_backend_error_(ssl_backend_error) {} + + int ssl_error() const { return ssl_error_; } + unsigned long ssl_backend_error() const { return ssl_backend_error_; } + +private: int ssl_error_ = 0; - unsigned long ssl_openssl_error_ = 0; + unsigned long ssl_backend_error_ = 0; +#endif + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +public: + [[deprecated("Use ssl_backend_error() instead")]] + unsigned long ssl_openssl_error() const { + return ssl_backend_error_; + } #endif }; struct ClientConnection { socket_t sock = INVALID_SOCKET; -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - SSL *ssl = nullptr; -#endif bool is_open() const { return sock != INVALID_SOCKET; } ClientConnection() = default; - ~ClientConnection() { -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - if (ssl) { - SSL_free(ssl); - ssl = nullptr; - } -#endif - if (sock != INVALID_SOCKET) { - detail::close_socket(sock); - sock = INVALID_SOCKET; - } - } + ~ClientConnection(); ClientConnection(const ClientConnection &) = delete; ClientConnection &operator=(const ClientConnection &) = delete; ClientConnection(ClientConnection &&other) noexcept : sock(other.sock) -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +#ifdef CPPHTTPLIB_SSL_ENABLED , - ssl(other.ssl) + session(other.session) #endif { other.sock = INVALID_SOCKET; -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - other.ssl = nullptr; +#ifdef CPPHTTPLIB_SSL_ENABLED + other.session = nullptr; #endif } ClientConnection &operator=(ClientConnection &&other) noexcept { if (this != &other) { sock = other.sock; -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - ssl = other.ssl; -#endif other.sock = INVALID_SOCKET; -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - other.ssl = nullptr; +#ifdef CPPHTTPLIB_SSL_ENABLED + session = other.session; + other.session = nullptr; #endif } return *this; } + +#ifdef CPPHTTPLIB_SSL_ENABLED + tls::session_t session = nullptr; +#endif }; namespace detail { @@ -1540,7 +1622,9 @@ struct ChunkedDecoder; struct BodyReader { Stream *stream = nullptr; + bool has_content_length = false; size_t content_length = 0; + size_t payload_max_length = CPPHTTPLIB_PAYLOAD_MAX_LENGTH; size_t bytes_read = 0; bool chunked = false; bool eof = false; @@ -1610,6 +1694,7 @@ public: std::unique_ptr decompressor_; std::string decompress_buffer_; size_t decompress_offset_ = 0; + size_t decompressed_bytes_read_ = 0; }; // clang-format off @@ -1756,10 +1841,6 @@ public: void set_basic_auth(const std::string &username, const std::string &password); void set_bearer_token_auth(const std::string &token); -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - void set_digest_auth(const std::string &username, - const std::string &password); -#endif void set_keep_alive(bool on); void set_follow_location(bool on); @@ -1770,30 +1851,14 @@ public: void set_decompress(bool on); + void set_payload_max_length(size_t length); + void set_interface(const std::string &intf); void set_proxy(const std::string &host, int port); void set_proxy_basic_auth(const std::string &username, const std::string &password); void set_proxy_bearer_token_auth(const std::string &token); -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - void set_proxy_digest_auth(const std::string &username, - const std::string &password); -#endif - -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - void set_ca_cert_path(const std::string &ca_cert_file_path, - const std::string &ca_cert_dir_path = std::string()); - void set_ca_cert_store(X509_STORE *ca_cert_store); - X509_STORE *create_ca_cert_store(const char *ca_cert, std::size_t size) const; -#endif - -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - void enable_server_certificate_verification(bool enabled); - void enable_server_hostname_verification(bool enabled); - void set_server_certificate_verifier( - std::function verifier); -#endif void set_logger(Logger logger); void set_error_logger(ErrorLogger error_logger); @@ -1801,11 +1866,15 @@ public: protected: struct Socket { socket_t sock = INVALID_SOCKET; -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - SSL *ssl = nullptr; -#endif + + // For Mbed TLS compatibility: start_time for request timeout tracking + std::chrono::time_point start_time_; bool is_open() const { return sock != INVALID_SOCKET; } + +#ifdef CPPHTTPLIB_SSL_ENABLED + tls::session_t ssl = nullptr; +#endif }; virtual bool create_and_connect_socket(Socket &socket, Error &error); @@ -1872,10 +1941,6 @@ protected: std::string basic_auth_username_; std::string basic_auth_password_; std::string bearer_token_auth_token_; -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - std::string digest_auth_username_; - std::string digest_auth_password_; -#endif bool keep_alive_ = false; bool follow_location_ = false; @@ -1890,6 +1955,9 @@ protected: bool compress_ = false; bool decompress_ = true; + size_t payload_max_length_ = CPPHTTPLIB_PAYLOAD_MAX_LENGTH; + bool has_payload_max_length_ = false; + std::string interface_; std::string proxy_host_; @@ -1898,33 +1966,11 @@ protected: std::string proxy_basic_auth_username_; std::string proxy_basic_auth_password_; std::string proxy_bearer_token_auth_token_; -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - std::string proxy_digest_auth_username_; - std::string proxy_digest_auth_password_; -#endif - -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - std::string ca_cert_file_path_; - std::string ca_cert_dir_path_; - - X509_STORE *ca_cert_store_ = nullptr; -#endif - -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - bool server_certificate_verification_ = true; - bool server_hostname_verification_ = true; - std::function server_certificate_verifier_; -#endif mutable std::mutex logger_mutex_; Logger logger_; ErrorLogger error_logger_; -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - int last_ssl_error_ = 0; - unsigned long last_openssl_error_ = 0; -#endif - private: bool send_(Request &req, Response &res, Error &error); Result send_(Request &&req); @@ -1969,6 +2015,44 @@ private: virtual bool is_ssl() const; void transfer_socket_ownership_to_handle(StreamHandle &handle); + +#ifdef CPPHTTPLIB_SSL_ENABLED +public: + void set_digest_auth(const std::string &username, + const std::string &password); + void set_proxy_digest_auth(const std::string &username, + const std::string &password); + void set_ca_cert_path(const std::string &ca_cert_file_path, + const std::string &ca_cert_dir_path = std::string()); + void enable_server_certificate_verification(bool enabled); + void enable_server_hostname_verification(bool enabled); + +protected: + std::string digest_auth_username_; + std::string digest_auth_password_; + std::string proxy_digest_auth_username_; + std::string proxy_digest_auth_password_; + std::string ca_cert_file_path_; + std::string ca_cert_dir_path_; + bool server_certificate_verification_ = true; + bool server_hostname_verification_ = true; + std::string ca_cert_pem_; // Store CA cert PEM for redirect transfer + int last_ssl_error_ = 0; + unsigned long last_backend_error_ = 0; +#endif + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +public: + [[deprecated("Use load_ca_cert_store() instead")]] + void set_ca_cert_store(X509_STORE *ca_cert_store); + + [[deprecated("Use tls::create_ca_store() instead")]] + X509_STORE *create_ca_cert_store(const char *ca_cert, std::size_t size) const; + + [[deprecated("Use set_server_certificate_verifier(VerifyCallback) instead")]] + virtual void set_server_certificate_verifier( + std::function verifier); +#endif }; class Client { @@ -2138,10 +2222,6 @@ public: void set_basic_auth(const std::string &username, const std::string &password); void set_bearer_token_auth(const std::string &token); -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - void set_digest_auth(const std::string &username, - const std::string &password); -#endif void set_keep_alive(bool on); void set_follow_location(bool on); @@ -2153,49 +2233,65 @@ public: void set_decompress(bool on); + void set_payload_max_length(size_t length); + void set_interface(const std::string &intf); void set_proxy(const std::string &host, int port); void set_proxy_basic_auth(const std::string &username, const std::string &password); void set_proxy_bearer_token_auth(const std::string &token); -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - void set_proxy_digest_auth(const std::string &username, - const std::string &password); -#endif - -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - void enable_server_certificate_verification(bool enabled); - void enable_server_hostname_verification(bool enabled); - void set_server_certificate_verifier( - std::function verifier); -#endif - void set_logger(Logger logger); void set_error_logger(ErrorLogger error_logger); - // SSL -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - void set_ca_cert_path(const std::string &ca_cert_file_path, - const std::string &ca_cert_dir_path = std::string()); - - void set_ca_cert_store(X509_STORE *ca_cert_store); - void load_ca_cert_store(const char *ca_cert, std::size_t size); - - long get_openssl_verify_result() const; - - SSL_CTX *ssl_context() const; -#endif - private: std::unique_ptr cli_; -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +#ifdef CPPHTTPLIB_SSL_ENABLED +public: + void set_digest_auth(const std::string &username, + const std::string &password); + void set_proxy_digest_auth(const std::string &username, + const std::string &password); + void enable_server_certificate_verification(bool enabled); + void enable_server_hostname_verification(bool enabled); + void set_ca_cert_path(const std::string &ca_cert_file_path, + const std::string &ca_cert_dir_path = std::string()); + + void set_ca_cert_store(tls::ca_store_t ca_cert_store); + void load_ca_cert_store(const char *ca_cert, std::size_t size); + + void set_server_certificate_verifier(tls::VerifyCallback verifier); + + void set_session_verifier( + std::function verifier); + + tls::ctx_t tls_context() const; + +#if defined(_WIN32) && \ + !defined(CPPHTTPLIB_DISABLE_WINDOWS_AUTOMATIC_ROOT_CERTIFICATES_UPDATE) + void enable_windows_certificate_verification(bool enabled); +#endif + +private: bool is_ssl_ = false; #endif + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +public: + [[deprecated("Use tls_context() instead")]] + SSL_CTX *ssl_context() const; + + [[deprecated("Use set_session_verifier(session_t) instead")]] + void set_server_certificate_verifier( + std::function verifier); + + [[deprecated("Use Result::ssl_backend_error() instead")]] + long get_verify_result() const; +#endif }; -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +#ifdef CPPHTTPLIB_SSL_ENABLED class SSLServer : public Server { public: SSLServer(const char *cert_path, const char *private_key_path, @@ -2203,32 +2299,60 @@ public: const char *client_ca_cert_dir_path = nullptr, const char *private_key_password = nullptr); - SSLServer(X509 *cert, EVP_PKEY *private_key, - X509_STORE *client_ca_cert_store = nullptr); + struct PemMemory { + const char *cert_pem; + size_t cert_pem_len; + const char *key_pem; + size_t key_pem_len; + const char *client_ca_pem; + size_t client_ca_pem_len; + const char *private_key_password; + }; + explicit SSLServer(const PemMemory &pem); - SSLServer( - const std::function &setup_ssl_ctx_callback); + // The callback receives the ctx_t handle which can be cast to the + // appropriate backend type (SSL_CTX* for OpenSSL, + // tls::impl::MbedTlsContext* for Mbed TLS) + explicit SSLServer(const tls::ContextSetupCallback &setup_callback); ~SSLServer() override; bool is_valid() const override; - SSL_CTX *ssl_context() const; + bool update_certs_pem(const char *cert_pem, const char *key_pem, + const char *client_ca_pem = nullptr, + const char *password = nullptr); - void update_certs(X509 *cert, EVP_PKEY *private_key, - X509_STORE *client_ca_cert_store = nullptr); + tls::ctx_t tls_context() const { return ctx_; } int ssl_last_error() const { return last_ssl_error_; } private: bool process_and_close_socket(socket_t sock) override; - STACK_OF(X509_NAME) * extract_ca_names_from_x509_store(X509_STORE *store); - - SSL_CTX *ctx_; + tls::ctx_t ctx_ = nullptr; std::mutex ctx_mutex_; int last_ssl_error_ = 0; + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +public: + [[deprecated("Use SSLServer(PemMemory) or " + "SSLServer(ContextSetupCallback) instead")]] + SSLServer(X509 *cert, EVP_PKEY *private_key, + X509_STORE *client_ca_cert_store = nullptr); + + [[deprecated("Use SSLServer(ContextSetupCallback) instead")]] + SSLServer( + const std::function &setup_ssl_ctx_callback); + + [[deprecated("Use tls_context() instead")]] + SSL_CTX *ssl_context() const; + + [[deprecated("Use update_certs_pem() instead")]] + void update_certs(X509 *cert, EVP_PKEY *private_key, + X509_STORE *client_ca_cert_store = nullptr); +#endif }; class SSLClient final : public ClientImpl { @@ -2242,20 +2366,34 @@ public: const std::string &client_key_path, const std::string &private_key_password = std::string()); - explicit SSLClient(const std::string &host, int port, X509 *client_cert, - EVP_PKEY *client_key, - const std::string &private_key_password = std::string()); + struct PemMemory { + const char *cert_pem; + size_t cert_pem_len; + const char *key_pem; + size_t key_pem_len; + const char *private_key_password; + }; + explicit SSLClient(const std::string &host, int port, const PemMemory &pem); ~SSLClient() override; bool is_valid() const override; - void set_ca_cert_store(X509_STORE *ca_cert_store); + void set_ca_cert_store(tls::ca_store_t ca_cert_store); void load_ca_cert_store(const char *ca_cert, std::size_t size); - long get_openssl_verify_result() const; + void set_server_certificate_verifier(tls::VerifyCallback verifier); - SSL_CTX *ssl_context() const; + // Post-handshake session verifier (backend-independent) + void set_session_verifier( + std::function verifier); + + tls::ctx_t tls_context() const { return ctx_; } + +#if defined(_WIN32) && \ + !defined(CPPHTTPLIB_DISABLE_WINDOWS_AUTOMATIC_ROOT_CERTIFICATES_UPDATE) + void enable_windows_certificate_verification(bool enabled); +#endif private: bool create_and_connect_socket(Socket &socket, Error &error) override; @@ -2277,26 +2415,45 @@ private: bool load_certs(); - bool verify_host(X509 *server_cert) const; - bool verify_host_with_subject_alt_name(X509 *server_cert) const; - bool verify_host_with_common_name(X509 *server_cert) const; - bool check_host_name(const char *pattern, size_t pattern_len) const; - - SSL_CTX *ctx_; + tls::ctx_t ctx_ = nullptr; std::mutex ctx_mutex_; std::once_flag initialize_cert_; - std::vector host_components_; - long verify_result_ = 0; - friend class ClientImpl; -}; + std::function session_verifier_; + +#if defined(_WIN32) && \ + !defined(CPPHTTPLIB_DISABLE_WINDOWS_AUTOMATIC_ROOT_CERTIFICATES_UPDATE) + bool enable_windows_cert_verification_ = true; #endif -/* - * Implementation of template methods. - */ + friend class ClientImpl; + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +public: + [[deprecated("Use SSLClient(host, port, PemMemory) instead")]] + explicit SSLClient(const std::string &host, int port, X509 *client_cert, + EVP_PKEY *client_key, + const std::string &private_key_password = std::string()); + + [[deprecated("Use Result::ssl_backend_error() instead")]] + long get_verify_result() const; + + [[deprecated("Use tls_context() instead")]] + SSL_CTX *ssl_context() const; + + [[deprecated("Use set_session_verifier(session_t) instead")]] + void set_server_certificate_verifier( + std::function verifier) override; + +private: + bool verify_host(X509 *server_cert) const; + bool verify_host_with_subject_alt_name(X509 *server_cert) const; + bool verify_host_with_common_name(X509 *server_cert) const; +#endif +}; +#endif // CPPHTTPLIB_SSL_ENABLED namespace detail { @@ -2345,66 +2502,6 @@ inline size_t get_header_value_u64(const Headers &headers, } // namespace detail -inline size_t Request::get_header_value_u64(const std::string &key, size_t def, - size_t id) const { - return detail::get_header_value_u64(headers, key, def, id); -} - -inline size_t Response::get_header_value_u64(const std::string &key, size_t def, - size_t id) const { - return detail::get_header_value_u64(headers, key, def, id); -} - -namespace detail { - -inline bool set_socket_opt_impl(socket_t sock, int level, int optname, - const void *optval, socklen_t optlen) { - return setsockopt(sock, level, optname, -#ifdef _WIN32 - reinterpret_cast(optval), -#else - optval, -#endif - optlen) == 0; -} - -inline bool set_socket_opt(socket_t sock, int level, int optname, int optval) { - return set_socket_opt_impl(sock, level, optname, &optval, sizeof(optval)); -} - -inline bool set_socket_opt_time(socket_t sock, int level, int optname, - time_t sec, time_t usec) { -#ifdef _WIN32 - auto timeout = static_cast(sec * 1000 + usec / 1000); -#else - timeval timeout; - timeout.tv_sec = static_cast(sec); - timeout.tv_usec = static_cast(usec); -#endif - return set_socket_opt_impl(sock, level, optname, &timeout, sizeof(timeout)); -} - -} // namespace detail - -inline void default_socket_options(socket_t sock) { - detail::set_socket_opt(sock, SOL_SOCKET, -#ifdef SO_REUSEPORT - SO_REUSEPORT, -#else - SO_REUSEADDR, -#endif - 1); -} - -inline std::string get_bearer_token_auth(const Request &req) { - if (req.has_header("Authorization")) { - constexpr auto bearer_header_prefix_len = detail::str_len("Bearer "); - return req.get_header_value("Authorization") - .substr(bearer_header_prefix_len); - } - return ""; -} - template inline Server & Server::set_read_timeout(const std::chrono::duration &duration) { @@ -2429,12 +2526,6 @@ Server::set_idle_interval(const std::chrono::duration &duration) { return *this; } -inline size_t Result::get_request_header_value_u64(const std::string &key, - size_t def, - size_t id) const { - return detail::get_header_value_u64(request_headers_, key, def, id); -} - template inline void ClientImpl::set_connection_timeout( const std::chrono::duration &duration) { @@ -2842,105 +2933,73 @@ bool is_field_content(const std::string &s); bool is_field_value(const std::string &s); } // namespace fields - } // namespace detail +/* + * TLS Abstraction Layer Declarations + */ + +#ifdef CPPHTTPLIB_SSL_ENABLED +// TLS abstraction layer - backend-specific type declarations +#ifdef CPPHTTPLIB_MBEDTLS_SUPPORT +namespace tls { +namespace impl { + +// Mbed TLS context wrapper (holds config, entropy, DRBG, CA chain, own +// cert/key). This struct is accessible via tls::impl for use in SSL context +// setup callbacks (cast ctx_t to tls::impl::MbedTlsContext*). +struct MbedTlsContext { + mbedtls_ssl_config conf; + mbedtls_entropy_context entropy; + mbedtls_ctr_drbg_context ctr_drbg; + mbedtls_x509_crt ca_chain; + mbedtls_x509_crt own_cert; + mbedtls_pk_context own_key; + bool is_server = false; + bool verify_client = false; + bool has_verify_callback = false; + + MbedTlsContext(); + ~MbedTlsContext(); + + MbedTlsContext(const MbedTlsContext &) = delete; + MbedTlsContext &operator=(const MbedTlsContext &) = delete; +}; + +} // namespace impl +} // namespace tls +#endif + +#endif // CPPHTTPLIB_SSL_ENABLED + namespace stream { class Result { public: - Result() : chunk_size_(8192) {} - - explicit Result(ClientImpl::StreamHandle &&handle, size_t chunk_size = 8192) - : handle_(std::move(handle)), chunk_size_(chunk_size) {} - - Result(Result &&other) noexcept - : handle_(std::move(other.handle_)), buffer_(std::move(other.buffer_)), - current_size_(other.current_size_), chunk_size_(other.chunk_size_), - finished_(other.finished_) { - other.current_size_ = 0; - other.finished_ = true; - } - - Result &operator=(Result &&other) noexcept { - if (this != &other) { - handle_ = std::move(other.handle_); - buffer_ = std::move(other.buffer_); - current_size_ = other.current_size_; - chunk_size_ = other.chunk_size_; - finished_ = other.finished_; - other.current_size_ = 0; - other.finished_ = true; - } - return *this; - } - + Result(); + explicit Result(ClientImpl::StreamHandle &&handle, size_t chunk_size = 8192); + Result(Result &&other) noexcept; + Result &operator=(Result &&other) noexcept; Result(const Result &) = delete; Result &operator=(const Result &) = delete; - // Check if the result is valid (connection succeeded and response received) - bool is_valid() const { return handle_.is_valid(); } - explicit operator bool() const { return is_valid(); } - - // Response status code - int status() const { - return handle_.response ? handle_.response->status : -1; - } - - // Response headers - const Headers &headers() const { - static const Headers empty_headers; - return handle_.response ? handle_.response->headers : empty_headers; - } - + // Response info + bool is_valid() const; + explicit operator bool() const; + int status() const; + const Headers &headers() const; std::string get_header_value(const std::string &key, - const char *def = "") const { - return handle_.response ? handle_.response->get_header_value(key, def) - : def; - } + const char *def = "") const; + bool has_header(const std::string &key) const; + Error error() const; + Error read_error() const; + bool has_read_error() const; - bool has_header(const std::string &key) const { - return handle_.response ? handle_.response->has_header(key) : false; - } - - // Error information - Error error() const { return handle_.error; } - Error read_error() const { return handle_.get_read_error(); } - bool has_read_error() const { return handle_.has_read_error(); } - - // Streaming iteration API - // Call next() to read the next chunk, then access data via data()/size() - // Returns true if data was read, false when stream is exhausted - bool next() { - if (!handle_.is_valid() || finished_) { return false; } - - if (buffer_.size() < chunk_size_) { buffer_.resize(chunk_size_); } - - ssize_t n = handle_.read(&buffer_[0], chunk_size_); - if (n > 0) { - current_size_ = static_cast(n); - return true; - } - - current_size_ = 0; - finished_ = true; - return false; - } - - // Pointer to current chunk data (valid after next() returns true) - const char *data() const { return buffer_.data(); } - - // Size of current chunk (valid after next() returns true) - size_t size() const { return current_size_; } - - // Convenience method: read all remaining data into a string - std::string read_all() { - std::string result; - while (next()) { - result.append(data(), size()); - } - return result; - } + // Stream reading + bool next(); + const char *data() const; + size_t size() const; + std::string read_all(); private: ClientImpl::StreamHandle handle_; @@ -3205,13 +3264,8 @@ struct SSEMessage { std::string data; // Event payload std::string id; // Event ID for Last-Event-ID header - SSEMessage() : event("message") {} - - void clear() { - event = "message"; - data.clear(); - id.clear(); - } + SSEMessage(); + void clear(); }; class SSEClient { @@ -3220,255 +3274,40 @@ public: using ErrorHandler = std::function; using OpenHandler = std::function; - SSEClient(Client &client, const std::string &path) - : client_(client), path_(path) {} - - SSEClient(Client &client, const std::string &path, const Headers &headers) - : client_(client), path_(path), headers_(headers) {} - - ~SSEClient() { stop(); } + SSEClient(Client &client, const std::string &path); + SSEClient(Client &client, const std::string &path, const Headers &headers); + ~SSEClient(); SSEClient(const SSEClient &) = delete; SSEClient &operator=(const SSEClient &) = delete; // Event handlers - SSEClient &on_message(MessageHandler handler) { - on_message_ = std::move(handler); - return *this; - } - - SSEClient &on_event(const std::string &type, MessageHandler handler) { - event_handlers_[type] = std::move(handler); - return *this; - } - - SSEClient &on_open(OpenHandler handler) { - on_open_ = std::move(handler); - return *this; - } - - SSEClient &on_error(ErrorHandler handler) { - on_error_ = std::move(handler); - return *this; - } - - SSEClient &set_reconnect_interval(int ms) { - reconnect_interval_ms_ = ms; - return *this; - } - - SSEClient &set_max_reconnect_attempts(int n) { - max_reconnect_attempts_ = n; - return *this; - } + SSEClient &on_message(MessageHandler handler); + SSEClient &on_event(const std::string &type, MessageHandler handler); + SSEClient &on_open(OpenHandler handler); + SSEClient &on_error(ErrorHandler handler); + SSEClient &set_reconnect_interval(int ms); + SSEClient &set_max_reconnect_attempts(int n); // State accessors - bool is_connected() const { return connected_.load(); } - const std::string &last_event_id() const { return last_event_id_; } + bool is_connected() const; + const std::string &last_event_id() const; // Blocking start - runs event loop with auto-reconnect - void start() { - running_.store(true); - run_event_loop(); - } + void start(); // Non-blocking start - runs in background thread - void start_async() { - running_.store(true); - async_thread_ = std::thread([this]() { run_event_loop(); }); - } + void start_async(); // Stop the client (thread-safe) - void stop() { - running_.store(false); - client_.stop(); // Cancel any pending operations - if (async_thread_.joinable()) { async_thread_.join(); } - } + void stop(); private: - // Parse a single SSE field line - // Returns true if this line ends an event (blank line) - bool parse_sse_line(const std::string &line, SSEMessage &msg, int &retry_ms) { - // Blank line signals end of event - if (line.empty() || line == "\r") { return true; } - - // Lines starting with ':' are comments (ignored) - if (!line.empty() && line[0] == ':') { return false; } - - // Find the colon separator - auto colon_pos = line.find(':'); - if (colon_pos == std::string::npos) { - // Line with no colon is treated as field name with empty value - return false; - } - - auto field = line.substr(0, colon_pos); - std::string value; - - // Value starts after colon, skip optional single space - if (colon_pos + 1 < line.size()) { - auto value_start = colon_pos + 1; - if (line[value_start] == ' ') { value_start++; } - value = line.substr(value_start); - // Remove trailing \r if present - if (!value.empty() && value.back() == '\r') { value.pop_back(); } - } - - // Handle known fields - if (field == "event") { - msg.event = value; - } else if (field == "data") { - // Multiple data lines are concatenated with newlines - if (!msg.data.empty()) { msg.data += "\n"; } - msg.data += value; - } else if (field == "id") { - // Empty id is valid (clears the last event ID) - msg.id = value; - } else if (field == "retry") { - // Parse retry interval in milliseconds - { - int v = 0; - auto res = - detail::from_chars(value.data(), value.data() + value.size(), v); - if (res.ec == std::errc{}) { retry_ms = v; } - } - } - // Unknown fields are ignored per SSE spec - - return false; - } - - // Main event loop with auto-reconnect - void run_event_loop() { - auto reconnect_count = 0; - - while (running_.load()) { - // Build headers, including Last-Event-ID if we have one - auto request_headers = headers_; - if (!last_event_id_.empty()) { - request_headers.emplace("Last-Event-ID", last_event_id_); - } - - // Open streaming connection - auto result = stream::Get(client_, path_, request_headers); - - // Connection error handling - if (!result) { - connected_.store(false); - if (on_error_) { on_error_(result.error()); } - - if (!should_reconnect(reconnect_count)) { break; } - wait_for_reconnect(); - reconnect_count++; - continue; - } - - if (result.status() != 200) { - connected_.store(false); - // For certain errors, don't reconnect - if (result.status() == 204 || // No Content - server wants us to stop - result.status() == 404 || // Not Found - result.status() == 401 || // Unauthorized - result.status() == 403) { // Forbidden - if (on_error_) { on_error_(Error::Connection); } - break; - } - - if (on_error_) { on_error_(Error::Connection); } - - if (!should_reconnect(reconnect_count)) { break; } - wait_for_reconnect(); - reconnect_count++; - continue; - } - - // Connection successful - connected_.store(true); - reconnect_count = 0; - if (on_open_) { on_open_(); } - - // Event receiving loop - std::string buffer; - SSEMessage current_msg; - - while (running_.load() && result.next()) { - buffer.append(result.data(), result.size()); - - // Process complete lines in the buffer - size_t line_start = 0; - size_t newline_pos; - - while ((newline_pos = buffer.find('\n', line_start)) != - std::string::npos) { - auto line = buffer.substr(line_start, newline_pos - line_start); - line_start = newline_pos + 1; - - // Parse the line and check if event is complete - auto event_complete = - parse_sse_line(line, current_msg, reconnect_interval_ms_); - - if (event_complete && !current_msg.data.empty()) { - // Update last_event_id for reconnection - if (!current_msg.id.empty()) { last_event_id_ = current_msg.id; } - - // Dispatch event to appropriate handler - dispatch_event(current_msg); - - current_msg.clear(); - } - } - - // Keep unprocessed data in buffer - buffer.erase(0, line_start); - } - - // Connection ended - connected_.store(false); - - if (!running_.load()) { break; } - - // Check for read errors - if (result.has_read_error()) { - if (on_error_) { on_error_(result.read_error()); } - } - - if (!should_reconnect(reconnect_count)) { break; } - wait_for_reconnect(); - reconnect_count++; - } - - connected_.store(false); - } - - // Dispatch event to appropriate handler - void dispatch_event(const SSEMessage &msg) { - // Check for specific event type handler first - auto it = event_handlers_.find(msg.event); - if (it != event_handlers_.end()) { - it->second(msg); - return; - } - - // Fall back to generic message handler - if (on_message_) { on_message_(msg); } - } - - // Check if we should attempt to reconnect - bool should_reconnect(int count) const { - if (!running_.load()) { return false; } - if (max_reconnect_attempts_ == 0) { return true; } // unlimited - return count < max_reconnect_attempts_; - } - - // Wait for reconnect interval - void wait_for_reconnect() { - // Use small increments to check running_ flag frequently - auto waited = 0; - while (running_.load() && waited < reconnect_interval_ms_) { - std::this_thread::sleep_for(std::chrono::milliseconds(100)); - waited += 100; - } - } + bool parse_sse_line(const std::string &line, SSEMessage &msg, int &retry_ms); + void run_event_loop(); + void dispatch_event(const SSEMessage &msg); + bool should_reconnect(int count) const; + void wait_for_reconnect(); // Client and path Client &client_;