Revert obsolete server-context change

This commit is contained in:
Piotr Wilkin 2026-02-08 16:05:21 +01:00
parent 29ce31b1a3
commit e590f31f67
1 changed files with 1 additions and 10 deletions

View File

@ -15,7 +15,6 @@
#include <cstddef>
#include <cinttypes>
#include <memory>
#include <stdexcept>
#include <filesystem>
// fix problem with std::min and std::max
@ -2748,15 +2747,7 @@ private:
slot.i_batch = -1;
try {
common_sampler_accept(slot.smpl.get(), id, true);
} catch (std::runtime_error & e) {
SLT_ERR(slot, "Error when accepting token for sampler: %s\n", e.what());
send_error(slot, std::string("Error when accepting token for sampler: ") + e.what(), ERROR_TYPE_SERVER);
slot.release();
slot.i_batch = -1;
continue; // continue loop of slots
}
common_sampler_accept(slot.smpl.get(), id, true);
// here we have synchronized the llama_context (due to the sampling above), so we can do time measurement
const int64_t t_current = ggml_time_us();