Revert obsolete server-context change
This commit is contained in:
parent
29ce31b1a3
commit
e590f31f67
|
|
@ -15,7 +15,6 @@
|
|||
#include <cstddef>
|
||||
#include <cinttypes>
|
||||
#include <memory>
|
||||
#include <stdexcept>
|
||||
#include <filesystem>
|
||||
|
||||
// fix problem with std::min and std::max
|
||||
|
|
@ -2748,15 +2747,7 @@ private:
|
|||
|
||||
slot.i_batch = -1;
|
||||
|
||||
try {
|
||||
common_sampler_accept(slot.smpl.get(), id, true);
|
||||
} catch (std::runtime_error & e) {
|
||||
SLT_ERR(slot, "Error when accepting token for sampler: %s\n", e.what());
|
||||
send_error(slot, std::string("Error when accepting token for sampler: ") + e.what(), ERROR_TYPE_SERVER);
|
||||
slot.release();
|
||||
slot.i_batch = -1;
|
||||
continue; // continue loop of slots
|
||||
}
|
||||
common_sampler_accept(slot.smpl.get(), id, true);
|
||||
|
||||
// here we have synchronized the llama_context (due to the sampling above), so we can do time measurement
|
||||
const int64_t t_current = ggml_time_us();
|
||||
|
|
|
|||
Loading…
Reference in New Issue