diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp
index f82a6cce56..ceafcac179 100644
--- a/tools/server/server-context.cpp
+++ b/tools/server/server-context.cpp
@@ -15,7 +15,6 @@
 #include <cstddef>
 #include <cinttypes>
 #include <memory>
-#include <stdexcept>
 #include <filesystem>
 
 // fix problem with std::min and std::max
@@ -2748,15 +2747,7 @@ private:
 
                 slot.i_batch = -1;
 
-                try {
-                    common_sampler_accept(slot.smpl.get(), id, true);
-                } catch (std::runtime_error & e) {
-                    SLT_ERR(slot, "Error when accepting token for sampler: %s\n", e.what());
-                    send_error(slot, std::string("Error when accepting token for sampler: ") + e.what(), ERROR_TYPE_SERVER);
-                    slot.release();
-                    slot.i_batch = -1;
-                    continue; // continue loop of slots
-                }
+                common_sampler_accept(slot.smpl.get(), id, true);
 
                 // here we have synchronized the llama_context (due to the sampling above), so we can do time measurement
                 const int64_t t_current = ggml_time_us();