From ca993bad5176571595c7e28da1a9fc4dc4153c57 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Tue, 18 Nov 2025 15:01:17 +0100 Subject: [PATCH] rm redundant includes --- tools/server/server-common.h | 40 ----------------------------------- tools/server/server-queue.cpp | 33 +++++++++++++++++------------ tools/server/server-queue.h | 1 - tools/server/server-task.cpp | 10 +++------ tools/server/server-task.h | 23 ++++++++++++++++++++ tools/server/server.cpp | 14 ++++++++++++ 6 files changed, 59 insertions(+), 62 deletions(-) diff --git a/tools/server/server-common.h b/tools/server/server-common.h index 447ad13038..32a8c85132 100644 --- a/tools/server/server-common.h +++ b/tools/server/server-common.h @@ -42,11 +42,6 @@ using json = nlohmann::ordered_json; #define SRV_ERR(fmt, ...) LOG_ERR("srv %12.*s: " fmt, 12, __func__, __VA_ARGS__) #define SRV_DBG(fmt, ...) LOG_DBG("srv %12.*s: " fmt, 12, __func__, __VA_ARGS__) -#define QUE_INF(fmt, ...) LOG_INF("que %12.*s: " fmt, 12, __func__, __VA_ARGS__) -#define QUE_WRN(fmt, ...) LOG_WRN("que %12.*s: " fmt, 12, __func__, __VA_ARGS__) -#define QUE_ERR(fmt, ...) LOG_ERR("que %12.*s: " fmt, 12, __func__, __VA_ARGS__) -#define QUE_DBG(fmt, ...) LOG_DBG("que %12.*s: " fmt, 12, __func__, __VA_ARGS__) - using raw_buffer = std::vector; template @@ -71,41 +66,6 @@ enum stop_type { STOP_TYPE_LIMIT, }; -// state diagram: https://github.com/ggml-org/llama.cpp/pull/9283 -enum slot_state { - SLOT_STATE_IDLE, - SLOT_STATE_STARTED, // TODO: this state is only used for setting up the initial prompt processing; maybe merge it with launch_slot_with_task in the future - SLOT_STATE_PROCESSING_PROMPT, - SLOT_STATE_DONE_PROMPT, - SLOT_STATE_GENERATING, -}; - -enum server_state { - SERVER_STATE_LOADING_MODEL, // Server is starting up, model not fully loaded yet - SERVER_STATE_READY, // Server is ready and model is loaded -}; - -enum server_task_type { - SERVER_TASK_TYPE_COMPLETION, - SERVER_TASK_TYPE_EMBEDDING, - SERVER_TASK_TYPE_RERANK, - SERVER_TASK_TYPE_INFILL, - SERVER_TASK_TYPE_CANCEL, - SERVER_TASK_TYPE_NEXT_RESPONSE, - SERVER_TASK_TYPE_METRICS, - SERVER_TASK_TYPE_SLOT_SAVE, - SERVER_TASK_TYPE_SLOT_RESTORE, - SERVER_TASK_TYPE_SLOT_ERASE, - SERVER_TASK_TYPE_SET_LORA, -}; - -enum oaicompat_type { - OAICOMPAT_TYPE_NONE, - OAICOMPAT_TYPE_CHAT, - OAICOMPAT_TYPE_COMPLETION, - OAICOMPAT_TYPE_EMBEDDING, -}; - // https://community.openai.com/t/openai-chat-list-of-error-codes-and-types/357791/11 enum error_type { ERROR_TYPE_INVALID_REQUEST, diff --git a/tools/server/server-queue.cpp b/tools/server/server-queue.cpp index e78e72cd60..afbf33ca59 100644 --- a/tools/server/server-queue.cpp +++ b/tools/server/server-queue.cpp @@ -1,12 +1,17 @@ -#include "server-common.h" #include "server-task.h" #include "server-queue.h" -#include -#include -#include -#include -#include +#include "log.h" + +#define QUE_INF(fmt, ...) LOG_INF("que %12.*s: " fmt, 12, __func__, __VA_ARGS__) +#define QUE_WRN(fmt, ...) LOG_WRN("que %12.*s: " fmt, 12, __func__, __VA_ARGS__) +#define QUE_ERR(fmt, ...) LOG_ERR("que %12.*s: " fmt, 12, __func__, __VA_ARGS__) +#define QUE_DBG(fmt, ...) LOG_DBG("que %12.*s: " fmt, 12, __func__, __VA_ARGS__) + +#define RES_INF(fmt, ...) LOG_INF("res %12.*s: " fmt, 12, __func__, __VA_ARGS__) +#define RES_WRN(fmt, ...) LOG_WRN("res %12.*s: " fmt, 12, __func__, __VA_ARGS__) +#define RES_ERR(fmt, ...) LOG_ERR("res %12.*s: " fmt, 12, __func__, __VA_ARGS__) +#define RES_DBG(fmt, ...) LOG_DBG("res %12.*s: " fmt, 12, __func__, __VA_ARGS__) // // server_queue @@ -150,7 +155,7 @@ void server_queue::cleanup_pending_task(int id_target) { // void server_response::add_waiting_task_id(int id_task) { - SRV_DBG("add task %d to waiting list. current waiting = %d (before add)\n", id_task, (int) waiting_task_ids.size()); + RES_DBG("add task %d to waiting list. current waiting = %d (before add)\n", id_task, (int) waiting_task_ids.size()); std::unique_lock lock(mutex_results); waiting_task_ids.insert(id_task); @@ -160,13 +165,13 @@ void server_response::add_waiting_tasks(const std::vector & tasks) std::unique_lock lock(mutex_results); for (const auto & task : tasks) { - SRV_DBG("add task %d to waiting list. current waiting = %d (before add)\n", task.id, (int) waiting_task_ids.size()); + RES_DBG("add task %d to waiting list. current waiting = %d (before add)\n", task.id, (int) waiting_task_ids.size()); waiting_task_ids.insert(task.id); } } void server_response::remove_waiting_task_id(int id_task) { - SRV_DBG("remove task %d from waiting list. current waiting = %d (before remove)\n", id_task, (int) waiting_task_ids.size()); + RES_DBG("remove task %d from waiting list. current waiting = %d (before remove)\n", id_task, (int) waiting_task_ids.size()); std::unique_lock lock(mutex_results); waiting_task_ids.erase(id_task); @@ -182,7 +187,7 @@ void server_response::remove_waiting_task_ids(const std::unordered_set & id std::unique_lock lock(mutex_results); for (const auto & id_task : id_tasks) { - SRV_DBG("remove task %d from waiting list. current waiting = %d (before remove)\n", id_task, (int) waiting_task_ids.size()); + RES_DBG("remove task %d from waiting list. current waiting = %d (before remove)\n", id_task, (int) waiting_task_ids.size()); waiting_task_ids.erase(id_task); } } @@ -192,7 +197,7 @@ server_task_result_ptr server_response::recv(const std::unordered_set & id_ std::unique_lock lock(mutex_results); condition_results.wait(lock, [&]{ if (!running) { - SRV_DBG("%s : queue result stop\n", __func__); + RES_DBG("%s : queue result stop\n", __func__); std::terminate(); // we cannot return here since the caller is HTTP code } return !queue_results.empty(); @@ -224,7 +229,7 @@ server_task_result_ptr server_response::recv_with_timeout(const std::unordered_s std::cv_status cr_res = condition_results.wait_for(lock, std::chrono::seconds(timeout)); if (!running) { - SRV_DBG("%s : queue result stop\n", __func__); + RES_DBG("%s : queue result stop\n", __func__); std::terminate(); // we cannot return here since the caller is HTTP code } if (cr_res == std::cv_status::timeout) { @@ -241,12 +246,12 @@ server_task_result_ptr server_response::recv(int id_task) { } void server_response::send(server_task_result_ptr && result) { - SRV_DBG("sending result for task id = %d\n", result->id); + RES_DBG("sending result for task id = %d\n", result->id); std::unique_lock lock(mutex_results); for (const auto & id_task : waiting_task_ids) { if (result->id == id_task) { - SRV_DBG("task id = %d pushed to result queue\n", result->id); + RES_DBG("task id = %d pushed to result queue\n", result->id); queue_results.emplace_back(std::move(result)); condition_results.notify_all(); diff --git a/tools/server/server-queue.h b/tools/server/server-queue.h index 7491e07081..529be817d2 100644 --- a/tools/server/server-queue.h +++ b/tools/server/server-queue.h @@ -1,6 +1,5 @@ #pragma once -#include "server-common.h" #include "server-task.h" #include diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp index 0b888107b8..bc4436ba65 100644 --- a/tools/server/server-task.cpp +++ b/tools/server/server-task.cpp @@ -1,16 +1,12 @@ +#include "server-common.h" +#include "server-task.h" + #include "common.h" #include "llama.h" #include "chat.h" #include "sampling.h" #include "json-schema-to-grammar.h" -#include -#include -#include - -#include "server-common.h" -#include "server-task.h" - using json = nlohmann::ordered_json; // diff --git a/tools/server/server-task.h b/tools/server/server-task.h index 9539085102..b062e33632 100644 --- a/tools/server/server-task.h +++ b/tools/server/server-task.h @@ -10,10 +10,33 @@ #include #include +// TODO: prevent including the whole server-common.h as we only use server_tokens #include "server-common.h" using json = nlohmann::ordered_json; +enum server_task_type { + SERVER_TASK_TYPE_COMPLETION, + SERVER_TASK_TYPE_EMBEDDING, + SERVER_TASK_TYPE_RERANK, + SERVER_TASK_TYPE_INFILL, + SERVER_TASK_TYPE_CANCEL, + SERVER_TASK_TYPE_NEXT_RESPONSE, + SERVER_TASK_TYPE_METRICS, + SERVER_TASK_TYPE_SLOT_SAVE, + SERVER_TASK_TYPE_SLOT_RESTORE, + SERVER_TASK_TYPE_SLOT_ERASE, + SERVER_TASK_TYPE_SET_LORA, +}; + +// TODO: change this to more generic "response_format" to replace the "format_response_*" in server-common +enum oaicompat_type { + OAICOMPAT_TYPE_NONE, + OAICOMPAT_TYPE_CHAT, + OAICOMPAT_TYPE_COMPLETION, + OAICOMPAT_TYPE_EMBEDDING, +}; + struct task_params { bool stream = true; bool include_usage = false; diff --git a/tools/server/server.cpp b/tools/server/server.cpp index f2be9968b9..7d0cbc8091 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -23,6 +23,20 @@ using json = nlohmann::ordered_json; constexpr int HTTP_POLLING_SECONDS = 1; +// state diagram: https://github.com/ggml-org/llama.cpp/pull/9283 +enum slot_state { + SLOT_STATE_IDLE, + SLOT_STATE_STARTED, // TODO: this state is only used for setting up the initial prompt processing; maybe merge it with launch_slot_with_task in the future + SLOT_STATE_PROCESSING_PROMPT, + SLOT_STATE_DONE_PROMPT, + SLOT_STATE_GENERATING, +}; + +enum server_state { + SERVER_STATE_LOADING_MODEL, // Server is starting up, model not fully loaded yet + SERVER_STATE_READY, // Server is ready and model is loaded +}; + static bool server_task_type_need_embd(server_task_type task_type) { switch (task_type) { case SERVER_TASK_TYPE_EMBEDDING: