diff --git a/tools/server/README.md b/tools/server/README.md index 3e311a657c..f22b57fee2 100644 --- a/tools/server/README.md +++ b/tools/server/README.md @@ -1455,19 +1455,23 @@ The `status` object can be: ```json "status": { - "value": "loading" + "value": "loading", + "args": ["llama-server", "-ctx", "4096"] } ``` ```json "status": { - "value": "failed" + "value": "failed", + "args": ["llama-server", "-ctx", "4096"], + "exit_code": 1 } ``` ```json "status": { - "value": "loaded" + "value": "loaded", + "args": ["llama-server", "-ctx", "4096"] } ``` diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp index 071b5522ea..525f2bb347 100644 --- a/tools/server/server-models.cpp +++ b/tools/server/server-models.cpp @@ -368,11 +368,11 @@ void server_models::load(const std::string & name) { std::vector child_env = base_env; // copy child_env.push_back("LLAMA_SERVER_ROUTER_PORT=" + std::to_string(base_params.port)); - // TODO: add logging SRV_INF("%s", "spawning server instance with args:\n"); for (const auto & arg : child_args) { SRV_INF(" %s\n", arg.c_str()); } + inst.meta.args = child_args; // save for debugging std::vector argv = to_char_ptr_array(child_args); std::vector envp = to_char_ptr_array(child_env); @@ -405,9 +405,11 @@ void server_models::load(const std::string & name) { std::lock_guard lk(mutex); auto it = mapping.find(name); if (it != mapping.end()) { - it->second.meta.status = exit_code == 0 - ? SERVER_MODEL_STATUS_UNLOADED - : SERVER_MODEL_STATUS_FAILED; + auto & meta = it->second.meta; + meta.exit_code = exit_code; + meta.status = exit_code == 0 + ? SERVER_MODEL_STATUS_UNLOADED + : SERVER_MODEL_STATUS_FAILED; } cv.notify_all(); } diff --git a/tools/server/server-models.h b/tools/server/server-models.h index 3cb3b39fe7..222f31645e 100644 --- a/tools/server/server-models.h +++ b/tools/server/server-models.h @@ -58,7 +58,9 @@ struct server_model_meta { bool in_cache = false; // if true, use -hf; use -m otherwise int port = 0; server_model_status status = SERVER_MODEL_STATUS_UNLOADED; - int64_t last_used = 0; + int64_t last_used = 0; // for LRU unloading + std::vector args; // additional args passed to the model instance (used for debugging) + int exit_code = 0; // exit code of the model instance process (only valid if status == FAILED) bool is_active() const { return status == SERVER_MODEL_STATUS_LOADED || status == SERVER_MODEL_STATUS_LOADING; diff --git a/tools/server/server.cpp b/tools/server/server.cpp index 43d145fb67..c7dbd74e81 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -5160,7 +5160,7 @@ public: std::string name = json_value(body, "model", std::string()); auto model = models->get_meta(name); if (!model.has_value()) { - res->error(format_error_response("model is not found", ERROR_TYPE_INVALID_REQUEST)); + res->error(format_error_response("model is not found", ERROR_TYPE_NOT_FOUND)); return res; } if (model->status == SERVER_MODEL_STATUS_LOADED) { @@ -5188,15 +5188,20 @@ public: json models_json = json::array(); auto all_models = models->get_all_meta(); for (const auto & model : all_models) { + json status { + {"value", server_model_status_to_string(model.status)}, + {"args", model.args}, + }; + if (model.status == SERVER_MODEL_STATUS_FAILED) { + status["exit_code"] = model.exit_code; + } models_json.push_back(json { {"name", model.name}, {"id", model.name}, {"in_cache", model.in_cache}, {"path", model.path}, + {"status", status}, // TODO: other fields... - {"status", { - {"value", server_model_status_to_string(model.status)} - }}, }); } res->ok({{"data", models_json}});