expose args and exit_code in API

2025-11-23 14:59:04 +01:00 · 2025-11-23 14:59:04 +01:00 · f25bfaba4d
parent 4af1b6cbac
commit f25bfaba4d
4 changed files with 25 additions and 12 deletions
--- a/tools/server/README.md
+++ b/tools/server/README.md
@ -1455,19 +1455,23 @@ The `status` object can be:

 ```json
 "status": {
-  "value": "loading"
+  "value": "loading",
+  "args": ["llama-server", "-ctx", "4096"]
 }
 ```

 ```json
 "status": {
-  "value": "failed"
+  "value": "failed",
+  "args": ["llama-server", "-ctx", "4096"],
+  "exit_code": 1
 }
 ```

 ```json
 "status": {
-  "value": "loaded"
+  "value": "loaded",
+  "args": ["llama-server", "-ctx", "4096"]
 }
 ```

--- a/tools/server/server-models.cpp
+++ b/tools/server/server-models.cpp
@ -368,11 +368,11 @@ void server_models::load(const std::string & name) {
        std::vector<std::string> child_env = base_env; // copy
        child_env.push_back("LLAMA_SERVER_ROUTER_PORT=" + std::to_string(base_params.port));

-        // TODO: add logging
        SRV_INF("%s", "spawning server instance with args:\n");
        for (const auto & arg : child_args) {
            SRV_INF("  %s\n", arg.c_str());
        }
+        inst.meta.args = child_args; // save for debugging

        std::vector<char *> argv = to_char_ptr_array(child_args);
        std::vector<char *> envp = to_char_ptr_array(child_env);
@ -405,7 +405,9 @@ void server_models::load(const std::string & name) {
            std::lock_guard<std::mutex> lk(mutex);
            auto it = mapping.find(name);
            if (it != mapping.end()) {
-                it->second.meta.status = exit_code == 0
+                auto & meta = it->second.meta;
+                meta.exit_code = exit_code;
+                meta.status    = exit_code == 0
                                    ? SERVER_MODEL_STATUS_UNLOADED
                                    : SERVER_MODEL_STATUS_FAILED;
            }
--- a/tools/server/server-models.h
+++ b/tools/server/server-models.h
@ -58,7 +58,9 @@ struct server_model_meta {
    bool in_cache = false; // if true, use -hf; use -m otherwise
    int port = 0;
    server_model_status status = SERVER_MODEL_STATUS_UNLOADED;
-    int64_t last_used = 0;
+    int64_t last_used = 0; // for LRU unloading
+    std::vector<std::string> args; // additional args passed to the model instance (used for debugging)
+    int exit_code = 0; // exit code of the model instance process (only valid if status == FAILED)

    bool is_active() const {
        return status == SERVER_MODEL_STATUS_LOADED || status == SERVER_MODEL_STATUS_LOADING;
--- a/tools/server/server.cpp
+++ b/tools/server/server.cpp
@ -5160,7 +5160,7 @@ public:
        std::string name = json_value(body, "model", std::string());
        auto model = models->get_meta(name);
        if (!model.has_value()) {
-            res->error(format_error_response("model is not found", ERROR_TYPE_INVALID_REQUEST));
+            res->error(format_error_response("model is not found", ERROR_TYPE_NOT_FOUND));
            return res;
        }
        if (model->status == SERVER_MODEL_STATUS_LOADED) {
@ -5188,15 +5188,20 @@ public:
        json models_json = json::array();
        auto all_models = models->get_all_meta();
        for (const auto & model : all_models) {
+            json status {
+                {"value", server_model_status_to_string(model.status)},
+                {"args",  model.args},
+            };
+            if (model.status == SERVER_MODEL_STATUS_FAILED) {
+                status["exit_code"] = model.exit_code;
+            }
            models_json.push_back(json {
                {"name",     model.name},
                {"id",       model.name},
                {"in_cache", model.in_cache},
                {"path",     model.path},
+                {"status",   status},
                // TODO: other fields...
-                {"status", {
-                    {"value", server_model_status_to_string(model.status)}
-                }},
            });
        }
        res->ok({{"data", models_json}});