expose args and exit_code in API
This commit is contained in:
parent
4af1b6cbac
commit
f25bfaba4d
|
|
@ -1455,19 +1455,23 @@ The `status` object can be:
|
|||
|
||||
```json
|
||||
"status": {
|
||||
"value": "loading"
|
||||
"value": "loading",
|
||||
"args": ["llama-server", "-ctx", "4096"]
|
||||
}
|
||||
```
|
||||
|
||||
```json
|
||||
"status": {
|
||||
"value": "failed"
|
||||
"value": "failed",
|
||||
"args": ["llama-server", "-ctx", "4096"],
|
||||
"exit_code": 1
|
||||
}
|
||||
```
|
||||
|
||||
```json
|
||||
"status": {
|
||||
"value": "loaded"
|
||||
"value": "loaded",
|
||||
"args": ["llama-server", "-ctx", "4096"]
|
||||
}
|
||||
```
|
||||
|
||||
|
|
|
|||
|
|
@ -368,11 +368,11 @@ void server_models::load(const std::string & name) {
|
|||
std::vector<std::string> child_env = base_env; // copy
|
||||
child_env.push_back("LLAMA_SERVER_ROUTER_PORT=" + std::to_string(base_params.port));
|
||||
|
||||
// TODO: add logging
|
||||
SRV_INF("%s", "spawning server instance with args:\n");
|
||||
for (const auto & arg : child_args) {
|
||||
SRV_INF(" %s\n", arg.c_str());
|
||||
}
|
||||
inst.meta.args = child_args; // save for debugging
|
||||
|
||||
std::vector<char *> argv = to_char_ptr_array(child_args);
|
||||
std::vector<char *> envp = to_char_ptr_array(child_env);
|
||||
|
|
@ -405,7 +405,9 @@ void server_models::load(const std::string & name) {
|
|||
std::lock_guard<std::mutex> lk(mutex);
|
||||
auto it = mapping.find(name);
|
||||
if (it != mapping.end()) {
|
||||
it->second.meta.status = exit_code == 0
|
||||
auto & meta = it->second.meta;
|
||||
meta.exit_code = exit_code;
|
||||
meta.status = exit_code == 0
|
||||
? SERVER_MODEL_STATUS_UNLOADED
|
||||
: SERVER_MODEL_STATUS_FAILED;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -58,7 +58,9 @@ struct server_model_meta {
|
|||
bool in_cache = false; // if true, use -hf; use -m otherwise
|
||||
int port = 0;
|
||||
server_model_status status = SERVER_MODEL_STATUS_UNLOADED;
|
||||
int64_t last_used = 0;
|
||||
int64_t last_used = 0; // for LRU unloading
|
||||
std::vector<std::string> args; // additional args passed to the model instance (used for debugging)
|
||||
int exit_code = 0; // exit code of the model instance process (only valid if status == FAILED)
|
||||
|
||||
bool is_active() const {
|
||||
return status == SERVER_MODEL_STATUS_LOADED || status == SERVER_MODEL_STATUS_LOADING;
|
||||
|
|
|
|||
|
|
@ -5160,7 +5160,7 @@ public:
|
|||
std::string name = json_value(body, "model", std::string());
|
||||
auto model = models->get_meta(name);
|
||||
if (!model.has_value()) {
|
||||
res->error(format_error_response("model is not found", ERROR_TYPE_INVALID_REQUEST));
|
||||
res->error(format_error_response("model is not found", ERROR_TYPE_NOT_FOUND));
|
||||
return res;
|
||||
}
|
||||
if (model->status == SERVER_MODEL_STATUS_LOADED) {
|
||||
|
|
@ -5188,15 +5188,20 @@ public:
|
|||
json models_json = json::array();
|
||||
auto all_models = models->get_all_meta();
|
||||
for (const auto & model : all_models) {
|
||||
json status {
|
||||
{"value", server_model_status_to_string(model.status)},
|
||||
{"args", model.args},
|
||||
};
|
||||
if (model.status == SERVER_MODEL_STATUS_FAILED) {
|
||||
status["exit_code"] = model.exit_code;
|
||||
}
|
||||
models_json.push_back(json {
|
||||
{"name", model.name},
|
||||
{"id", model.name},
|
||||
{"in_cache", model.in_cache},
|
||||
{"path", model.path},
|
||||
{"status", status},
|
||||
// TODO: other fields...
|
||||
{"status", {
|
||||
{"value", server_model_status_to_string(model.status)}
|
||||
}},
|
||||
});
|
||||
}
|
||||
res->ok({{"data", models_json}});
|
||||
|
|
|
|||
Loading…
Reference in New Issue