server: fix streaming event bugs and tighten test assertions

Code fixes:
- build_oai_resp_metadata accepts status param; completed_at is null
  when status is in_progress (was always set to timestamp)
- response.created/in_progress events use zeroed usage (was passing
  actual prompt tokens before response was logically started)
- Function call item IDs are now generated once per tool call in
  update() and reused consistently across output_item.added,
  function_call_arguments.delta, and output_item.done events
  (was generating independent random IDs in each path)
- Clean up commented-out status checks in server-common.cpp

Test fixes:
- Assert sequence_number on every event unconditionally (was using
  weak "if present" guard)
- Check actual values not just key presence in streaming created
  event test (completed_at is None, usage tokens are 0, etc.)

Refs: ggml-org/llama.cpp#21174 (patrick review)
This commit is contained in:
Christopher Albert 2026-03-30 18:24:39 +02:00
parent 5d51bbef1c
commit 35f62f9eb3
4 changed files with 36 additions and 29 deletions

View File

@ -1280,12 +1280,8 @@ json convert_responses_to_chatcmpl(const json & response_body) {
} else if (exists_and_is_array(item, "content") &&
exists_and_is_string(item, "role") &&
item.at("role") == "assistant" &&
// exists_and_is_string(item, "status") &&
// (item.at("status") == "in_progress" ||
// item.at("status") == "completed" ||
// item.at("status") == "incomplete") &&
// item["status"] not sent by codex-cli
// item["type"] == "message" for OutputMessage, absent for EasyInputMessage
// status not checked (not always present, e.g. codex-cli omits it)
// type == "message" for OutputMessage, absent for EasyInputMessage
(!item.contains("type") || item.at("type") == "message")
) {
// #responses_create-input-input_item_list-item-output_message

View File

@ -937,17 +937,18 @@ static json build_oai_resp_metadata(const std::string & oai_resp_id,
const std::string & output_text,
int n_prompt_tokens,
int n_decoded,
int n_prompt_tokens_cache) {
int n_prompt_tokens_cache,
const std::string & status = "completed") {
std::time_t t = std::time(0);
return json {
{"completed_at", t},
{"completed_at", status == "completed" ? json(t) : json(nullptr)},
{"created_at", t},
{"id", oai_resp_id},
{"model", oaicompat_model},
{"object", "response"},
{"output", output},
{"output_text", output_text},
{"status", "completed"},
{"status", status},
{"usage", json {
{"input_tokens", n_prompt_tokens},
{"output_tokens", n_decoded},
@ -1122,10 +1123,14 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
output_idx++;
}
for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) {
for (size_t tc_idx = 0; tc_idx < oaicompat_msg.tool_calls.size(); tc_idx++) {
const common_chat_tool_call & tool_call = oaicompat_msg.tool_calls[tc_idx];
const std::string fc_id = tc_idx < oai_resp_fc_item_ids.size()
? oai_resp_fc_item_ids[tc_idx]
: "fc_" + random_string(); // fallback for non-streaming path
const json output_item = {
{"type", "function_call"},
{"id", "fc_" + random_string()},
{"id", fc_id},
{"call_id", tool_call.id},
{"name", tool_call.name},
{"arguments", tool_call.arguments},
@ -1429,7 +1434,7 @@ void server_task_result_cmpl_partial::update(task_result_state & state) {
oai_resp_reasoning_id = state.oai_resp_reasoning_id;
oai_resp_message_id = state.oai_resp_message_id;
oai_resp_fc_id = state.oai_resp_fc_id;
// seq_num/output_idx: read from state (may have been advanced by previous to_json call)
oai_resp_fc_item_id = state.oai_resp_fc_item_id;
oai_resp_seq_num = state.oai_resp_seq_num;
oai_resp_output_idx = state.oai_resp_output_idx;
@ -1460,6 +1465,8 @@ void server_task_result_cmpl_partial::update(task_result_state & state) {
}
if (!diff.tool_call_delta.name.empty()) {
state.oai_resp_fc_id = diff.tool_call_delta.id;
state.oai_resp_fc_item_id = "fc_" + random_string();
state.oai_resp_fc_item_ids.push_back(state.oai_resp_fc_item_id);
state.oai_resp_seq_num++; // output_item.added
state.oai_resp_output_idx++;
}
@ -1610,12 +1617,10 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
int & output_idx = oai_resp_output_idx;
if (n_decoded == 1) {
// Build initial response object with all required fields but empty output
// Build initial response object with all required fields but empty output and zeroed usage
json initial_resp = build_oai_resp_metadata(
oai_resp_id, oaicompat_model, {}, "",
n_prompt_tokens, 0, n_prompt_tokens_cache);
initial_resp["status"] = "in_progress";
initial_resp["completed_at"] = nullptr;
0, 0, 0, "in_progress");
events.push_back(json {
{"event", "response.created"},
@ -1723,7 +1728,7 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
{"sequence_number", seq_num++},
{"output_index", output_idx++},
{"item", json {
{"id", "fc_" + random_string()},
{"id", oai_resp_fc_item_id},
{"arguments", ""},
{"call_id", diff.tool_call_delta.id},
{"name", diff.tool_call_delta.name},
@ -1732,7 +1737,6 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
}},
}},
});
oai_resp_fc_id = diff.tool_call_delta.id;
}
if (!diff.tool_call_delta.arguments.empty()) {
@ -1743,7 +1747,7 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
{"sequence_number", seq_num++},
{"output_index", output_idx - 1},
{"delta", diff.tool_call_delta.arguments},
{"item_id", "fc_" + oai_resp_fc_id},
{"item_id", oai_resp_fc_item_id},
}},
});
}

View File

@ -109,9 +109,11 @@ struct task_result_state {
const std::string oai_resp_id;
const std::string oai_resp_reasoning_id;
const std::string oai_resp_message_id;
std::string oai_resp_fc_id; // function call ID for current args delta
int oai_resp_seq_num = 0; // monotonically increasing per-stream
int oai_resp_output_idx = 0; // tracks current output item index
std::string oai_resp_fc_id; // model's tool_call ID for current function call
std::string oai_resp_fc_item_id; // our generated fc_ item ID for current function call
std::vector<std::string> oai_resp_fc_item_ids; // all generated fc_ IDs, in order of tool call appearance
int oai_resp_seq_num = 0; // monotonically increasing per-stream
int oai_resp_output_idx = 0; // tracks current output item index
task_result_state(const common_chat_parser_params & chat_parser_params)
: chat_parser_params(chat_parser_params)
@ -372,6 +374,7 @@ struct server_task_result_cmpl_final : server_task_result {
std::string oai_resp_id;
std::string oai_resp_reasoning_id;
std::string oai_resp_message_id;
std::vector<std::string> oai_resp_fc_item_ids;
int oai_resp_seq_num = 0;
virtual bool is_stop() override {
@ -387,6 +390,7 @@ struct server_task_result_cmpl_final : server_task_result {
oai_resp_id = state.oai_resp_id;
oai_resp_reasoning_id = state.oai_resp_reasoning_id;
oai_resp_message_id = state.oai_resp_message_id;
oai_resp_fc_item_ids = state.oai_resp_fc_item_ids;
oai_resp_seq_num = state.oai_resp_seq_num;
}
@ -440,6 +444,7 @@ struct server_task_result_cmpl_partial : server_task_result {
std::string oai_resp_reasoning_id;
std::string oai_resp_message_id;
std::string oai_resp_fc_id;
std::string oai_resp_fc_item_id;
int oai_resp_seq_num = 0;
int oai_resp_output_idx = 0;

View File

@ -136,8 +136,8 @@ def test_responses_stream_schema_fields():
saw_output_item_done = False
completed_response = None
for data in res:
if "sequence_number" in data:
seen_seq_nums.append(data["sequence_number"])
assert "sequence_number" in data, f"missing sequence_number in {data.get('type')}"
seen_seq_nums.append(data["sequence_number"])
if data.get("type") == "response.output_text.done":
saw_output_text_done = True
assert "content_index" in data
@ -435,11 +435,13 @@ def test_responses_stream_created_event_has_full_response():
assert resp["id"].startswith("resp_")
assert resp["object"] == "response"
assert resp["model"] is not None
assert "metadata" in resp
assert "store" in resp
assert "truncation" in resp
assert "tools" in resp
assert "usage" in resp
assert resp["completed_at"] is None
assert resp["metadata"] == {}
assert resp["store"] == False
assert resp["truncation"] == "disabled"
assert resp["tools"] == []
assert resp["usage"]["input_tokens"] == 0
assert resp["usage"]["output_tokens"] == 0
assert resp["output"] == []
assert resp["output_text"] == ""