server: fix streaming event bugs and tighten test assertions
Code fixes: - build_oai_resp_metadata accepts status param; completed_at is null when status is in_progress (was always set to timestamp) - response.created/in_progress events use zeroed usage (was passing actual prompt tokens before response was logically started) - Function call item IDs are now generated once per tool call in update() and reused consistently across output_item.added, function_call_arguments.delta, and output_item.done events (was generating independent random IDs in each path) - Clean up commented-out status checks in server-common.cpp Test fixes: - Assert sequence_number on every event unconditionally (was using weak "if present" guard) - Check actual values not just key presence in streaming created event test (completed_at is None, usage tokens are 0, etc.) Refs: ggml-org/llama.cpp#21174 (patrick review)
This commit is contained in:
parent
5d51bbef1c
commit
35f62f9eb3
|
|
@ -1280,12 +1280,8 @@ json convert_responses_to_chatcmpl(const json & response_body) {
|
|||
} else if (exists_and_is_array(item, "content") &&
|
||||
exists_and_is_string(item, "role") &&
|
||||
item.at("role") == "assistant" &&
|
||||
// exists_and_is_string(item, "status") &&
|
||||
// (item.at("status") == "in_progress" ||
|
||||
// item.at("status") == "completed" ||
|
||||
// item.at("status") == "incomplete") &&
|
||||
// item["status"] not sent by codex-cli
|
||||
// item["type"] == "message" for OutputMessage, absent for EasyInputMessage
|
||||
// status not checked (not always present, e.g. codex-cli omits it)
|
||||
// type == "message" for OutputMessage, absent for EasyInputMessage
|
||||
(!item.contains("type") || item.at("type") == "message")
|
||||
) {
|
||||
// #responses_create-input-input_item_list-item-output_message
|
||||
|
|
|
|||
|
|
@ -937,17 +937,18 @@ static json build_oai_resp_metadata(const std::string & oai_resp_id,
|
|||
const std::string & output_text,
|
||||
int n_prompt_tokens,
|
||||
int n_decoded,
|
||||
int n_prompt_tokens_cache) {
|
||||
int n_prompt_tokens_cache,
|
||||
const std::string & status = "completed") {
|
||||
std::time_t t = std::time(0);
|
||||
return json {
|
||||
{"completed_at", t},
|
||||
{"completed_at", status == "completed" ? json(t) : json(nullptr)},
|
||||
{"created_at", t},
|
||||
{"id", oai_resp_id},
|
||||
{"model", oaicompat_model},
|
||||
{"object", "response"},
|
||||
{"output", output},
|
||||
{"output_text", output_text},
|
||||
{"status", "completed"},
|
||||
{"status", status},
|
||||
{"usage", json {
|
||||
{"input_tokens", n_prompt_tokens},
|
||||
{"output_tokens", n_decoded},
|
||||
|
|
@ -1122,10 +1123,14 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
|
|||
output_idx++;
|
||||
}
|
||||
|
||||
for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) {
|
||||
for (size_t tc_idx = 0; tc_idx < oaicompat_msg.tool_calls.size(); tc_idx++) {
|
||||
const common_chat_tool_call & tool_call = oaicompat_msg.tool_calls[tc_idx];
|
||||
const std::string fc_id = tc_idx < oai_resp_fc_item_ids.size()
|
||||
? oai_resp_fc_item_ids[tc_idx]
|
||||
: "fc_" + random_string(); // fallback for non-streaming path
|
||||
const json output_item = {
|
||||
{"type", "function_call"},
|
||||
{"id", "fc_" + random_string()},
|
||||
{"id", fc_id},
|
||||
{"call_id", tool_call.id},
|
||||
{"name", tool_call.name},
|
||||
{"arguments", tool_call.arguments},
|
||||
|
|
@ -1429,7 +1434,7 @@ void server_task_result_cmpl_partial::update(task_result_state & state) {
|
|||
oai_resp_reasoning_id = state.oai_resp_reasoning_id;
|
||||
oai_resp_message_id = state.oai_resp_message_id;
|
||||
oai_resp_fc_id = state.oai_resp_fc_id;
|
||||
// seq_num/output_idx: read from state (may have been advanced by previous to_json call)
|
||||
oai_resp_fc_item_id = state.oai_resp_fc_item_id;
|
||||
oai_resp_seq_num = state.oai_resp_seq_num;
|
||||
oai_resp_output_idx = state.oai_resp_output_idx;
|
||||
|
||||
|
|
@ -1460,6 +1465,8 @@ void server_task_result_cmpl_partial::update(task_result_state & state) {
|
|||
}
|
||||
if (!diff.tool_call_delta.name.empty()) {
|
||||
state.oai_resp_fc_id = diff.tool_call_delta.id;
|
||||
state.oai_resp_fc_item_id = "fc_" + random_string();
|
||||
state.oai_resp_fc_item_ids.push_back(state.oai_resp_fc_item_id);
|
||||
state.oai_resp_seq_num++; // output_item.added
|
||||
state.oai_resp_output_idx++;
|
||||
}
|
||||
|
|
@ -1610,12 +1617,10 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
|
|||
int & output_idx = oai_resp_output_idx;
|
||||
|
||||
if (n_decoded == 1) {
|
||||
// Build initial response object with all required fields but empty output
|
||||
// Build initial response object with all required fields but empty output and zeroed usage
|
||||
json initial_resp = build_oai_resp_metadata(
|
||||
oai_resp_id, oaicompat_model, {}, "",
|
||||
n_prompt_tokens, 0, n_prompt_tokens_cache);
|
||||
initial_resp["status"] = "in_progress";
|
||||
initial_resp["completed_at"] = nullptr;
|
||||
0, 0, 0, "in_progress");
|
||||
|
||||
events.push_back(json {
|
||||
{"event", "response.created"},
|
||||
|
|
@ -1723,7 +1728,7 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
|
|||
{"sequence_number", seq_num++},
|
||||
{"output_index", output_idx++},
|
||||
{"item", json {
|
||||
{"id", "fc_" + random_string()},
|
||||
{"id", oai_resp_fc_item_id},
|
||||
{"arguments", ""},
|
||||
{"call_id", diff.tool_call_delta.id},
|
||||
{"name", diff.tool_call_delta.name},
|
||||
|
|
@ -1732,7 +1737,6 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
|
|||
}},
|
||||
}},
|
||||
});
|
||||
oai_resp_fc_id = diff.tool_call_delta.id;
|
||||
}
|
||||
|
||||
if (!diff.tool_call_delta.arguments.empty()) {
|
||||
|
|
@ -1743,7 +1747,7 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
|
|||
{"sequence_number", seq_num++},
|
||||
{"output_index", output_idx - 1},
|
||||
{"delta", diff.tool_call_delta.arguments},
|
||||
{"item_id", "fc_" + oai_resp_fc_id},
|
||||
{"item_id", oai_resp_fc_item_id},
|
||||
}},
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -109,9 +109,11 @@ struct task_result_state {
|
|||
const std::string oai_resp_id;
|
||||
const std::string oai_resp_reasoning_id;
|
||||
const std::string oai_resp_message_id;
|
||||
std::string oai_resp_fc_id; // function call ID for current args delta
|
||||
int oai_resp_seq_num = 0; // monotonically increasing per-stream
|
||||
int oai_resp_output_idx = 0; // tracks current output item index
|
||||
std::string oai_resp_fc_id; // model's tool_call ID for current function call
|
||||
std::string oai_resp_fc_item_id; // our generated fc_ item ID for current function call
|
||||
std::vector<std::string> oai_resp_fc_item_ids; // all generated fc_ IDs, in order of tool call appearance
|
||||
int oai_resp_seq_num = 0; // monotonically increasing per-stream
|
||||
int oai_resp_output_idx = 0; // tracks current output item index
|
||||
|
||||
task_result_state(const common_chat_parser_params & chat_parser_params)
|
||||
: chat_parser_params(chat_parser_params)
|
||||
|
|
@ -372,6 +374,7 @@ struct server_task_result_cmpl_final : server_task_result {
|
|||
std::string oai_resp_id;
|
||||
std::string oai_resp_reasoning_id;
|
||||
std::string oai_resp_message_id;
|
||||
std::vector<std::string> oai_resp_fc_item_ids;
|
||||
int oai_resp_seq_num = 0;
|
||||
|
||||
virtual bool is_stop() override {
|
||||
|
|
@ -387,6 +390,7 @@ struct server_task_result_cmpl_final : server_task_result {
|
|||
oai_resp_id = state.oai_resp_id;
|
||||
oai_resp_reasoning_id = state.oai_resp_reasoning_id;
|
||||
oai_resp_message_id = state.oai_resp_message_id;
|
||||
oai_resp_fc_item_ids = state.oai_resp_fc_item_ids;
|
||||
oai_resp_seq_num = state.oai_resp_seq_num;
|
||||
}
|
||||
|
||||
|
|
@ -440,6 +444,7 @@ struct server_task_result_cmpl_partial : server_task_result {
|
|||
std::string oai_resp_reasoning_id;
|
||||
std::string oai_resp_message_id;
|
||||
std::string oai_resp_fc_id;
|
||||
std::string oai_resp_fc_item_id;
|
||||
int oai_resp_seq_num = 0;
|
||||
int oai_resp_output_idx = 0;
|
||||
|
||||
|
|
|
|||
|
|
@ -136,8 +136,8 @@ def test_responses_stream_schema_fields():
|
|||
saw_output_item_done = False
|
||||
completed_response = None
|
||||
for data in res:
|
||||
if "sequence_number" in data:
|
||||
seen_seq_nums.append(data["sequence_number"])
|
||||
assert "sequence_number" in data, f"missing sequence_number in {data.get('type')}"
|
||||
seen_seq_nums.append(data["sequence_number"])
|
||||
if data.get("type") == "response.output_text.done":
|
||||
saw_output_text_done = True
|
||||
assert "content_index" in data
|
||||
|
|
@ -435,11 +435,13 @@ def test_responses_stream_created_event_has_full_response():
|
|||
assert resp["id"].startswith("resp_")
|
||||
assert resp["object"] == "response"
|
||||
assert resp["model"] is not None
|
||||
assert "metadata" in resp
|
||||
assert "store" in resp
|
||||
assert "truncation" in resp
|
||||
assert "tools" in resp
|
||||
assert "usage" in resp
|
||||
assert resp["completed_at"] is None
|
||||
assert resp["metadata"] == {}
|
||||
assert resp["store"] == False
|
||||
assert resp["truncation"] == "disabled"
|
||||
assert resp["tools"] == []
|
||||
assert resp["usage"]["input_tokens"] == 0
|
||||
assert resp["usage"]["output_tokens"] == 0
|
||||
assert resp["output"] == []
|
||||
assert resp["output_text"] == ""
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue