server: add full streaming compliance for Responses API events
- Add sequence_number to ALL streaming events (created, in_progress,
output_item.added, content_part.added, all delta events)
- Add output_index to all events referencing output items
- Add content_index to content-related events
- Populate full response object in response.created and
response.in_progress events (was only {id, object, status})
- Add id field to function_call output_item.added events
- Add status: completed to reasoning output_item.done events
- Counter state persisted across streaming chunks via task_result_state
Fixes: spec-compliant client libraries (async-openai) that require
these fields can now parse all streaming events without error.
Refs: ggml-org/llama.cpp#21174 (fumlig review comment)
This commit is contained in:
parent
467266ba4c
commit
987340767c
|
|
@ -1053,6 +1053,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
|
||||||
{"type", "reasoning_text"},
|
{"type", "reasoning_text"},
|
||||||
}})},
|
}})},
|
||||||
{"encrypted_content", ""},
|
{"encrypted_content", ""},
|
||||||
|
{"status", "completed"},
|
||||||
};
|
};
|
||||||
|
|
||||||
server_sent_events.push_back(json {
|
server_sent_events.push_back(json {
|
||||||
|
|
@ -1428,20 +1429,42 @@ void server_task_result_cmpl_partial::update(task_result_state & state) {
|
||||||
oai_resp_reasoning_id = state.oai_resp_reasoning_id;
|
oai_resp_reasoning_id = state.oai_resp_reasoning_id;
|
||||||
oai_resp_message_id = state.oai_resp_message_id;
|
oai_resp_message_id = state.oai_resp_message_id;
|
||||||
oai_resp_fc_id = state.oai_resp_fc_id;
|
oai_resp_fc_id = state.oai_resp_fc_id;
|
||||||
|
// seq_num/output_idx: read from state (may have been advanced by previous to_json call)
|
||||||
|
oai_resp_seq_num = state.oai_resp_seq_num;
|
||||||
|
oai_resp_output_idx = state.oai_resp_output_idx;
|
||||||
|
|
||||||
// track if the accumulated message has any reasoning content
|
// track if the accumulated message has any reasoning content
|
||||||
anthropic_has_reasoning = !state.chat_msg.reasoning_content.empty();
|
anthropic_has_reasoning = !state.chat_msg.reasoning_content.empty();
|
||||||
|
|
||||||
// Pre-compute state updates based on diffs (for next chunk)
|
// Pre-compute state updates based on diffs (for next chunk)
|
||||||
|
// Also advance seq_num/output_idx to match events that to_json_oaicompat_resp() will emit
|
||||||
|
if (n_decoded == 1) {
|
||||||
|
state.oai_resp_seq_num += 2; // response.created + response.in_progress
|
||||||
|
}
|
||||||
for (const common_chat_msg_diff & diff : oaicompat_msg_diffs) {
|
for (const common_chat_msg_diff & diff : oaicompat_msg_diffs) {
|
||||||
if (!diff.reasoning_content_delta.empty() && !state.thinking_block_started) {
|
if (!diff.reasoning_content_delta.empty()) {
|
||||||
state.thinking_block_started = true;
|
if (!state.thinking_block_started) {
|
||||||
|
state.thinking_block_started = true;
|
||||||
|
state.oai_resp_seq_num++; // output_item.added
|
||||||
|
state.oai_resp_output_idx++;
|
||||||
|
}
|
||||||
|
state.oai_resp_seq_num++; // reasoning_text.delta
|
||||||
}
|
}
|
||||||
if (!diff.content_delta.empty() && !state.text_block_started) {
|
if (!diff.content_delta.empty()) {
|
||||||
state.text_block_started = true;
|
if (!state.text_block_started) {
|
||||||
|
state.text_block_started = true;
|
||||||
|
state.oai_resp_seq_num += 2; // output_item.added + content_part.added
|
||||||
|
state.oai_resp_output_idx++;
|
||||||
|
}
|
||||||
|
state.oai_resp_seq_num++; // output_text.delta
|
||||||
}
|
}
|
||||||
if (!diff.tool_call_delta.name.empty()) {
|
if (!diff.tool_call_delta.name.empty()) {
|
||||||
state.oai_resp_fc_id = diff.tool_call_delta.id;
|
state.oai_resp_fc_id = diff.tool_call_delta.id;
|
||||||
|
state.oai_resp_seq_num++; // output_item.added
|
||||||
|
state.oai_resp_output_idx++;
|
||||||
|
}
|
||||||
|
if (!diff.tool_call_delta.arguments.empty()) {
|
||||||
|
state.oai_resp_seq_num++; // function_call_arguments.delta
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -1583,28 +1606,31 @@ json server_task_result_cmpl_partial::to_json_oaicompat_chat() {
|
||||||
|
|
||||||
json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
|
json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
|
||||||
std::vector<json> events;
|
std::vector<json> events;
|
||||||
|
int & seq_num = oai_resp_seq_num;
|
||||||
|
int & output_idx = oai_resp_output_idx;
|
||||||
|
|
||||||
if (n_decoded == 1) {
|
if (n_decoded == 1) {
|
||||||
|
// Build initial response object with all required fields but empty output
|
||||||
|
json initial_resp = build_oai_resp_metadata(
|
||||||
|
oai_resp_id, oaicompat_model, {}, "",
|
||||||
|
n_prompt_tokens, 0, n_prompt_tokens_cache);
|
||||||
|
initial_resp["status"] = "in_progress";
|
||||||
|
initial_resp["completed_at"] = nullptr;
|
||||||
|
|
||||||
events.push_back(json {
|
events.push_back(json {
|
||||||
{"event", "response.created"},
|
{"event", "response.created"},
|
||||||
{"data", json {
|
{"data", json {
|
||||||
{"type", "response.created"},
|
{"type", "response.created"},
|
||||||
{"response", json {
|
{"sequence_number", seq_num++},
|
||||||
{"id", oai_resp_id},
|
{"response", initial_resp},
|
||||||
{"object", "response"},
|
|
||||||
{"status", "in_progress"},
|
|
||||||
}},
|
|
||||||
}},
|
}},
|
||||||
});
|
});
|
||||||
events.push_back(json {
|
events.push_back(json {
|
||||||
{"event", "response.in_progress"},
|
{"event", "response.in_progress"},
|
||||||
{"data", json {
|
{"data", json {
|
||||||
{"type", "response.in_progress"},
|
{"type", "response.in_progress"},
|
||||||
{"response", json {
|
{"sequence_number", seq_num++},
|
||||||
{"id", oai_resp_id},
|
{"response", initial_resp},
|
||||||
{"object", "response"},
|
|
||||||
{"status", "in_progress"},
|
|
||||||
}},
|
|
||||||
}},
|
}},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
@ -1615,7 +1641,9 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
|
||||||
events.push_back(json {
|
events.push_back(json {
|
||||||
{"event", "response.output_item.added"},
|
{"event", "response.output_item.added"},
|
||||||
{"data", json {
|
{"data", json {
|
||||||
{"type", "response.output_item.added"},
|
{"type", "response.output_item.added"},
|
||||||
|
{"sequence_number", seq_num++},
|
||||||
|
{"output_index", output_idx++},
|
||||||
{"item", json {
|
{"item", json {
|
||||||
{"id", oai_resp_reasoning_id},
|
{"id", oai_resp_reasoning_id},
|
||||||
{"summary", json::array()},
|
{"summary", json::array()},
|
||||||
|
|
@ -1631,9 +1659,12 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
|
||||||
events.push_back(json {
|
events.push_back(json {
|
||||||
{"event", "response.reasoning_text.delta"},
|
{"event", "response.reasoning_text.delta"},
|
||||||
{"data", json {
|
{"data", json {
|
||||||
{"type", "response.reasoning_text.delta"},
|
{"type", "response.reasoning_text.delta"},
|
||||||
{"delta", diff.reasoning_content_delta},
|
{"sequence_number", seq_num++},
|
||||||
{"item_id", oai_resp_reasoning_id},
|
{"output_index", output_idx - 1},
|
||||||
|
{"content_index", 0},
|
||||||
|
{"delta", diff.reasoning_content_delta},
|
||||||
|
{"item_id", oai_resp_reasoning_id},
|
||||||
}},
|
}},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
@ -1643,7 +1674,9 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
|
||||||
events.push_back(json {
|
events.push_back(json {
|
||||||
{"event", "response.output_item.added"},
|
{"event", "response.output_item.added"},
|
||||||
{"data", json {
|
{"data", json {
|
||||||
{"type", "response.output_item.added"},
|
{"type", "response.output_item.added"},
|
||||||
|
{"sequence_number", seq_num++},
|
||||||
|
{"output_index", output_idx++},
|
||||||
{"item", json {
|
{"item", json {
|
||||||
{"content", json::array()},
|
{"content", json::array()},
|
||||||
{"id", oai_resp_message_id},
|
{"id", oai_resp_message_id},
|
||||||
|
|
@ -1656,8 +1689,11 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
|
||||||
events.push_back(json {
|
events.push_back(json {
|
||||||
{"event", "response.content_part.added"},
|
{"event", "response.content_part.added"},
|
||||||
{"data", json {
|
{"data", json {
|
||||||
{"type", "response.content_part.added"},
|
{"type", "response.content_part.added"},
|
||||||
{"item_id", oai_resp_message_id},
|
{"sequence_number", seq_num++},
|
||||||
|
{"output_index", output_idx - 1},
|
||||||
|
{"content_index", 0},
|
||||||
|
{"item_id", oai_resp_message_id},
|
||||||
{"part", json {
|
{"part", json {
|
||||||
{"type", "output_text"},
|
{"type", "output_text"},
|
||||||
{"text", ""},
|
{"text", ""},
|
||||||
|
|
@ -1669,9 +1705,12 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
|
||||||
events.push_back(json {
|
events.push_back(json {
|
||||||
{"event", "response.output_text.delta"},
|
{"event", "response.output_text.delta"},
|
||||||
{"data", json {
|
{"data", json {
|
||||||
{"type", "response.output_text.delta"},
|
{"type", "response.output_text.delta"},
|
||||||
{"item_id", oai_resp_message_id},
|
{"sequence_number", seq_num++},
|
||||||
{"delta", diff.content_delta},
|
{"output_index", output_idx - 1},
|
||||||
|
{"content_index", 0},
|
||||||
|
{"item_id", oai_resp_message_id},
|
||||||
|
{"delta", diff.content_delta},
|
||||||
}},
|
}},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
@ -1680,10 +1719,13 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
|
||||||
events.push_back(json {
|
events.push_back(json {
|
||||||
{"event", "response.output_item.added"},
|
{"event", "response.output_item.added"},
|
||||||
{"data", json {
|
{"data", json {
|
||||||
{"type", "response.output_item.added"},
|
{"type", "response.output_item.added"},
|
||||||
|
{"sequence_number", seq_num++},
|
||||||
|
{"output_index", output_idx++},
|
||||||
{"item", json {
|
{"item", json {
|
||||||
|
{"id", "fc_" + random_string()},
|
||||||
{"arguments", ""},
|
{"arguments", ""},
|
||||||
{"call_id", "fc_" + diff.tool_call_delta.id},
|
{"call_id", diff.tool_call_delta.id},
|
||||||
{"name", diff.tool_call_delta.name},
|
{"name", diff.tool_call_delta.name},
|
||||||
{"type", "function_call"},
|
{"type", "function_call"},
|
||||||
{"status", "in_progress"},
|
{"status", "in_progress"},
|
||||||
|
|
@ -1697,9 +1739,11 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
|
||||||
events.push_back(json {
|
events.push_back(json {
|
||||||
{"event", "response.function_call_arguments.delta"},
|
{"event", "response.function_call_arguments.delta"},
|
||||||
{"data", json {
|
{"data", json {
|
||||||
{"type", "response.function_call_arguments.delta"},
|
{"type", "response.function_call_arguments.delta"},
|
||||||
{"delta", diff.tool_call_delta.arguments},
|
{"sequence_number", seq_num++},
|
||||||
{"item_id", "fc_" + oai_resp_fc_id},
|
{"output_index", output_idx - 1},
|
||||||
|
{"delta", diff.tool_call_delta.arguments},
|
||||||
|
{"item_id", "fc_" + oai_resp_fc_id},
|
||||||
}},
|
}},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -110,6 +110,8 @@ struct task_result_state {
|
||||||
const std::string oai_resp_reasoning_id;
|
const std::string oai_resp_reasoning_id;
|
||||||
const std::string oai_resp_message_id;
|
const std::string oai_resp_message_id;
|
||||||
std::string oai_resp_fc_id; // function call ID for current args delta
|
std::string oai_resp_fc_id; // function call ID for current args delta
|
||||||
|
int oai_resp_seq_num = 0; // monotonically increasing per-stream
|
||||||
|
int oai_resp_output_idx = 0; // tracks current output item index
|
||||||
|
|
||||||
task_result_state(const common_chat_parser_params & chat_parser_params)
|
task_result_state(const common_chat_parser_params & chat_parser_params)
|
||||||
: chat_parser_params(chat_parser_params)
|
: chat_parser_params(chat_parser_params)
|
||||||
|
|
@ -385,6 +387,7 @@ struct server_task_result_cmpl_final : server_task_result {
|
||||||
oai_resp_id = state.oai_resp_id;
|
oai_resp_id = state.oai_resp_id;
|
||||||
oai_resp_reasoning_id = state.oai_resp_reasoning_id;
|
oai_resp_reasoning_id = state.oai_resp_reasoning_id;
|
||||||
oai_resp_message_id = state.oai_resp_message_id;
|
oai_resp_message_id = state.oai_resp_message_id;
|
||||||
|
oai_resp_seq_num = state.oai_resp_seq_num;
|
||||||
}
|
}
|
||||||
|
|
||||||
json to_json_non_oaicompat();
|
json to_json_non_oaicompat();
|
||||||
|
|
@ -437,6 +440,8 @@ struct server_task_result_cmpl_partial : server_task_result {
|
||||||
std::string oai_resp_reasoning_id;
|
std::string oai_resp_reasoning_id;
|
||||||
std::string oai_resp_message_id;
|
std::string oai_resp_message_id;
|
||||||
std::string oai_resp_fc_id;
|
std::string oai_resp_fc_id;
|
||||||
|
int oai_resp_seq_num = 0;
|
||||||
|
int oai_resp_output_idx = 0;
|
||||||
|
|
||||||
// for Anthropic API: track if any reasoning content has been generated
|
// for Anthropic API: track if any reasoning content has been generated
|
||||||
bool anthropic_has_reasoning = false;
|
bool anthropic_has_reasoning = false;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue