diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp index f9dc319a03..5d63e6b697 100644 --- a/tools/server/server-task.cpp +++ b/tools/server/server-task.cpp @@ -1053,6 +1053,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { {"type", "reasoning_text"}, }})}, {"encrypted_content", ""}, + {"status", "completed"}, }; server_sent_events.push_back(json { @@ -1428,20 +1429,42 @@ void server_task_result_cmpl_partial::update(task_result_state & state) { oai_resp_reasoning_id = state.oai_resp_reasoning_id; oai_resp_message_id = state.oai_resp_message_id; oai_resp_fc_id = state.oai_resp_fc_id; + // seq_num/output_idx: read from state (may have been advanced by previous to_json call) + oai_resp_seq_num = state.oai_resp_seq_num; + oai_resp_output_idx = state.oai_resp_output_idx; // track if the accumulated message has any reasoning content anthropic_has_reasoning = !state.chat_msg.reasoning_content.empty(); // Pre-compute state updates based on diffs (for next chunk) + // Also advance seq_num/output_idx to match events that to_json_oaicompat_resp() will emit + if (n_decoded == 1) { + state.oai_resp_seq_num += 2; // response.created + response.in_progress + } for (const common_chat_msg_diff & diff : oaicompat_msg_diffs) { - if (!diff.reasoning_content_delta.empty() && !state.thinking_block_started) { - state.thinking_block_started = true; + if (!diff.reasoning_content_delta.empty()) { + if (!state.thinking_block_started) { + state.thinking_block_started = true; + state.oai_resp_seq_num++; // output_item.added + state.oai_resp_output_idx++; + } + state.oai_resp_seq_num++; // reasoning_text.delta } - if (!diff.content_delta.empty() && !state.text_block_started) { - state.text_block_started = true; + if (!diff.content_delta.empty()) { + if (!state.text_block_started) { + state.text_block_started = true; + state.oai_resp_seq_num += 2; // output_item.added + content_part.added + state.oai_resp_output_idx++; + } + state.oai_resp_seq_num++; // output_text.delta } if (!diff.tool_call_delta.name.empty()) { state.oai_resp_fc_id = diff.tool_call_delta.id; + state.oai_resp_seq_num++; // output_item.added + state.oai_resp_output_idx++; + } + if (!diff.tool_call_delta.arguments.empty()) { + state.oai_resp_seq_num++; // function_call_arguments.delta } } } @@ -1583,28 +1606,31 @@ json server_task_result_cmpl_partial::to_json_oaicompat_chat() { json server_task_result_cmpl_partial::to_json_oaicompat_resp() { std::vector events; + int & seq_num = oai_resp_seq_num; + int & output_idx = oai_resp_output_idx; if (n_decoded == 1) { + // Build initial response object with all required fields but empty output + json initial_resp = build_oai_resp_metadata( + oai_resp_id, oaicompat_model, {}, "", + n_prompt_tokens, 0, n_prompt_tokens_cache); + initial_resp["status"] = "in_progress"; + initial_resp["completed_at"] = nullptr; + events.push_back(json { {"event", "response.created"}, {"data", json { - {"type", "response.created"}, - {"response", json { - {"id", oai_resp_id}, - {"object", "response"}, - {"status", "in_progress"}, - }}, + {"type", "response.created"}, + {"sequence_number", seq_num++}, + {"response", initial_resp}, }}, }); events.push_back(json { {"event", "response.in_progress"}, {"data", json { - {"type", "response.in_progress"}, - {"response", json { - {"id", oai_resp_id}, - {"object", "response"}, - {"status", "in_progress"}, - }}, + {"type", "response.in_progress"}, + {"sequence_number", seq_num++}, + {"response", initial_resp}, }}, }); } @@ -1615,7 +1641,9 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() { events.push_back(json { {"event", "response.output_item.added"}, {"data", json { - {"type", "response.output_item.added"}, + {"type", "response.output_item.added"}, + {"sequence_number", seq_num++}, + {"output_index", output_idx++}, {"item", json { {"id", oai_resp_reasoning_id}, {"summary", json::array()}, @@ -1631,9 +1659,12 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() { events.push_back(json { {"event", "response.reasoning_text.delta"}, {"data", json { - {"type", "response.reasoning_text.delta"}, - {"delta", diff.reasoning_content_delta}, - {"item_id", oai_resp_reasoning_id}, + {"type", "response.reasoning_text.delta"}, + {"sequence_number", seq_num++}, + {"output_index", output_idx - 1}, + {"content_index", 0}, + {"delta", diff.reasoning_content_delta}, + {"item_id", oai_resp_reasoning_id}, }}, }); } @@ -1643,7 +1674,9 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() { events.push_back(json { {"event", "response.output_item.added"}, {"data", json { - {"type", "response.output_item.added"}, + {"type", "response.output_item.added"}, + {"sequence_number", seq_num++}, + {"output_index", output_idx++}, {"item", json { {"content", json::array()}, {"id", oai_resp_message_id}, @@ -1656,8 +1689,11 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() { events.push_back(json { {"event", "response.content_part.added"}, {"data", json { - {"type", "response.content_part.added"}, - {"item_id", oai_resp_message_id}, + {"type", "response.content_part.added"}, + {"sequence_number", seq_num++}, + {"output_index", output_idx - 1}, + {"content_index", 0}, + {"item_id", oai_resp_message_id}, {"part", json { {"type", "output_text"}, {"text", ""}, @@ -1669,9 +1705,12 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() { events.push_back(json { {"event", "response.output_text.delta"}, {"data", json { - {"type", "response.output_text.delta"}, - {"item_id", oai_resp_message_id}, - {"delta", diff.content_delta}, + {"type", "response.output_text.delta"}, + {"sequence_number", seq_num++}, + {"output_index", output_idx - 1}, + {"content_index", 0}, + {"item_id", oai_resp_message_id}, + {"delta", diff.content_delta}, }}, }); } @@ -1680,10 +1719,13 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() { events.push_back(json { {"event", "response.output_item.added"}, {"data", json { - {"type", "response.output_item.added"}, + {"type", "response.output_item.added"}, + {"sequence_number", seq_num++}, + {"output_index", output_idx++}, {"item", json { + {"id", "fc_" + random_string()}, {"arguments", ""}, - {"call_id", "fc_" + diff.tool_call_delta.id}, + {"call_id", diff.tool_call_delta.id}, {"name", diff.tool_call_delta.name}, {"type", "function_call"}, {"status", "in_progress"}, @@ -1697,9 +1739,11 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() { events.push_back(json { {"event", "response.function_call_arguments.delta"}, {"data", json { - {"type", "response.function_call_arguments.delta"}, - {"delta", diff.tool_call_delta.arguments}, - {"item_id", "fc_" + oai_resp_fc_id}, + {"type", "response.function_call_arguments.delta"}, + {"sequence_number", seq_num++}, + {"output_index", output_idx - 1}, + {"delta", diff.tool_call_delta.arguments}, + {"item_id", "fc_" + oai_resp_fc_id}, }}, }); } diff --git a/tools/server/server-task.h b/tools/server/server-task.h index 28ec7b8f6b..a4ce0449a3 100644 --- a/tools/server/server-task.h +++ b/tools/server/server-task.h @@ -110,6 +110,8 @@ struct task_result_state { const std::string oai_resp_reasoning_id; const std::string oai_resp_message_id; std::string oai_resp_fc_id; // function call ID for current args delta + int oai_resp_seq_num = 0; // monotonically increasing per-stream + int oai_resp_output_idx = 0; // tracks current output item index task_result_state(const common_chat_parser_params & chat_parser_params) : chat_parser_params(chat_parser_params) @@ -385,6 +387,7 @@ struct server_task_result_cmpl_final : server_task_result { oai_resp_id = state.oai_resp_id; oai_resp_reasoning_id = state.oai_resp_reasoning_id; oai_resp_message_id = state.oai_resp_message_id; + oai_resp_seq_num = state.oai_resp_seq_num; } json to_json_non_oaicompat(); @@ -437,6 +440,8 @@ struct server_task_result_cmpl_partial : server_task_result { std::string oai_resp_reasoning_id; std::string oai_resp_message_id; std::string oai_resp_fc_id; + int oai_resp_seq_num = 0; + int oai_resp_output_idx = 0; // for Anthropic API: track if any reasoning content has been generated bool anthropic_has_reasoning = false;