diff --git a/common/chat.cpp b/common/chat.cpp index 47a34d5822..f8f3a0a797 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -65,15 +65,18 @@ json common_chat_msg::to_json_oaicompat(bool concat_typed_text) const { } else if (!content_parts.empty()) { if (concat_typed_text) { std::string text; + bool last_was_media_marker = false; + // join parts with newline, do not add newline before or after media markers for (const auto & part : content_parts) { - if (part.type != "text") { + if (part.type != "text" && part.type != "media_marker") { LOG_WRN("Ignoring content part type: %s\n", part.type.c_str()); continue; } - if (!text.empty()) { + if (part.type != "media_marker" && !last_was_media_marker && !text.empty()) { text += '\n'; } - text += part.text; + last_was_media_marker = (part.type == "media_marker"); + text += part.text; } jmsg["content"] = text; } else { @@ -319,7 +322,7 @@ std::vector common_chat_msgs_parse_oaicompat(const json & messa throw std::invalid_argument("Missing content part type: " + part.dump()); } const auto & type = part.at("type"); - if (type != "text") { + if (type != "text" && type != "media_marker") { throw std::invalid_argument("Unsupported content part type: " + type.dump()); } common_chat_msg_content_part msg_part; diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index a853f65c8d..d717fb6698 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -916,8 +916,7 @@ json oaicompat_chat_params_parse( json image_url = json_value(p, "image_url", json::object()); handle_media(out_files, image_url, opt.media_path); - // replace this chunk with a marker - p["type"] = "text"; + p["type"] = "media_marker"; p["text"] = mtmd_default_marker(); p.erase("image_url"); @@ -938,8 +937,7 @@ json oaicompat_chat_params_parse( // TODO: add audio_url support by reusing handle_media() - // replace this chunk with a marker - p["type"] = "text"; + p["type"] = "media_marker"; p["text"] = mtmd_default_marker(); p.erase("input_audio");