diff --git a/tools/server/tests/unit/test_chat_completion.py b/tools/server/tests/unit/test_chat_completion.py index 73bd8add07..5f5de415cf 100644 --- a/tools/server/tests/unit/test_chat_completion.py +++ b/tools/server/tests/unit/test_chat_completion.py @@ -13,16 +13,16 @@ def create_server(): @pytest.mark.parametrize( "model,system_prompt,user_prompt,max_tokens,re_content,n_prompt,n_predicted,finish_reason,jinja,chat_template", [ - (None, "Book", "Hey", 8, "But she couldn't|Some of her", 69, 8, "length", False, None), - (None, "Book", "Hey", 8, "But she couldn't|Some of her", 69, 8, "length", True, None), - (None, "Book", "What is the best book", 8, "(Suddenly)+|\\{ \" Sarax.|Timmy", 77, 8, "length", False, None), - (None, "Book", "What is the best book", 8, "(Suddenly)+|\\{ \" Sarax.|Timmy", 77, 8, "length", True, None), - (None, "Book", "What is the best book", 8, "(Suddenly)+|\\{ \" Sarax.|Timmy", 77, 8, "length", True, 'chatml'), - (None, "Book", "What is the best book", 8, "^ blue|very teaful|very busy", 23, 8, "length", True, "This is not a chat template, it is"), - ("codellama70b", "You are a coding assistant.", "Write the fibonacci function in c++.", 128, "(Aside|she|felter|alonger|shake)+", 104, 128, "length", False, None), - ("codellama70b", "You are a coding assistant.", "Write the fibonacci function in c++.", 128, "(Aside|she|felter|alonger|shake)+", 104, 128, "length", True, None), - (None, "Book", [{"type": "text", "text": "What is"}, {"type": "text", "text": "the best book"}], 8, "Whillicter|Some", 79, 8, "length", False, None), - (None, "Book", [{"type": "text", "text": "What is"}, {"type": "text", "text": "the best book"}], 8, "Whillicter|Some", 79, 8, "length", True, None), + (None, "Book", "Hey", 8, "But she couldn't", 69, 8, "length", False, None), + (None, "Book", "Hey", 8, "But she couldn't", 69, 8, "length", True, None), + (None, "Book", "What is the best book", 8, "(Suddenly)+|\\{ \" Sarax.", 77, 8, "length", False, None), + (None, "Book", "What is the best book", 8, "(Suddenly)+|\\{ \" Sarax.", 77, 8, "length", True, None), + (None, "Book", "What is the best book", 8, "(Suddenly)+|\\{ \" Sarax.", 77, 8, "length", True, 'chatml'), + (None, "Book", "What is the best book", 8, "^ blue", 23, 8, "length", True, "This is not a chat template, it is"), + ("codellama70b", "You are a coding assistant.", "Write the fibonacci function in c++.", 128, "(Aside|she|felter|alonger)+", 104, 128, "length", False, None), + ("codellama70b", "You are a coding assistant.", "Write the fibonacci function in c++.", 128, "(Aside|she|felter|alonger)+", 104, 128, "length", True, None), + (None, "Book", [{"type": "text", "text": "What is"}, {"type": "text", "text": "the best book"}], 8, "Whillicter", 79, 8, "length", False, None), + (None, "Book", [{"type": "text", "text": "What is"}, {"type": "text", "text": "the best book"}], 8, "Whillicter", 79, 8, "length", True, None), ] ) def test_chat_completion(model, system_prompt, user_prompt, max_tokens, re_content, n_prompt, n_predicted, finish_reason, jinja, chat_template): @@ -54,8 +54,8 @@ def test_chat_completion(model, system_prompt, user_prompt, max_tokens, re_conte @pytest.mark.parametrize( "system_prompt,user_prompt,max_tokens,re_content,n_prompt,n_predicted,finish_reason", [ - ("Book", "What is the best book", 8, "(Suddenly)+|Timmy", 77, 8, "length"), - ("You are a coding assistant.", "Write the fibonacci function in c++.", 128, "(Aside|she|felter|alonger|shake)+", 104, 128, "length"), + ("Book", "What is the best book", 8, "(Suddenly)+", 77, 8, "length"), + ("You are a coding assistant.", "Write the fibonacci function in c++.", 128, "(Aside|she|felter|alonger)+", 104, 128, "length"), ] ) def test_chat_completion_stream(system_prompt, user_prompt, max_tokens, re_content, n_prompt, n_predicted, finish_reason): @@ -115,7 +115,7 @@ def test_chat_completion_with_openai_library(): assert res.system_fingerprint is not None and res.system_fingerprint.startswith("b") assert res.choices[0].finish_reason == "length" assert res.choices[0].message.content is not None - assert match_regex("(Suddenly)+|Timmy", res.choices[0].message.content) + assert match_regex("(Suddenly)+", res.choices[0].message.content) def test_chat_template(): @@ -503,5 +503,5 @@ def test_chat_completions_multiple_choices(): assert len(res.body["choices"]) == 2 for choice in res.body["choices"]: assert "assistant" == choice["message"]["role"] - assert match_regex("Suddenly|Timmy", choice["message"]["content"]) + assert match_regex("Suddenly", choice["message"]["content"]) assert choice["finish_reason"] == "length" diff --git a/tools/server/tests/unit/test_completion.py b/tools/server/tests/unit/test_completion.py index daaa6e5a90..ef1757db21 100644 --- a/tools/server/tests/unit/test_completion.py +++ b/tools/server/tests/unit/test_completion.py @@ -17,7 +17,7 @@ def create_server(): server = ServerPreset.tinyllama2() @pytest.mark.parametrize("prompt,n_predict,re_content,n_prompt,n_predicted,truncated,return_tokens", [ - ("I believe the meaning of life is", 8, "(going|bed)+|froze and every|froze and bri", 18, 8, False, False), + ("I believe the meaning of life is", 8, "(going|bed)+", 18, 8, False, False), ("Write a joke about AI from a very long prompt which will not be truncated", 64, "(princesses|everyone|kids|Anna|forest)+", 46, 64, False, True), ]) def test_completion(prompt: str, n_predict: int, re_content: str, n_prompt: int, n_predicted: int, truncated: bool, return_tokens: bool): @@ -42,7 +42,7 @@ def test_completion(prompt: str, n_predict: int, re_content: str, n_prompt: int, @pytest.mark.parametrize("prompt,n_predict,re_content,n_prompt,n_predicted,truncated", [ - ("I believe the meaning of life is", 8, "(going|bed)+|froze and every|froze and bri", 18, 8, False), + ("I believe the meaning of life is", 8, "(going|bed)+", 18, 8, False), ("Write a joke about AI from a very long prompt which will not be truncated", 64, "(princesses|everyone|kids|Anna|forest)+", 46, 64, False), ]) def test_completion_stream(prompt: str, n_predict: int, re_content: str, n_prompt: int, n_predicted: int, truncated: bool): @@ -103,7 +103,7 @@ def test_completion_with_openai_library(): assert res.system_fingerprint is not None and res.system_fingerprint.startswith("b") assert res.choices[0].finish_reason == "length" assert res.choices[0].text is not None - assert match_regex("(going|bed)+|froze and every|froze and bri", res.choices[0].text) + assert match_regex("(going|bed)+", res.choices[0].text) def test_completion_stream_with_openai_library(): @@ -122,7 +122,7 @@ def test_completion_stream_with_openai_library(): if choice.finish_reason is None: assert choice.text is not None output_text += choice.text - assert match_regex("(going|bed)+|froze and every|froze and bri", output_text) + assert match_regex("(going|bed)+", output_text) # Test case from https://github.com/ggml-org/llama.cpp/issues/13780 @@ -146,7 +146,7 @@ def test_completion_stream_with_openai_library_stops(): if choice.finish_reason is None: assert choice.text is not None output_text += choice.text - assert match_regex("Sure, here's one for[\\s\\S]*|Sure thing..Why don't|Sure! Here's one for you:", output_text), f'Unexpected output: {output_text}' + assert match_regex("Sure, here's one for[\\s\\S]*", output_text), f'Unexpected output: {output_text}' @pytest.mark.parametrize("n_slots", [1, 2]) @@ -511,8 +511,8 @@ def test_n_probs_post_sampling(): assert "token" in prob and type(prob["token"]) == str assert "prob" in prob and 0.0 <= prob["prob"] <= 1.0 assert "bytes" in prob and type(prob["bytes"]) == list - # at low temperature, one of the token has a very high probability - assert any(prob["prob"] >= 0.99 for prob in tok["top_probs"]) + # because the test model usually output token with either 100% or 0% probability, we need to check all the top_probs + assert any(prob["prob"] == 1.0 for prob in tok["top_probs"]) @pytest.mark.parametrize("tokenize,openai_style", [(False, False), (False, True), (True, False), (True, True)])