Fix gemma_test GeographyBatched for 2b-it and add entropy expectations for gemma2-2b-it.

PiperOrigin-RevId: 662072395
This commit is contained in:
Daniel Keysers 2024-08-12 07:12:08 -07:00 committed by Copybara-Service
parent b831fa8482
commit 7316ee8f96
1 changed files with 11 additions and 2 deletions

View File

@ -29,7 +29,7 @@
// To run the test, pass the following flags: // To run the test, pass the following flags:
// --model <model> --tokenizer <tokenizer_path> --weights <weights_path> // --model <model> --tokenizer <tokenizer_path> --weights <weights_path>
// It should pass for the following models: // It should pass for the following models:
// 2b-it (v1 and v1.1), 7b-it (v1 and v1.1), 9b-it, 27b-it // 2b-it (v1 and v1.1), 7b-it (v1 and v1.1), gemma2-2b-it, 9b-it, 27b-it
namespace gcpp { namespace gcpp {
namespace { namespace {
@ -122,7 +122,7 @@ TEST_F(GemmaTest, GeographyBatched) {
{"What is the capital of Australia?", "Canberra"}, {"What is the capital of Australia?", "Canberra"},
{"What is the capital of Denmark?", "Copenhagen"}, {"What is the capital of Denmark?", "Copenhagen"},
{"Ljubljana is the capital of which country?", "Slovenia"}, {"Ljubljana is the capital of which country?", "Slovenia"},
{"Is Chicago a country?", "not"}, {"Is Chicago a country?", "city"},
{"How many states does the US have?", "50"}, {"How many states does the US have?", "50"},
{"What is the Pacific?", "ocean"}, {"What is the Pacific?", "ocean"},
}; };
@ -199,6 +199,9 @@ TEST_F(GemmaTest, CrossEntropySmall) {
// 7B v.1 and v.1.1 produce slightly different results. // 7B v.1 and v.1.1 produce slightly different results.
EXPECT_NEAR(entropy, 2.8f, 0.2f); EXPECT_NEAR(entropy, 2.8f, 0.2f);
break; break;
case gcpp::Model::GEMMA2_2B:
EXPECT_NEAR(entropy, 1.14f, 0.02f);
break;
case gcpp::Model::GEMMA2_9B: case gcpp::Model::GEMMA2_9B:
EXPECT_NEAR(entropy, 1.28f, 0.02f); EXPECT_NEAR(entropy, 1.28f, 0.02f);
break; break;
@ -224,6 +227,9 @@ TEST_F(GemmaTest, CrossEntropyJingleBells) {
// 7B v.1 and v.1.1 produce slightly different results. // 7B v.1 and v.1.1 produce slightly different results.
EXPECT_NEAR(entropy, 1.07f, 0.05f); EXPECT_NEAR(entropy, 1.07f, 0.05f);
break; break;
case gcpp::Model::GEMMA2_2B:
EXPECT_NEAR(entropy, 0.49f, 0.02f);
break;
case gcpp::Model::GEMMA2_9B: case gcpp::Model::GEMMA2_9B:
EXPECT_NEAR(entropy, 0.37f, 0.02f); EXPECT_NEAR(entropy, 0.37f, 0.02f);
break; break;
@ -249,6 +255,9 @@ TEST_F(GemmaTest, CrossEntropyGettysburg) {
// 7B v.1 and v.1.1 produce slightly different results. // 7B v.1 and v.1.1 produce slightly different results.
EXPECT_NEAR(entropy, 0.75f, 0.1f); EXPECT_NEAR(entropy, 0.75f, 0.1f);
break; break;
case gcpp::Model::GEMMA2_2B:
EXPECT_NEAR(entropy, 0.20f, 0.02f);
break;
case gcpp::Model::GEMMA2_9B: case gcpp::Model::GEMMA2_9B:
EXPECT_NEAR(entropy, 0.15f, 0.02f); EXPECT_NEAR(entropy, 0.15f, 0.02f);
break; break;