mirror of https://github.com/google/gemma.cpp.git
156 lines
5.7 KiB
C++
156 lines
5.7 KiB
C++
// Copyright 2024 Google LLC
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// https://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <algorithm>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#include "evals/benchmark_helper.h"
|
|
#include "gemma/gemma.h"
|
|
#include "hwy/base.h"
|
|
#include "hwy/nanobenchmark.h"
|
|
#include "hwy/profiler.h"
|
|
#include "hwy/tests/hwy_gtest.h"
|
|
|
|
namespace gcpp {
|
|
namespace {
|
|
|
|
// Shared state. Requires argc/argv, so construct in main and use the same raw
|
|
// pointer approach as in benchmarks.cc. Note that the style guide forbids
|
|
// non-local static variables with dtors.
|
|
GemmaEnv* s_env = nullptr;
|
|
|
|
class GemmaBatchBench : public ::testing::Test {
|
|
protected:
|
|
std::vector<std::string> BatchGemmaReply(
|
|
const std::vector<std::string>& inputs) {
|
|
s_env->MutableConfig().temperature = 0.0f; // deterministic
|
|
s_env->MutableConfig().verbosity = 2;
|
|
std::vector<std::string> replies;
|
|
for (const QueryResult& result : s_env->BatchQueryModel(inputs)) {
|
|
replies.push_back(result.response);
|
|
}
|
|
return replies;
|
|
}
|
|
};
|
|
|
|
std::vector<std::string> GenerateInputs() {
|
|
std::vector<std::string> prompts = {
|
|
{"Describe dynamic programming."},
|
|
{"Explain how electric cars work."},
|
|
{"Explain to me how to use Google Maps."},
|
|
{"How does AI work?"},
|
|
{"How would you describe a unicorn?"},
|
|
{"Please share some good cooking tips."},
|
|
{"Teach me about GPU programming."},
|
|
{"Tell me a fact about World War 2."},
|
|
{"Tell me about Google."},
|
|
{"Tell me more about olympic sports."},
|
|
{"Tell me something about space travel."},
|
|
{"What is a horse?"},
|
|
{"What is Michigan State?"},
|
|
{"What's the history of Denmark?"},
|
|
{"Write a poem about planet earth."},
|
|
{"Write a story about Jupiter."},
|
|
{"Write about the moon."},
|
|
{"Write me a comedy story about Florida."},
|
|
{"Write me a poem about France."},
|
|
};
|
|
const std::vector<std::string> start = {
|
|
{"What is"}, {"When did"}, {"Where did"}, {"How did"}, {"Why did"}};
|
|
const std::vector<std::string> concepts = {"Socrates",
|
|
"Einstein",
|
|
"Leonardo",
|
|
"Cleopatra",
|
|
"Adele",
|
|
"Mars",
|
|
"Turing",
|
|
"Mozart",
|
|
"democracy",
|
|
"gravity",
|
|
"AI",
|
|
"evolution",
|
|
"physics",
|
|
"the internet",
|
|
"steam engine",
|
|
"inflation",
|
|
"electricity",
|
|
"the Sahara",
|
|
"NASA",
|
|
"Rome",
|
|
"the UN",
|
|
"Google",
|
|
"the Renaissance",
|
|
"Hamlet",
|
|
"poetry",
|
|
"Stoicism",
|
|
"geometry",
|
|
"DNA",
|
|
"Star Wars",
|
|
"1984"};
|
|
const std::vector<std::string> end = {"exist?", "work?", "happen?",
|
|
"lead to?", "believe?", "result in?"};
|
|
for (const std::string& s : start) {
|
|
for (const std::string& c : concepts) {
|
|
for (const std::string& e : end) {
|
|
prompts.push_back(s + " " + c + " " + e);
|
|
}
|
|
}
|
|
}
|
|
AesCtrEngine engine(true);
|
|
std::shuffle(prompts.begin(), prompts.end(), RngStream(engine, 123));
|
|
|
|
// Fills `inputs` by repeating from `prompts` until the desired batch size.
|
|
std::vector<std::string> inputs;
|
|
inputs.reserve(s_env->MutableConfig().decode_qbatch_size);
|
|
size_t qpos = 0;
|
|
for (size_t i = 0; i < inputs.capacity(); ++i) {
|
|
inputs.push_back(prompts[qpos++]);
|
|
if (qpos == prompts.size()) qpos = 0;
|
|
}
|
|
return inputs;
|
|
}
|
|
|
|
TEST_F(GemmaBatchBench, RandomQuestionsBatched) {
|
|
s_env->SetMaxGeneratedTokens(12);
|
|
const std::vector<std::string> inputs = GenerateInputs();
|
|
|
|
// Run multiple times so that auto-tuning is closer to complete.
|
|
for (size_t rep = 0; rep < 4; ++rep) {
|
|
std::vector<std::string> responses = BatchGemmaReply(inputs);
|
|
for (size_t i = 0; i < HWY_MIN(hwy::Unpredictable1() * 3, responses.size());
|
|
++i) {
|
|
fprintf(stderr, "Rep %zu batch answer %zu '%s'\n\n", rep, i,
|
|
responses[i].c_str());
|
|
}
|
|
PROFILER_PRINT_RESULTS();
|
|
}
|
|
}
|
|
|
|
} // namespace
|
|
} // namespace gcpp
|
|
|
|
int main(int argc, char** argv) {
|
|
fprintf(stderr, "GemmaEnv setup..\n");
|
|
gcpp::GemmaEnv env(argc, argv);
|
|
gcpp::s_env = &env;
|
|
|
|
testing::InitGoogleTest(&argc, argv);
|
|
|
|
return RUN_ALL_TESTS();
|
|
}
|