From 5862d1f9952ad5ad9eb400af075b5e90cacbf5ef Mon Sep 17 00:00:00 2001 From: Luca Versari Date: Thu, 4 Apr 2024 14:37:40 +0200 Subject: [PATCH] Add a benchmark and additional tests. Also add a script to help running sanitizer builds, and do some cleanup. Co-authored-by: Andrey Mikhaylov Co-authored-by: Eugene Kliuchnikov Co-authored-by: Sami Boukortt Co-authored-by: Zoltan Szabadka --- .clang-tidy | 1 + .github/workflows/build.yml | 6 +- .gitignore | 4 + BUILD.bazel | 32 ++++ CMakeLists.txt | 23 +-- MODULE.bazel | 7 + benchmark.cc | 293 ++++++++++++++++++++++++++++++++++ cmake.sh | 299 +++++++++++++++++++++++++++++++++++ examples/hello_world/run.cc | 15 +- gemma.cc | 168 +++++++++++++++++++- gemma.h | 17 +- gemma_test.cc | 304 ++++++++++++++++++++++++++++++++++++ goldens/2b-it.txt | 40 +++++ goldens/7b-it.txt | 12 ++ run.cc | 32 ++-- util/app.h | 10 +- 16 files changed, 1217 insertions(+), 46 deletions(-) create mode 100644 .gitignore create mode 100644 benchmark.cc create mode 100755 cmake.sh create mode 100644 gemma_test.cc create mode 100644 goldens/2b-it.txt create mode 100644 goldens/7b-it.txt diff --git a/.clang-tidy b/.clang-tidy index 497c2e3..658b941 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -90,6 +90,7 @@ Checks: "-*,\ -concurrency-mt-unsafe,\ -cppcoreguidelines-avoid-c-arrays,\ -cppcoreguidelines-avoid-const-or-ref-data-members,\ + -cppcoreguidelines-avoid-do-while,\ -cppcoreguidelines-avoid-goto,\ -cppcoreguidelines-avoid-magic-numbers,\ -cppcoreguidelines-avoid-non-const-global-variables,\ diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6fa432c..b028c47 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,7 +1,11 @@ name: build # Trigger on push, pull request, or via manual dispatch. -on: [push, pull_request, workflow_dispatch] +on: + push: + pull_request: + types: [opened, reopened, labeled, unlabeled, synchronize] + workflow_dispatch: jobs: build: diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d4264cb --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.cache/ +bazel-*/ +build-*/ +python/*/__pycache__ diff --git a/BUILD.bazel b/BUILD.bazel index 27c6cd7..757bea2 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -86,6 +86,19 @@ cc_library( ], ) +cc_test( + name = "gemma_test", + srcs = ["gemma_test.cc"], + deps = [ + ":args", + ":gemma_lib", + ":ops", + "@googletest//:gtest_main", + "@hwy//:hwy_test_util", + "@hwy//:thread_pool", + ], +) + cc_library( name = "app", hdrs = [ @@ -132,3 +145,22 @@ cc_binary( "@hwy//:thread_pool", ], ) + +cc_binary( + name = "benchmark", + srcs = [ + "benchmark.cc", + ], + deps = [ + ":app", + ":args", + ":gemma_lib", + # "//base", + "//compression:compress", + "@hwy//:hwy", + "@hwy//:nanobenchmark", + "@hwy//:profiler", + "@hwy//:thread_pool", + "@nlohmann_json//:json", + ], +) diff --git a/CMakeLists.txt b/CMakeLists.txt index c0c749b..00c47f9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,6 +30,9 @@ FetchContent_MakeAvailable(highway) FetchContent_Declare(sentencepiece GIT_REPOSITORY https://github.com/google/sentencepiece GIT_TAG 53de76561cfc149d3c01037f0595669ad32a5e7c EXCLUDE_FROM_ALL) FetchContent_MakeAvailable(sentencepiece) +FetchContent_Declare(json GIT_REPOSITORY https://github.com/nlohmann/json.git GIT_TAG 9cca280a4d0ccf0c08f47a99aa71d1b0e52f8d03 EXCLUDE_FROM_ALL) +FetchContent_MakeAvailable(json) + set(SOURCES gemma.cc compression/blob_store.cc @@ -60,17 +63,7 @@ if (WEIGHT_TYPE) add_definitions(-DGEMMA_WEIGHT_T=${WEIGHT_TYPE}) endif() -# Executable Target - -add_executable(gemma run.cc) -target_sources(gemma PRIVATE ${SOURCES}) -set_property(TARGET gemma PROPERTY CXX_STANDARD 17) -target_link_libraries(gemma hwy hwy_contrib sentencepiece) -target_include_directories(gemma PRIVATE ./) FetchContent_GetProperties(sentencepiece) -target_include_directories(gemma PRIVATE ${sentencepiece_SOURCE_DIR}) -target_compile_definitions(gemma PRIVATE $<$:_CRT_SECURE_NO_WARNINGS NOMINMAX>) -target_compile_options(gemma PRIVATE $<$:-Wno-deprecated-declarations>) ## Library Target @@ -84,11 +77,21 @@ target_include_directories(libgemma PUBLIC ${sentencepiece_SOURCE_DIR}) target_compile_definitions(libgemma PRIVATE $<$:_CRT_SECURE_NO_WARNINGS NOMINMAX>) target_compile_options(libgemma PRIVATE $<$:-Wno-deprecated-declarations>) +# Executable Target + +add_executable(gemma run.cc) +target_link_libraries(gemma libgemma hwy hwy_contrib) + +add_executable(benchmark benchmark.cc) +target_link_libraries(benchmark libgemma hwy hwy_contrib nlohmann_json::nlohmann_json) + +## Tests set(GEMMA_ENABLE_TESTS OFF CACHE BOOL "Enable Gemma tests") if (GEMMA_ENABLE_TESTS) set(GEMMA_TEST_FILES ops_test.cc + gemma_test.cc ) foreach (TESTFILE IN LISTS GEMMA_TEST_FILES) diff --git a/MODULE.bazel b/MODULE.bazel index 63b3c89..d183e89 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -20,6 +20,13 @@ http_archive( strip_prefix = "highway-1.1.0", ) +http_archive( + name = "nlohmann_json", + urls = ["https://github.com/nlohmann/json/archive/refs/tags/v3.11.3.zip"], + integrity = "sha256-BAIrBdgG61/3MCPCgLaGl9Erk+G3JnoLIqGjnsdXgGk=", + strip_prefix = "json-3.11.3", +) + http_archive( name = "com_google_sentencepiece", sha256 = "8409b0126ebd62b256c685d5757150cf7fcb2b92a2f2b98efb3f38fc36719754", diff --git a/benchmark.cc b/benchmark.cc new file mode 100644 index 0000000..a5a77d7 --- /dev/null +++ b/benchmark.cc @@ -0,0 +1,293 @@ +#include +#include +#include +#include +#include +#include + +#include "hwy/base.h" +#include "hwy/contrib/thread_pool/thread_pool.h" +#include "hwy/highway.h" +#include "hwy/per_target.h" +#include "hwy/profiler.h" +#include "hwy/timer.h" +#include "nlohmann/json.hpp" +// copybara:import_next_line:gemma_cpp +#include "util/app.h" +// copybara:import_next_line:gemma_cpp +#include "util/args.h" + +using json = nlohmann::json; + +class BenchmarkArgs : public gcpp::ArgsBase { + public: + BenchmarkArgs(int argc, char* argv[]) { InitAndParse(argc, argv); } + + gcpp::Path goldens; + gcpp::Path summarize_text; + gcpp::Path cross_entropy; + gcpp::Path trivia_qa; + size_t max_questions; + size_t batch_tokens; + + template + void ForEach(const Visitor& visitor) { + visitor(goldens.path, "goldens_dir", std::string(""), + "Directory containing golden files", 2); + visitor(summarize_text.path, "summarize_text", std::string(""), + "Path to text file to summarize", 2); + visitor(cross_entropy.path, "cross_entropy", std::string(""), + "Path to text file to compute cross entropy on", 2); + visitor(trivia_qa.path, "trivia_qa", std::string(""), + "Path to json file containing TriviaQA entries", 2); + visitor(max_questions, "max_questions", (size_t)20, + "Maximum number of questions to ask from --trivial_qa input", 2); + visitor(batch_tokens, "batch_tokens", (size_t)0, + "If not zero, break prompt into batches of this size and compute " + "cross entropy on them independently.", + 2); + } +}; + +void LogSpeedStats(const double time_start, size_t total_tokens) { + const double time_end = hwy::platform::Now(); + const double time_elapsed = time_end - time_start; + const double tok_sec = total_tokens / time_elapsed; + std::cout << total_tokens << " tokens in " << time_elapsed << " seconds" + << " [" << tok_sec << " tokens / sec" << "]\n"; +} + +std::pair QueryModel( + gcpp::Gemma& model, gcpp::InferenceArgs& args, gcpp::AppArgs& app, + gcpp::KVCache& kv_cache, hwy::ThreadPool& inner_pool, hwy::ThreadPool& pool, + const std::string& input) { + std::vector prompt; + HWY_ASSERT(model.Tokenizer()->Encode(input, &prompt)); + + // For both pre-trained and instruction-tuned models: prepend "" token + // if needed. + prompt.insert(prompt.begin(), 2); + std::string res; + size_t total_tokens = 0; + auto accept_token = [](int) { return true; }; + std::mt19937 gen; + gen.seed(42); + + const double time_start = hwy::platform::Now(); + auto stream_token = [&res, &total_tokens, &time_start, &app, + tokenizer = model.Tokenizer()](int token, float) { + ++total_tokens; + std::string token_text; + HWY_ASSERT(tokenizer->Decode(std::vector{token}, &token_text)); + res += token_text; + if (app.verbosity >= 1 && total_tokens % 100 == 0) { + LogSpeedStats(time_start, total_tokens); + } + return true; + }; + if (app.verbosity >= 2) { + std::cout << args.max_tokens << " " << args.max_generated_tokens << " " + << args.temperature; + } + GenerateGemma(model, args.max_tokens, args.max_generated_tokens, + args.temperature, prompt, /*abs_pos=*/0, kv_cache, pool, + inner_pool, stream_token, accept_token, gen, app.verbosity); + if (app.verbosity >= 1) { + LogSpeedStats(time_start, total_tokens); + } + return {res, total_tokens}; +} + +std::vector> load_goldens( + const std::string& path) { + std::ifstream goldens_file(path); + if (!goldens_file) { + std::cout << "Could not load goldens file: " << path << "\n" << std::flush; + return {}; + } + std::vector> res; + std::string query_separator; + std::string query; + std::string answer_separator; + std::string answer; + while (std::getline(goldens_file, query_separator) && + std::getline(goldens_file, query) && + std::getline(goldens_file, answer_separator) && + std::getline(goldens_file, answer)) { + res.push_back({query, answer}); + } + return res; +} + +std::string ReadFile(const gcpp::Path& path) { + std::ifstream text_file(path.path); + if (!text_file) { + std::cout << "Could not open file: " << path.path << "\n" << std::flush; + return {}; + } + std::stringstream buffer; + buffer << text_file.rdbuf(); + return buffer.str(); +} + +int BenchmarkGoldens(gcpp::Gemma& model, gcpp::InferenceArgs& args, + gcpp::AppArgs& app, gcpp::KVCache& kv_cache, + hwy::ThreadPool& inner_pool, hwy::ThreadPool& pool, + const std::string& golden_path) { + const std::vector> queries_answers = + load_goldens(golden_path); + int correct_answers = 0; + int total_tokens = 0; + const double time_start = hwy::platform::Now(); + for (const auto& [question, expected_answer] : queries_answers) { + const auto [answer, token_count] = + QueryModel(model, args, app, kv_cache, inner_pool, pool, question); + total_tokens += token_count; + if (answer.find(expected_answer) != std::string::npos) { + correct_answers++; + } else { + std::cout << "Wrong!\n"; + std::cout << "Input: " << question << "\n"; + std::cout << "Expected: " << expected_answer << "\n"; + std::cout << "Output: " << answer << "\n\n" << std::flush; + } + } + LogSpeedStats(time_start, total_tokens); + + std::cout << "Correct: " << correct_answers << " out of " + << queries_answers.size() << "\n" + << std::flush; + if (correct_answers != queries_answers.size()) { + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} + +int BenchmarkSummary(gcpp::Gemma& model, gcpp::InferenceArgs& args, + gcpp::AppArgs& app, gcpp::KVCache& kv_cache, + hwy::ThreadPool& inner_pool, hwy::ThreadPool& pool, + const gcpp::Path& text) { + std::string prompt("Here is some text to summarize:\n"); + prompt.append(ReadFile(text)); + prompt.append("\nSummarize this text.\n"); + const double time_start = hwy::platform::Now(); + const auto [answer, token_count] = + QueryModel(model, args, app, kv_cache, inner_pool, pool, prompt); + std::cout << answer.substr(prompt.size()) << "\n" << std::flush; + LogSpeedStats(time_start, token_count); + return EXIT_SUCCESS; +} + +int BenchmarkCrossEntropy(gcpp::Gemma& model, gcpp::Model model_type, + gcpp::InferenceArgs& args, gcpp::AppArgs& app, + hwy::ThreadPool& inner_pool, hwy::ThreadPool& pool, + const gcpp::Path& text, size_t batch_tokens) { + std::string input = ReadFile(text); + std::vector prompt; + HWY_ASSERT(model.Tokenizer()->Encode(input, &prompt)); + prompt.resize(std::min(args.max_tokens, prompt.size())); + std::cout << "Number of input tokens: " << prompt.size() << "\n"; + const double time_start = hwy::platform::Now(); + float total_entropy = 0.0f; + size_t total_input_len = 0; + if (batch_tokens == 0) batch_tokens = prompt.size(); + for (size_t pos = 0; pos < prompt.size(); pos += batch_tokens) { + size_t num_tokens = std::min(prompt.size() - pos, batch_tokens); + std::vector prompt_slice(prompt.begin() + pos, + prompt.begin() + pos + num_tokens); + auto kv_cache = CreateKVCache(model_type); + float entropy = + ComputeCrossEntropy(model, num_tokens, prompt_slice, kv_cache, pool, + inner_pool, app.verbosity); + total_entropy += entropy; + LogSpeedStats(time_start, pos + num_tokens); + std::string text_slice; + HWY_ASSERT(model.Tokenizer()->Decode(prompt_slice, &text_slice)); + total_input_len += text_slice.size(); + printf("Cross entropy per byte: %f [cumulative: %f]\n", + entropy / text_slice.size(), total_entropy / total_input_len); + } + return EXIT_SUCCESS; +} + +int BenchmarkTriviaQA(gcpp::Gemma& model, gcpp::InferenceArgs& args, + gcpp::AppArgs& app, gcpp::KVCache& kv_cache, + hwy::ThreadPool& inner_pool, hwy::ThreadPool& pool, + const gcpp::Path& json_file, size_t max_questions) { + std::ifstream trivia_file(json_file.path); + if (!trivia_file) { + std::cout << "Could not load file: " << json_file.path << "\n" + << std::flush; + return EXIT_FAILURE; + } + std::string line; + size_t correct_answers = 0; + size_t i = 0; + while (std::getline(trivia_file, line)) { + json data = json::parse(line); + const auto [answer, token_count] = QueryModel( + model, args, app, kv_cache, inner_pool, pool, data["question"]); + std::cout << answer << "\n"; + bool correct = false; + for (const std::string expected : data["answer"]["aliases"]) { + if (answer.find(expected) != std::string::npos) { + correct = true; + break; + } + } + if (correct) { + ++correct_answers; + std::cout << "CORRECT\n\n"; + } else { + std::cout << "WRONG\n\n"; + } + if (++i >= max_questions) break; + } + printf("Correct answers: %zu / %zu\n", correct_answers, i); + return EXIT_SUCCESS; +} + +/* Run this in the same way as gemma, p.ex.: + ./benchmark --tokenizer tokenizer.spm --model 2b-it --weights \ + 2b-it-sfp.sbs --goldens_dir "../goldens" +*/ +int main(int argc, char** argv) { + gcpp::LoaderArgs loader(argc, argv); + gcpp::InferenceArgs args(argc, argv); // inference + gcpp::AppArgs app(argc, argv); + BenchmarkArgs benchmark_args(argc, argv); + + hwy::ThreadPool inner_pool(0); + hwy::ThreadPool pool(app.num_threads); + // For many-core, pinning threads to cores helps. + if (app.num_threads > 10) { + gcpp::PinThreadToCore(app.num_threads - 1); // Main thread + + pool.Run(0, pool.NumThreads(), [](uint64_t /*task*/, size_t thread) { + gcpp::PinThreadToCore(thread); + }); + } + + gcpp::Gemma model(loader.tokenizer, loader.weights, loader.ModelType(), pool); + auto kv_cache = CreateKVCache(loader.ModelType()); + + if (!benchmark_args.goldens.path.empty()) { + const std::string golden_path = + benchmark_args.goldens.path + "/" + loader.model_type + ".txt"; + return BenchmarkGoldens(model, args, app, kv_cache, inner_pool, pool, + golden_path); + } else if (!benchmark_args.summarize_text.path.empty()) { + return BenchmarkSummary(model, args, app, kv_cache, inner_pool, pool, + benchmark_args.summarize_text); + } else if (!benchmark_args.cross_entropy.path.empty()) { + return BenchmarkCrossEntropy(model, loader.ModelType(), args, app, + inner_pool, pool, benchmark_args.cross_entropy, + benchmark_args.batch_tokens); + } else if (!benchmark_args.trivia_qa.path.empty()) { + return BenchmarkTriviaQA(model, args, app, kv_cache, inner_pool, pool, + benchmark_args.trivia_qa, + benchmark_args.max_questions); + } + std::cout << "No benchmark command given." << "\n" << std::flush; + return EXIT_FAILURE; +} diff --git a/cmake.sh b/cmake.sh new file mode 100755 index 0000000..408f84f --- /dev/null +++ b/cmake.sh @@ -0,0 +1,299 @@ +#!/usr/bin/env bash +# Copyright 2024 Google LLC +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +MYDIR=$(dirname $(realpath "$0")) +BUILD_DIR="${BUILD_DIR:-${MYDIR}/build}" + +CMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE:-RelWithDebInfo}" +CMAKE_C_COMPILER="${CMAKE_C_COMPILER:-clang-14}" +CMAKE_CXX_COMPILER="${CMAKE_CXX_COMPILER:-clang++-14}" +# Convenience flag to pass both CMAKE_C_FLAGS and CMAKE_CXX_FLAGS +CMAKE_FLAGS="${CMAKE_FLAGS:-}" +CMAKE_C_FLAGS="${CMAKE_C_FLAGS:-} ${CMAKE_FLAGS}" +CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS:-} ${CMAKE_FLAGS}" +CMAKE_EXE_LINKER_FLAGS="${CMAKE_EXE_LINKER_FLAGS:-}" +CMAKE_MODULE_LINKER_FLAGS="${CMAKE_MODULE_LINKER_FLAGS:-}" +CMAKE_SHARED_LINKER_FLAGS="${CMAKE_SHARED_LINKER_FLAGS:-}" + +# Local flags passed to sanitizers. +UBSAN_FLAGS=( + -fsanitize=alignment + -fsanitize=bool + -fsanitize=bounds + -fsanitize=builtin + -fsanitize=enum + -fsanitize=float-cast-overflow + -fsanitize=float-divide-by-zero + -fsanitize=integer-divide-by-zero + -fsanitize=null + -fsanitize=object-size + -fsanitize=pointer-overflow + -fsanitize=return + -fsanitize=returns-nonnull-attribute + -fsanitize=shift-base + -fsanitize=shift-exponent + -fsanitize=unreachable + -fsanitize=vla-bound + + -fno-sanitize-recover=undefined + -fsanitize-recover=alignment +) + +CLANG_VERSION="${CLANG_VERSION:-}" +# Detect the clang version suffix and store it in CLANG_VERSION. For example, +# "6.0" for clang 6 or "7" for clang 7. +detect_clang_version() { + if [[ -n "${CLANG_VERSION}" ]]; then + return 0 + fi + local clang_version=$("${CMAKE_C_COMPILER:-clang}" --version | head -n1) + clang_version=${clang_version#"Debian "} + clang_version=${clang_version#"Ubuntu "} + local llvm_tag + case "${clang_version}" in + "clang version 6."*) + CLANG_VERSION="6.0" + ;; + "clang version "*) + # Any other clang version uses just the major version number. + local suffix="${clang_version#clang version }" + CLANG_VERSION="${suffix%%.*}" + ;; + "emcc"*) + # We can't use asan or msan in the emcc case. + ;; + *) + echo "Unknown clang version: ${clang_version}" >&2 + return 1 + esac +} + +# Temporary files cleanup hooks. +CLEANUP_FILES=() +cleanup() { + if [[ ${#CLEANUP_FILES[@]} -ne 0 ]]; then + rm -fr "${CLEANUP_FILES[@]}" + fi +} + +# Executed on exit. +on_exit() { + local retcode="$1" + # Always cleanup the CLEANUP_FILES. + cleanup +} + +trap 'retcode=$?; { set +x; } 2>/dev/null; on_exit ${retcode}' INT TERM EXIT + + +# Install libc++ libraries compiled with msan in the msan_prefix for the current +# compiler version. +cmd_msan_install() { + local tmpdir=$(mktemp -d) + CLEANUP_FILES+=("${tmpdir}") + # Detect the llvm to install: + detect_clang_version + # Allow overriding the LLVM checkout. + local llvm_root="${LLVM_ROOT:-}" + if [ -z "${llvm_root}" ]; then + local llvm_tag="llvmorg-${CLANG_VERSION}.0.0" + case "${CLANG_VERSION}" in + "6.0") + llvm_tag="llvmorg-6.0.1" + ;; + "7") + llvm_tag="llvmorg-7.0.1" + ;; + esac + local llvm_targz="${tmpdir}/${llvm_tag}.tar.gz" + curl -L --show-error -o "${llvm_targz}" \ + "https://github.com/llvm/llvm-project/archive/${llvm_tag}.tar.gz" + tar -C "${tmpdir}" -zxf "${llvm_targz}" + llvm_root="${tmpdir}/llvm-project-${llvm_tag}" + fi + + local msan_prefix="${HOME}/.msan/${CLANG_VERSION}" + rm -rf "${msan_prefix}" + + declare -A CMAKE_EXTRAS + CMAKE_EXTRAS[libcxx]="\ + -DLIBCXX_CXX_ABI=libstdc++ \ + -DLIBCXX_INSTALL_EXPERIMENTAL_LIBRARY=ON \ + -DLIBCXX_INCLUDE_BENCHMARKS=OFF" + + for project in libcxx; do + local proj_build="${tmpdir}/build-${project}" + local proj_dir="${llvm_root}/${project}" + mkdir -p "${proj_build}" + cmake -B"${proj_build}" -H"${proj_dir}" \ + -G Ninja \ + -DCMAKE_BUILD_TYPE=Release \ + -DLLVM_USE_SANITIZER=Memory \ + -DLLVM_PATH="${llvm_root}/llvm" \ + -DLLVM_CONFIG_PATH="$(which llvm-config llvm-config-7 llvm-config-6.0 | \ + head -n1)" \ + -DCMAKE_C_COMPILER="${CMAKE_C_COMPILER}" \ + -DCMAKE_CXX_COMPILER="${CMAKE_CXX_COMPILER}" \ + -DCMAKE_C_FLAGS="${CMAKE_C_FLAGS}" \ + -DCMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS}" \ + -DCMAKE_EXE_LINKER_FLAGS="${CMAKE_EXE_LINKER_FLAGS}" \ + -DCMAKE_SHARED_LINKER_FLAGS="${CMAKE_SHARED_LINKER_FLAGS}" \ + -DCMAKE_INSTALL_PREFIX="${msan_prefix}" \ + ${CMAKE_EXTRAS[${project}]} + cmake --build "${proj_build}" + ninja -C "${proj_build}" install + done +} + +cmd_msan() { + detect_clang_version + local msan_prefix="${HOME}/.msan/${CLANG_VERSION}" + if [[ ! -d "${msan_prefix}" || -e "${msan_prefix}/lib/libc++abi.a" ]]; then + # Install msan libraries for this version if needed or if an older version + # with libc++abi was installed. + cmd_msan_install + fi + + local msan_c_flags=( + -fsanitize=memory + -fno-omit-frame-pointer + + -g + -DMEMORY_SANITIZER + + # Force gtest to not use the cxxbai. + -DGTEST_HAS_CXXABI_H_=0 + + -fsanitize-memory-track-origins + ) + + local msan_cxx_flags=( + "${msan_c_flags[@]}" + + # Some C++ sources don't use the std at all, so the -stdlib=libc++ is unused + # in those cases. Ignore the warning. + -Wno-unused-command-line-argument + -stdlib=libc++ + + # We include the libc++ from the msan directory instead, so we don't want + # the std includes. + -nostdinc++ + -cxx-isystem"${msan_prefix}/include/c++/v1" + ) + + local msan_linker_flags=( + -L"${msan_prefix}"/lib + -Wl,-rpath -Wl,"${msan_prefix}"/lib/ + ) + + CMAKE_C_FLAGS+=" ${msan_c_flags[@]} ${UBSAN_FLAGS[@]}" + CMAKE_CXX_FLAGS+=" ${msan_cxx_flags[@]} ${UBSAN_FLAGS[@]}" + CMAKE_EXE_LINKER_FLAGS+=" ${msan_linker_flags[@]}" + CMAKE_MODULE_LINKER_FLAGS+=" ${msan_linker_flags[@]}" + CMAKE_SHARED_LINKER_FLAGS+=" ${msan_linker_flags[@]}" + cmake_configure "$@" \ + -DCMAKE_CROSSCOMPILING=1 -DRUN_HAVE_STD_REGEX=0 -DRUN_HAVE_POSIX_REGEX=0 \ + -DCMAKE_REQUIRED_LINK_OPTIONS="${msan_linker_flags[@]}" +} + +cmake_configure() { + local args=( + -B"${BUILD_DIR}" -H"${MYDIR}" + -DCMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}" + -G Ninja + -DCMAKE_C_COMPILER="${CMAKE_C_COMPILER}" + -DCMAKE_CXX_COMPILER="${CMAKE_CXX_COMPILER}" + -DCMAKE_C_FLAGS="${CMAKE_C_FLAGS}" + -DCMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS}" + -DCMAKE_EXE_LINKER_FLAGS="${CMAKE_EXE_LINKER_FLAGS}" + -DCMAKE_MODULE_LINKER_FLAGS="${CMAKE_MODULE_LINKER_FLAGS}" + -DCMAKE_SHARED_LINKER_FLAGS="${CMAKE_SHARED_LINKER_FLAGS}" + -DGEMMA_ENABLE_TESTS=ON + ) + + cmake "${args[@]}" "$@" +} + +cmd_opt() { + CMAKE_BUILD_TYPE="RelWithDebInfo" + cmake_configure "$@" +} + +cmd_asan() { + CMAKE_C_FLAGS+=" -g -DADDRESS_SANITIZER -fsanitize=address ${UBSAN_FLAGS[@]}" + CMAKE_CXX_FLAGS+=" -g -DADDRESS_SANITIZER -fsanitize=address \ + ${UBSAN_FLAGS[@]}" + cmake_configure "$@" +} + +cmd_tsan() { + SANITIZER="tsan" + local tsan_args=( + -g + -DTHREAD_SANITIZER + ${UBSAN_FLAGS[@]} + -fsanitize=thread + ) + CMAKE_C_FLAGS+=" ${tsan_args[@]}" + CMAKE_CXX_FLAGS+=" ${tsan_args[@]}" + + CMAKE_BUILD_TYPE="RelWithDebInfo" + cmake_configure "$@" +} + +main() { + local cmd="${1:-}" + if [[ -z "${cmd}" ]]; then + cat >&2 < tokenize( - const std::string& prompt_string, - const sentencepiece::SentencePieceProcessor* tokenizer) { +std::vector tokenize(const std::string& prompt_string, + const gcpp::GemmaTokenizer* tokenizer) { std::string formatted = "user\n" + prompt_string + "\nmodel\n"; std::vector tokens; - HWY_ASSERT(tokenizer->Encode(formatted, &tokens).ok()); + HWY_ASSERT(tokenizer->Encode(formatted, &tokens)); tokens.insert(tokens.begin(), 2); // BOS token return tokens; } @@ -58,14 +57,14 @@ int main(int argc, char** argv) { size_t ntokens = tokens.size(); // This callback function gets invoked everytime a token is generated - auto stream_token = [&pos, &gen, &ntokens, tokenizer = model.Tokenizer()]( - int token, float) { + auto stream_token = [&pos, &ntokens, tokenizer = model.Tokenizer()](int token, + float) { ++pos; if (pos < ntokens) { // print feedback } else if (token != gcpp::EOS_ID) { std::string token_text; - HWY_ASSERT(tokenizer->Decode(std::vector{token}, &token_text).ok()); + HWY_ASSERT(tokenizer->Decode(std::vector{token}, &token_text)); std::cout << token_text << std::flush; } return true; @@ -78,5 +77,5 @@ int main(int argc, char** argv) { .verbosity = 0}, tokens, /*KV cache position = */ 0, kv_cache, pool, stream_token, gen); - std::cout << std::endl; + std::cout << "\n"; } diff --git a/gemma.cc b/gemma.cc index 77478bd..201155f 100644 --- a/gemma.cc +++ b/gemma.cc @@ -31,6 +31,9 @@ #include "hwy/timer.h" // copybara:import_next_line:gemma_cpp #include "util/args.h" // Path +// copybara:import_next_line:sentencepiece +#include "src/sentencepiece_processor.h" +// copybara:end // Non-SIMD includes and types. Note that HWY_ONCE is only true on the last // compile pass, whereas we want this defined in the first. @@ -49,6 +52,7 @@ #include #include #include +#include #include #include @@ -330,7 +334,7 @@ struct Activations { struct GemmaInterface { virtual ~GemmaInterface() = default; - virtual const sentencepiece::SentencePieceProcessor* Tokenizer() const = 0; + virtual const GemmaTokenizer* Tokenizer() const = 0; virtual void Generate(size_t max_tokens, size_t max_generated_tokens, float temperature, const std::vector& prompt, @@ -339,6 +343,12 @@ struct GemmaInterface { const StreamFunc& stream_token, const AcceptFunc& accept_token, std::mt19937& gen, int verbosity) = 0; + + virtual float ComputeCrossEntropy(size_t max_tokens, + const std::vector& prompt, + KVCache& kv_cache, hwy::ThreadPool& pool, + hwy::ThreadPool& inner_pool, + int verbosity) = 0; }; template @@ -358,6 +368,29 @@ KVCache CreateKVCache(Model type) { } } +class GemmaTokenizerImpl : public GemmaTokenizer { + public: + GemmaTokenizerImpl( + std::unique_ptr&& impl) + : impl_(std::move(impl)) {} + bool Encode(const std::string& input, + std::vector* pieces) const override { + return impl_->Encode(input, pieces).ok(); + } + bool Encode(const std::string& input, + std::vector* pieces) const override { + return impl_->Encode(input, pieces).ok(); + } + // Given a sequence of ids, decodes it into a detokenized output. + bool Decode(const std::vector& ids, + std::string* detokenized) const override { + return impl_->Decode(ids, detokenized).ok(); + } + + private: + std::unique_ptr impl_; +}; + namespace { template void DeleteLayersPtrs(CompressedWeights* c_weights) { @@ -381,8 +414,8 @@ struct GemmaImpl : public GemmaInterface { DeleteLayersPtrs(weights); } - const sentencepiece::SentencePieceProcessor* Tokenizer() const override { - return tokenizer.get(); + const GemmaTokenizer* Tokenizer() const override { + return &tokenizer; } void Generate(size_t max_tokens, size_t max_generated_tokens, @@ -392,7 +425,12 @@ struct GemmaImpl : public GemmaInterface { const AcceptFunc& accept_token, std::mt19937&, int verbosity) override; - std::unique_ptr tokenizer; + float ComputeCrossEntropy(size_t max_tokens, const std::vector& prompt, + KVCache& kv_cache, hwy::ThreadPool& pool, + hwy::ThreadPool& inner_pool, + int verbosity) override; + + GemmaTokenizerImpl tokenizer; hwy::AlignedFreeUniquePtr weights_u8; hwy::AlignedUniquePtr> prefill; hwy::AlignedUniquePtr> state; @@ -804,6 +842,78 @@ void GenerateImpl(GemmaImpl& gemma, size_t max_tokens, } } +template +std::string TokenString(GemmaImpl& gemma, int token) { + std::string token_str; + gemma.Tokenizer()->Decode({token}, &token_str); + return "'" + std::regex_replace(token_str, std::regex("\n"), "\\n") + "'"; +} + +#define TOKEN(token_id) TokenString(gemma, token_id).c_str() + +template +void LogTopK(GemmaImpl& gemma, float* logits, float* dist, size_t len, + size_t k) { + std::vector> sorted(len); + for (int i = 0; i < len; ++i) { + sorted[i] = std::make_pair(dist[i], i); + } + std::sort(sorted.begin(), sorted.end(), + [](const std::pair& a, const std::pair& b) { + if (a.first != b.first) { + return a.first > b.first; + } + return a.second < b.second; + }); + for (int i = 0; i < k; ++i) { + printf(" [#%-2d token %6d = %-12s %.2e %f]\n", i + 1, sorted[i].second, + TOKEN(sorted[i].second), sorted[i].first, logits[sorted[i].second]); + } +} + +template +float ComputeCrossEntropyImpl(GemmaImpl& gemma, size_t max_tokens, + const std::vector& prompt, KVCache& kv_cache, + hwy::ThreadPool& pool, + hwy::ThreadPool& inner_pool, int verbosity) { + static constexpr size_t kModelDim = TConfig::kModelDim; + static constexpr size_t kVocabSize = TConfig::kVocabSize; + Activations& activations = *gemma.state.get(); + const WeightsT& weights = + *reinterpret_cast*>(gemma.weights_u8.get()); + std::vector logits(kVocabSize); + Softmax(activations.logits.data(), kVocabSize); + float total_entropy = 0.0f; + for (size_t pos = 0; pos < max_tokens && pos < prompt.size(); ++pos) { + if (verbosity >= 4) { + LogTopK(gemma, logits.data(), activations.logits.data(), kVocabSize, 10); + } + const int token = prompt[pos]; + const float prob = activations.logits[token]; + if (verbosity >= 3) { + printf("pos %4zu token %6d = %-12s %.10e %14.10f bits\n", pos, token, + TOKEN(token), prob, -std::log(prob) / std::log(2.0)); + } + total_entropy -= std::max(std::log(prob), -64.0f); + if (verbosity >= 2 && pos % 100 == 99) { + printf("Processed %zu tokens, cross-entropy per token: %f\n", pos + 1, + total_entropy / std::log(2.0) / (pos + 1)); + } + Transformer(token, pos, weights, activations, kv_cache, pool, inner_pool); + MatVec(weights.embedder_input_embedding, 0, + activations.x.data(), + activations.logits.data(), pool); + LogitsSoftCap(30.0f, activations.logits.data(), kVocabSize); + memcpy(logits.data(), activations.logits.data(), + kVocabSize * sizeof(logits[0])); + Softmax(activations.logits.data(), kVocabSize); + } + return total_entropy / std::log(2.0); +} + +#undef TOKEN + + void Generate2B(GemmaImpl& gemma, size_t max_tokens, size_t max_generated_tokens, float temperature, const std::vector& prompt, size_t start_pos, @@ -828,6 +938,23 @@ void Generate7B(GemmaImpl& gemma, size_t max_tokens, accept_token, gen, verbosity); } +float ComputeCrossEntropy2B(GemmaImpl& gemma, size_t max_tokens, + const std::vector& prompt, KVCache& kv_cache, + hwy::ThreadPool& pool, hwy::ThreadPool& inner_pool, + int verbosity) { + return ComputeCrossEntropyImpl(gemma, max_tokens, prompt, kv_cache, pool, + inner_pool, verbosity); +} + +float ComputeCrossEntropy7B(GemmaImpl& gemma, size_t max_tokens, + const std::vector& prompt, KVCache& kv_cache, + hwy::ThreadPool& pool, hwy::ThreadPool& inner_pool, + int verbosity) { + return ComputeCrossEntropyImpl(gemma, max_tokens, prompt, kv_cache, pool, + inner_pool, verbosity); +} + + // Calls func(name, float*, CompressedArray&) for each tensor. float* is null // if weights = null, which happens during the first call where we attempt to // load from cache. @@ -983,6 +1110,8 @@ HWY_EXPORT(LoadWeightsT); HWY_EXPORT(CompressWeightsT); HWY_EXPORT(Generate2B); HWY_EXPORT(Generate7B); +HWY_EXPORT(ComputeCrossEntropy2B); +HWY_EXPORT(ComputeCrossEntropy7B); KVCache CreateKVCache(size_t size_cache_pos, size_t seq_len) { KVCache kv_cache = {}; @@ -995,7 +1124,7 @@ template GemmaImpl::GemmaImpl( std::unique_ptr& tokenizer, hwy::AlignedFreeUniquePtr& weights_u8, hwy::ThreadPool& pool) - : tokenizer(std::move(tokenizer)), + : tokenizer(GemmaTokenizerImpl(std::move(tokenizer))), weights_u8(std::move(weights_u8)), prefill(hwy::MakeUniqueAligned>()), state(hwy::MakeUniqueAligned>()) {} @@ -1023,6 +1152,22 @@ void GemmaImpl::Generate( kv_cache, pool, inner_pool, stream_token, accept_token, gen, verbosity); } +template <> +float GemmaImpl::ComputeCrossEntropy( + size_t max_tokens, const std::vector& prompt, KVCache& kv_cache, + hwy::ThreadPool& pool, hwy::ThreadPool& inner_pool, int verbosity) { + return HWY_DYNAMIC_DISPATCH(ComputeCrossEntropy2B)( + *this, max_tokens, prompt, kv_cache, pool, inner_pool, verbosity); +} + +template <> +float GemmaImpl::ComputeCrossEntropy( + size_t max_tokens, const std::vector& prompt, KVCache& kv_cache, + hwy::ThreadPool& pool, hwy::ThreadPool& inner_pool, int verbosity) { + return HWY_DYNAMIC_DISPATCH(ComputeCrossEntropy7B)( + *this, max_tokens, prompt, kv_cache, pool, inner_pool, verbosity); +} + Gemma::Gemma(const Path& tokenizer_path, const Path& weights, Model model_type, hwy::ThreadPool& pool) { std::unique_ptr tokenizer; @@ -1056,7 +1201,7 @@ Gemma::Gemma(const Path& tokenizer_path, const Path& weights, Model model_type, Gemma::~Gemma() = default; // after GemmaInterface is defined -const sentencepiece::SentencePieceProcessor* Gemma::Tokenizer() const { +const GemmaTokenizer* Gemma::Tokenizer() const { return impl_->Tokenizer(); } @@ -1090,5 +1235,16 @@ void CompressWeights(gcpp::Model model, const Path& weights, (model, weights, compressed_weights, pool); } +float ComputeCrossEntropy(Gemma& gemma, size_t max_tokens, + const std::vector& prompt, KVCache& kv_cache, + hwy::ThreadPool& pool, hwy::ThreadPool& inner_pool, + int verbosity) { + pool.SetWaitMode(hwy::PoolWaitMode::kSpin); + const float result = gemma.impl_->ComputeCrossEntropy( + max_tokens, prompt, kv_cache, pool, inner_pool, verbosity); + pool.SetWaitMode(hwy::PoolWaitMode::kBlock); + return result; +} + } // namespace gcpp #endif // HWY_ONCE diff --git a/gemma.h b/gemma.h index dc96c7b..00141c9 100644 --- a/gemma.h +++ b/gemma.h @@ -60,11 +60,21 @@ struct RuntimeConfig { struct GemmaInterface; +class GemmaTokenizer { + public: + virtual bool Encode(const std::string& input, + std::vector* pieces) const = 0; + virtual bool Encode(const std::string& input, + std::vector* pieces) const = 0; + virtual bool Decode(const std::vector& ids, + std::string* detokenized) const = 0; +}; + struct Gemma { Gemma(const Path& tokenizer_path, const Path& weights, Model model_type, hwy::ThreadPool& pool); ~Gemma(); // must be defined after GemmaInterface's dtor is defined. - const sentencepiece::SentencePieceProcessor* Tokenizer() const; + const GemmaTokenizer* Tokenizer() const; std::unique_ptr impl_; }; @@ -95,6 +105,11 @@ void GenerateGemma(Gemma& gemma, RuntimeConfig runtime_config, void CompressWeights(gcpp::Model model, const Path& weights, const Path& compressed_weights, hwy::ThreadPool& pool); +float ComputeCrossEntropy(Gemma& gemma, size_t max_tokens, + const std::vector& prompt, KVCache& kv_cache, + hwy::ThreadPool& pool, hwy::ThreadPool& inner_pool, + int verbosity); + constexpr int EOS_ID = 1; } // namespace gcpp diff --git a/gemma_test.cc b/gemma_test.cc new file mode 100644 index 0000000..c024ffc --- /dev/null +++ b/gemma_test.cc @@ -0,0 +1,304 @@ +// Copyright 2024 Google LLC +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// copybara:import_next_line:gemma_cpp +#include "gemma.h" + +#include + +#include "hwy/contrib/thread_pool/thread_pool.h" +#include "hwy/tests/test_util-inl.h" +// copybara:import_next_line:gemma_cpp +#include "ops.h" +// copybara:import_next_line:gemma_cpp +#include "util/args.h" + +namespace gcpp { +namespace { + +class GemmaTest : public ::testing::Test { + protected: + GemmaTest() + : weights("./2b-it-mqa.sbs"), + tokenizer("./tokenizer.spm"), + pool(std::min(20, (std::thread::hardware_concurrency() - 1) / 2)), + inner_pool(0), + model_type(gcpp::Model::GEMMA_2B), + model(tokenizer, weights, model_type, pool) { + kv_cache = CreateKVCache(model_type); + } + + std::string GemmaReply(const std::string& prompt_string) { + std::mt19937 gen; + gen.seed(42); + + std::vector prompt; + HWY_ASSERT(model.Tokenizer()->Encode(prompt_string, &prompt)); + // For both pre-trained and instruction-tuned models: prepend "" token + // if needed. + prompt.insert(prompt.begin(), 2); + + std::vector response; + auto stream_token = [&response](int token, float) { + response.push_back(token); + return true; + }; + gcpp::GenerateGemma( + model, /*max_tokens=*/3072, /*max_generated_tokens=*/2048, + /*temperature=*/1.0, prompt, /*start_pos=*/0, kv_cache, pool, + inner_pool, stream_token, + /*accept=*/[](int) { return true; }, gen, /*verbosity=*/0); + std::string response_text; + HWY_ASSERT(model.Tokenizer()->Decode(response, &response_text)); + return response_text; + } + + float GemmaCrossEntropy(const std::string& prompt_string) { + std::vector prompt; + HWY_ASSERT(model.Tokenizer()->Encode(prompt_string, &prompt)); + return gcpp::ComputeCrossEntropy(model, /*max_tokens=*/3072, prompt, + kv_cache, pool, inner_pool, + /*verbosity=*/0) / + prompt_string.size(); + } + + void TestQuestions(const char* kQA[][2], size_t num_questions) { + for (size_t i = 0; i < num_questions; ++i) { + std::cout << "Question " << i + 1 << "\n\n"; + std::string response = GemmaReply(kQA[i][0]); + std::cout << response << "\n\n"; + EXPECT_TRUE(response.find(kQA[i][1]) != std::string::npos); + } + } + + gcpp::Path weights; + gcpp::Path tokenizer; + gcpp::KVCache kv_cache; + hwy::ThreadPool pool; + hwy::ThreadPool inner_pool; + gcpp::Model model_type = {}; + gcpp::Gemma model; +}; + +TEST_F(GemmaTest, Geography) { + static const char* kQA[][2] = { + {"What is the capital of Hungary?", "Budapest"}, + {"How many states does the US have?", "50"}, + {"list me ten biggest cities in the world", "Tokyo"}, + }; + static const size_t kNum = sizeof(kQA) / sizeof(kQA[0]); + TestQuestions(kQA, kNum); +} + +TEST_F(GemmaTest, History) { + static const char* kQA[][2] = { + {"When was the Battle of Hastings?", "1066"}, + {"Who fought at the Battle of Marathon?", "Greek"}, + }; + static const size_t kNum = sizeof(kQA) / sizeof(kQA[0]); + TestQuestions(kQA, kNum); +} + +TEST_F(GemmaTest, Arithmetic) { + static const char* kQA[][2] = { + {"what is 13 + 14?", "27"}, + {"what is 7 * 8", "56"}, + }; + static const size_t kNum = sizeof(kQA) / sizeof(kQA[0]); + TestQuestions(kQA, kNum); +} + +static const char kJingleBells[] = R"( +Dashing through the snow +In a one-horse open sleigh +O'er the fields we go +Laughing all the way +Bells on bobtails ring +Making spirits bright +What fun it is to ride and sing +A sleighing song tonight +)"; + +// The "Hay Draft" of the Gettysburg Address. +static const char kGettysburg[] = { + "Four score and seven years ago our fathers brought forth, upon this " + "continent, a new nation, conceived in Liberty, and dedicated to the " + "proposition that all men are created equal.\n\nNow we are engaged in a " + "great civil war, testing whether that nation, or any nation, so " + "conceived, and so dedicated, can long endure. We are met here on a great " + "battlefield of that war. We have come to dedicate a portion of it as a " + "final resting place for those who here gave their lives that that nation " + "might live. It is altogether fitting and proper that we should do " + "this.\n\nBut in a larger sense we can not dedicate -- we can not " + "consecrate -- we can not hallow this ground. The brave men, living and " + "dead, who struggled, here, have consecrated it far above our poor power " + "to add or detract. The world will little note, nor long remember, what we " + "say here, but can never forget what they did here. It is for us, the " + "living, rather to be dedicated here to the unfinished work which they " + "have, thus far, so nobly carried on. It is rather for us to be here " + "dedicated to the great task remaining before us -- that from these " + "honored dead we take increased devotion to that cause for which they here " + "gave the last full measure of devotion -- that we here highly resolve " + "that these dead shall not have died in vain; that this nation shall have " + "a new birth of freedom; and that this government of the people, by the " + "people, for the people, shall not perish from the earth.\n"}; + +// The Declaration of Independence. +static const char kDeclaration[] = { + "IN CONGRESS, July 4, 1776.\n\nThe unanimous Declaration of the thirteen " + "united States of America,\n\nWhen in the Course of human events, it " + "becomes necessary for one people to dissolve the political bands which " + "have connected them with another, and to assume among the powers of the " + "earth, the separate and equal station to which the Laws of Nature and of " + "Nature's God entitle them, a decent respect to the opinions of mankind " + "requires that they should declare the causes which impel them to the " + "separation.\n\nWe hold these truths to be self-evident, that all men are " + "created equal, that they are endowed by their Creator with certain " + "unalienable Rights, that among these are Life, Liberty and the pursuit of " + "Happiness.--That to secure these rights, Governments are instituted among " + "Men, deriving their just powers from the consent of the governed, --That " + "whenever any Form of Government becomes destructive of these ends, it is " + "the Right of the People to alter or to abolish it, and to institute new " + "Government, laying its foundation on such principles and organizing its " + "powers in such form, as to them shall seem most likely to effect their " + "Safety and Happiness. Prudence, indeed, will dictate that Governments " + "long established should not be changed for light and transient causes; " + "and accordingly all experience hath shewn, that mankind are more disposed " + "to suffer, while evils are sufferable, than to right themselves by " + "abolishing the forms to which they are accustomed. But when a long train " + "of abuses and usurpations, pursuing invariably the same Object evinces a " + "design to reduce them under absolute Despotism, it is their right, it is " + "their duty, to throw off such Government, and to provide new Guards for " + "their future security.--Such has been the patient sufferance of these " + "Colonies; and such is now the necessity which constrains them to alter " + "their former Systems of Government. The history of the present King of " + "Great Britain is a history of repeated injuries and usurpations, all " + "having in direct object the establishment of an absolute Tyranny over " + "these States. To prove this, let Facts be submitted to a candid " + "world.\n\nHe has refused his Assent to Laws, the most wholesome and " + "necessary for the public good.\nHe has forbidden his Governors to pass " + "Laws of immediate and pressing importance, unless suspended in their " + "operation till his Assent should be obtained; and when so suspended, he " + "has utterly neglected to attend to them.\nHe has refused to pass other " + "Laws for the accommodation of large districts of people, unless those " + "people would relinquish the right of Representation in the Legislature, a " + "right inestimable to them and formidable to tyrants only.\nHe has called " + "together legislative bodies at places unusual, uncomfortable, and distant " + "from the depository of their public Records, for the sole purpose of " + "fatiguing them into compliance with his measures.\nHe has dissolved " + "Representative Houses repeatedly, for opposing with manly firmness his " + "invasions on the rights of the people.\nHe has refused for a long time, " + "after such dissolutions, to cause others to be elected; whereby the " + "Legislative powers, incapable of Annihilation, have returned to the " + "People at large for their exercise; the State remaining in the mean time " + "exposed to all the dangers of invasion from without, and convulsions " + "within.\nHe has endeavoured to prevent the population of these States; " + "for that purpose obstructing the Laws for Naturalization of Foreigners; " + "refusing to pass others to encourage their migrations hither, and raising " + "the conditions of new Appropriations of Lands.\nHe has obstructed the " + "Administration of Justice, by refusing his Assent to Laws for " + "establishing Judiciary powers.\nHe has made Judges dependent on his Will " + "alone, for the tenure of their offices, and the amount and payment of " + "their salaries.\nHe has erected a multitude of New Offices, and sent " + "hither swarms of Officers to harrass our people, and eat out their " + "substance.\nHe has kept among us, in times of peace, Standing Armies " + "without the Consent of our legislatures.\nHe has affected to render the " + "Military independent of and superior to the Civil power.\nHe has combined " + "with others to subject us to a jurisdiction foreign to our constitution, " + "and unacknowledged by our laws; giving his Assent to their Acts of " + "pretended Legislation:\nFor Quartering large bodies of armed troops among " + "us:\nFor protecting them, by a mock Trial, from punishment for any " + "Murders which they should commit on the Inhabitants of these States:\nFor " + "cutting off our Trade with all parts of the world:\nFor imposing Taxes on " + "us without our Consent:\nFor depriving us in many cases, of the benefits " + "of Trial by Jury:\nFor transporting us beyond Seas to be tried for " + "pretended offences\nFor abolishing the free System of English Laws in a " + "neighbouring Province, establishing therein an Arbitrary government, and " + "enlarging its Boundaries so as to render it at once an example and fit " + "instrument for introducing the same absolute rule into these " + "Colonies:\nFor taking away our Charters, abolishing our most valuable " + "Laws, and altering fundamentally the Forms of our Governments:\nFor " + "suspending our own Legislatures, and declaring themselves invested with " + "power to legislate for us in all cases whatsoever.\nHe has abdicated " + "Government here, by declaring us out of his Protection and waging War " + "against us.\nHe has plundered our seas, ravaged our Coasts, burnt our " + "towns, and destroyed the lives of our people.\nHe is at this time " + "transporting large Armies of foreign Mercenaries to compleat the works of " + "death, desolation and tyranny, already begun with circumstances of " + "Cruelty & perfidy scarcely paralleled in the most barbarous ages, and " + "totally unworthy the Head of a civilized nation.\nHe has constrained our " + "fellow Citizens taken Captive on the high Seas to bear Arms against their " + "Country, to become the executioners of their friends and Brethren, or to " + "fall themselves by their Hands.\nHe has excited domestic insurrections " + "amongst us, and has endeavoured to bring on the inhabitants of our " + "frontiers, the merciless Indian Savages, whose known rule of warfare, is " + "an undistinguished destruction of all ages, sexes and conditions.\n\nIn " + "every stage of these Oppressions We have Petitioned for Redress in the " + "most humble terms: Our repeated Petitions have been answered only by " + "repeated injury. A Prince whose character is thus marked by every act " + "which may define a Tyrant, is unfit to be the ruler of a free " + "people.\n\nNor have We been wanting in attentions to our Brittish " + "brethren. We have warned them from time to time of attempts by their " + "legislature to extend an unwarrantable jurisdiction over us. We have " + "reminded them of the circumstances of our emigration and settlement here. " + "We have appealed to their native justice and magnanimity, and we have " + "conjured them by the ties of our common kindred to disavow these " + "usurpations, which, would inevitably interrupt our connections and " + "correspondence. They too have been deaf to the voice of justice and of " + "consanguinity. We must, therefore, acquiesce in the necessity, which " + "denounces our Separation, and hold them, as we hold the rest of mankind, " + "Enemies in War, in Peace Friends.\n\nWe, therefore, the Representatives " + "of the united States of America, in General Congress, Assembled, " + "appealing to the Supreme Judge of the world for the rectitude of our " + "intentions, do, in the Name, and by Authority of the good People of these " + "Colonies, solemnly publish and declare, That these United Colonies are, " + "and of Right ought to be Free and Independent States; that they are " + "Absolved from all Allegiance to the British Crown, and that all political " + "connection between them and the State of Great Britain, is and ought to " + "be totally dissolved; and that as Free and Independent States, they have " + "full Power to levy War, conclude Peace, contract Alliances, establish " + "Commerce, and to do all other Acts and Things which Independent States " + "may of right do. And for the support of this Declaration, with a firm " + "reliance on the protection of divine Providence, we mutually pledge to " + "each other our Lives, our Fortunes and our sacred Honor.\n"}; + +TEST_F(GemmaTest, CrossEntropySmall) { + static const char kSmall[] = + "The capital of Hungary is Budapest which is located in Europe."; + float entropy = GemmaCrossEntropy(kSmall); + std::cout << "per-byte entropy: " << entropy << "\n"; + EXPECT_LT(entropy, 1.6f); +} + +TEST_F(GemmaTest, CrossEntropyJingleBells) { + float entropy = GemmaCrossEntropy(kJingleBells); + std::cout << "per-byte entropy: " << entropy << "\n"; + EXPECT_LT(entropy, 2.3f); +} + +TEST_F(GemmaTest, CrossEntropyGettysburg) { + float entropy = GemmaCrossEntropy(kGettysburg); + std::cout << "per-byte entropy: " << entropy << "\n"; + EXPECT_LT(entropy, 1.2f); +} + +TEST_F(GemmaTest, CrossEntropyDeclaration) { + float entropy = GemmaCrossEntropy(kDeclaration); + std::cout << "per-byte entropy: " << entropy << "\n"; + EXPECT_LT(entropy, 1.0f); +} + +} // namespace +} // namespace gcpp diff --git a/goldens/2b-it.txt b/goldens/2b-it.txt new file mode 100644 index 0000000..d3fa68d --- /dev/null +++ b/goldens/2b-it.txt @@ -0,0 +1,40 @@ ++++ +list me ten biggest cities +--- +8. Mexico City ++++ +What is the capital of Switzerland? +--- +The capital of Switzerland is Bern. ++++ +What is the name of the president of the US? +--- +I cannot answer this question ++++ +Is it raining frequently in China? +--- +average of 1,200 rainy days per year ++++ +tell me a french joke +--- +What do you call a French person who's always complaining? ++++ +which year did the 21th century started? +--- +The 21st century started in the year **1900** ++++ +what's your favorite pokemon? +--- +I am unable to provide a favorite Pokémon ++++ +How to bake a tasty cake? +--- +* 1 and 1/2 cups all-purpose flour ++++ +Which is the richest country in the world? +--- +richest country in the world is the United States ++++ +do you like electronic music? +--- +Electronic music is a broad genre \ No newline at end of file diff --git a/goldens/7b-it.txt b/goldens/7b-it.txt new file mode 100644 index 0000000..816dc8e --- /dev/null +++ b/goldens/7b-it.txt @@ -0,0 +1,12 @@ ++++ +list me ten biggest cities +--- +6. Mexico City, Mexico ++++ +What is the capital of Switzerland? +--- +Bern is the capital of Switzerland. ++++ +What is the name of the president of the US? +--- +The answer is: Joe Biden. \ No newline at end of file diff --git a/run.cc b/run.cc index 7ebf7b4..e16910d 100644 --- a/run.cc +++ b/run.cc @@ -40,13 +40,14 @@ namespace gcpp { -static constexpr std::string_view kAsciiArtBanner = - " __ _ ___ _ __ ___ _ __ ___ __ _ ___ _ __ _ __\n" - " / _` |/ _ \\ '_ ` _ \\| '_ ` _ \\ / _` | / __| '_ \\| '_ \\\n" - "| (_| | __/ | | | | | | | | | | (_| || (__| |_) | |_) |\n" - " \\__, |\\___|_| |_| |_|_| |_| |_|\\__,_(_)___| .__/| .__/\n" - " __/ | | | | |\n" - " |___/ |_| |_|"; +static constexpr std::string_view kAsciiArtBanner = R""( + __ _ ___ _ __ ___ _ __ ___ __ _ ___ _ __ _ __ + / _` |/ _ \ '_ ` _ \| '_ ` _ \ / _` | / __| '_ \| '_ \ +| (_| | __/ | | | | | | | | | | (_| || (__| |_) | |_) | + \__, |\___|_| |_| |_|_| |_| |_|\__,_(_)___| .__/| .__/ + __/ | | | | | + |___/ |_| |_| +)""; void ShowConfig(LoaderArgs& loader, InferenceArgs& inference, AppArgs& app) { loader.Print(app.verbosity); @@ -60,7 +61,7 @@ void ShowConfig(LoaderArgs& loader, InferenceArgs& inference, AppArgs& app) { << "Prefill Token Batch Size : " << gcpp::kPrefillBatchSize << "\n" << "Hardware concurrency : " - << std::thread::hardware_concurrency() << std::endl + << std::thread::hardware_concurrency() << "\n" << "Instruction set : " << hwy::TargetName(hwy::DispatchedTarget()) << " (" << hwy::VectorBytes() * 8 << " bits)" << "\n" @@ -132,13 +133,13 @@ void ReplGemma(gcpp::Gemma& model, ModelTraining training, } } else { std::string token_text; - HWY_ASSERT(tokenizer->Decode(std::vector{token}, &token_text).ok()); + HWY_ASSERT(tokenizer->Decode(std::vector{token}, &token_text)); // +1 since position is incremented above if (current_pos == prompt_size + 1) { // first token of response token_text.erase(0, token_text.find_first_not_of(" \t\n")); if (verbosity >= 1) { - std::cout << std::endl << std::endl; + std::cout << "\n\n"; } } std::cout << token_text << std::flush; @@ -189,7 +190,7 @@ void ReplGemma(gcpp::Gemma& model, ModelTraining training, } } - HWY_ASSERT(model.Tokenizer()->Encode(prompt_string, &prompt).ok()); + HWY_ASSERT(model.Tokenizer()->Encode(prompt_string, &prompt)); // For both pre-trained and instruction-tuned models: prepend "" token // if needed. @@ -199,7 +200,8 @@ void ReplGemma(gcpp::Gemma& model, ModelTraining training, prompt_size = prompt.size(); - std::cerr << std::endl << "[ Reading prompt ] " << std::flush; + std::cerr << "\n" + << "[ Reading prompt ] " << std::flush; const double time_start = hwy::platform::Now(); GenerateGemma(model, args.max_tokens, args.max_generated_tokens, @@ -209,10 +211,10 @@ void ReplGemma(gcpp::Gemma& model, ModelTraining training, const double tok_sec = current_pos / (time_end - time_start); if (verbosity >= 2) { std::cout << current_pos << " tokens (" << abs_pos << " total tokens)" - << std::endl - << tok_sec << " tokens / sec" << std::endl; + << "\n" + << tok_sec << " tokens / sec" << "\n"; } - std::cout << std::endl << std::endl; + std::cout << "\n\n"; } std::cout << "max_tokens (" << args.max_tokens diff --git a/util/app.h b/util/app.h index 2348051..f67849b 100644 --- a/util/app.h +++ b/util/app.h @@ -25,7 +25,7 @@ #include #include // IDE does not recognize errno.h as providing errno. #include -#endif +#endif // HWY_OS_LINUX #include #include @@ -170,8 +170,8 @@ struct LoaderArgs : public ArgsBase { weights = compressed_weights; } else { return "Only one of --weights and --compressed_weights can be " - "specified. To create compressed weights use the compress_weights " - "tool."; + "specified. To create compressed weights use the " + "compress_weights tool."; } } if (weights.path.empty()) { @@ -179,12 +179,12 @@ struct LoaderArgs : public ArgsBase { } if (!weights.exists()) { return "Can't open file specified with --weights flag."; - } + } return nullptr; } Path tokenizer; - Path weights; // weights file location + Path weights; // weights file location Path compressed_weights; std::string model_type;