mirror of https://github.com/google/gemma.cpp.git
libgemma refactor - review changes
This commit is contained in:
parent
cc5c24c4f8
commit
0fc80fad05
|
|
@ -14,7 +14,6 @@
|
||||||
|
|
||||||
cmake_minimum_required(VERSION 3.11)
|
cmake_minimum_required(VERSION 3.11)
|
||||||
project(hello_world)
|
project(hello_world)
|
||||||
set(CMAKE_CXX_STANDARD 17)
|
|
||||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||||
|
|
||||||
include(FetchContent)
|
include(FetchContent)
|
||||||
|
|
@ -31,18 +30,18 @@ if (NOT BUILD_MODE)
|
||||||
set(BUILD_MODE "remote")
|
set(BUILD_MODE "remote")
|
||||||
endif()
|
endif()
|
||||||
if (BUILD_MODE STREQUAL "local")
|
if (BUILD_MODE STREQUAL "local")
|
||||||
|
# Relative path to gemma.cpp from examples/hello_world/build/
|
||||||
FetchContent_Declare(gemma SOURCE_DIR ../../..)
|
FetchContent_Declare(gemma SOURCE_DIR ../../..)
|
||||||
else()
|
else()
|
||||||
FetchContent_Declare(gemma GIT_REPOSITORY https://github.com/google/gemma.cpp.git GIT_TAG 8c7b2cf61b9794b806de091685dc6739dd3db837)
|
FetchContent_Declare(gemma GIT_REPOSITORY https://github.com/google/gemma.cpp.git GIT_TAG 8c7b2cf61b9794b806de091685dc6739dd3db837)
|
||||||
endif()
|
endif()
|
||||||
FetchContent_MakeAvailable(gemma)
|
FetchContent_MakeAvailabl(gemma)
|
||||||
|
|
||||||
if(NOT CMAKE_BUILD_TYPE)
|
if(NOT CMAKE_BUILD_TYPE)
|
||||||
set(CMAKE_BUILD_TYPE "Release")
|
set(CMAKE_BUILD_TYPE "Release")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_executable(hello_world run.cc)
|
add_executable(hello_world run.cc)
|
||||||
set_property(TARGET hello_world PROPERTY CXX_STANDARD 17)
|
|
||||||
target_link_libraries(hello_world hwy hwy_contrib sentencepiece libgemma)
|
target_link_libraries(hello_world hwy hwy_contrib sentencepiece libgemma)
|
||||||
FetchContent_GetProperties(sentencepiece)
|
FetchContent_GetProperties(sentencepiece)
|
||||||
target_include_directories(hello_world PRIVATE ${sentencepiece_SOURCE_DIR})
|
target_include_directories(hello_world PRIVATE ${sentencepiece_SOURCE_DIR})
|
||||||
|
|
|
||||||
|
|
@ -1,23 +1,29 @@
|
||||||
# Hello World Example
|
# Hello World Example
|
||||||
|
|
||||||
This is a minimal/template project for using `gemma.cpp` as a library. Instead of an interactive interface, it sets up the model state and generates text for a single hard coded prompt.
|
This is a minimal/template project for using `gemma.cpp` as a library. Instead
|
||||||
|
of an interactive interface, it sets up the model state and generates text for a
|
||||||
|
single hard coded prompt.
|
||||||
|
|
||||||
Build steps are similar to the main `gemma` executable. From inside the top-level directory. For now only `cmake`/`make` is available for builds (PRs welcome for other build options).
|
Build steps are similar to the main `gemma` executable. For now only
|
||||||
|
`cmake`/`make` is available for builds (PRs welcome for other build options).
|
||||||
|
|
||||||
First use `cmake` to configure the project, assuming you are in the `hello_world` example directory (`gemma.cpp/examples/hello_world`):
|
First use `cmake` to configure the project, starting from the `hello_world`
|
||||||
|
example directory (`gemma.cpp/examples/hello_world`):
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
cmake -B build
|
cmake -B build
|
||||||
```
|
```
|
||||||
|
|
||||||
This sets up a build configuration in `gemma.cpp/examples/hello_world/build`. Note that this fetches `libgemma` from a git commit hash on github. Alternatively if you want to build using the local version of `gemma.cpp` use:
|
This sets up a build configuration in `gemma.cpp/examples/hello_world/build`.
|
||||||
|
Note that this fetches `libgemma` from a git commit hash on github.
|
||||||
|
Alternatively if you want to build using the local version of `gemma.cpp` use:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
cmake -B build -DBUILD_MODE=local
|
cmake -B build -DBUILD_MODE=local
|
||||||
```
|
```
|
||||||
|
|
||||||
Make sure you delete the contents of the build directory before changing configurations.
|
Make sure you delete the contents of the build directory before changing
|
||||||
|
configurations.
|
||||||
|
|
||||||
Then use `make` to build the project:
|
Then use `make` to build the project:
|
||||||
|
|
||||||
|
|
@ -26,9 +32,13 @@ cd build
|
||||||
make hello_world
|
make hello_world
|
||||||
```
|
```
|
||||||
|
|
||||||
As with the top-level `gemma.cpp` project you can use the `make` commands `-j` flag to use parallel threads for faster builds.
|
As with the top-level `gemma.cpp` project you can use the `make` commands `-j`
|
||||||
|
flag to use parallel threads for faster builds.
|
||||||
|
|
||||||
From inside the `gemma.cpp/examples/hello_world/build` directory, there should be a `hello_world` executable. You can run it with the same 3 model arguments as gemma.cpp specifying the tokenizer, compressed weights file, and model type, for example:
|
From inside the `gemma.cpp/examples/hello_world/build` directory, there should
|
||||||
|
be a `hello_world` executable. You can run it with the same 3 model arguments as
|
||||||
|
gemma.cpp specifying the tokenizer, compressed weights file, and model type, for
|
||||||
|
example:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
./hello_world --tokenizer tokenizer.spm --compressed_weights 2b-it-sfp.sbs --model 2b-it
|
./hello_world --tokenizer tokenizer.spm --compressed_weights 2b-it-sfp.sbs --model 2b-it
|
||||||
|
|
|
||||||
|
|
@ -24,12 +24,12 @@
|
||||||
#include "hwy/contrib/thread_pool/thread_pool.h"
|
#include "hwy/contrib/thread_pool/thread_pool.h"
|
||||||
|
|
||||||
std::vector<int> tokenize(
|
std::vector<int> tokenize(
|
||||||
std::string prompt_string,
|
const std::string& prompt_string,
|
||||||
const sentencepiece::SentencePieceProcessor* tokenizer) {
|
const sentencepiece::SentencePieceProcessor* tokenizer) {
|
||||||
prompt_string = "<start_of_turn>user\n" + prompt_string +
|
std::string formatted = "<start_of_turn>user\n" + prompt_string +
|
||||||
"<end_of_turn>\n<start_of_turn>model\n";
|
"<end_of_turn>\n<start_of_turn>model\n";
|
||||||
std::vector<int> tokens;
|
std::vector<int> tokens;
|
||||||
HWY_ASSERT(tokenizer->Encode(prompt_string, &tokens).ok());
|
HWY_ASSERT(tokenizer->Encode(formatted, &tokens).ok());
|
||||||
tokens.insert(tokens.begin(), 2); // BOS token
|
tokens.insert(tokens.begin(), 2); // BOS token
|
||||||
return tokens;
|
return tokens;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
17
gemma.cc
17
gemma.cc
|
|
@ -261,8 +261,7 @@ KVCache CreateKVCache(Model type) {
|
||||||
|
|
||||||
template <class Config>
|
template <class Config>
|
||||||
struct GemmaImpl : public GemmaInterface {
|
struct GemmaImpl : public GemmaInterface {
|
||||||
GemmaImpl( // const LoaderArgs& args,
|
GemmaImpl(std::unique_ptr<sentencepiece::SentencePieceProcessor>& tokenizer,
|
||||||
std::unique_ptr<sentencepiece::SentencePieceProcessor>& tokenizer,
|
|
||||||
hwy::AlignedFreeUniquePtr<uint8_t[]>& compressed_weights,
|
hwy::AlignedFreeUniquePtr<uint8_t[]>& compressed_weights,
|
||||||
hwy::ThreadPool& pool);
|
hwy::ThreadPool& pool);
|
||||||
|
|
||||||
|
|
@ -767,16 +766,10 @@ GemmaImpl<Config>::GemmaImpl(
|
||||||
std::unique_ptr<sentencepiece::SentencePieceProcessor>& tokenizer,
|
std::unique_ptr<sentencepiece::SentencePieceProcessor>& tokenizer,
|
||||||
hwy::AlignedFreeUniquePtr<uint8_t[]>& compressed_weights,
|
hwy::AlignedFreeUniquePtr<uint8_t[]>& compressed_weights,
|
||||||
hwy::ThreadPool& pool)
|
hwy::ThreadPool& pool)
|
||||||
// GemmaImpl<Config>::GemmaImpl(const LoaderArgs& args, hwy::ThreadPool&
|
|
||||||
// pool)
|
|
||||||
: compressed_weights(std::move(compressed_weights)),
|
: compressed_weights(std::move(compressed_weights)),
|
||||||
// HWY_DYNAMIC_DISPATCH(GetCompressedWeightsT)(args, pool)),
|
|
||||||
prefill(hwy::MakeUniqueAligned<Activations<Config, kPrefillBatchSize>>()),
|
prefill(hwy::MakeUniqueAligned<Activations<Config, kPrefillBatchSize>>()),
|
||||||
state(hwy::MakeUniqueAligned<Activations<Config, 1>>()),
|
state(hwy::MakeUniqueAligned<Activations<Config, 1>>()),
|
||||||
tokenizer(std::move(tokenizer)) {
|
tokenizer(std::move(tokenizer)) {}
|
||||||
// PROFILER_ZONE("Startup.tokenizer");
|
|
||||||
// HWY_ASSERT(tokenizer.Load(args.tokenizer.path).ok());
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
void GemmaImpl<ConfigGemma2B>::Generate(
|
void GemmaImpl<ConfigGemma2B>::Generate(
|
||||||
|
|
@ -804,10 +797,14 @@ void GemmaImpl<ConfigGemma7B>::Generate(
|
||||||
Gemma::Gemma(const Path& tokenizer_path, const Path& compressed_weights_path,
|
Gemma::Gemma(const Path& tokenizer_path, const Path& compressed_weights_path,
|
||||||
const Path& weights_path, Model model_type,
|
const Path& weights_path, Model model_type,
|
||||||
hwy::ThreadPool& pool) {
|
hwy::ThreadPool& pool) {
|
||||||
|
{
|
||||||
PROFILER_ZONE("Startup.tokenizer");
|
PROFILER_ZONE("Startup.tokenizer");
|
||||||
std::unique_ptr<sentencepiece::SentencePieceProcessor> tokenizer =
|
std::unique_ptr<sentencepiece::SentencePieceProcessor> tokenizer =
|
||||||
std::make_unique<sentencepiece::SentencePieceProcessor>();
|
std::make_unique<sentencepiece::SentencePieceProcessor>();
|
||||||
HWY_ASSERT(tokenizer->Load(tokenizer_path.path).ok());
|
if (!tokenizer->Load(tokenizer_path.path).ok()) {
|
||||||
|
HWY_ABORT("Failed to load the tokenizer file.");
|
||||||
|
}
|
||||||
|
}
|
||||||
auto compressed_weights = HWY_DYNAMIC_DISPATCH(GetCompressedWeightsT)(
|
auto compressed_weights = HWY_DYNAMIC_DISPATCH(GetCompressedWeightsT)(
|
||||||
model_type, weights_path, compressed_weights_path, pool);
|
model_type, weights_path, compressed_weights_path, pool);
|
||||||
switch (model_type) {
|
switch (model_type) {
|
||||||
|
|
|
||||||
1
run.cc
1
run.cc
|
|
@ -190,7 +190,6 @@ void ReplGemma(gcpp::Gemma& model, gcpp::KVCache& kv_cache,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// HWY_ASSERT(model.Tokenizer().Encode(prompt_string, &prompt).ok());
|
|
||||||
HWY_ASSERT(model.Tokenizer()->Encode(prompt_string, &prompt).ok());
|
HWY_ASSERT(model.Tokenizer()->Encode(prompt_string, &prompt).ok());
|
||||||
|
|
||||||
// For both pre-trained and instruction-tuned models: prepend "<bos>" token
|
// For both pre-trained and instruction-tuned models: prepend "<bos>" token
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue