diff --git a/README.md b/README.md index e78be42..af388c3 100644 --- a/README.md +++ b/README.md @@ -305,6 +305,24 @@ A tall tree stands in front of the building, and a window on the building is visible from the water. The water is green, and the sky is blue. ``` +### Migrating to single-file format + +There is now a new format for the weights file, which is a single file that +allows to contain the tokenizer (and the model type) directly. A tool to migrate +from the multi-file format to the single-file format is available. + +```sh +compression/migrate_weights \ + --tokenizer .../tokenizer.spm --weights .../gemma2-2b-it-sfp.sbs \ + --model gemma2-2b-it --output_weights .../gemma2-2b-it-sfp-single.sbs +``` + +After migration, you can use the new weights file with gemma.cpp like this: + +```sh +./gemma --weights .../gemma2-2b-it-sfp-single.sbs +``` + ### Troubleshooting and FAQs **Running `./gemma` fails with "Failed to read cache gating_ein_0 (error 294) ..."** @@ -331,9 +349,8 @@ and not a pre-trained model (any model with a `-pt` suffix). **How do I convert my fine-tune to a `.sbs` compressed model file?** -We're working on a python script to convert a standard model format to `.sbs`, -and hope have it available soon. Follow -[this issue](https://github.com/google/gemma.cpp/issues/11) for updates. +See compression/convert_weights.py to convert a pytorch checkpint. (The code may +need updates to work with Gemma-2 models.) **What are some easy ways to make the model run faster?** diff --git a/compression/migrate_weights.cc b/compression/migrate_weights.cc index 7a9613e..97e6343 100644 --- a/compression/migrate_weights.cc +++ b/compression/migrate_weights.cc @@ -55,7 +55,7 @@ int main(int argc, char** argv) { fprintf(stderr, "Skipping model load because: %s\n", err); return 1; } - gcpp::GemmaEnv env(argc, argv, /*required=*/true); + gcpp::GemmaEnv env(argc, argv); hwy::ThreadPool pool(0); env.GetModel()->Save(args.output_weights, pool); return 0; diff --git a/evals/benchmark_helper.cc b/evals/benchmark_helper.cc index d16e5ef..76251f5 100644 --- a/evals/benchmark_helper.cc +++ b/evals/benchmark_helper.cc @@ -92,9 +92,9 @@ static AppArgs MakeAppArgs(int argc, char** argv) { return AppArgs(argc, argv); } -GemmaEnv::GemmaEnv(int argc, char** argv, bool model_type_required) - : GemmaEnv(LoaderArgs(argc, argv, model_type_required), - InferenceArgs(argc, argv), MakeAppArgs(argc, argv)) {} +GemmaEnv::GemmaEnv(int argc, char** argv) + : GemmaEnv(LoaderArgs(argc, argv), InferenceArgs(argc, argv), + MakeAppArgs(argc, argv)) {} QueryResult GemmaEnv::QueryModel(const std::vector& tokens) { QueryResult result; diff --git a/evals/benchmark_helper.h b/evals/benchmark_helper.h index 6a1302e..7e7f1bf 100644 --- a/evals/benchmark_helper.h +++ b/evals/benchmark_helper.h @@ -44,7 +44,7 @@ struct QueryResult { class GemmaEnv { public: // Calls the other constructor with *Args arguments initialized from argv. - GemmaEnv(int argc, char** argv, bool model_type_required = false); + GemmaEnv(int argc, char** argv); GemmaEnv(const LoaderArgs& loader, const InferenceArgs& inference, const AppArgs& app); diff --git a/evals/gemma_test.cc b/evals/gemma_test.cc index 6fadb0f..7674c5e 100644 --- a/evals/gemma_test.cc +++ b/evals/gemma_test.cc @@ -28,6 +28,7 @@ // This test can be run manually with the downloaded gemma weights. // To run the test, pass the following flags: // --model --tokenizer --weights +// or just use the single-file weights file with --weights . // It should pass for the following models: // Gemma1: 2b-it (v1 and v1.1), 7b-it (v1 and v1.1), gr2b-it, // Gemma2: gemma2-2b-it, 9b-it, 27b-it, diff --git a/gemma/weights.h b/gemma/weights.h index 8cb4bce..ca32ad3 100644 --- a/gemma/weights.h +++ b/gemma/weights.h @@ -525,9 +525,9 @@ class ModelWeightsStorage { // Loads the weights from a blob store file. Supports multi-file or // single-file format. If the weights file contains a TOC, then it is in - // single-file format, and model_type, weight_type, training are ignored, + // single-file format, and model_type, weight_type, wrapping are ignored, // and tokenizer_proto is required and written to. - // With a multi-file format, file, model_type, weight_type, training are + // With a multi-file format, file, model_type, weight_type, wrapping are // required and tokenizer_proto is ignored. BlobError Load(const Path& weights, Model model_type, Type weight_type, PromptWrapping wrapping, hwy::ThreadPool& pool, diff --git a/paligemma/paligemma_test.cc b/paligemma/paligemma_test.cc index b28849e..bd8fb2d 100644 --- a/paligemma/paligemma_test.cc +++ b/paligemma/paligemma_test.cc @@ -27,6 +27,7 @@ // This test can be run manually with the downloaded PaliGemma weights. // To run the test, pass the following flags: // --model paligemma-224 --tokenizer --weights +// or just use the single-file weights file with --weights . // It should pass for the following models: // paligemma-3b-mix-224, paligemma2-3b-pt-448 diff --git a/util/app.h b/util/app.h index 6c66d2c..4b6dffb 100644 --- a/util/app.h +++ b/util/app.h @@ -126,8 +126,7 @@ static inline NestedPools CreatePools(const AppArgs& app) { } struct LoaderArgs : public ArgsBase { - LoaderArgs(int argc, char* argv[], bool required = true) - : model_type_required(required) { + LoaderArgs(int argc, char* argv[]) { InitAndParse(argc, argv); } LoaderArgs(const std::string& tokenizer_path, const std::string& weights_path, @@ -140,25 +139,6 @@ struct LoaderArgs : public ArgsBase { // Returns error string or nullptr if OK. const char* Validate() { - info_.model = Model::UNKNOWN; - info_.wrapping = PromptWrapping::GEMMA_PT; - info_.weight = Type::kUnknown; - if (const char* err = ParseModelTypeAndWrapping(model_type_str, info_.model, - info_.wrapping)) { - if (model_type_required) return err; - } - if (const char* err = ParseType(weight_type_str, info_.weight)) { - if (model_type_required) return err; - } - if (model_type_required) { - if (tokenizer.path.empty()) { - return "Missing --tokenizer flag, a file for the tokenizer is " - "required."; - } - if (!tokenizer.Exists()) { - return "Can't open file specified with --tokenizer flag."; - } - } if (!compressed_weights.path.empty()) { if (weights.path.empty()) { weights = compressed_weights; @@ -174,6 +154,28 @@ struct LoaderArgs : public ArgsBase { if (!weights.Exists()) { return "Can't open file specified with --weights flag."; } + info_.model = Model::UNKNOWN; + info_.wrapping = PromptWrapping::GEMMA_PT; + info_.weight = Type::kUnknown; + if (!model_type_str.empty()) { + const char* err = ParseModelTypeAndWrapping(model_type_str, info_.model, + info_.wrapping); + if (err != nullptr) return err; + } + if (!weight_type_str.empty()) { + const char* err = ParseType(weight_type_str, info_.weight); + if (err != nullptr) return err; + } + if (!tokenizer.path.empty()) { + if (!tokenizer.Exists()) { + return "Can't open file specified with --tokenizer flag."; + } + } + // model_type and tokenizer must be either both present or both absent. + // Further checks happen on weight loading. + if (model_type_str.empty() != tokenizer.path.empty()) { + return "Missing or extra flags for model_type or tokenizer."; + } return nullptr; } @@ -182,7 +184,6 @@ struct LoaderArgs : public ArgsBase { Path compressed_weights; std::string model_type_str; std::string weight_type_str; - bool model_type_required = true; template void ForEach(const Visitor& visitor) { @@ -199,7 +200,7 @@ struct LoaderArgs : public ArgsBase { "gr2b-it = griffin 2B parameters, instruction-tuned\n " "gr2b-pt = griffin 2B parameters, pretrained."); visitor(weight_type_str, "weight_type", std::string("sfp"), - "Weight type\n f32 = float, bf16 = bfloat16, sfp = 8-bit FP."); + "Weight type\n f32 = float, bf16 = bfloat16, sfp = 8-bit SFP."); } // Uninitialized before Validate, must call after that. @@ -212,6 +213,11 @@ struct LoaderArgs : public ArgsBase { }; static inline Gemma CreateGemma(const LoaderArgs& loader, NestedPools& pools) { + if (Type::kUnknown == loader.Info().weight || + Model::UNKNOWN == loader.Info().model || loader.tokenizer.path.empty()) { + // New weights file format doesn't need tokenizer path or model/weightinfo. + return Gemma(loader.weights, pools); + } return Gemma(loader.tokenizer, loader.weights, loader.Info(), pools); } @@ -219,8 +225,7 @@ static inline std::unique_ptr AllocateGemma(const LoaderArgs& loader, NestedPools& pools) { if (Type::kUnknown == loader.Info().weight || Model::UNKNOWN == loader.Info().model || loader.tokenizer.path.empty()) { - // Newer weights file format doesn't need tokenizer path or model/weight - // info. + // New weights file format doesn't need tokenizer path or model/weight info. return std::make_unique(loader.weights, pools); } return std::make_unique(loader.tokenizer, loader.weights,