diff --git a/src/bpe_model.cc b/src/bpe_model.cc index 22cd115..97e0bda 100644 --- a/src/bpe_model.cc +++ b/src/bpe_model.cc @@ -21,7 +21,7 @@ #include "bpe_model.h" #include "freelist.h" -#include "third_party/absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_map.h" #include "util.h" namespace sentencepiece { diff --git a/src/bpe_model_trainer.cc b/src/bpe_model_trainer.cc index 964d44e..64878cd 100644 --- a/src/bpe_model_trainer.cc +++ b/src/bpe_model_trainer.cc @@ -18,7 +18,8 @@ #include #include "bpe_model_trainer.h" -#include "third_party/absl/container/flat_hash_set.h" +#include "absl/container/flat_hash_set.h" +#include "absl/status/status.h" #include "util.h" namespace sentencepiece { @@ -171,7 +172,7 @@ void Trainer::UpdateActiveSymbols() { active_symbols_.insert(symbols.begin(), symbols.begin() + size); } -util::Status Trainer::Train() { +absl::Status Trainer::Train() { RETURN_IF_ERROR(status()); CHECK_OR_RETURN(normalizer_spec_.escape_whitespaces()); diff --git a/src/bpe_model_trainer.h b/src/bpe_model_trainer.h index e011a37..a17e580 100644 --- a/src/bpe_model_trainer.h +++ b/src/bpe_model_trainer.h @@ -20,7 +20,8 @@ #include #include "sentencepiece_model.pb.h" -#include "third_party/absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_map.h" +#include "absl/status/status.h" #include "trainer_interface.h" namespace sentencepiece { @@ -35,7 +36,7 @@ class Trainer : public TrainerInterface { : TrainerInterface::TrainerInterface(trainer_spec, normalizer_spec, denormalizer_spec) {} - util::Status Train() override; + absl::Status Train() override; private: // Symbol represents a character or symbol bigram. diff --git a/src/bpe_model_trainer_test.cc b/src/bpe_model_trainer_test.cc index 173eb9c..2a43c3a 100644 --- a/src/bpe_model_trainer_test.cc +++ b/src/bpe_model_trainer_test.cc @@ -20,8 +20,8 @@ #include "sentencepiece_processor.h" #include "sentencepiece_trainer.h" #include "testharness.h" -#include "third_party/absl/strings/str_cat.h" -#include "third_party/absl/strings/str_join.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" #include "util.h" namespace sentencepiece { diff --git a/src/builder.cc b/src/builder.cc index 378aaa0..fd8edf8 100644 --- a/src/builder.cc +++ b/src/builder.cc @@ -18,10 +18,11 @@ #include "builder.h" #include "filesystem.h" -#include "third_party/absl/strings/str_join.h" -#include "third_party/absl/strings/str_replace.h" -#include "third_party/absl/strings/str_split.h" -#include "third_party/absl/strings/strip.h" +#include "absl/strings/str_join.h" +#include "absl/strings/str_replace.h" +#include "absl/strings/str_split.h" +#include "absl/strings/strip.h" +#include "absl/status/status.h" #ifdef ENABLE_NFKC_COMPILE #include @@ -36,7 +37,7 @@ #include "normalization_rule.h" #include "normalizer.h" -#include "third_party/darts_clone/darts.h" +#include "include/darts.h" #include "util.h" namespace sentencepiece { @@ -145,7 +146,7 @@ Builder::Chars Normalize(const Builder::CharsMap &chars_map, } // namespace // static -util::Status Builder::CompileCharsMap(const CharsMap &chars_map, +absl::Status Builder::CompileCharsMap(const CharsMap &chars_map, std::string *output) { CHECK_OR_RETURN(output); CHECK_OR_RETURN(!chars_map.empty()); @@ -212,7 +213,7 @@ util::Status Builder::CompileCharsMap(const CharsMap &chars_map, } // static -util::Status Builder::DecompileCharsMap(absl::string_view blob, +absl::Status Builder::DecompileCharsMap(absl::string_view blob, Builder::CharsMap *chars_map) { CHECK_OR_RETURN(chars_map); chars_map->clear(); @@ -265,7 +266,7 @@ util::Status Builder::DecompileCharsMap(absl::string_view blob, } // static -util::Status Builder::GetPrecompiledCharsMap(const std::string &name, +absl::Status Builder::GetPrecompiledCharsMap(const std::string &name, std::string *output) { CHECK_OR_RETURN(output); @@ -282,12 +283,12 @@ util::Status Builder::GetPrecompiledCharsMap(const std::string &name, return util::OkStatus(); } } - return util::StatusBuilder(util::StatusCode::kNotFound, GTL_LOC) + return util::StatusBuilder(absl::StatusCode::kNotFound, GTL_LOC) << "No precompiled charsmap is found: " << name; } // static -util::Status Builder::BuildNFKCMap(CharsMap *chars_map) { +absl::Status Builder::BuildNFKCMap(CharsMap *chars_map) { #ifdef ENABLE_NFKC_COMPILE LOG(INFO) << "Running BuildNFKCMap"; @@ -345,7 +346,7 @@ util::Status Builder::BuildNFKCMap(CharsMap *chars_map) { return util::OkStatus(); } -util::Status Builder::BuildNmtNFKCMap(CharsMap *chars_map) { +absl::Status Builder::BuildNmtNFKCMap(CharsMap *chars_map) { #ifdef ENABLE_NFKC_COMPILE LOG(INFO) << "Running BuildNmtNFKCMap"; @@ -420,7 +421,7 @@ util::Status Builder::BuildNmtNFKCMap(CharsMap *chars_map) { } // static -util::Status Builder::MergeUnicodeCaseFoldMap(Builder::CharsMap *chars_map) { +absl::Status Builder::MergeUnicodeCaseFoldMap(Builder::CharsMap *chars_map) { #ifdef ENABLE_NFKC_COMPILE for (auto &c : *chars_map) { std::vector trg; @@ -445,7 +446,7 @@ util::Status Builder::MergeUnicodeCaseFoldMap(Builder::CharsMap *chars_map) { } // static -util::Status Builder::BuildNFKC_CFMap(CharsMap *chars_map) { +absl::Status Builder::BuildNFKC_CFMap(CharsMap *chars_map) { #ifdef ENABLE_NFKC_COMPILE CharsMap nfkc_map; RETURN_IF_ERROR(Builder::BuildNFKCMap(&nfkc_map)); @@ -460,7 +461,7 @@ util::Status Builder::BuildNFKC_CFMap(CharsMap *chars_map) { } // static -util::Status Builder::BuildNmtNFKC_CFMap(CharsMap *chars_map) { +absl::Status Builder::BuildNmtNFKC_CFMap(CharsMap *chars_map) { #ifdef ENABLE_NFKC_COMPILE CharsMap nfkc_map; RETURN_IF_ERROR(Builder::BuildNmtNFKCMap(&nfkc_map)); @@ -475,7 +476,7 @@ util::Status Builder::BuildNmtNFKC_CFMap(CharsMap *chars_map) { } // static -util::Status Builder::LoadCharsMap(absl::string_view filename, +absl::Status Builder::LoadCharsMap(absl::string_view filename, CharsMap *chars_map) { LOG(INFO) << "Loading mapping file: " << filename.data(); CHECK_OR_RETURN(chars_map); @@ -510,7 +511,7 @@ util::Status Builder::LoadCharsMap(absl::string_view filename, } // static -util::Status Builder::SaveCharsMap(absl::string_view filename, +absl::Status Builder::SaveCharsMap(absl::string_view filename, const Builder::CharsMap &chars_map) { auto output = filesystem::NewWritableFile(filename); RETURN_IF_ERROR(output->status()); @@ -540,7 +541,7 @@ util::Status Builder::SaveCharsMap(absl::string_view filename, } // static -util::Status Builder::RemoveRedundantMap(CharsMap *chars_map) { +absl::Status Builder::RemoveRedundantMap(CharsMap *chars_map) { CHECK_OR_RETURN(chars_map); CharsMap new_chars_map; diff --git a/src/builder.h b/src/builder.h index 49d2884..8ad872c 100644 --- a/src/builder.h +++ b/src/builder.h @@ -22,7 +22,8 @@ #include "common.h" #include "sentencepiece_model.pb.h" #include "sentencepiece_processor.h" -#include "third_party/absl/strings/string_view.h" +#include "absl/strings/string_view.h" +#include "absl/status/status.h" namespace sentencepiece { namespace normalizer { @@ -43,15 +44,15 @@ class Builder { // String-to-string mapping. using CharsMap = std::map; - static util::Status CompileCharsMap(const CharsMap &chars_map, + static absl::Status CompileCharsMap(const CharsMap &chars_map, std::string *output); // Decompiles `blob` into `chars_map`. - static util::Status DecompileCharsMap(absl::string_view blob, + static absl::Status DecompileCharsMap(absl::string_view blob, CharsMap *chars_map); // Returns a pre-compiled binary index with `name`. - static util::Status GetPrecompiledCharsMap(const std::string &name, + static absl::Status GetPrecompiledCharsMap(const std::string &name, std::string *output); // Makes a normalization mapping based on NFKC. @@ -89,30 +90,30 @@ class Builder { // normalizer is the goal of SentencePiece. // // TODO(taku): Make NFC, NFD, and NFKD mapping if necessary. - static util::Status BuildNFKCMap(CharsMap *chars_map); + static absl::Status BuildNFKCMap(CharsMap *chars_map); // Makes an NFKC-based mapping with NMT specific modifications around // whitespaces. - static util::Status BuildNmtNFKCMap(CharsMap *chars_map); + static absl::Status BuildNmtNFKCMap(CharsMap *chars_map); // Merge Unicode case folding mapping into `chars_map`. - static util::Status MergeUnicodeCaseFoldMap(CharsMap *chars_map); + static absl::Status MergeUnicodeCaseFoldMap(CharsMap *chars_map); // Makes NFKC with Unicode case folding. - static util::Status BuildNFKC_CFMap(CharsMap *chars_map); + static absl::Status BuildNFKC_CFMap(CharsMap *chars_map); // Makes NMT NFKC with Unicode case folding. - static util::Status BuildNmtNFKC_CFMap(CharsMap *chars_map); + static absl::Status BuildNmtNFKC_CFMap(CharsMap *chars_map); // Builds Chars map save in `filename`. // Format: // src_uchar1 src_uchar2 ... trg_uchar1 trg_uchar2... // (src|trg)_ucharX must be a hex of Unicode code point. - static util::Status LoadCharsMap(absl::string_view filename, + static absl::Status LoadCharsMap(absl::string_view filename, CharsMap *chars_map); // Saves Chars map to `filename` as TSV. - static util::Status SaveCharsMap(absl::string_view filename, + static absl::Status SaveCharsMap(absl::string_view filename, const CharsMap &chars_map); private: @@ -121,7 +122,7 @@ class Builder { // Removes redundant rules from `chars_map`. // When char_maps have "aa" => "bb" and "a" => "b", the first // rule is not necessary since the second rule can cover the first rule. - static util::Status RemoveRedundantMap(CharsMap *chars_map); + static absl::Status RemoveRedundantMap(CharsMap *chars_map); }; } // namespace normalizer } // namespace sentencepiece diff --git a/src/builder_test.cc b/src/builder_test.cc index 4acb7b3..1dee5c7 100644 --- a/src/builder_test.cc +++ b/src/builder_test.cc @@ -18,7 +18,7 @@ #include "normalizer.h" #include "sentencepiece_trainer.h" #include "testharness.h" -#include "third_party/absl/strings/str_cat.h" +#include "absl/strings/str_cat.h" #include "util.h" namespace sentencepiece { diff --git a/src/char_model_trainer.cc b/src/char_model_trainer.cc index f438d78..4f4c603 100644 --- a/src/char_model_trainer.cc +++ b/src/char_model_trainer.cc @@ -16,12 +16,13 @@ #include "char_model.h" #include "char_model_trainer.h" +#include "absl/status/status.h" #include "util.h" namespace sentencepiece { namespace character { -util::Status Trainer::Train() { +absl::Status Trainer::Train() { RETURN_IF_ERROR(status()); CHECK_OR_RETURN(normalizer_spec_.escape_whitespaces()); diff --git a/src/char_model_trainer.h b/src/char_model_trainer.h index e563819..a5d021c 100644 --- a/src/char_model_trainer.h +++ b/src/char_model_trainer.h @@ -17,6 +17,7 @@ #include "sentencepiece_model.pb.h" #include "trainer_interface.h" +#include "absl/status/status.h" namespace sentencepiece { namespace character { @@ -30,7 +31,7 @@ class Trainer : public TrainerInterface { : TrainerInterface::TrainerInterface(trainer_spec, normalizer_spec, denormalizer_spec) {} - util::Status Train() override; + absl::Status Train() override; }; } // namespace character } // namespace sentencepiece diff --git a/src/char_model_trainer_test.cc b/src/char_model_trainer_test.cc index 8c2e4b7..e8b4979 100644 --- a/src/char_model_trainer_test.cc +++ b/src/char_model_trainer_test.cc @@ -19,8 +19,8 @@ #include "filesystem.h" #include "sentencepiece_processor.h" #include "testharness.h" -#include "third_party/absl/strings/str_cat.h" -#include "third_party/absl/strings/str_join.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" #include "util.h" namespace sentencepiece { diff --git a/src/common.h b/src/common.h index 7595634..339f831 100644 --- a/src/common.h +++ b/src/common.h @@ -146,6 +146,7 @@ inline const char *BaseName(const char *path) { } // namespace logging } // namespace sentencepiece +#ifndef LOG #define LOG(severity) \ (::sentencepiece::logging::GetMinLogLevel() > \ ::sentencepiece::logging::LOG_##severity) \ @@ -156,6 +157,7 @@ inline const char *BaseName(const char *path) { std::cerr << ::sentencepiece::logging::BaseName(__FILE__) << "(" \ << __LINE__ << ") " \ << "LOG(" << #severity << ") " +#endif // LOG #define CHECK(condition) \ (condition) ? 0 \ diff --git a/src/compile_charsmap_main.cc b/src/compile_charsmap_main.cc index c5a5188..e5db1d7 100644 --- a/src/compile_charsmap_main.cc +++ b/src/compile_charsmap_main.cc @@ -22,8 +22,9 @@ #include "filesystem.h" #include "init.h" #include "sentencepiece_processor.h" -#include "third_party/absl/flags/flag.h" -#include "third_party/absl/strings/string_view.h" +#include "absl/flags/flag.h" +#include "absl/strings/string_view.h" +#include "absl/status/status.h" using sentencepiece::normalizer::Builder; @@ -160,7 +161,7 @@ int main(int argc, char **argv) { const std::vector>> + std::function>> kRuleList = {{"nfkc", Builder::BuildNFKCMap}, {"nmt_nfkc", Builder::BuildNmtNFKCMap}, {"nfkc_cf", Builder::BuildNFKC_CFMap}, diff --git a/src/error.cc b/src/error.cc index a226d98..ab4675d 100644 --- a/src/error.cc +++ b/src/error.cc @@ -20,8 +20,8 @@ #ifdef _USE_EXTERNAL_ABSL // Naive workaround to define minloglevel on external absl package. // We want to define them in other cc file. -#include "third_party/absl/flags/flag.h" -#include "third_party/absl/flags/parse.h" +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" ABSL_FLAG(int32, minloglevel, 0, "Messages logged at a lower level than this don't actually."); #endif diff --git a/src/filesystem.cc b/src/filesystem.cc index 833c8f7..9a1b6c9 100644 --- a/src/filesystem.cc +++ b/src/filesystem.cc @@ -15,7 +15,8 @@ #include #include "filesystem.h" -#include "third_party/absl/memory/memory.h" +#include "absl/status/status.h" +#include "absl/memory/memory.h" #include "util.h" #if defined(OS_WIN) && defined(UNICODE) && defined(_UNICODE) @@ -36,7 +37,7 @@ class PosixReadableFile : public ReadableFile { is_binary ? std::ios::binary | std::ios::in : std::ios::in)) { if (!*is_) - status_ = util::StatusBuilder(util::StatusCode::kNotFound, GTL_LOC) + status_ = util::StatusBuilder(absl::StatusCode::kNotFound, GTL_LOC) << "\"" << filename.data() << "\": " << util::StrError(errno); } @@ -44,7 +45,7 @@ class PosixReadableFile : public ReadableFile { if (is_ != &std::cin) delete is_; } - util::Status status() const { return status_; } + absl::Status status() const { return status_; } bool ReadLine(std::string *line) { return static_cast(std::getline(*is_, *line)); @@ -61,7 +62,7 @@ class PosixReadableFile : public ReadableFile { } private: - util::Status status_; + absl::Status status_; std::istream *is_; }; @@ -75,7 +76,7 @@ class PosixWritableFile : public WritableFile { : std::ios::out)) { if (!*os_) status_ = - util::StatusBuilder(util::StatusCode::kPermissionDenied, GTL_LOC) + util::StatusBuilder(absl::StatusCode::kPermissionDenied, GTL_LOC) << "\"" << filename.data() << "\": " << util::StrError(errno); } @@ -83,7 +84,7 @@ class PosixWritableFile : public WritableFile { if (os_ != &std::cout) delete os_; } - util::Status status() const { return status_; } + absl::Status status() const { return status_; } bool Write(absl::string_view text) { os_->write(text.data(), text.size()); @@ -93,7 +94,7 @@ class PosixWritableFile : public WritableFile { bool WriteLine(absl::string_view text) { return Write(text) && Write("\n"); } private: - util::Status status_; + absl::Status status_; std::ostream *os_; }; diff --git a/src/filesystem.h b/src/filesystem.h index e572b4b..6e8e305 100644 --- a/src/filesystem.h +++ b/src/filesystem.h @@ -23,7 +23,8 @@ #include "common.h" #include "sentencepiece_processor.h" -#include "third_party/absl/strings/string_view.h" +#include "absl/strings/string_view.h" +#include "absl/status/status.h" namespace sentencepiece { namespace filesystem { @@ -33,7 +34,7 @@ class ReadableFile { explicit ReadableFile(absl::string_view filename, bool is_binary = false) {} virtual ~ReadableFile() {} - virtual util::Status status() const = 0; + virtual absl::Status status() const = 0; virtual bool ReadLine(std::string *line) = 0; virtual bool ReadAll(std::string *line) = 0; }; @@ -44,7 +45,7 @@ class WritableFile { explicit WritableFile(absl::string_view filename, bool is_binary = false) {} virtual ~WritableFile() {} - virtual util::Status status() const = 0; + virtual absl::Status status() const = 0; virtual bool Write(absl::string_view text) = 0; virtual bool WriteLine(absl::string_view text) = 0; }; diff --git a/src/filesystem_test.cc b/src/filesystem_test.cc index 790e756..39ece99 100644 --- a/src/filesystem_test.cc +++ b/src/filesystem_test.cc @@ -14,7 +14,7 @@ #include "filesystem.h" #include "testharness.h" -#include "third_party/absl/strings/str_cat.h" +#include "absl/strings/str_cat.h" #include "util.h" namespace sentencepiece { diff --git a/src/init.h b/src/init.h index 090a2d9..acfda8a 100644 --- a/src/init.h +++ b/src/init.h @@ -16,8 +16,8 @@ #define INIT_H_ #include "common.h" -#include "third_party/absl/flags/flag.h" -#include "third_party/absl/flags/parse.h" +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" ABSL_DECLARE_FLAG(int32, minloglevel); diff --git a/src/model_factory.cc b/src/model_factory.cc index be99501..040c00c 100644 --- a/src/model_factory.cc +++ b/src/model_factory.cc @@ -15,7 +15,7 @@ #include "bpe_model.h" #include "char_model.h" #include "model_factory.h" -#include "third_party/absl/memory/memory.h" +#include "absl/memory/memory.h" #include "unigram_model.h" #include "word_model.h" diff --git a/src/model_interface.cc b/src/model_interface.cc index c49be1e..22c6378 100644 --- a/src/model_interface.cc +++ b/src/model_interface.cc @@ -16,8 +16,8 @@ #include "model_interface.h" #include "sentencepiece_model.pb.h" -#include "third_party/absl/memory/memory.h" -#include "third_party/absl/strings/str_format.h" +#include "absl/memory/memory.h" +#include "absl/strings/str_format.h" #include "util.h" namespace sentencepiece { diff --git a/src/model_interface.h b/src/model_interface.h index aef5b53..c7858fb 100644 --- a/src/model_interface.h +++ b/src/model_interface.h @@ -25,9 +25,10 @@ #include "normalizer.h" #include "sentencepiece_model.pb.h" #include "sentencepiece_processor.h" -#include "third_party/absl/container/flat_hash_map.h" -#include "third_party/absl/strings/string_view.h" -#include "third_party/darts_clone/darts.h" +#include "absl/container/flat_hash_map.h" +#include "absl/strings/string_view.h" +#include "absl/status/status.h" +#include "include/darts.h" #include "util.h" namespace sentencepiece { @@ -69,7 +70,7 @@ class ModelInterface { // Returns Status. // Encode/Decode functions are valid only when status is OK. - virtual util::Status status() const { return status_; } + virtual absl::Status status() const { return status_; } virtual const ModelProto &model_proto() const { return *model_proto_; } @@ -82,7 +83,7 @@ class ModelInterface { // normally users do not need to call this function. This function is provided // just in case that a user want to manually choose which encoder version to // use. - virtual util::Status SetEncoderVersion(EncoderVersion encoder_version) { + virtual absl::Status SetEncoderVersion(EncoderVersion encoder_version) { encoder_version_ = encoder_version; return util::OkStatus(); } @@ -261,7 +262,7 @@ class ModelInterface { EncoderVersion encoder_version_ = EncoderVersion::kOptimized; // status. - util::Status status_; + absl::Status status_; }; } // namespace sentencepiece #endif // MODEL_INTERFACE_H_ diff --git a/src/model_interface_test.cc b/src/model_interface_test.cc index 69ee4e6..26a1e05 100644 --- a/src/model_interface_test.cc +++ b/src/model_interface_test.cc @@ -15,7 +15,7 @@ #include "model_factory.h" #include "model_interface.h" #include "testharness.h" -#include "third_party/absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_map.h" #include "util.h" namespace sentencepiece { diff --git a/src/normalizer.cc b/src/normalizer.cc index 100b875..c553906 100644 --- a/src/normalizer.cc +++ b/src/normalizer.cc @@ -18,11 +18,12 @@ #include #include "common.h" -#include "third_party/absl/memory/memory.h" -#include "third_party/absl/strings/match.h" -#include "third_party/absl/strings/string_view.h" -#include "third_party/absl/strings/strip.h" -#include "third_party/darts_clone/darts.h" +#include "absl/memory/memory.h" +#include "absl/strings/match.h" +#include "absl/strings/string_view.h" +#include "absl/strings/strip.h" +#include "absl/status/status.h" +#include "include/darts.h" #include "util.h" namespace sentencepiece { @@ -71,7 +72,7 @@ void Normalizer::Init() { } } -util::Status Normalizer::Normalize(absl::string_view input, +absl::Status Normalizer::Normalize(absl::string_view input, std::string *normalized, std::vector *norm_to_orig) const { norm_to_orig->clear(); @@ -274,7 +275,7 @@ std::string Normalizer::EncodePrecompiledCharsMap( } // static -util::Status Normalizer::DecodePrecompiledCharsMap( +absl::Status Normalizer::DecodePrecompiledCharsMap( absl::string_view blob, absl::string_view *trie_blob, absl::string_view *normalized, std::string *buffer) { uint32 trie_blob_size = 0; diff --git a/src/normalizer.h b/src/normalizer.h index 622bbd2..21d1385 100644 --- a/src/normalizer.h +++ b/src/normalizer.h @@ -24,8 +24,9 @@ #include "common.h" #include "sentencepiece_model.pb.h" #include "sentencepiece_processor.h" -#include "third_party/absl/strings/string_view.h" -#include "third_party/darts_clone/darts.h" +#include "absl/strings/string_view.h" +#include "absl/status/status.h" +#include "include/darts.h" #include "util.h" namespace sentencepiece { @@ -75,7 +76,7 @@ class Normalizer { // Returns Status. // Normalizes function is valid only when status is OK. - virtual util::Status status() const { return status_; } + virtual absl::Status status() const { return status_; } // Normalizes a plain utf8 string into an internal representation for // Sentencepiece model. |norm_to_orig| stores the byte-alignment from @@ -86,7 +87,7 @@ class Normalizer { // - Adds a prefix space. // - Replaces a space with a meta symbol. // - Removing heading, tailing and other redundant spaces. - virtual util::Status Normalize(absl::string_view input, + virtual absl::Status Normalize(absl::string_view input, std::string *normalized, std::vector *norm_to_orig) const; @@ -121,7 +122,7 @@ class Normalizer { absl::string_view normalized); // Decodes blob into trie_blob and normalized string. - static util::Status DecodePrecompiledCharsMap(absl::string_view blob, + static absl::Status DecodePrecompiledCharsMap(absl::string_view blob, absl::string_view *trie_blob, absl::string_view *normalized, std::string *buffer = nullptr); @@ -153,7 +154,7 @@ class Normalizer { #endif // Normalizer's status. - util::Status status_; + absl::Status status_; }; } // namespace normalizer } // namespace sentencepiece diff --git a/src/pretokenizer_for_training.cc b/src/pretokenizer_for_training.cc index 049658e..8021511 100644 --- a/src/pretokenizer_for_training.cc +++ b/src/pretokenizer_for_training.cc @@ -14,7 +14,7 @@ #include #include "pretokenizer_for_training.h" -#include "third_party/absl/strings/str_replace.h" +#include "absl/strings/str_replace.h" namespace sentencepiece { namespace pretokenizer { diff --git a/src/pretokenizer_for_training.h b/src/pretokenizer_for_training.h index 2d3bc82..b4a6de3 100644 --- a/src/pretokenizer_for_training.h +++ b/src/pretokenizer_for_training.h @@ -21,7 +21,8 @@ #include "common.h" #include "sentencepiece.pb.h" #include "sentencepiece_processor.h" -#include "third_party/absl/strings/string_view.h" +#include "absl/strings/string_view.h" +#include "absl/status/status.h" namespace sentencepiece { namespace pretokenizer { @@ -30,7 +31,7 @@ class PretokenizerForTrainingInterface { public: PretokenizerForTrainingInterface() {} virtual ~PretokenizerForTrainingInterface() {} - virtual util::Status status() const = 0; + virtual absl::Status status() const = 0; // Puts kUPPBoundaryStr before and after the pre-tokenizer's segmentation // when there are no spaces between these tokens. diff --git a/src/pretokenizer_for_training_test.cc b/src/pretokenizer_for_training_test.cc index 80f4787..de89fe3 100644 --- a/src/pretokenizer_for_training_test.cc +++ b/src/pretokenizer_for_training_test.cc @@ -13,8 +13,9 @@ // limitations under the License.! #include "pretokenizer_for_training.h" #include "testharness.h" -#include "third_party/absl/strings/str_cat.h" +#include "absl/strings/str_cat.h" #include "trainer_interface.h" +#include "absl/status/status.h" namespace sentencepiece { namespace pretokenizer { @@ -28,7 +29,7 @@ class MockPretokenizer : public PretokenizerForTrainingInterface { return spt_; } - util::Status status() const override { return util::OkStatus(); } + absl::Status status() const override { return util::OkStatus(); } void SetOutput(const SentencePieceText &spt) { spt_ = spt; } diff --git a/src/sentencepiece_processor.cc b/src/sentencepiece_processor.cc index 1e4e7a0..78ae527 100644 --- a/src/sentencepiece_processor.cc +++ b/src/sentencepiece_processor.cc @@ -23,14 +23,15 @@ #include "normalizer.h" #include "sentencepiece.pb.h" #include "sentencepiece_processor.h" -#include "third_party/absl/memory/memory.h" -#include "third_party/absl/strings/numbers.h" -#include "third_party/absl/strings/str_cat.h" -#include "third_party/absl/strings/str_join.h" -#include "third_party/absl/strings/str_replace.h" -#include "third_party/absl/strings/str_split.h" -#include "third_party/absl/strings/string_view.h" -#include "third_party/absl/strings/strip.h" +#include "absl/memory/memory.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" +#include "absl/strings/str_replace.h" +#include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" +#include "absl/strings/strip.h" +#include "absl/status/status.h" #include "unigram_model.h" #include "util.h" @@ -52,7 +53,7 @@ const char kReplacementCharacter[] = "\xef\xbf\xbd"; SentencePieceProcessor::SentencePieceProcessor() {} SentencePieceProcessor::~SentencePieceProcessor() {} -util::Status SentencePieceProcessor::Load(absl::string_view filename) { +absl::Status SentencePieceProcessor::Load(absl::string_view filename) { auto model_proto = absl::make_unique(); RETURN_IF_ERROR(io::LoadModelProto(filename, model_proto.get())); return Load(std::move(model_proto)); @@ -62,13 +63,13 @@ void SentencePieceProcessor::LoadOrDie(absl::string_view filename) { CHECK_OK(Load(filename)); } -util::Status SentencePieceProcessor::Load(const ModelProto &model_proto) { +absl::Status SentencePieceProcessor::Load(const ModelProto &model_proto) { auto model_proto_copy = absl::make_unique(); *model_proto_copy = model_proto; return Load(std::move(model_proto_copy)); } -util::Status SentencePieceProcessor::LoadFromSerializedProto( +absl::Status SentencePieceProcessor::LoadFromSerializedProto( absl::string_view serialized) { auto model_proto = absl::make_unique(); CHECK_OR_RETURN( @@ -76,7 +77,7 @@ util::Status SentencePieceProcessor::LoadFromSerializedProto( return Load(std::move(model_proto)); } -util::Status SentencePieceProcessor::Load( +absl::Status SentencePieceProcessor::Load( std::unique_ptr model_proto) { model_proto_ = std::move(model_proto); model_ = ModelFactory::Create(*model_proto_); @@ -117,7 +118,7 @@ util::Status SentencePieceProcessor::Load( return util::OkStatus(); } -util::Status SentencePieceProcessor::SetEncoderVersion( +absl::Status SentencePieceProcessor::SetEncoderVersion( EncoderVersion encoder_version) { return model_->SetEncoderVersion(encoder_version); } @@ -126,17 +127,17 @@ EncoderVersion SentencePieceProcessor::GetEncoderVersion() const { return model_->GetEncoderVersion(); } -util::Status SentencePieceProcessor::SetEncodeExtraOptions( +absl::Status SentencePieceProcessor::SetEncodeExtraOptions( absl::string_view extra_options) { return ParseExtraOptions(extra_options, &encode_extra_options_); } -util::Status SentencePieceProcessor::SetDecodeExtraOptions( +absl::Status SentencePieceProcessor::SetDecodeExtraOptions( absl::string_view extra_options) { return ParseExtraOptions(extra_options, &decode_extra_options_); } -util::Status SentencePieceProcessor::status() const { +absl::Status SentencePieceProcessor::status() const { CHECK_OR_RETURN(model_) << "Model is not initialized."; CHECK_OR_RETURN(normalizer_) << "Normalizer is not initialized."; RETURN_IF_ERROR(model_->status()); @@ -144,7 +145,7 @@ util::Status SentencePieceProcessor::status() const { return util::OkStatus(); } -util::Status SentencePieceProcessor::SetVocabulary( +absl::Status SentencePieceProcessor::SetVocabulary( const std::vector &valid_vocab) { RETURN_IF_ERROR(status()); @@ -174,7 +175,7 @@ util::Status SentencePieceProcessor::SetVocabulary( return util::OkStatus(); } -util::Status SentencePieceProcessor::ResetVocabulary() { +absl::Status SentencePieceProcessor::ResetVocabulary() { RETURN_IF_ERROR(status()); for (auto &piece : *(model_proto_->mutable_pieces())) { if (piece.type() == ModelProto::SentencePiece::UNUSED) @@ -184,7 +185,7 @@ util::Status SentencePieceProcessor::ResetVocabulary() { return util::OkStatus(); } -util::Status SentencePieceProcessor::LoadVocabulary(absl::string_view filename, +absl::Status SentencePieceProcessor::LoadVocabulary(absl::string_view filename, int threshold) { auto input = filesystem::NewReadableFile(filename); RETURN_IF_ERROR(input->status()); @@ -221,7 +222,7 @@ util::Status SentencePieceProcessor::LoadVocabulary(absl::string_view filename, ////////////////////////////////////////////////////////////// // Simple API. -util::Status SentencePieceProcessor::Encode( +absl::Status SentencePieceProcessor::Encode( absl::string_view input, std::vector *pieces) const { CHECK_OR_RETURN_STATUS_STL(pieces); @@ -234,7 +235,7 @@ util::Status SentencePieceProcessor::Encode( return util::OkStatus(); } -util::Status SentencePieceProcessor::Encode(absl::string_view input, +absl::Status SentencePieceProcessor::Encode(absl::string_view input, std::vector *ids) const { CHECK_OR_RETURN_STATUS_STL(ids); @@ -247,7 +248,7 @@ util::Status SentencePieceProcessor::Encode(absl::string_view input, return util::OkStatus(); } -util::Status SentencePieceProcessor::Decode( +absl::Status SentencePieceProcessor::Decode( const std::vector &pieces, std::string *detokenized) const { CHECK_OR_RETURN_STATUS_STL(detokenized); @@ -258,7 +259,7 @@ util::Status SentencePieceProcessor::Decode( return util::OkStatus(); } -util::Status SentencePieceProcessor::Decode(const std::vector &ids, +absl::Status SentencePieceProcessor::Decode(const std::vector &ids, std::string *detokenized) const { CHECK_OR_RETURN_STATUS_STL(detokenized); @@ -269,7 +270,7 @@ util::Status SentencePieceProcessor::Decode(const std::vector &ids, return util::OkStatus(); } -util::Status SentencePieceProcessor::NBestEncode( +absl::Status SentencePieceProcessor::NBestEncode( absl::string_view input, int nbest_size, std::vector> *pieces) const { CHECK_OR_RETURN_STATUS_STL(pieces); @@ -287,7 +288,7 @@ util::Status SentencePieceProcessor::NBestEncode( return util::OkStatus(); } -util::Status SentencePieceProcessor::NBestEncode( +absl::Status SentencePieceProcessor::NBestEncode( absl::string_view input, int nbest_size, std::vector> *ids) const { CHECK_OR_RETURN_STATUS_STL(ids); @@ -305,7 +306,7 @@ util::Status SentencePieceProcessor::NBestEncode( return util::OkStatus(); } -util::Status SentencePieceProcessor::SampleEncode( +absl::Status SentencePieceProcessor::SampleEncode( absl::string_view input, int nbest_size, float alpha, std::vector *pieces) const { CHECK_OR_RETURN_STATUS_STL(pieces); @@ -319,7 +320,7 @@ util::Status SentencePieceProcessor::SampleEncode( return util::OkStatus(); } -util::Status SentencePieceProcessor::SampleEncode(absl::string_view input, +absl::Status SentencePieceProcessor::SampleEncode(absl::string_view input, int nbest_size, float alpha, std::vector *ids) const { CHECK_OR_RETURN_STATUS_STL(ids); @@ -333,7 +334,7 @@ util::Status SentencePieceProcessor::SampleEncode(absl::string_view input, return util::OkStatus(); } -util::Status SentencePieceProcessor::PopulateSentencePieceText( +absl::Status SentencePieceProcessor::PopulateSentencePieceText( absl::string_view input, absl::string_view normalized, const std::vector &norm_to_orig, const EncodeResult &result, SentencePieceText *spt) const { @@ -424,7 +425,7 @@ util::Status SentencePieceProcessor::PopulateSentencePieceText( return util::OkStatus(); } // namespace sentencepiece -util::Status SentencePieceProcessor::Encode(absl::string_view input, +absl::Status SentencePieceProcessor::Encode(absl::string_view input, SentencePieceText *spt) const { CHECK_OR_RETURN_STATUS_PROTO(spt); @@ -439,7 +440,7 @@ util::Status SentencePieceProcessor::Encode(absl::string_view input, return util::OkStatus(); } -util::Status SentencePieceProcessor::NBestEncode( +absl::Status SentencePieceProcessor::NBestEncode( absl::string_view input, int nbest_size, NBestSentencePieceText *nbest_spt) const { CHECK_OR_RETURN_STATUS_PROTO(nbest_spt); @@ -464,7 +465,7 @@ util::Status SentencePieceProcessor::NBestEncode( return util::OkStatus(); } -util::Status SentencePieceProcessor::SampleEncode( +absl::Status SentencePieceProcessor::SampleEncode( absl::string_view input, int nbest_size, float alpha, SentencePieceText *spt) const { CHECK_OR_RETURN_STATUS_PROTO(spt); @@ -503,7 +504,7 @@ util::Status SentencePieceProcessor::SampleEncode( return util::OkStatus(); } -util::Status SentencePieceProcessor::SampleEncodeAndScore( +absl::Status SentencePieceProcessor::SampleEncodeAndScore( absl::string_view input, int samples, float theta, bool wor, bool include_best, NBestSentencePieceText *samples_spt) const { CHECK_OR_RETURN(model_->IsSampleEncodeAndScoreAvailable()) @@ -527,7 +528,7 @@ util::Status SentencePieceProcessor::SampleEncodeAndScore( return util::OkStatus(); } -util::Status SentencePieceProcessor::CalculateEntropy(absl::string_view input, +absl::Status SentencePieceProcessor::CalculateEntropy(absl::string_view input, float theta, float *entropy) const { CHECK_OR_RETURN(model_->IsCalculateEntropyAvailable()) @@ -540,7 +541,7 @@ util::Status SentencePieceProcessor::CalculateEntropy(absl::string_view input, return util::OkStatus(); } -util::Status SentencePieceProcessor::Decode( +absl::Status SentencePieceProcessor::Decode( const std::vector &pieces, SentencePieceText *spt) const { CHECK_OR_RETURN_STATUS_PROTO(spt); @@ -591,7 +592,7 @@ util::Status SentencePieceProcessor::Decode( }; auto ProcessBytePieces = [&](int token_index_begin, - int token_index_end) -> util::Status { + int token_index_end) -> absl::Status { if (token_index_begin >= token_index_end) { return util::OkStatus(); } @@ -661,14 +662,14 @@ util::Status SentencePieceProcessor::Decode( return util::OkStatus(); } -util::Status SentencePieceProcessor::Decode(const std::vector &ids, +absl::Status SentencePieceProcessor::Decode(const std::vector &ids, SentencePieceText *spt) const { std::vector pieces; const int num_pieces = GetPieceSize(); pieces.reserve(ids.size()); for (const int id : ids) { if (id < 0 || id >= num_pieces) { - return util::Status(util::StatusCode::kOutOfRange, + return absl::Status(absl::StatusCode::kOutOfRange, absl::StrCat("Invalid id: ", id)); } pieces.emplace_back(IdToPiece(id)); @@ -783,7 +784,7 @@ int SentencePieceProcessor::pad_id() const { } // static -util::Status SentencePieceProcessor::ApplyExtraOptions( +absl::Status SentencePieceProcessor::ApplyExtraOptions( const std::vector &extra_options, SentencePieceText *spt) const { for (const auto &extra_option : extra_options) { @@ -818,7 +819,7 @@ util::Status SentencePieceProcessor::ApplyExtraOptions( } // static -util::Status SentencePieceProcessor::ParseExtraOptions( +absl::Status SentencePieceProcessor::ParseExtraOptions( absl::string_view _extra_option, std::vector *extra_options) const { absl::string_view extra_option(_extra_option.data(), _extra_option.size()); @@ -877,7 +878,7 @@ void SetRandomGeneratorSeed(unsigned int seed); namespace io { -util::Status LoadModelProto(absl::string_view filename, +absl::Status LoadModelProto(absl::string_view filename, ModelProto *model_proto) { if (filename.empty()) { return util::NotFoundError("model file path should not be empty."); @@ -893,7 +894,7 @@ util::Status LoadModelProto(absl::string_view filename, return util::OkStatus(); } -util::Status SaveModelProto(absl::string_view filename, +absl::Status SaveModelProto(absl::string_view filename, const ModelProto &model_proto) { if (filename.empty()) { return util::NotFoundError("model file path should not be empty."); diff --git a/src/sentencepiece_processor.h b/src/sentencepiece_processor.h index e8bd5f5..346fb0e 100644 --- a/src/sentencepiece_processor.h +++ b/src/sentencepiece_processor.h @@ -20,9 +20,10 @@ #include #include #include +#include "absl/status/status.h" #if defined(_USE_INTERNAL_STRING_VIEW) -#include "third_party/absl/strings/string_view.h" +#include "absl/strings/string_view.h" #elif defined(_USE_TF_STRING_VIEW) #include "absl/strings/string_view.h" #else @@ -185,7 +186,7 @@ class SentencePieceProcessor { // Loads model from `filename`. // Returns false if `filename` cannot be loaded. - virtual util::Status Load(absl::string_view filename); + virtual absl::Status Load(absl::string_view filename); // Loads model from `filename`. // Crash if `filename` cannot be loaded. @@ -193,24 +194,24 @@ class SentencePieceProcessor { // Loads model from `model_proto`. // `model_proto` is copied. - virtual util::Status Load(const ModelProto &model_proto); + virtual absl::Status Load(const ModelProto &model_proto); // Loads model from `model_proto`. // `model_proto` is moved. - virtual util::Status Load(std::unique_ptr model_proto); + virtual absl::Status Load(std::unique_ptr model_proto); // Loads model from `serialized`, which is a string-serialized model proto. // Useful to load the model from a platform independent blob object. - virtual util::Status LoadFromSerializedProto(absl::string_view serialized); + virtual absl::Status LoadFromSerializedProto(absl::string_view serialized); // Returns the status. Encode/Decode methods are valid when status is OK. - virtual util::Status status() const; + virtual absl::Status status() const; // Sets encode extra_option sequence. - virtual util::Status SetEncodeExtraOptions(absl::string_view extra_option); + virtual absl::Status SetEncodeExtraOptions(absl::string_view extra_option); // Sets decode extra_option sequence. - virtual util::Status SetDecodeExtraOptions(absl::string_view extra_option); + virtual absl::Status SetDecodeExtraOptions(absl::string_view extra_option); ////////////////////////////////////////////////////////////// // Vocabulary restriction. @@ -219,41 +220,41 @@ class SentencePieceProcessor { // Restricts the vocabulary set. // The input sentences are encoded into the tokens in `valid_vocab`. - virtual util::Status SetVocabulary( + virtual absl::Status SetVocabulary( const std::vector &valid_vocab); // Reverts the vocabulary restriction. - virtual util::Status ResetVocabulary(); + virtual absl::Status ResetVocabulary(); // Loads the valid vocabulary set from `filename` in TSV format. // Format: . // Any token with frequency < threshold will be treated as OOV. - virtual util::Status LoadVocabulary(absl::string_view filename, + virtual absl::Status LoadVocabulary(absl::string_view filename, int threshold); ////////////////////////////////////////////////////////////// // Simple API. // // Given a UTF8 input, encodes it into a sequence of sentence pieces. - virtual util::Status Encode(absl::string_view input, + virtual absl::Status Encode(absl::string_view input, std::vector *pieces) const; // Given a UTF8 input, encodes it into a sequence of ids. - virtual util::Status Encode(absl::string_view input, + virtual absl::Status Encode(absl::string_view input, std::vector *ids) const; // Given a sequence of pieces, decodes it into a detokenized output. - virtual util::Status Decode(const std::vector &pieces, + virtual absl::Status Decode(const std::vector &pieces, std::string *detokenized) const; // Given a sequence of ids, decodes it into a detokenized output. - virtual util::Status Decode(const std::vector &ids, + virtual absl::Status Decode(const std::vector &ids, std::string *detokenized) const; // Sets the encoder version. Normally users do not need to call this function. // But they can call this fucntion just in case if they want to fall back to // the original encoder. - virtual util::Status SetEncoderVersion(EncoderVersion encoder_version); + virtual absl::Status SetEncoderVersion(EncoderVersion encoder_version); // Returns the current encoder version in use. virtual EncoderVersion GetEncoderVersion() const; @@ -261,12 +262,12 @@ class SentencePieceProcessor { ////////////////////////////////////////////////////////////// // NBest API. // Same as Encode, but returns nbest results. - virtual util::Status NBestEncode( + virtual absl::Status NBestEncode( absl::string_view input, int nbest_size, std::vector> *pieces) const; // Same as Encode, but returns nbest results. - virtual util::Status NBestEncode(absl::string_view input, int nbest_size, + virtual absl::Status NBestEncode(absl::string_view input, int nbest_size, std::vector> *ids) const; ////////////////////////////////////////////////////////////// @@ -289,12 +290,12 @@ class SentencePieceProcessor { // in https://arxiv.org/abs/1910.13267 // Nbest-based sampling is not supported so nbest_size parameter is ignored in // BPE. - virtual util::Status SampleEncode(absl::string_view input, int nbest_size, + virtual absl::Status SampleEncode(absl::string_view input, int nbest_size, float alpha, std::vector *pieces) const; // Same as above, but returns a sequence of ids. - virtual util::Status SampleEncode(absl::string_view input, int nbest_size, + virtual absl::Status SampleEncode(absl::string_view input, int nbest_size, float alpha, std::vector *ids) const; ////////////////////////////////////////////////////////////// @@ -303,16 +304,16 @@ class SentencePieceProcessor { // and internal sentencepiece sequence. // // Given a UTF8 input, encodes it into SentencePieceText. - virtual util::Status Encode(absl::string_view input, + virtual absl::Status Encode(absl::string_view input, SentencePieceText *spt) const; // Same as above, but returns NBestSentencePieceText. - virtual util::Status NBestEncode(absl::string_view input, int nbest_size, + virtual absl::Status NBestEncode(absl::string_view input, int nbest_size, NBestSentencePieceText *nbest_spt) const; // Same as above, but samples one segmentation from the hypotheses // (Lattice). - virtual util::Status SampleEncode(absl::string_view input, int nbest_size, + virtual absl::Status SampleEncode(absl::string_view input, int nbest_size, float alpha, SentencePieceText *spt) const; // Sample `samples` segmentations from the segmentation lattice. @@ -323,21 +324,21 @@ class SentencePieceProcessor { // If `include_best` is true, the best tokenization is always included in the // sample, and the remaining elements are sampled excluding the best. // This method is only available in Unigram mode. - virtual util::Status SampleEncodeAndScore( + virtual absl::Status SampleEncodeAndScore( absl::string_view input, int samples, float theta, bool wor, bool include_best, NBestSentencePieceText *samples_spt) const; // Calculate entropy of possible tokenization. // Only available in unigram mode. - virtual util::Status CalculateEntropy(absl::string_view input, float theta, + virtual absl::Status CalculateEntropy(absl::string_view input, float theta, float *entropy) const; // Given a sequence of pieces, decodes it into SentencePieceText. - virtual util::Status Decode(const std::vector &pieces, + virtual absl::Status Decode(const std::vector &pieces, SentencePieceText *spt) const; // Given a sequence of ids, decodes it into SentencePieceText. - virtual util::Status Decode(const std::vector &ids, + virtual absl::Status Decode(const std::vector &ids, SentencePieceText *spt) const; ////////////////////////////////////////////////////////////// @@ -487,13 +488,13 @@ class SentencePieceProcessor { private: enum ExtraOption { REVERSE, BOS, EOS }; - util::Status ParseExtraOptions(absl::string_view extra_option, + absl::Status ParseExtraOptions(absl::string_view extra_option, std::vector *extra_options) const; - util::Status ApplyExtraOptions(const std::vector &extra_options, + absl::Status ApplyExtraOptions(const std::vector &extra_options, SentencePieceText *spt) const; - util::Status PopulateSentencePieceText( + absl::Status PopulateSentencePieceText( absl::string_view input, absl::string_view normalized, const std::vector &norm_to_orig, const std::vector> &result, @@ -526,10 +527,10 @@ namespace io { // io::LoadModelProto("//path/spm.model", model_proto.get()); // SentencePieceProcessor sp; // CHECK_OK(sp.Load(std::move(model_proto))); -util::Status LoadModelProto(absl::string_view, ModelProto *model_proto); +absl::Status LoadModelProto(absl::string_view, ModelProto *model_proto); // Saves `model_proto` as `filename`. -util::Status SaveModelProto(absl::string_view, const ModelProto &model_proto); +absl::Status SaveModelProto(absl::string_view, const ModelProto &model_proto); } // namespace io #endif // SWIG } // namespace sentencepiece diff --git a/src/sentencepiece_processor_test.cc b/src/sentencepiece_processor_test.cc index 373e73e..829c3d4 100644 --- a/src/sentencepiece_processor_test.cc +++ b/src/sentencepiece_processor_test.cc @@ -23,10 +23,10 @@ #include "sentencepiece_processor.h" #include "sentencepiece_trainer.h" #include "testharness.h" -#include "third_party/absl/container/flat_hash_map.h" -#include "third_party/absl/memory/memory.h" -#include "third_party/absl/strings/str_cat.h" -#include "third_party/absl/strings/string_view.h" +#include "absl/container/flat_hash_map.h" +#include "absl/memory/memory.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" #include "util.h" namespace sentencepiece { diff --git a/src/sentencepiece_trainer.cc b/src/sentencepiece_trainer.cc index b9fe64f..5b33cd7 100644 --- a/src/sentencepiece_trainer.cc +++ b/src/sentencepiece_trainer.cc @@ -22,12 +22,13 @@ #include "sentencepiece_model.pb.h" #include "sentencepiece_trainer.h" #include "spec_parser.h" -#include "third_party/absl/flags/flag.h" -#include "third_party/absl/strings/numbers.h" -#include "third_party/absl/strings/str_cat.h" -#include "third_party/absl/strings/str_split.h" -#include "third_party/absl/strings/string_view.h" -#include "third_party/absl/strings/strip.h" +#include "absl/flags/flag.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" +#include "absl/strings/strip.h" +#include "absl/status/status.h" #include "trainer_factory.h" #include "util.h" @@ -37,7 +38,7 @@ static constexpr char kDefaultNormalizerName[] = "nmt_nfkc"; } // namespace // static -util::Status SentencePieceTrainer::Train(const TrainerSpec &trainer_spec, +absl::Status SentencePieceTrainer::Train(const TrainerSpec &trainer_spec, SentenceIterator *sentence_iterator, std::string *serialized_model_proto) { NormalizerSpec normalizer_spec; @@ -45,7 +46,7 @@ util::Status SentencePieceTrainer::Train(const TrainerSpec &trainer_spec, serialized_model_proto); } -util::Status SentencePieceTrainer::Train(const TrainerSpec &trainer_spec, +absl::Status SentencePieceTrainer::Train(const TrainerSpec &trainer_spec, const NormalizerSpec &normalizer_spec, SentenceIterator *sentence_iterator, std::string *serialized_model_proto) { @@ -55,7 +56,7 @@ util::Status SentencePieceTrainer::Train(const TrainerSpec &trainer_spec, } // static -util::Status SentencePieceTrainer::Train( +absl::Status SentencePieceTrainer::Train( const TrainerSpec &trainer_spec, const NormalizerSpec &normalizer_spec, const NormalizerSpec &denormalizer_spec, SentenceIterator *sentence_iterator, std::string *serialized_model_proto) { @@ -97,7 +98,7 @@ NormalizerSpec SentencePieceTrainer::GetNormalizerSpec(absl::string_view name) { } // static -util::Status SentencePieceTrainer::MergeSpecsFromArgs( +absl::Status SentencePieceTrainer::MergeSpecsFromArgs( absl::string_view args, TrainerSpec *trainer_spec, NormalizerSpec *normalizer_spec, NormalizerSpec *denormalizer_spec) { CHECK_OR_RETURN(trainer_spec) << "`trainer_spec` must not be null."; @@ -125,7 +126,7 @@ util::Status SentencePieceTrainer::MergeSpecsFromArgs( } // static -util::Status SentencePieceTrainer::MergeSpecsFromArgs( +absl::Status SentencePieceTrainer::MergeSpecsFromArgs( const std::unordered_map &kwargs, TrainerSpec *trainer_spec, NormalizerSpec *normalizer_spec, NormalizerSpec *denormalizer_spec) { @@ -171,7 +172,7 @@ util::Status SentencePieceTrainer::MergeSpecsFromArgs( } // static -util::Status SentencePieceTrainer::Train(absl::string_view args, +absl::Status SentencePieceTrainer::Train(absl::string_view args, SentenceIterator *sentence_iterator, std::string *serialized_model_proto) { LOG(INFO) << "Running command: " << args.data(); @@ -185,7 +186,7 @@ util::Status SentencePieceTrainer::Train(absl::string_view args, } // static -util::Status SentencePieceTrainer::Train( +absl::Status SentencePieceTrainer::Train( const std::unordered_map &kwargs, SentenceIterator *sentence_iterator, std::string *serialized_model_proto) { TrainerSpec trainer_spec; @@ -198,7 +199,7 @@ util::Status SentencePieceTrainer::Train( } // static -util::Status SentencePieceTrainer::PopulateNormalizerSpec( +absl::Status SentencePieceTrainer::PopulateNormalizerSpec( NormalizerSpec *normalizer_spec, bool is_denormalizer) { CHECK_OR_RETURN(normalizer_spec); @@ -226,7 +227,7 @@ util::Status SentencePieceTrainer::PopulateNormalizerSpec( } // static -util::Status SentencePieceTrainer::PopulateModelTypeFromString( +absl::Status SentencePieceTrainer::PopulateModelTypeFromString( absl::string_view type, TrainerSpec *spec) { static const std::unordered_map kModelTypeMap = {{"unigram", TrainerSpec::UNIGRAM}, @@ -239,7 +240,7 @@ util::Status SentencePieceTrainer::PopulateModelTypeFromString( return util::OkStatus(); } - return util::StatusBuilder(util::StatusCode::kInternal, GTL_LOC) + return util::StatusBuilder(absl::StatusCode::kInternal, GTL_LOC) << "\"" << type << "\" is not found in TrainerSpec"; } @@ -248,7 +249,7 @@ const pretokenizer::PretokenizerForTrainingInterface *g_pretokenizer = nullptr; } // namespace // static -util::Status SentencePieceTrainer::SetPretokenizerForTraining( +absl::Status SentencePieceTrainer::SetPretokenizerForTraining( const pretokenizer::PretokenizerForTrainingInterface *pretokenizer) { g_pretokenizer = pretokenizer; return util::OkStatus(); diff --git a/src/sentencepiece_trainer.h b/src/sentencepiece_trainer.h index bb74ab9..ec6cf93 100644 --- a/src/sentencepiece_trainer.h +++ b/src/sentencepiece_trainer.h @@ -19,6 +19,7 @@ #include #include "sentencepiece_processor.h" +#include "absl/status/status.h" namespace sentencepiece { @@ -46,7 +47,7 @@ class SentenceIterator { virtual bool done() const = 0; virtual void Next() = 0; virtual const std::string &value() const = 0; - virtual util::Status status() const = 0; + virtual absl::Status status() const = 0; }; class SentencePieceTrainer { @@ -54,14 +55,14 @@ class SentencePieceTrainer { // Trains SentencePiece model with `trainer_spec`. // Default `normalizer_spec` is used. // When `sentence_iterator` is passed, load sentences from the iterator. - static util::Status Train(const TrainerSpec &trainer_spec, + static absl::Status Train(const TrainerSpec &trainer_spec, SentenceIterator *sentence_iterator = nullptr, std::string *serialized_model_proto = nullptr); // Trains SentencePiece model with `trainer_spec` and // `normalizer_spec`. // When `sentence_iterator` is passed, load sentences from the iterator. - static util::Status Train(const TrainerSpec &trainer_spec, + static absl::Status Train(const TrainerSpec &trainer_spec, const NormalizerSpec &normalizer_spec, SentenceIterator *sentence_iterator = nullptr, std::string *serialized_model_proto = nullptr); @@ -69,7 +70,7 @@ class SentencePieceTrainer { // Trains SentencePiece model with `trainer_spec`, `normalizer_spec` // and `denormalizer_spec`. // When `sentence_iterator` is passed, load sentences from the iterator. - static util::Status Train(const TrainerSpec &trainer_spec, + static absl::Status Train(const TrainerSpec &trainer_spec, const NormalizerSpec &normalizer_spec, const NormalizerSpec &denormalizer_spec, SentenceIterator *sentence_iterator = nullptr, @@ -78,13 +79,13 @@ class SentencePieceTrainer { // e.g., // '--input=data --model_prefix=m --vocab_size=8192 model_type=unigram' // When `sentence_iterator` is passed, load sentences from the iterator. - static util::Status Train(absl::string_view args, + static absl::Status Train(absl::string_view args, SentenceIterator *sentence_iterator = nullptr, std::string *serialized_model_proto = nullptr); // Trains SentencePiece model with mapin `kwargs`. // e.g., {{"input", "data"}, {"model_prefix, "m"}, {"vocab_size", "8192"}...} - static util::Status Train( + static absl::Status Train( const std::unordered_map &kwargs, SentenceIterator *sentence_iterator = nullptr, std::string *serialized_model_proto = nullptr); @@ -96,19 +97,19 @@ class SentencePieceTrainer { // Populates necessary fields (precompiled_charmap) from // `NormalizerSpec::name` or `NormalizerSpec::normalization_rule_tsv`. - static util::Status PopulateNormalizerSpec(NormalizerSpec *normalizer_spec, + static absl::Status PopulateNormalizerSpec(NormalizerSpec *normalizer_spec, bool is_denormalizer = false); // Overrides `trainer_spec`, `normalizer_spec`, `denormalizer_spec` with the // std::unordered_map in `kargs`. - static util::Status MergeSpecsFromArgs( + static absl::Status MergeSpecsFromArgs( const std::unordered_map &kwargs, TrainerSpec *trainer_spec, NormalizerSpec *normalizer_spec, NormalizerSpec *denormalizer_spec); // Overrides `trainer_spec`, `normalizer_spec`, `denormalizer_spec` with the // command line flags in `args`. - static util::Status MergeSpecsFromArgs(absl::string_view args, + static absl::Status MergeSpecsFromArgs(absl::string_view args, TrainerSpec *trainer_spec, NormalizerSpec *normalizer_spec, NormalizerSpec *denormalizer_spec); @@ -116,7 +117,7 @@ class SentencePieceTrainer { // Injects global pre-tokenizer that are applied in training time. // Pretokenizer is only used for extracting pieces. // TODO(taku): It would be better to inject per `trainer_spec`. - static util::Status SetPretokenizerForTraining( + static absl::Status SetPretokenizerForTraining( const pretokenizer::PretokenizerForTrainingInterface *pretokenizer); // Returns the current pretokenizer. if no pretokenizer is defined, returns @@ -129,17 +130,17 @@ class SentencePieceTrainer { // with comma-separated values. `field_name` must not be a nested message. // The body of these functions are automatically generated with // data/gen_spec_parser.pl - static util::Status SetProtoField(const std::string &name, + static absl::Status SetProtoField(const std::string &name, const std::string &value, TrainerSpec *message); - static util::Status SetProtoField(const std::string &name, + static absl::Status SetProtoField(const std::string &name, const std::string &value, NormalizerSpec *message); // Populates model type from string representation, e.g., "bpe". // Supported model: "unigram", "bpe", "word", "char". - static util::Status PopulateModelTypeFromString(absl::string_view type, + static absl::Status PopulateModelTypeFromString(absl::string_view type, TrainerSpec *trainer_spec); private: diff --git a/src/sentencepiece_trainer_test.cc b/src/sentencepiece_trainer_test.cc index e44e66b..00c8d08 100644 --- a/src/sentencepiece_trainer_test.cc +++ b/src/sentencepiece_trainer_test.cc @@ -16,7 +16,8 @@ #include "sentencepiece_model.pb.h" #include "sentencepiece_trainer.h" #include "testharness.h" -#include "third_party/absl/strings/str_cat.h" +#include "absl/strings/str_cat.h" +#include "absl/status/status.h" #include "util.h" namespace sentencepiece { @@ -109,7 +110,7 @@ TEST(SentencePieceTrainerTest, TrainFromIterator) { bool done() const override { return idx_ == vec_.size(); } void Next() override { ++idx_; } const std::string &value() const override { return vec_[idx_]; } - util::Status status() const override { return util::OkStatus(); } + absl::Status status() const override { return util::OkStatus(); } private: std::vector vec_; diff --git a/src/spec_parser.h b/src/spec_parser.h index 2c5a95b..259c45d 100644 --- a/src/spec_parser.h +++ b/src/spec_parser.h @@ -19,8 +19,9 @@ #include #include "sentencepiece_processor.h" -#include "third_party/absl/strings/ascii.h" -#include "third_party/absl/strings/str_split.h" +#include "absl/strings/ascii.h" +#include "absl/strings/str_split.h" +#include "absl/status/status.h" #include "util.h" namespace sentencepiece { @@ -49,7 +50,7 @@ namespace sentencepiece { if (name == #param_name) { \ int32 v; \ if (!string_util::lexical_cast(value, &v)) \ - return util::StatusBuilder(util::StatusCode::kInvalidArgument, GTL_LOC) \ + return util::StatusBuilder(absl::StatusCode::kInvalidArgument, GTL_LOC) \ << "cannot parse \"" << value << "\" as int."; \ message->set_##param_name(v); \ return util::OkStatus(); \ @@ -59,7 +60,7 @@ namespace sentencepiece { if (name == #param_name) { \ uint64 v; \ if (!string_util::lexical_cast(value, &v)) \ - return util::StatusBuilder(util::StatusCode::kInvalidArgument, GTL_LOC) \ + return util::StatusBuilder(absl::StatusCode::kInvalidArgument, GTL_LOC) \ << "cannot parse \"" << value << "\" as int."; \ message->set_##param_name(v); \ return util::OkStatus(); \ @@ -69,7 +70,7 @@ namespace sentencepiece { if (name == #param_name) { \ double v; \ if (!string_util::lexical_cast(value, &v)) \ - return util::StatusBuilder(util::StatusCode::kInvalidArgument, GTL_LOC) \ + return util::StatusBuilder(absl::StatusCode::kInvalidArgument, GTL_LOC) \ << "cannot parse \"" << value << "\" as int."; \ message->set_##param_name(v); \ return util::OkStatus(); \ @@ -79,7 +80,7 @@ namespace sentencepiece { if (name == #param_name) { \ bool v; \ if (!string_util::lexical_cast(value.empty() ? "true" : value, &v)) \ - return util::StatusBuilder(util::StatusCode::kInvalidArgument, GTL_LOC) \ + return util::StatusBuilder(absl::StatusCode::kInvalidArgument, GTL_LOC) \ << "cannot parse \"" << value << "\" as bool."; \ message->set_##param_name(v); \ return util::OkStatus(); \ @@ -89,7 +90,7 @@ namespace sentencepiece { if (name == #param_name) { \ const auto it = map_name.find(absl::AsciiStrToUpper(value)); \ if (it == map_name.end()) \ - return util::StatusBuilder(util::StatusCode::kInvalidArgument, GTL_LOC) \ + return util::StatusBuilder(absl::StatusCode::kInvalidArgument, GTL_LOC) \ << "unknown enumeration value of \"" << value << "\" as " \ << #map_name; \ message->set_##param_name(it->second); \ @@ -186,7 +187,7 @@ inline std::string PrintProto(const NormalizerSpec &message, return os.str(); } -util::Status SentencePieceTrainer::SetProtoField(const std::string &name, +absl::Status SentencePieceTrainer::SetProtoField(const std::string &name, const std::string &value, TrainerSpec *message) { CHECK_OR_RETURN(message); @@ -239,11 +240,11 @@ util::Status SentencePieceTrainer::SetProtoField(const std::string &name, PARSE_STRING(pad_piece); PARSE_STRING(unk_surface); - return util::StatusBuilder(util::StatusCode::kNotFound, GTL_LOC) + return util::StatusBuilder(absl::StatusCode::kNotFound, GTL_LOC) << "unknown field name \"" << name << "\" in TrainerSpec."; } -util::Status SentencePieceTrainer::SetProtoField(const std::string &name, +absl::Status SentencePieceTrainer::SetProtoField(const std::string &name, const std::string &value, NormalizerSpec *message) { CHECK_OR_RETURN(message); @@ -255,7 +256,7 @@ util::Status SentencePieceTrainer::SetProtoField(const std::string &name, PARSE_BOOL(escape_whitespaces); PARSE_STRING(normalization_rule_tsv); - return util::StatusBuilder(util::StatusCode::kNotFound, GTL_LOC) + return util::StatusBuilder(absl::StatusCode::kNotFound, GTL_LOC) << "unknown field name \"" << name << "\" in NormalizerSpec."; } diff --git a/src/spm_decode_main.cc b/src/spm_decode_main.cc index 3382ddc..9dda65c 100644 --- a/src/spm_decode_main.cc +++ b/src/spm_decode_main.cc @@ -21,8 +21,8 @@ #include "init.h" #include "sentencepiece.pb.h" #include "sentencepiece_processor.h" -#include "third_party/absl/flags/flag.h" -#include "third_party/absl/strings/str_split.h" +#include "absl/flags/flag.h" +#include "absl/strings/str_split.h" #include "util.h" ABSL_FLAG(std::string, model, "", "model file name"); diff --git a/src/spm_encode_main.cc b/src/spm_encode_main.cc index 4d12a38..29b7458 100644 --- a/src/spm_encode_main.cc +++ b/src/spm_encode_main.cc @@ -21,10 +21,10 @@ #include "init.h" #include "sentencepiece.pb.h" #include "sentencepiece_processor.h" -#include "third_party/absl/container/flat_hash_map.h" -#include "third_party/absl/flags/flag.h" -#include "third_party/absl/strings/str_cat.h" -#include "third_party/absl/strings/str_join.h" +#include "absl/container/flat_hash_map.h" +#include "absl/flags/flag.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" #include "trainer_interface.h" ABSL_FLAG(std::string, model, "", "model file name"); diff --git a/src/spm_export_vocab_main.cc b/src/spm_export_vocab_main.cc index b5d93cb..70a65c1 100644 --- a/src/spm_export_vocab_main.cc +++ b/src/spm_export_vocab_main.cc @@ -20,7 +20,7 @@ #include "init.h" #include "sentencepiece_model.pb.h" #include "sentencepiece_processor.h" -#include "third_party/absl/flags/flag.h" +#include "absl/flags/flag.h" ABSL_FLAG(std::string, output, "", "Output filename"); ABSL_FLAG(std::string, model, "", "input model file name"); diff --git a/src/spm_normalize_main.cc b/src/spm_normalize_main.cc index 96da360..8c541b8 100644 --- a/src/spm_normalize_main.cc +++ b/src/spm_normalize_main.cc @@ -21,7 +21,7 @@ #include "sentencepiece_model.pb.h" #include "sentencepiece_processor.h" #include "sentencepiece_trainer.h" -#include "third_party/absl/flags/flag.h" +#include "absl/flags/flag.h" ABSL_FLAG(std::string, model, "", "Model file name"); ABSL_FLAG(bool, use_internal_normalization, false, diff --git a/src/spm_train_main.cc b/src/spm_train_main.cc index baf8dbf..ba1e811 100644 --- a/src/spm_train_main.cc +++ b/src/spm_train_main.cc @@ -18,10 +18,10 @@ #include "init.h" #include "sentencepiece_model.pb.h" #include "sentencepiece_trainer.h" -#include "third_party/absl/flags/flag.h" -#include "third_party/absl/strings/ascii.h" -#include "third_party/absl/strings/str_join.h" -#include "third_party/absl/strings/str_split.h" +#include "absl/flags/flag.h" +#include "absl/strings/ascii.h" +#include "absl/strings/str_join.h" +#include "absl/strings/str_split.h" #include "util.h" using sentencepiece::NormalizerSpec; diff --git a/src/testharness.cc b/src/testharness.cc index f6b1efe..daf2d14 100644 --- a/src/testharness.cc +++ b/src/testharness.cc @@ -26,7 +26,7 @@ #include #include "common.h" -#include "third_party/absl/strings/str_cat.h" +#include "absl/strings/str_cat.h" #include "util.h" namespace sentencepiece { diff --git a/src/testharness.h b/src/testharness.h index 9879b06..98317ad 100644 --- a/src/testharness.h +++ b/src/testharness.h @@ -21,9 +21,9 @@ #include #include "common.h" -#include "third_party/absl/flags/flag.h" -#include "third_party/absl/flags/parse.h" -#include "third_party/absl/strings/string_view.h" +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "absl/strings/string_view.h" ABSL_DECLARE_FLAG(std::string, test_tmpdir); ABSL_DECLARE_FLAG(std::string, test_srcdir); diff --git a/src/trainer_factory.cc b/src/trainer_factory.cc index d1d2541..ff594d0 100644 --- a/src/trainer_factory.cc +++ b/src/trainer_factory.cc @@ -14,7 +14,7 @@ #include "bpe_model_trainer.h" #include "char_model_trainer.h" -#include "third_party/absl/memory/memory.h" +#include "absl/memory/memory.h" #include "trainer_factory.h" #include "unigram_model_trainer.h" #include "word_model_trainer.h" diff --git a/src/trainer_interface.cc b/src/trainer_interface.cc index a3a4b74..e6a2587 100644 --- a/src/trainer_interface.cc +++ b/src/trainer_interface.cc @@ -26,13 +26,14 @@ #include "normalizer.h" #include "sentencepiece_processor.h" #include "sentencepiece_trainer.h" -#include "third_party/absl/container/flat_hash_map.h" -#include "third_party/absl/memory/memory.h" -#include "third_party/absl/strings/numbers.h" -#include "third_party/absl/strings/str_cat.h" -#include "third_party/absl/strings/str_format.h" -#include "third_party/absl/strings/str_join.h" -#include "third_party/absl/strings/str_split.h" +#include "absl/container/flat_hash_map.h" +#include "absl/memory/memory.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" +#include "absl/strings/str_join.h" +#include "absl/strings/str_split.h" +#include "absl/status/status.h" #include "trainer_interface.h" #include "unicode_script.h" #include "util.h" @@ -49,7 +50,7 @@ const char32 TrainerInterface::kUPPBoundaryChar = L'\u0009'; const char TrainerInterface::kUPPBoundaryStr[] = "\t"; namespace { -util::Status VerifySpec(const TrainerSpec &trainer_spec) { +absl::Status VerifySpec(const TrainerSpec &trainer_spec) { CHECK_GT_OR_RETURN(trainer_spec.vocab_size(), 0); if (trainer_spec.model_type() == TrainerSpec::UNIGRAM || @@ -164,7 +165,7 @@ bool MultiFileSentenceIterator::done() const { return (!read_done_ && file_index_ == files_.size()); } -util::Status MultiFileSentenceIterator::status() const { +absl::Status MultiFileSentenceIterator::status() const { CHECK_OR_RETURN(fp_); return fp_->status(); } @@ -296,7 +297,7 @@ bool TrainerInterface::IsValidSentencePiece( return true; } -util::Status TrainerInterface::LoadSentences() { +absl::Status TrainerInterface::LoadSentences() { RETURN_IF_ERROR(status()); CHECK_OR_RETURN(sentences_.empty()); CHECK_OR_RETURN(required_chars_.empty()); @@ -537,7 +538,7 @@ void TrainerInterface::SplitSentencesByWhitespace() { LOG(INFO) << "Done! " << sentences_.size(); } -util::Status TrainerInterface::Serialize(ModelProto *model_proto) const { +absl::Status TrainerInterface::Serialize(ModelProto *model_proto) const { RETURN_IF_ERROR(status()); // Duplicated sentencepiece is not allowed. @@ -611,7 +612,7 @@ util::Status TrainerInterface::Serialize(ModelProto *model_proto) const { return util::OkStatus(); } -util::Status TrainerInterface::SaveModel(absl::string_view filename) const { +absl::Status TrainerInterface::SaveModel(absl::string_view filename) const { LOG(INFO) << "Saving model: " << filename; ModelProto model_proto; RETURN_IF_ERROR(Serialize(&model_proto)); @@ -622,7 +623,7 @@ util::Status TrainerInterface::SaveModel(absl::string_view filename) const { return util::OkStatus(); } -util::Status TrainerInterface::SaveVocab(absl::string_view filename) const { +absl::Status TrainerInterface::SaveVocab(absl::string_view filename) const { LOG(INFO) << "Saving vocabs: " << filename; ModelProto model_proto; RETURN_IF_ERROR(Serialize(&model_proto)); @@ -644,7 +645,7 @@ util::Status TrainerInterface::SaveVocab(absl::string_view filename) const { return util::OkStatus(); } -util::Status TrainerInterface::Save() const { +absl::Status TrainerInterface::Save() const { if (output_model_proto_) { RETURN_IF_ERROR(Serialize(output_model_proto_)); } else { @@ -654,7 +655,7 @@ util::Status TrainerInterface::Save() const { return util::OkStatus(); } -util::Status TrainerInterface::InitMetaPieces() { +absl::Status TrainerInterface::InitMetaPieces() { CHECK_OR_RETURN(meta_pieces_.empty()); bool has_unk = false; diff --git a/src/trainer_interface.h b/src/trainer_interface.h index f66d59a..b4fbc7b 100644 --- a/src/trainer_interface.h +++ b/src/trainer_interface.h @@ -27,7 +27,8 @@ #include "sentencepiece_model.pb.h" #include "sentencepiece_processor.h" #include "sentencepiece_trainer.h" -#include "third_party/absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_map.h" +#include "absl/status/status.h" #include "util.h" namespace sentencepiece { @@ -57,7 +58,7 @@ class MultiFileSentenceIterator : public SentenceIterator { bool done() const override; void Next() override; const std::string &value() const override { return value_; } - util::Status status() const override; + absl::Status status() const override; private: void TryRead(); @@ -90,16 +91,16 @@ class TrainerInterface { // Loads sentence from `sentence_iterator` and stores the model // to `output_model_proto`. - virtual util::Status Train(SentenceIterator *sentence_iterator, + virtual absl::Status Train(SentenceIterator *sentence_iterator, ModelProto *output_model_proto) { sentence_iterator_ = sentence_iterator; output_model_proto_ = output_model_proto; return Train(); } - virtual util::Status Train() { return status(); } + virtual absl::Status Train() { return status(); } - virtual util::Status status() const { return status_; } + virtual absl::Status status() const { return status_; } FRIEND_TEST(TrainerInterfaceTest, IsValidSentencePieceTest); FRIEND_TEST(TrainerInterfaceTest, OverrideSpecialPiecesTest); @@ -115,7 +116,7 @@ class TrainerInterface { // Loads all sentences from spec.input() or SentenceIterator. // It loads at most input_sentence_size sentences. - util::Status LoadSentences(); + absl::Status LoadSentences(); // Splits all sentencecs by whitespaces and // replace the |sentences_| with tokenized string. @@ -125,7 +126,7 @@ class TrainerInterface { void SplitSentencesByWhitespace(); // Save model files into spec.model_prefix(). - util::Status Save() const; + absl::Status Save() const; // Set of characters which must be included in the final vocab. // The value of this map stores the frequency. @@ -152,7 +153,7 @@ class TrainerInterface { meta_pieces_; // Detect errors on initialization. - util::Status status_; + absl::Status status_; // Loads sentences from SentenceIterator if not null. SentenceIterator *sentence_iterator_ = nullptr; @@ -162,19 +163,19 @@ class TrainerInterface { private: // Serialize final_pieces_ to |model_proto|. - util::Status Serialize(ModelProto *model_proto) const; + absl::Status Serialize(ModelProto *model_proto) const; // Saves the best sentence split with the current model for debugging. - util::Status SaveSplits(absl::string_view filename) const; + absl::Status SaveSplits(absl::string_view filename) const; // Saves model file. - util::Status SaveModel(absl::string_view filename) const; + absl::Status SaveModel(absl::string_view filename) const; // Saves vocabulary file for NMT. - util::Status SaveVocab(absl::string_view filename) const; + absl::Status SaveVocab(absl::string_view filename) const; // Initializes `meta_pieces_` from TrainerSpec. - util::Status InitMetaPieces(); + absl::Status InitMetaPieces(); // Randomly sampled raw sentences for self-testing. std::vector self_test_samples_; diff --git a/src/trainer_interface_test.cc b/src/trainer_interface_test.cc index 70a51ad..d7f3f0c 100644 --- a/src/trainer_interface_test.cc +++ b/src/trainer_interface_test.cc @@ -16,8 +16,8 @@ #include "filesystem.h" #include "testharness.h" -#include "third_party/absl/strings/str_cat.h" -#include "third_party/absl/strings/str_format.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" #include "trainer_interface.h" #include "util.h" diff --git a/src/unicode_script.cc b/src/unicode_script.cc index 583dc30..11b24dc 100644 --- a/src/unicode_script.cc +++ b/src/unicode_script.cc @@ -14,7 +14,7 @@ #include -#include "third_party/absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_map.h" #include "unicode_script.h" #include "unicode_script_map.h" #include "util.h" diff --git a/src/unicode_script_map.h b/src/unicode_script_map.h index f2e67e9..f1b8299 100644 --- a/src/unicode_script_map.h +++ b/src/unicode_script_map.h @@ -14,7 +14,7 @@ #ifndef UNICODE_SCRIPT_DATA_H_ #define UNICODE_SCRIPT_DATA_H_ -#include "third_party/absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_map.h" namespace sentencepiece { namespace unicode_script { namespace { diff --git a/src/unicode_script_test.cc b/src/unicode_script_test.cc index ab33565..e0b1c4d 100644 --- a/src/unicode_script_test.cc +++ b/src/unicode_script_test.cc @@ -14,7 +14,7 @@ #include "common.h" #include "testharness.h" -#include "third_party/absl/strings/string_view.h" +#include "absl/strings/string_view.h" #include "unicode_script.h" #include "util.h" diff --git a/src/unigram_model.cc b/src/unigram_model.cc index 3b99060..9c72fb9 100644 --- a/src/unigram_model.cc +++ b/src/unigram_model.cc @@ -22,9 +22,9 @@ #include #include -#include "third_party/absl/memory/memory.h" -#include "third_party/absl/strings/str_split.h" -#include "third_party/absl/strings/string_view.h" +#include "absl/memory/memory.h" +#include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" #include "unigram_model.h" #include "util.h" diff --git a/src/unigram_model.h b/src/unigram_model.h index 448e489..9062f12 100644 --- a/src/unigram_model.h +++ b/src/unigram_model.h @@ -24,7 +24,7 @@ #include "freelist.h" #include "model_interface.h" #include "sentencepiece_model.pb.h" -#include "third_party/darts_clone/darts.h" +#include "include/darts.h" namespace sentencepiece { namespace unigram { diff --git a/src/unigram_model_test.cc b/src/unigram_model_test.cc index f93b21c..808e907 100644 --- a/src/unigram_model_test.cc +++ b/src/unigram_model_test.cc @@ -22,8 +22,8 @@ #include "sentencepiece_model.pb.h" #include "sentencepiece_processor.h" #include "testharness.h" -#include "third_party/absl/strings/str_cat.h" -#include "third_party/absl/strings/str_join.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" #include "util.h" namespace sentencepiece { diff --git a/src/unigram_model_trainer.cc b/src/unigram_model_trainer.cc index 9615040..7d16bd2 100644 --- a/src/unigram_model_trainer.cc +++ b/src/unigram_model_trainer.cc @@ -25,8 +25,9 @@ #include "normalizer.h" #include "pretokenizer_for_training.h" #include "sentencepiece_trainer.h" -#include "third_party/absl/container/flat_hash_map.h" -#include "third_party/absl/memory/memory.h" +#include "absl/container/flat_hash_map.h" +#include "absl/memory/memory.h" +#include "absl/status/status.h" #include "third_party/esaxx/esa.hxx" // Suffix array library. #include "unicode_script.h" #include "unigram_model_trainer.h" @@ -463,7 +464,7 @@ TrainerModel::SentencePieces Trainer::FinalizeSentencePieces( return Sorted(final_sentencepieces); } -util::Status Trainer::Train() { +absl::Status Trainer::Train() { RETURN_IF_ERROR(status()); CHECK_EQ_OR_RETURN(TrainerSpec::UNIGRAM, trainer_spec_.model_type()); diff --git a/src/unigram_model_trainer.h b/src/unigram_model_trainer.h index 91fbeb4..d41967d 100644 --- a/src/unigram_model_trainer.h +++ b/src/unigram_model_trainer.h @@ -21,7 +21,8 @@ #include #include "sentencepiece_model.pb.h" -#include "third_party/absl/strings/string_view.h" +#include "absl/strings/string_view.h" +#include "absl/status/status.h" #include "trainer_interface.h" #include "unigram_model.h" #include "util.h" @@ -68,7 +69,7 @@ class Trainer : public TrainerInterface { : TrainerInterface::TrainerInterface(trainer_spec, normalizer_spec, denormalizer_spec) {} - util::Status Train() override; + absl::Status Train() override; private: FRIEND_TEST(TrainerTest, IsValidSentencePieceTest); diff --git a/src/unigram_model_trainer_test.cc b/src/unigram_model_trainer_test.cc index ffe515e..fdb25f6 100644 --- a/src/unigram_model_trainer_test.cc +++ b/src/unigram_model_trainer_test.cc @@ -16,8 +16,8 @@ #include "sentencepiece_processor.h" #include "sentencepiece_trainer.h" #include "testharness.h" -#include "third_party/absl/strings/str_cat.h" -#include "third_party/absl/strings/str_join.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" #include "unigram_model_trainer.h" #include "util.h" diff --git a/src/util.h b/src/util.h index 0d15863..7122c7c 100644 --- a/src/util.h +++ b/src/util.h @@ -30,7 +30,8 @@ #include "common.h" #include "sentencepiece_processor.h" -#include "third_party/absl/strings/string_view.h" +#include "absl/strings/string_view.h" +#include "absl/status/status.h" #ifdef SPM_NO_THREADLOCAL #include @@ -359,14 +360,14 @@ std::string StrError(int errnum); std::vector StrSplitAsCSV(absl::string_view text); -inline Status OkStatus() { return Status(); } +inline absl::Status OkStatus() { return absl::Status(); } #define DECLARE_ERROR(FUNC) \ - inline util::Status FUNC##Error(absl::string_view str) { \ - return util::Status(StatusCode::k##FUNC, str.data()); \ + inline absl::Status FUNC##Error(absl::string_view str) { \ + return absl::Status(absl::StatusCode::k##FUNC, str.data()); \ } \ - inline bool Is##FUNC(const util::Status &status) { \ - return status.code() == StatusCode::k##FUNC; \ + inline bool Is##FUNC(const absl::Status &status) { \ + return status.code() ==absl::StatusCode::k##FUNC; \ } DECLARE_ERROR(Cancelled) @@ -390,8 +391,8 @@ DECLARE_ERROR(Unauthenticated) class StatusBuilder { public: - explicit StatusBuilder(StatusCode code) : code_(code) {} - explicit StatusBuilder(StatusCode code, int loc) : code_(code) {} + explicit StatusBuilder(absl::StatusCode code) : code_(code) {} + explicit StatusBuilder(absl::StatusCode code, int loc) : code_(code) {} template StatusBuilder &operator<<(const T &value) { @@ -399,10 +400,10 @@ class StatusBuilder { return *this; } - operator Status() const { return Status(code_, os_.str()); } + operator absl::Status() const { return absl::Status(code_, os_.str()); } private: - StatusCode code_; + absl::StatusCode code_; std::ostringstream os_; }; @@ -410,7 +411,7 @@ class StatusBuilder { if (condition) { \ } else /* NOLINT */ \ return ::sentencepiece::util::StatusBuilder( \ - ::sentencepiece::util::StatusCode::kInternal) \ + ::absl::StatusCode::kInternal) \ << __FILE__ << "(" << __LINE__ << ") [" << #condition << "] " #define CHECK_EQ_OR_RETURN(a, b) CHECK_OR_RETURN((a) == (b)) diff --git a/src/util_test.cc b/src/util_test.cc index 71d006f..67290dc 100644 --- a/src/util_test.cc +++ b/src/util_test.cc @@ -16,7 +16,8 @@ #include "filesystem.h" #include "testharness.h" -#include "third_party/absl/strings/str_cat.h" +#include "absl/strings/str_cat.h" +#include "absl/status/status.h" #include "util.h" namespace sentencepiece { @@ -376,27 +377,27 @@ TEST(UtilTest, STLDeleteELementsTest) { } TEST(UtilTest, StatusTest) { - const util::Status ok; + const absl::Status ok; EXPECT_TRUE(ok.ok()); - EXPECT_EQ(util::StatusCode::kOk, ok.code()); + EXPECT_EQ(absl::StatusCode::kOk, ok.code()); EXPECT_EQ(std::string(""), ok.message()); - const util::Status s1(util::StatusCode::kUnknown, "unknown"); - const util::Status s2(util::StatusCode::kUnknown, std::string("unknown")); + const absl::Status s1(absl::StatusCode::kUnknown, "unknown"); + const absl::Status s2(absl::StatusCode::kUnknown, std::string("unknown")); - EXPECT_EQ(util::StatusCode::kUnknown, s1.code()); - EXPECT_EQ(util::StatusCode::kUnknown, s2.code()); + EXPECT_EQ(absl::StatusCode::kUnknown, s1.code()); + EXPECT_EQ(absl::StatusCode::kUnknown, s2.code()); EXPECT_EQ(std::string("unknown"), s1.message()); EXPECT_EQ(std::string("unknown"), s2.message()); auto ok2 = util::OkStatus(); EXPECT_TRUE(ok2.ok()); - EXPECT_EQ(util::StatusCode::kOk, ok2.code()); + EXPECT_EQ(absl::StatusCode::kOk, ok2.code()); EXPECT_EQ(std::string(""), ok2.message()); util::OkStatus().IgnoreError(); for (int i = 1; i <= 16; ++i) { - util::Status s(static_cast(i), "message"); + absl::Status s(static_cast(i), "message"); EXPECT_TRUE(s.ToString().find("message") != std::string::npos) << s.ToString(); } diff --git a/src/word_model_trainer.cc b/src/word_model_trainer.cc index 0b8b062..bc1f86b 100644 --- a/src/word_model_trainer.cc +++ b/src/word_model_trainer.cc @@ -15,8 +15,9 @@ #include #include -#include "third_party/absl/container/flat_hash_map.h" -#include "third_party/absl/strings/string_view.h" +#include "absl/container/flat_hash_map.h" +#include "absl/strings/string_view.h" +#include "absl/status/status.h" #include "util.h" #include "word_model.h" #include "word_model_trainer.h" @@ -24,7 +25,7 @@ namespace sentencepiece { namespace word { -util::Status Trainer::Train() { +absl::Status Trainer::Train() { RETURN_IF_ERROR(status()); CHECK_OR_RETURN(normalizer_spec_.escape_whitespaces()); diff --git a/src/word_model_trainer.h b/src/word_model_trainer.h index 76f8f32..436e595 100644 --- a/src/word_model_trainer.h +++ b/src/word_model_trainer.h @@ -17,6 +17,7 @@ #include "sentencepiece_model.pb.h" #include "trainer_interface.h" +#include "absl/status/status.h" namespace sentencepiece { namespace word { @@ -34,7 +35,7 @@ class Trainer : public TrainerInterface { : TrainerInterface::TrainerInterface(trainer_spec, normalizer_spec, denormalizer_spec) {} - util::Status Train() override; + absl::Status Train() override; }; } // namespace word } // namespace sentencepiece diff --git a/src/word_model_trainer_test.cc b/src/word_model_trainer_test.cc index c4a8bc6..366810f 100644 --- a/src/word_model_trainer_test.cc +++ b/src/word_model_trainer_test.cc @@ -18,8 +18,8 @@ #include "filesystem.h" #include "sentencepiece_processor.h" #include "testharness.h" -#include "third_party/absl/strings/str_cat.h" -#include "third_party/absl/strings/str_join.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" #include "util.h" #include "word_model_trainer.h"