Tiny cleanup: distinguish between "ids" and "pieces" in argument names when encoding.

PiperOrigin-RevId: 642614278
This commit is contained in:
Daniel Keysers 2024-06-12 07:51:45 -07:00 committed by Copybara-Service
parent 1ac9857014
commit 6e67a6d8a9
2 changed files with 6 additions and 6 deletions

View File

@ -171,15 +171,15 @@ class GemmaTokenizer::Impl {
return spp_ && spp_->Encode(input, pieces).ok();
}
bool Encode(const std::string& input, std::vector<int>* pieces) const {
bool Encode(const std::string& input, std::vector<int>* ids) const {
if constexpr (kShowTokenization) {
bool is_ok = spp_ && spp_->Encode(input, pieces).ok();
for (int i = 0; i < static_cast<int>(pieces->size()); i++) {
fprintf(stderr, "%3d: %d\n", i, (*pieces)[i]);
bool is_ok = spp_ && spp_->Encode(input, ids).ok();
for (int i = 0; i < static_cast<int>(ids->size()); i++) {
fprintf(stderr, "%3d: %d\n", i, (*ids)[i]);
}
return is_ok;
} else {
return spp_ && spp_->Encode(input, pieces).ok();
return spp_ && spp_->Encode(input, ids).ok();
}
}

View File

@ -60,7 +60,7 @@ class GemmaTokenizer {
GemmaTokenizer& operator=(GemmaTokenizer&& other);
bool Encode(const std::string& input, std::vector<std::string>* pieces) const;
bool Encode(const std::string& input, std::vector<int>* pieces) const;
bool Encode(const std::string& input, std::vector<int>* ids) const;
bool Decode(const std::vector<int>& ids, std::string* detokenized) const;
private: