From a784b8459dec835ddc15b26b0c8461bf292959f4 Mon Sep 17 00:00:00 2001 From: RangerUFO Date: Sat, 12 Oct 2024 21:15:05 +0800 Subject: [PATCH 1/4] Add an overload of `Image::ReadPPM` method Make it able to load image data from a stream. --- paligemma/image.cc | 49 ++++++++++++++++++++++++++-------------------- paligemma/image.h | 4 ++++ 2 files changed, 32 insertions(+), 21 deletions(-) diff --git a/paligemma/image.cc b/paligemma/image.cc index 5dad770..76753d8 100644 --- a/paligemma/image.cc +++ b/paligemma/image.cc @@ -55,14 +55,14 @@ float StretchToSigned(float value) { bool IsLineBreak(int c) { return c == '\r' || c == '\n'; } -void SkipWhitespaceAndComments(std::ifstream& file) { - int value = file.get(); - while (std::isspace(value)) value = file.get(); +void SkipWhitespaceAndComments(std::istream& in) { + int value = in.get(); + while (std::isspace(value)) value = in.get(); while (value == '#') { // Skip comment lines. - while (!IsLineBreak(value)) value = file.get(); - while (std::isspace(value)) value = file.get(); + while (!IsLineBreak(value)) value = in.get(); + while (std::isspace(value)) value = in.get(); } - file.unget(); // Rewind last byte. + in.unget(); // Rewind last byte. } } // namespace @@ -72,25 +72,37 @@ bool Image::ReadPPM(const std::string& filename) { std::cerr << "Failed to open " << filename << "\n"; return false; } + if (!ReadPPM(file)) { + return false; + } + if (file.get() != EOF) { + std::cerr << "Extra data in file\n"; + return false; + } + file.close(); + return true; +} + +bool Image::ReadPPM(std::istream& in) { std::string format; - file >> format; + in >> format; if (format != "P6") { std::cerr << "We only support binary PPM (P6) but got: " << format << "\n"; return false; } int width, height, max_value; - SkipWhitespaceAndComments(file); - file >> width; - SkipWhitespaceAndComments(file); - file >> height; - SkipWhitespaceAndComments(file); - file >> max_value; + SkipWhitespaceAndComments(in); + in >> width; + SkipWhitespaceAndComments(in); + in >> height; + SkipWhitespaceAndComments(in); + in >> max_value; if (max_value <= 0 || max_value > 255) { std::cerr << "Unsupported max value " << max_value << "\n"; return false; } // P6 requires exactly one whitespace character after the header. - int value = file.get(); + int value = in.get(); if (!std::isspace(value)) { std::cerr << "Missing whitespace after header\n"; return false; @@ -100,19 +112,14 @@ bool Image::ReadPPM(const std::string& filename) { int data_size = width * height * 3; data_.resize(data_size); std::vector data_bytes(data_size); - file.read(reinterpret_cast(data_bytes.data()), data_size); - if (file.gcount() != data_size) { + in.read(reinterpret_cast(data_bytes.data()), data_size); + if (in.gcount() != data_size) { std::cerr << "Failed to read " << data_size << " bytes\n"; return false; } for (int i = 0; i < data_size; ++i) { data_[i] = StretchToSigned(static_cast(data_bytes[i]) / max_value); } - if (file.get() != EOF) { - std::cerr << "Extra data in file\n"; - return false; - } - file.close(); return true; } diff --git a/paligemma/image.h b/paligemma/image.h index 280a324..1b15215 100644 --- a/paligemma/image.h +++ b/paligemma/image.h @@ -18,6 +18,7 @@ #include #include +#include #include namespace gcpp { @@ -30,6 +31,9 @@ class Image { // Reads a file in PPM format (P6, binary), normalizes to [-1, 1]. // Returns true on success. bool ReadPPM(const std::string& filename); + // Reads PPM format (P6, binary) data from a stream, normalizes to [-1, 1]. + // Returns true on success. + bool ReadPPM(std::istream& in); // Resizes to 224x224 (nearest-neighbor for now, bilinear or antialias would // be better). void Resize(); From de2f7d7e2c176d815fdbccdba6ff846f3a6713d1 Mon Sep 17 00:00:00 2001 From: RangerUFO Date: Wed, 16 Oct 2024 17:34:11 +0800 Subject: [PATCH 2/4] Add an overload of `Image::ReadPPM` method Make it able to load image data from a `hwy::Span`. --- paligemma/image.cc | 68 ++++++++++++++++++++++++++++++++++++++++++++++ paligemma/image.h | 5 ++++ 2 files changed, 73 insertions(+) diff --git a/paligemma/image.cc b/paligemma/image.cc index 76753d8..2853712 100644 --- a/paligemma/image.cc +++ b/paligemma/image.cc @@ -64,6 +64,69 @@ void SkipWhitespaceAndComments(std::istream& in) { } in.unget(); // Rewind last byte. } + +template > +class basic_spanbuf : public std::basic_streambuf { + public: + using base_type = std::basic_streambuf; + using char_type = typename base_type::char_type; + using traits_type = typename base_type::traits_type; + using int_type = typename traits_type::int_type; + using pos_type = typename traits_type::pos_type; + using off_type = typename traits_type::off_type; + + basic_spanbuf(const hwy::Span& buf) { + this->setg(buf.data(), buf.data(), buf.data() + buf.size()); + } + + std::streamsize showmanyc() override { + return this->egptr() - this->gptr(); + } + + std::streamsize xsgetn(char_type* s, std::streamsize n) override { + if (n == 0) { + return 0; + } + if (this->gptr() + n > this->egptr()) { + n = this->egptr() - this->gptr(); + if (n == 0) { + if (this->uflow() == traits_type::eof()) { + return -1; + } + return 0; + } + } + std::memmove(s, this->gptr(), n); + this->gbump(n); + return n; + } + + int_type pbackfail(int_type c) override { + *(this->gptr() - 1) = traits_type::to_char_type(c); + this->pbump(-1); + return 1; + } +}; + +template > +class basic_ispanstream : public std::basic_istream { + public: + using base_type = std::basic_istream; + using char_type = typename base_type::char_type; + using traits_type = typename base_type::traits_type; + using int_type = typename traits_type::int_type; + using pos_type = typename traits_type::pos_type; + using off_type = typename traits_type::off_type; + + basic_ispanstream(const hwy::Span& buf) + : base_type(nullptr) + , sb_(buf) { + this->init(&sb_); + } + + private: + basic_spanbuf sb_; +}; } // namespace bool Image::ReadPPM(const std::string& filename) { @@ -123,6 +186,11 @@ bool Image::ReadPPM(std::istream& in) { return true; } +bool Image::ReadPPM(const hwy::Span& buf) { + basic_ispanstream in(buf); + return ReadPPM(in); +} + void Image::Resize() { int new_width = 224; int new_height = kImageSize; diff --git a/paligemma/image.h b/paligemma/image.h index 1b15215..ad3a3c3 100644 --- a/paligemma/image.h +++ b/paligemma/image.h @@ -21,6 +21,8 @@ #include #include +#include "hwy/aligned_allocator.h" // Span + namespace gcpp { // Very basic image loading and processing for PaliGemma-224. Does not try to be @@ -34,6 +36,9 @@ class Image { // Reads PPM format (P6, binary) data from a stream, normalizes to [-1, 1]. // Returns true on success. bool ReadPPM(std::istream& in); + // Reads PPM format (P6, binary) data from a hwy::Span, normalizes to [-1, 1]. + // Returns true on success. + bool ReadPPM(const hwy::Span& buf); // Resizes to 224x224 (nearest-neighbor for now, bilinear or antialias would // be better). void Resize(); From e48fc3abb4cab53037b62f75b855f61e25a4348c Mon Sep 17 00:00:00 2001 From: RangerUFO Date: Fri, 18 Oct 2024 01:13:35 +0800 Subject: [PATCH 3/4] Refactor the overloads of `Image::ReadPPM` method Remove the `std::istream` overload and directly parse the PPM format on the span. Load the image bytes in the file using `ReadFileToString` helper defined in "compression/io.h" instead of `std::ifstream`. --- paligemma/image.cc | 170 +++++++++++++++++---------------------------- paligemma/image.h | 6 +- 2 files changed, 65 insertions(+), 111 deletions(-) diff --git a/paligemma/image.cc b/paligemma/image.cc index 2853712..795b5e4 100644 --- a/paligemma/image.cc +++ b/paligemma/image.cc @@ -14,6 +14,7 @@ // limitations under the License. #include "paligemma/image.h" +#include "compression/io.h" #include @@ -55,142 +56,99 @@ float StretchToSigned(float value) { bool IsLineBreak(int c) { return c == '\r' || c == '\n'; } -void SkipWhitespaceAndComments(std::istream& in) { - int value = in.get(); - while (std::isspace(value)) value = in.get(); - while (value == '#') { // Skip comment lines. - while (!IsLineBreak(value)) value = in.get(); - while (std::isspace(value)) value = in.get(); +const char* CheckP6Format(const char* pos, const char* end) { + constexpr const char format[] = "P6"; + for (size_t i = 0; i < sizeof(format) - 1; ++i) { + if (pos == end || *pos != format[i]) { + return nullptr; + } + ++pos; } - in.unget(); // Rewind last byte. + return pos; } -template > -class basic_spanbuf : public std::basic_streambuf { - public: - using base_type = std::basic_streambuf; - using char_type = typename base_type::char_type; - using traits_type = typename base_type::traits_type; - using int_type = typename traits_type::int_type; - using pos_type = typename traits_type::pos_type; - using off_type = typename traits_type::off_type; - - basic_spanbuf(const hwy::Span& buf) { - this->setg(buf.data(), buf.data(), buf.data() + buf.size()); +const char* SkipWhitespaceAndComments(const char* pos, const char* end) { + while (pos < end && std::isspace(*pos)) ++pos; + while (pos < end && *pos == '#') { // Skip comment lines. + while (pos < end && !IsLineBreak(*pos)) ++pos; + while (pos < end && std::isspace(*pos)) ++pos; } + return pos; +} - std::streamsize showmanyc() override { - return this->egptr() - this->gptr(); +const char* ParseUnsigned(const char* pos, const char* end, size_t& num) { + if (pos == end || !std::isdigit(*pos)) { + return nullptr; } - - std::streamsize xsgetn(char_type* s, std::streamsize n) override { - if (n == 0) { - return 0; - } - if (this->gptr() + n > this->egptr()) { - n = this->egptr() - this->gptr(); - if (n == 0) { - if (this->uflow() == traits_type::eof()) { - return -1; - } - return 0; - } - } - std::memmove(s, this->gptr(), n); - this->gbump(n); - return n; + num = 0; + for ( ; pos < end && std::isdigit(*pos); ++pos) { + num *= 10; + num += *pos - '0'; } - - int_type pbackfail(int_type c) override { - *(this->gptr() - 1) = traits_type::to_char_type(c); - this->pbump(-1); - return 1; - } -}; - -template > -class basic_ispanstream : public std::basic_istream { - public: - using base_type = std::basic_istream; - using char_type = typename base_type::char_type; - using traits_type = typename base_type::traits_type; - using int_type = typename traits_type::int_type; - using pos_type = typename traits_type::pos_type; - using off_type = typename traits_type::off_type; - - basic_ispanstream(const hwy::Span& buf) - : base_type(nullptr) - , sb_(buf) { - this->init(&sb_); - } - - private: - basic_spanbuf sb_; -}; + return pos; +} } // namespace bool Image::ReadPPM(const std::string& filename) { - std::ifstream file(filename); - if (!file.is_open()) { - std::cerr << "Failed to open " << filename << "\n"; + Path path(filename); + if (!path.Exists()) { + std::cerr << filename << " does not exist\n"; return false; } - if (!ReadPPM(file)) { - return false; - } - if (file.get() != EOF) { - std::cerr << "Extra data in file\n"; - return false; - } - file.close(); - return true; + auto content = ReadFileToString(path); + return ReadPPM(hwy::Span(content.data(), content.size())); } -bool Image::ReadPPM(std::istream& in) { - std::string format; - in >> format; - if (format != "P6") { - std::cerr << "We only support binary PPM (P6) but got: " << format << "\n"; +bool Image::ReadPPM(const hwy::Span& buf) { + auto pos = CheckP6Format(buf.cbegin(), buf.cend()); + if (!pos) { + std::cerr << "We only support binary PPM (P6)\n"; + return false; + } + size_t width, height, max_value; + pos = SkipWhitespaceAndComments(pos, buf.cend()); + pos = ParseUnsigned(pos, buf.cend(), width); + if (!pos) { + std::cerr << "Reached end before width\n"; + return false; + } + pos = SkipWhitespaceAndComments(pos, buf.cend()); + pos = ParseUnsigned(pos, buf.cend(), height); + if (!pos) { + std::cerr << "Reached end before height\n"; + return false; + } + pos = SkipWhitespaceAndComments(pos, buf.cend()); + pos = ParseUnsigned(pos, buf.cend(), max_value); + if (!pos) { + std::cerr << "Reached end before max_value\n"; return false; } - int width, height, max_value; - SkipWhitespaceAndComments(in); - in >> width; - SkipWhitespaceAndComments(in); - in >> height; - SkipWhitespaceAndComments(in); - in >> max_value; if (max_value <= 0 || max_value > 255) { std::cerr << "Unsupported max value " << max_value << "\n"; return false; } // P6 requires exactly one whitespace character after the header. - int value = in.get(); - if (!std::isspace(value)) { + if (!std::isspace(*pos)) { std::cerr << "Missing whitespace after header\n"; return false; } - width_ = width; - height_ = height; - int data_size = width * height * 3; - data_.resize(data_size); - std::vector data_bytes(data_size); - in.read(reinterpret_cast(data_bytes.data()), data_size); - if (in.gcount() != data_size) { - std::cerr << "Failed to read " << data_size << " bytes\n"; + ++pos; + auto data_size = width * height * 3; + if (buf.cend() - pos < data_size) { + std::cerr << "Insufficient data remaining\n"; return false; } - for (int i = 0; i < data_size; ++i) { - data_[i] = StretchToSigned(static_cast(data_bytes[i]) / max_value); + data_.resize(data_size); + width_ = width; + height_ = height; + for (size_t i = 0; i < data_size; ++i) { + uint8_t value = pos[i]; + data_[i] = StretchToSigned(static_cast(value) / max_value); } return true; } -bool Image::ReadPPM(const hwy::Span& buf) { - basic_ispanstream in(buf); - return ReadPPM(in); -} - void Image::Resize() { int new_width = 224; int new_height = kImageSize; diff --git a/paligemma/image.h b/paligemma/image.h index ad3a3c3..bc53dbb 100644 --- a/paligemma/image.h +++ b/paligemma/image.h @@ -18,7 +18,6 @@ #include #include -#include #include #include "hwy/aligned_allocator.h" // Span @@ -33,12 +32,9 @@ class Image { // Reads a file in PPM format (P6, binary), normalizes to [-1, 1]. // Returns true on success. bool ReadPPM(const std::string& filename); - // Reads PPM format (P6, binary) data from a stream, normalizes to [-1, 1]. - // Returns true on success. - bool ReadPPM(std::istream& in); // Reads PPM format (P6, binary) data from a hwy::Span, normalizes to [-1, 1]. // Returns true on success. - bool ReadPPM(const hwy::Span& buf); + bool ReadPPM(const hwy::Span& buf); // Resizes to 224x224 (nearest-neighbor for now, bilinear or antialias would // be better). void Resize(); From fcea7431076b0cc68a10ba11827f541a7016418c Mon Sep 17 00:00:00 2001 From: RangerUFO Date: Sat, 19 Oct 2024 19:54:46 +0800 Subject: [PATCH 4/4] Fix Bazel builder failure --- paligemma/BUILD | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/paligemma/BUILD b/paligemma/BUILD index 6b303c8..c014fe6 100644 --- a/paligemma/BUILD +++ b/paligemma/BUILD @@ -11,7 +11,10 @@ cc_library( name = "image", srcs = ["image.cc"], hdrs = ["image.h"], - deps = ["@highway//:hwy"], + deps = [ + "@highway//:hwy", + "//compression:io", + ], ) cc_test(