Fix data corruption when output of snappy compression is more than 4GB This fixes #201
diff --git a/snappy.cc b/snappy.cc index 877b65a..7a8920f 100644 --- a/snappy.cc +++ b/snappy.cc
@@ -1499,7 +1499,7 @@ // If ip < ip_limit_min_maxtaglen_ it's safe to read kMaxTagLength from // buffer. const char* ip_limit_min_maxtaglen_; - uint32_t peeked_; // Bytes peeked from reader (need to skip) + uint64_t peeked_; // Bytes peeked from reader (need to skip) bool eof_; // Hit end of input without an error? char scratch_[kMaximumTagLength]; // See RefillTag(). @@ -1726,7 +1726,7 @@ assert(needed <= sizeof(scratch_)); // Read more bytes from reader if needed - uint32_t nbuf = ip_limit_ - ip; + uint64_t nbuf = ip_limit_ - ip; if (nbuf < needed) { // Stitch together bytes from ip and reader to form the word // contents. We store the needed bytes in "scratch_". They @@ -1739,7 +1739,7 @@ size_t length; const char* src = reader_->Peek(&length); if (length == 0) return false; - uint32_t to_add = std::min<uint32_t>(needed - nbuf, length); + uint64_t to_add = std::min<uint64_t>(needed - nbuf, length); std::memcpy(scratch_ + nbuf, src, to_add); nbuf += to_add; reader_->Skip(to_add); @@ -1802,6 +1802,7 @@ int token = 0; size_t written = 0; size_t N = reader->Available(); + assert(N <= 0xFFFFFFFFu); const size_t uncompressed_size = N; char ulength[Varint::kMax32]; char* p = Varint::Encode32(ulength, N);
diff --git a/snappy_unittest.cc b/snappy_unittest.cc index e57b13d..923a0ec 100644 --- a/snappy_unittest.cc +++ b/snappy_unittest.cc
@@ -27,6 +27,7 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include <algorithm> +#include <cinttypes> #include <cmath> #include <cstdlib> #include <random> @@ -484,6 +485,18 @@ Verify(input); } +// Issue #201, when output is more than 4GB, we had a data corruption bug. +// We cannot run this test always because of CI constraints. +TEST(Snappy, DISABLED_MoreThan4GB) { + std::mt19937 rng; + std::uniform_int_distribution<int> uniform_byte(0, 255); + std::string input; + input.resize((1ull << 32) - 1); + for (uint64_t i = 0; i < ((1ull << 32) - 1); ++i) + input[i] = static_cast<char>(uniform_byte(rng)); + Verify(input); +} + TEST(Snappy, RandomData) { std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed)); std::uniform_int_distribution<int> uniform_0_to_3(0, 3);