Fix data corruption when output of snappy compression is more than 4GB
This fixes #201
diff --git a/snappy.cc b/snappy.cc
index 877b65a..7a8920f 100644
--- a/snappy.cc
+++ b/snappy.cc
@@ -1499,7 +1499,7 @@
// If ip < ip_limit_min_maxtaglen_ it's safe to read kMaxTagLength from
// buffer.
const char* ip_limit_min_maxtaglen_;
- uint32_t peeked_; // Bytes peeked from reader (need to skip)
+ uint64_t peeked_; // Bytes peeked from reader (need to skip)
bool eof_; // Hit end of input without an error?
char scratch_[kMaximumTagLength]; // See RefillTag().
@@ -1726,7 +1726,7 @@
assert(needed <= sizeof(scratch_));
// Read more bytes from reader if needed
- uint32_t nbuf = ip_limit_ - ip;
+ uint64_t nbuf = ip_limit_ - ip;
if (nbuf < needed) {
// Stitch together bytes from ip and reader to form the word
// contents. We store the needed bytes in "scratch_". They
@@ -1739,7 +1739,7 @@
size_t length;
const char* src = reader_->Peek(&length);
if (length == 0) return false;
- uint32_t to_add = std::min<uint32_t>(needed - nbuf, length);
+ uint64_t to_add = std::min<uint64_t>(needed - nbuf, length);
std::memcpy(scratch_ + nbuf, src, to_add);
nbuf += to_add;
reader_->Skip(to_add);
@@ -1802,6 +1802,7 @@
int token = 0;
size_t written = 0;
size_t N = reader->Available();
+ assert(N <= 0xFFFFFFFFu);
const size_t uncompressed_size = N;
char ulength[Varint::kMax32];
char* p = Varint::Encode32(ulength, N);
diff --git a/snappy_unittest.cc b/snappy_unittest.cc
index e57b13d..923a0ec 100644
--- a/snappy_unittest.cc
+++ b/snappy_unittest.cc
@@ -27,6 +27,7 @@
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <algorithm>
+#include <cinttypes>
#include <cmath>
#include <cstdlib>
#include <random>
@@ -484,6 +485,18 @@
Verify(input);
}
+// Issue #201, when output is more than 4GB, we had a data corruption bug.
+// We cannot run this test always because of CI constraints.
+TEST(Snappy, DISABLED_MoreThan4GB) {
+ std::mt19937 rng;
+ std::uniform_int_distribution<int> uniform_byte(0, 255);
+ std::string input;
+ input.resize((1ull << 32) - 1);
+ for (uint64_t i = 0; i < ((1ull << 32) - 1); ++i)
+ input[i] = static_cast<char>(uniform_byte(rng));
+ Verify(input);
+}
+
TEST(Snappy, RandomData) {
std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed));
std::uniform_int_distribution<int> uniform_0_to_3(0, 3);