blob: 9b78a57b670be2995e8628644c5e3a97edd91cb0 [file] [log] [blame]
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#include "google/protobuf/parse_context.h"
#include <algorithm>
#include <cstring>
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "google/protobuf/message_lite.h"
#include "google/protobuf/repeated_field.h"
#include "google/protobuf/wire_format_lite.h"
#include "utf8_validity.h"
// Must be included last.
#include "google/protobuf/port_def.inc"
namespace google {
namespace protobuf {
namespace internal {
// Only call if at start of tag.
bool EpsCopyInputStream::ParseEndsInSlopRegion(const char* begin, int overrun,
int depth) {
constexpr int kSlopBytes = EpsCopyInputStream::kSlopBytes;
ABSL_DCHECK_GE(overrun, 0);
ABSL_DCHECK_LE(overrun, kSlopBytes);
auto ptr = begin + overrun;
auto end = begin + kSlopBytes;
while (ptr < end) {
uint32_t tag;
ptr = ReadTag(ptr, &tag);
if (ptr == nullptr || ptr > end) return false;
// ending on 0 tag is allowed and is the major reason for the necessity of
// this function.
if (tag == 0) return true;
switch (tag & 7) {
case 0: { // Varint
uint64_t val;
ptr = VarintParse(ptr, &val);
if (ptr == nullptr) return false;
break;
}
case 1: { // fixed64
ptr += 8;
break;
}
case 2: { // len delim
int32_t size = ReadSize(&ptr);
if (ptr == nullptr || size > end - ptr) return false;
ptr += size;
break;
}
case 3: { // start group
depth++;
break;
}
case 4: { // end group
if (--depth < 0) return true; // We exit early
break;
}
case 5: { // fixed32
ptr += 4;
break;
}
default:
return false; // Unknown wireformat
}
}
return false;
}
const char* EpsCopyInputStream::NextBuffer(int overrun, int depth) {
if (next_chunk_ == nullptr) return nullptr; // We've reached end of stream.
if (next_chunk_ != patch_buffer_) {
ABSL_DCHECK(size_ > kSlopBytes);
// The chunk is large enough to be used directly
buffer_end_ = next_chunk_ + size_ - kSlopBytes;
auto res = next_chunk_;
next_chunk_ = patch_buffer_;
if (aliasing_ == kOnPatch) aliasing_ = kNoDelta;
return res;
}
// Move the slop bytes of previous buffer to start of the patch buffer.
// Note we must use memmove because the previous buffer could be part of
// patch_buffer_.
std::memmove(patch_buffer_, buffer_end_, kSlopBytes);
if (overall_limit_ > 0 &&
(depth < 0 || !ParseEndsInSlopRegion(patch_buffer_, overrun, depth))) {
const void* data;
// ZeroCopyInputStream indicates Next may return 0 size buffers. Hence
// we loop.
while (StreamNext(&data)) {
if (size_ > kSlopBytes) {
// We got a large chunk
std::memcpy(patch_buffer_ + kSlopBytes, data, kSlopBytes);
next_chunk_ = static_cast<const char*>(data);
buffer_end_ = patch_buffer_ + kSlopBytes;
if (aliasing_ >= kNoDelta) aliasing_ = kOnPatch;
return patch_buffer_;
} else if (size_ > 0) {
std::memcpy(patch_buffer_ + kSlopBytes, data, size_);
next_chunk_ = patch_buffer_;
buffer_end_ = patch_buffer_ + size_;
if (aliasing_ >= kNoDelta) aliasing_ = kOnPatch;
return patch_buffer_;
}
ABSL_DCHECK(size_ == 0) << size_;
}
overall_limit_ = 0; // Next failed, no more needs for next
}
// End of stream or array
if (aliasing_ == kNoDelta) {
// If there is no more block and aliasing is true, the previous block
// is still valid and we can alias. We have users relying on string_view's
// obtained from protos to outlive the proto, when the parse was from an
// array. This guarantees string_view's are always aliased if parsed from
// an array.
aliasing_ = reinterpret_cast<std::uintptr_t>(buffer_end_) -
reinterpret_cast<std::uintptr_t>(patch_buffer_);
}
next_chunk_ = nullptr;
buffer_end_ = patch_buffer_ + kSlopBytes;
size_ = 0;
return patch_buffer_;
}
const char* EpsCopyInputStream::Next() {
ABSL_DCHECK(limit_ > kSlopBytes);
auto p = NextBuffer(0 /* immaterial */, -1);
if (p == nullptr) {
limit_end_ = buffer_end_;
// Distinguish ending on a pushed limit or ending on end-of-stream.
SetEndOfStream();
return nullptr;
}
limit_ -= buffer_end_ - p; // Adjust limit_ relative to new anchor
limit_end_ = buffer_end_ + std::min(0, limit_);
return p;
}
std::pair<const char*, bool> EpsCopyInputStream::DoneFallback(int overrun,
int depth) {
// Did we exceeded the limit (parse error).
if (PROTOBUF_PREDICT_FALSE(overrun > limit_)) return {nullptr, true};
ABSL_DCHECK(overrun != limit_); // Guaranteed by caller.
ABSL_DCHECK(overrun < limit_); // Follows from above
// TODO Instead of this dcheck we could just assign, and remove
// updating the limit_end from PopLimit, ie.
// limit_end_ = buffer_end_ + (std::min)(0, limit_);
// if (ptr < limit_end_) return {ptr, false};
ABSL_DCHECK(limit_end_ == buffer_end_ + (std::min)(0, limit_));
// At this point we know the following assertion holds.
ABSL_DCHECK_GT(limit_, 0);
ABSL_DCHECK(limit_end_ == buffer_end_); // because limit_ > 0
const char* p;
do {
// We are past the end of buffer_end_, in the slop region.
ABSL_DCHECK_GE(overrun, 0);
p = NextBuffer(overrun, depth);
if (p == nullptr) {
// We are at the end of the stream
if (PROTOBUF_PREDICT_FALSE(overrun != 0)) return {nullptr, true};
ABSL_DCHECK_GT(limit_, 0);
limit_end_ = buffer_end_;
// Distinguish ending on a pushed limit or ending on end-of-stream.
SetEndOfStream();
return {buffer_end_, true};
}
limit_ -= buffer_end_ - p; // Adjust limit_ relative to new anchor
p += overrun;
overrun = p - buffer_end_;
} while (overrun >= 0);
limit_end_ = buffer_end_ + std::min(0, limit_);
return {p, false};
}
const char* EpsCopyInputStream::SkipFallback(const char* ptr, int size) {
return AppendSize(ptr, size, [](const char* /*p*/, int /*s*/) {});
}
const char* EpsCopyInputStream::ReadStringFallback(const char* ptr, int size,
std::string* str) {
str->clear();
if (PROTOBUF_PREDICT_TRUE(size <= buffer_end_ - ptr + limit_)) {
// Reserve the string up to a static safe size. If strings are bigger than
// this we proceed by growing the string as needed. This protects against
// malicious payloads making protobuf hold on to a lot of memory.
str->reserve(str->size() + std::min<int>(size, kSafeStringSize));
}
return AppendSize(ptr, size,
[str](const char* p, int s) { str->append(p, s); });
}
const char* EpsCopyInputStream::AppendStringFallback(const char* ptr, int size,
std::string* str) {
if (PROTOBUF_PREDICT_TRUE(size <= buffer_end_ - ptr + limit_)) {
// Reserve the string up to a static safe size. If strings are bigger than
// this we proceed by growing the string as needed. This protects against
// malicious payloads making protobuf hold on to a lot of memory.
str->reserve(str->size() + std::min<int>(size, kSafeStringSize));
}
return AppendSize(ptr, size,
[str](const char* p, int s) { str->append(p, s); });
}
const char* EpsCopyInputStream::ReadCordFallback(const char* ptr, int size,
absl::Cord* cord) {
if (zcis_ == nullptr) {
int bytes_from_buffer = buffer_end_ - ptr + kSlopBytes;
if (size <= bytes_from_buffer) {
*cord = absl::string_view(ptr, size);
return ptr + size;
}
return AppendSize(ptr, size, [cord](const char* p, int s) {
cord->Append(absl::string_view(p, s));
});
}
int new_limit = buffer_end_ - ptr + limit_;
if (size > new_limit) return nullptr;
new_limit -= size;
int bytes_from_buffer = buffer_end_ - ptr + kSlopBytes;
const bool in_patch_buf = reinterpret_cast<uintptr_t>(ptr) -
reinterpret_cast<uintptr_t>(patch_buffer_) <=
kPatchBufferSize;
if (bytes_from_buffer > kPatchBufferSize || !in_patch_buf) {
cord->Clear();
StreamBackUp(bytes_from_buffer);
} else if (bytes_from_buffer == kSlopBytes && next_chunk_ != nullptr &&
// Only backup if next_chunk_ points to a valid buffer returned by
// ZeroCopyInputStream. This happens when NextStream() returns a
// chunk that's smaller than or equal to kSlopBytes.
next_chunk_ != patch_buffer_) {
cord->Clear();
StreamBackUp(size_);
} else {
size -= bytes_from_buffer;
ABSL_DCHECK_GT(size, 0);
*cord = absl::string_view(ptr, bytes_from_buffer);
if (next_chunk_ == patch_buffer_) {
// We have read to end of the last buffer returned by
// ZeroCopyInputStream. So the stream is in the right position.
} else if (next_chunk_ == nullptr) {
// There is no remaining chunks. We can't read size.
SetEndOfStream();
return nullptr;
} else {
// Next chunk is already loaded
ABSL_DCHECK(size_ > kSlopBytes);
StreamBackUp(size_ - kSlopBytes);
}
}
if (size > overall_limit_) return nullptr;
overall_limit_ -= size;
if (!zcis_->ReadCord(cord, size)) return nullptr;
ptr = InitFrom(zcis_);
limit_ = new_limit - static_cast<int>(buffer_end_ - ptr);
limit_end_ = buffer_end_ + (std::min)(0, limit_);
return ptr;
}
const char* EpsCopyInputStream::InitFrom(io::ZeroCopyInputStream* zcis) {
zcis_ = zcis;
const void* data;
int size;
limit_ = INT_MAX;
if (zcis->Next(&data, &size)) {
overall_limit_ -= size;
if (size > kSlopBytes) {
auto ptr = static_cast<const char*>(data);
limit_ -= size - kSlopBytes;
limit_end_ = buffer_end_ = ptr + size - kSlopBytes;
next_chunk_ = patch_buffer_;
if (aliasing_ == kOnPatch) aliasing_ = kNoDelta;
return ptr;
} else {
limit_end_ = buffer_end_ = patch_buffer_ + kSlopBytes;
next_chunk_ = patch_buffer_;
auto ptr = patch_buffer_ + kPatchBufferSize - size;
std::memcpy(ptr, data, size);
return ptr;
}
}
overall_limit_ = 0;
next_chunk_ = nullptr;
size_ = 0;
limit_end_ = buffer_end_ = patch_buffer_;
return patch_buffer_;
}
const char* ParseContext::ReadSizeAndPushLimitAndDepth(const char* ptr,
LimitToken* old_limit) {
return ReadSizeAndPushLimitAndDepthInlined(ptr, old_limit);
}
const char* ParseContext::ParseMessage(MessageLite* msg, const char* ptr) {
LimitToken old;
ptr = ReadSizeAndPushLimitAndDepth(ptr, &old);
if (ptr == nullptr) return ptr;
auto old_depth = depth_;
ptr = msg->_InternalParse(ptr, this);
if (ptr != nullptr) ABSL_DCHECK_EQ(old_depth, depth_);
depth_++;
if (!PopLimit(std::move(old))) return nullptr;
return ptr;
}
inline void WriteVarint(uint64_t val, std::string* s) {
while (val >= 128) {
uint8_t c = val | 0x80;
s->push_back(c);
val >>= 7;
}
s->push_back(val);
}
void WriteVarint(uint32_t num, uint64_t val, std::string* s) {
WriteVarint(num << 3, s);
WriteVarint(val, s);
}
void WriteLengthDelimited(uint32_t num, absl::string_view val, std::string* s) {
WriteVarint((num << 3) + 2, s);
WriteVarint(val.size(), s);
s->append(val.data(), val.size());
}
std::pair<const char*, uint32_t> VarintParseSlow32(const char* p,
uint32_t res) {
for (std::uint32_t i = 1; i < 5; i++) {
uint32_t byte = static_cast<uint8_t>(p[i]);
res += (byte - 1) << (7 * i);
if (PROTOBUF_PREDICT_TRUE(byte < 128)) {
return {p + i + 1, res};
}
}
// Accept >5 bytes
for (std::uint32_t i = 5; i < 10; i++) {
uint32_t byte = static_cast<uint8_t>(p[i]);
if (PROTOBUF_PREDICT_TRUE(byte < 128)) {
return {p + i + 1, res};
}
}
return {nullptr, 0};
}
std::pair<const char*, uint64_t> VarintParseSlow64(const char* p,
uint32_t res32) {
uint64_t res = res32;
for (std::uint32_t i = 1; i < 10; i++) {
uint64_t byte = static_cast<uint8_t>(p[i]);
res += (byte - 1) << (7 * i);
if (PROTOBUF_PREDICT_TRUE(byte < 128)) {
return {p + i + 1, res};
}
}
return {nullptr, 0};
}
std::pair<const char*, uint32_t> ReadTagFallback(const char* p, uint32_t res) {
for (std::uint32_t i = 2; i < 5; i++) {
uint32_t byte = static_cast<uint8_t>(p[i]);
res += (byte - 1) << (7 * i);
if (PROTOBUF_PREDICT_TRUE(byte < 128)) {
return {p + i + 1, res};
}
}
return {nullptr, 0};
}
std::pair<const char*, int32_t> ReadSizeFallback(const char* p, uint32_t res) {
for (std::uint32_t i = 1; i < 4; i++) {
uint32_t byte = static_cast<uint8_t>(p[i]);
res += (byte - 1) << (7 * i);
if (PROTOBUF_PREDICT_TRUE(byte < 128)) {
return {p + i + 1, res};
}
}
std::uint32_t byte = static_cast<uint8_t>(p[4]);
if (PROTOBUF_PREDICT_FALSE(byte >= 8)) return {nullptr, 0}; // size >= 2gb
res += (byte - 1) << 28;
// Protect against sign integer overflow in PushLimit. Limits are relative
// to buffer ends and ptr could potential be kSlopBytes beyond a buffer end.
// To protect against overflow we reject limits absurdly close to INT_MAX.
if (PROTOBUF_PREDICT_FALSE(res > INT_MAX - ParseContext::kSlopBytes)) {
return {nullptr, 0};
}
return {p + 5, res};
}
const char* StringParser(const char* begin, const char* end, void* object,
ParseContext*) {
auto str = static_cast<std::string*>(object);
str->append(begin, end - begin);
return end;
}
// Defined in wire_format_lite.cc
void PrintUTF8ErrorLog(absl::string_view message_name,
absl::string_view field_name, const char* operation_str,
bool emit_stacktrace);
bool VerifyUTF8(absl::string_view str, const char* field_name) {
if (!utf8_range::IsStructurallyValid(str)) {
PrintUTF8ErrorLog("", field_name, "parsing", false);
return false;
}
return true;
}
const char* InlineGreedyStringParser(std::string* s, const char* ptr,
ParseContext* ctx) {
int size = ReadSize(&ptr);
if (!ptr) return nullptr;
return ctx->ReadString(ptr, size, s);
}
template <typename T, bool sign>
const char* VarintParser(void* object, const char* ptr, ParseContext* ctx) {
return ctx->ReadPackedVarint(ptr, [object](uint64_t varint) {
T val;
if (sign) {
if (sizeof(T) == 8) {
val = WireFormatLite::ZigZagDecode64(varint);
} else {
val = WireFormatLite::ZigZagDecode32(varint);
}
} else {
val = varint;
}
static_cast<RepeatedField<T>*>(object)->Add(val);
});
}
const char* PackedInt32Parser(void* object, const char* ptr,
ParseContext* ctx) {
return VarintParser<int32_t, false>(object, ptr, ctx);
}
const char* PackedUInt32Parser(void* object, const char* ptr,
ParseContext* ctx) {
return VarintParser<uint32_t, false>(object, ptr, ctx);
}
const char* PackedInt64Parser(void* object, const char* ptr,
ParseContext* ctx) {
return VarintParser<int64_t, false>(object, ptr, ctx);
}
const char* PackedUInt64Parser(void* object, const char* ptr,
ParseContext* ctx) {
return VarintParser<uint64_t, false>(object, ptr, ctx);
}
const char* PackedSInt32Parser(void* object, const char* ptr,
ParseContext* ctx) {
return VarintParser<int32_t, true>(object, ptr, ctx);
}
const char* PackedSInt64Parser(void* object, const char* ptr,
ParseContext* ctx) {
return VarintParser<int64_t, true>(object, ptr, ctx);
}
const char* PackedEnumParser(void* object, const char* ptr, ParseContext* ctx) {
return VarintParser<int, false>(object, ptr, ctx);
}
const char* PackedBoolParser(void* object, const char* ptr, ParseContext* ctx) {
return VarintParser<bool, false>(object, ptr, ctx);
}
template <typename T>
const char* FixedParser(void* object, const char* ptr, ParseContext* ctx) {
int size = ReadSize(&ptr);
return ctx->ReadPackedFixed(ptr, size,
static_cast<RepeatedField<T>*>(object));
}
const char* PackedFixed32Parser(void* object, const char* ptr,
ParseContext* ctx) {
return FixedParser<uint32_t>(object, ptr, ctx);
}
const char* PackedSFixed32Parser(void* object, const char* ptr,
ParseContext* ctx) {
return FixedParser<int32_t>(object, ptr, ctx);
}
const char* PackedFixed64Parser(void* object, const char* ptr,
ParseContext* ctx) {
return FixedParser<uint64_t>(object, ptr, ctx);
}
const char* PackedSFixed64Parser(void* object, const char* ptr,
ParseContext* ctx) {
return FixedParser<int64_t>(object, ptr, ctx);
}
const char* PackedFloatParser(void* object, const char* ptr,
ParseContext* ctx) {
return FixedParser<float>(object, ptr, ctx);
}
const char* PackedDoubleParser(void* object, const char* ptr,
ParseContext* ctx) {
return FixedParser<double>(object, ptr, ctx);
}
class UnknownFieldLiteParserHelper {
public:
explicit UnknownFieldLiteParserHelper(std::string* unknown)
: unknown_(unknown) {}
void AddVarint(uint32_t num, uint64_t value) {
if (unknown_ == nullptr) return;
WriteVarint(num * 8, unknown_);
WriteVarint(value, unknown_);
}
void AddFixed64(uint32_t num, uint64_t value) {
if (unknown_ == nullptr) return;
WriteVarint(num * 8 + 1, unknown_);
char buffer[8];
io::CodedOutputStream::WriteLittleEndian64ToArray(
value, reinterpret_cast<uint8_t*>(buffer));
unknown_->append(buffer, 8);
}
const char* ParseLengthDelimited(uint32_t num, const char* ptr,
ParseContext* ctx) {
int size = ReadSize(&ptr);
GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
if (unknown_ == nullptr) return ctx->Skip(ptr, size);
WriteVarint(num * 8 + 2, unknown_);
WriteVarint(size, unknown_);
return ctx->AppendString(ptr, size, unknown_);
}
const char* ParseGroup(uint32_t num, const char* ptr, ParseContext* ctx) {
if (unknown_) WriteVarint(num * 8 + 3, unknown_);
ptr = ctx->ParseGroupInlined(ptr, num * 8 + 3, [&](const char* ptr) {
return WireFormatParser(*this, ptr, ctx);
});
GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
if (unknown_) WriteVarint(num * 8 + 4, unknown_);
return ptr;
}
void AddFixed32(uint32_t num, uint32_t value) {
if (unknown_ == nullptr) return;
WriteVarint(num * 8 + 5, unknown_);
char buffer[4];
io::CodedOutputStream::WriteLittleEndian32ToArray(
value, reinterpret_cast<uint8_t*>(buffer));
unknown_->append(buffer, 4);
}
private:
std::string* unknown_;
};
const char* UnknownGroupLiteParse(std::string* unknown, const char* ptr,
ParseContext* ctx) {
UnknownFieldLiteParserHelper field_parser(unknown);
return WireFormatParser(field_parser, ptr, ctx);
}
const char* UnknownFieldParse(uint32_t tag, std::string* unknown,
const char* ptr, ParseContext* ctx) {
UnknownFieldLiteParserHelper field_parser(unknown);
return FieldParser(tag, field_parser, ptr, ctx);
}
} // namespace internal
} // namespace protobuf
} // namespace google
#include "google/protobuf/port_undef.inc"