src/google/protobuf/parse_context.cc - third_party/github/protocolbuffers/protobuf - Git at Google

 // Protocol Buffers - Google's data interchange format
 // Copyright 2008 Google Inc.  All rights reserved.
 // https://developers.google.com/protocol-buffers/
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
 //
 //     * Redistributions of source code must retain the above copyright
 // notice, this list of conditions and the following disclaimer.
 //     * Redistributions in binary form must reproduce the above
 // copyright notice, this list of conditions and the following disclaimer
 // in the documentation and/or other materials provided with the
 // distribution.
 //     * Neither the name of Google Inc. nor the names of its
 // contributors may be used to endorse or promote products derived from
 // this software without specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 #include "google/protobuf/parse_context.h"

 #include "absl/strings/string_view.h"
 #include "google/protobuf/message_lite.h"
 #include "google/protobuf/repeated_field.h"
 #include "google/protobuf/wire_format_lite.h"
 #include "utf8_validity.h"


 // Must be included last.
 #include "google/protobuf/port_def.inc"

 namespace google {
 namespace protobuf {
 namespace internal {

 namespace {

 // Only call if at start of tag.
 bool ParseEndsInSlopRegion(const char* begin, int overrun, int depth) {
   constexpr int kSlopBytes = EpsCopyInputStream::kSlopBytes;
   GOOGLE_DCHECK_GE(overrun, 0);
   GOOGLE_DCHECK_LE(overrun, kSlopBytes);
   auto ptr = begin + overrun;
   auto end = begin + kSlopBytes;
   while (ptr < end) {
     uint32_t tag;
     ptr = ReadTag(ptr, &tag);
     if (ptr == nullptr || ptr > end) return false;
     // ending on 0 tag is allowed and is the major reason for the necessity of
     // this function.
     if (tag == 0) return true;
     switch (tag & 7) {
       case 0: {  // Varint
         uint64_t val;
         ptr = VarintParse(ptr, &val);
         if (ptr == nullptr) return false;
         break;
       }
       case 1: {  // fixed64
         ptr += 8;
         break;
       }
       case 2: {  // len delim
         int32_t size = ReadSize(&ptr);
         if (ptr == nullptr || size > end - ptr) return false;
         ptr += size;
         break;
       }
       case 3: {  // start group
         depth++;
         break;
       }
       case 4: {                    // end group
         if (--depth < 0) return true;  // We exit early
         break;
       }
       case 5: {  // fixed32
         ptr += 4;
         break;
       }
       default:
         return false;  // Unknown wireformat
     }
   }
   return false;
 }

 }  // namespace

 const char* EpsCopyInputStream::NextBuffer(int overrun, int depth) {
   if (next_chunk_ == nullptr) return nullptr;  // We've reached end of stream.
   if (next_chunk_ != buffer_) {
     GOOGLE_DCHECK(size_ > kSlopBytes);
     // The chunk is large enough to be used directly
     buffer_end_ = next_chunk_ + size_ - kSlopBytes;
     auto res = next_chunk_;
     next_chunk_ = buffer_;
     if (aliasing_ == kOnPatch) aliasing_ = kNoDelta;
     return res;
   }
   // Move the slop bytes of previous buffer to start of the patch buffer.
   // Note we must use memmove because the previous buffer could be part of
   // buffer_.
   std::memmove(buffer_, buffer_end_, kSlopBytes);
   if (overall_limit_ > 0 &&
       (depth < 0 || !ParseEndsInSlopRegion(buffer_, overrun, depth))) {
     const void* data;
     // ZeroCopyInputStream indicates Next may return 0 size buffers. Hence
     // we loop.
     while (StreamNext(&data)) {
       if (size_ > kSlopBytes) {
         // We got a large chunk
         std::memcpy(buffer_ + kSlopBytes, data, kSlopBytes);
         next_chunk_ = static_cast<const char*>(data);
         buffer_end_ = buffer_ + kSlopBytes;
         if (aliasing_ >= kNoDelta) aliasing_ = kOnPatch;
         return buffer_;
       } else if (size_ > 0) {
         std::memcpy(buffer_ + kSlopBytes, data, size_);
         next_chunk_ = buffer_;
         buffer_end_ = buffer_ + size_;
         if (aliasing_ >= kNoDelta) aliasing_ = kOnPatch;
         return buffer_;
       }
       GOOGLE_DCHECK(size_ == 0) << size_;
     }
     overall_limit_ = 0;  // Next failed, no more needs for next
   }
   // End of stream or array
   if (aliasing_ == kNoDelta) {
     // If there is no more block and aliasing is true, the previous block
     // is still valid and we can alias. We have users relying on string_view's
     // obtained from protos to outlive the proto, when the parse was from an
     // array. This guarantees string_view's are always aliased if parsed from
     // an array.
     aliasing_ = reinterpret_cast<std::uintptr_t>(buffer_end_) -
                 reinterpret_cast<std::uintptr_t>(buffer_);
   }
   next_chunk_ = nullptr;
   buffer_end_ = buffer_ + kSlopBytes;
   size_ = 0;
   return buffer_;
 }

 const char* EpsCopyInputStream::Next() {
   GOOGLE_DCHECK(limit_ > kSlopBytes);
   auto p = NextBuffer(0 /* immaterial */, -1);
   if (p == nullptr) {
     limit_end_ = buffer_end_;
     // Distinguish ending on a pushed limit or ending on end-of-stream.
     SetEndOfStream();
     return nullptr;
   }
   limit_ -= buffer_end_ - p;  // Adjust limit_ relative to new anchor
   limit_end_ = buffer_end_ + std::min(0, limit_);
   return p;
 }

 std::pair<const char*, bool> EpsCopyInputStream::DoneFallback(int overrun,
                                                               int depth) {
   // Did we exceeded the limit (parse error).
   if (PROTOBUF_PREDICT_FALSE(overrun > limit_)) return {nullptr, true};
   GOOGLE_DCHECK(overrun != limit_);  // Guaranteed by caller.
   GOOGLE_DCHECK(overrun < limit_);   // Follows from above
   // TODO(gerbens) Instead of this dcheck we could just assign, and remove
   // updating the limit_end from PopLimit, ie.
   // limit_end_ = buffer_end_ + (std::min)(0, limit_);
   // if (ptr < limit_end_) return {ptr, false};
   GOOGLE_DCHECK(limit_end_ == buffer_end_ + (std::min)(0, limit_));
   // At this point we know the following assertion holds.
   GOOGLE_DCHECK_GT(limit_, 0);
   GOOGLE_DCHECK(limit_end_ == buffer_end_);  // because limit_ > 0
   const char* p;
   do {
     // We are past the end of buffer_end_, in the slop region.
     GOOGLE_DCHECK_GE(overrun, 0);
     p = NextBuffer(overrun, depth);
     if (p == nullptr) {
       // We are at the end of the stream
       if (PROTOBUF_PREDICT_FALSE(overrun != 0)) return {nullptr, true};
       GOOGLE_DCHECK_GT(limit_, 0);
       limit_end_ = buffer_end_;
       // Distinguish ending on a pushed limit or ending on end-of-stream.
       SetEndOfStream();
       return {buffer_end_, true};
     }
     limit_ -= buffer_end_ - p;  // Adjust limit_ relative to new anchor
     p += overrun;
     overrun = p - buffer_end_;
   } while (overrun >= 0);
   limit_end_ = buffer_end_ + std::min(0, limit_);
   return {p, false};
 }

 const char* EpsCopyInputStream::SkipFallback(const char* ptr, int size) {
   return AppendSize(ptr, size, [](const char* /*p*/, int /*s*/) {});
 }

 const char* EpsCopyInputStream::ReadStringFallback(const char* ptr, int size,
                                                    std::string* str) {
   str->clear();
   if (PROTOBUF_PREDICT_TRUE(size <= buffer_end_ - ptr + limit_)) {
     // Reserve the string up to a static safe size. If strings are bigger than
     // this we proceed by growing the string as needed. This protects against
     // malicious payloads making protobuf hold on to a lot of memory.
     str->reserve(str->size() + std::min<int>(size, kSafeStringSize));
   }
   return AppendSize(ptr, size,
                     [str](const char* p, int s) { str->append(p, s); });
 }

 const char* EpsCopyInputStream::AppendStringFallback(const char* ptr, int size,
                                                      std::string* str) {
   if (PROTOBUF_PREDICT_TRUE(size <= buffer_end_ - ptr + limit_)) {
     // Reserve the string up to a static safe size. If strings are bigger than
     // this we proceed by growing the string as needed. This protects against
     // malicious payloads making protobuf hold on to a lot of memory.
     str->reserve(str->size() + std::min<int>(size, kSafeStringSize));
   }
   return AppendSize(ptr, size,
                     [str](const char* p, int s) { str->append(p, s); });
 }


 const char* EpsCopyInputStream::InitFrom(io::ZeroCopyInputStream* zcis) {
   zcis_ = zcis;
   const void* data;
   int size;
   limit_ = INT_MAX;
   if (zcis->Next(&data, &size)) {
     overall_limit_ -= size;
     if (size > kSlopBytes) {
       auto ptr = static_cast<const char*>(data);
       limit_ -= size - kSlopBytes;
       limit_end_ = buffer_end_ = ptr + size - kSlopBytes;
       next_chunk_ = buffer_;
       if (aliasing_ == kOnPatch) aliasing_ = kNoDelta;
       return ptr;
     } else {
       limit_end_ = buffer_end_ = buffer_ + kSlopBytes;
       next_chunk_ = buffer_;
       auto ptr = buffer_ + 2 * kSlopBytes - size;
       std::memcpy(ptr, data, size);
       return ptr;
     }
   }
   overall_limit_ = 0;
   next_chunk_ = nullptr;
   size_ = 0;
   limit_end_ = buffer_end_ = buffer_;
   return buffer_;
 }

 const char* ParseContext::ReadSizeAndPushLimitAndDepth(const char* ptr,
                                                        int* old_limit) {
   int size = ReadSize(&ptr);
   if (PROTOBUF_PREDICT_FALSE(!ptr) || depth_ <= 0) {
     *old_limit = 0;  // Make sure this isn't uninitialized even on error return
     return nullptr;
   }
   *old_limit = PushLimit(ptr, size);
   --depth_;
   return ptr;
 }

 const char* ParseContext::ParseMessage(MessageLite* msg, const char* ptr) {
   int old;
   ptr = ReadSizeAndPushLimitAndDepth(ptr, &old);
   if (ptr == nullptr) return ptr;
   auto old_depth = depth_;
   ptr = msg->_InternalParse(ptr, this);
   if (ptr != nullptr) GOOGLE_DCHECK_EQ(old_depth, depth_);
   depth_++;
   if (!PopLimit(old)) return nullptr;
   return ptr;
 }

 inline void WriteVarint(uint64_t val, std::string* s) {
   while (val >= 128) {
     uint8_t c = val | 0x80;
     s->push_back(c);
     val >>= 7;
   }
   s->push_back(val);
 }

 void WriteVarint(uint32_t num, uint64_t val, std::string* s) {
   WriteVarint(num << 3, s);
   WriteVarint(val, s);
 }

 void WriteLengthDelimited(uint32_t num, absl::string_view val, std::string* s) {
   WriteVarint((num << 3) + 2, s);
   WriteVarint(val.size(), s);
   s->append(val.data(), val.size());
 }

 std::pair<const char*, uint32_t> VarintParseSlow32(const char* p,
                                                    uint32_t res) {
   for (std::uint32_t i = 1; i < 5; i++) {
     uint32_t byte = static_cast<uint8_t>(p[i]);
     res += (byte - 1) << (7 * i);
     if (PROTOBUF_PREDICT_TRUE(byte < 128)) {
       return {p + i + 1, res};
     }
   }
   // Accept >5 bytes
   for (std::uint32_t i = 5; i < 10; i++) {
     uint32_t byte = static_cast<uint8_t>(p[i]);
     if (PROTOBUF_PREDICT_TRUE(byte < 128)) {
       return {p + i + 1, res};
     }
   }
   return {nullptr, 0};
 }

 std::pair<const char*, uint64_t> VarintParseSlow64(const char* p,
                                                    uint32_t res32) {
   uint64_t res = res32;
   for (std::uint32_t i = 1; i < 10; i++) {
     uint64_t byte = static_cast<uint8_t>(p[i]);
     res += (byte - 1) << (7 * i);
     if (PROTOBUF_PREDICT_TRUE(byte < 128)) {
       return {p + i + 1, res};
     }
   }
   return {nullptr, 0};
 }

 std::pair<const char*, uint32_t> ReadTagFallback(const char* p, uint32_t res) {
   for (std::uint32_t i = 2; i < 5; i++) {
     uint32_t byte = static_cast<uint8_t>(p[i]);
     res += (byte - 1) << (7 * i);
     if (PROTOBUF_PREDICT_TRUE(byte < 128)) {
       return {p + i + 1, res};
     }
   }
   return {nullptr, 0};
 }

 std::pair<const char*, int32_t> ReadSizeFallback(const char* p, uint32_t res) {
   for (std::uint32_t i = 1; i < 4; i++) {
     uint32_t byte = static_cast<uint8_t>(p[i]);
     res += (byte - 1) << (7 * i);
     if (PROTOBUF_PREDICT_TRUE(byte < 128)) {
       return {p + i + 1, res};
     }
   }
   std::uint32_t byte = static_cast<uint8_t>(p[4]);
   if (PROTOBUF_PREDICT_FALSE(byte >= 8)) return {nullptr, 0};  // size >= 2gb
   res += (byte - 1) << 28;
   // Protect against sign integer overflow in PushLimit. Limits are relative
   // to buffer ends and ptr could potential be kSlopBytes beyond a buffer end.
   // To protect against overflow we reject limits absurdly close to INT_MAX.
   if (PROTOBUF_PREDICT_FALSE(res > INT_MAX - ParseContext::kSlopBytes)) {
     return {nullptr, 0};
   }
   return {p + 5, res};
 }

 const char* StringParser(const char* begin, const char* end, void* object,
                          ParseContext*) {
   auto str = static_cast<std::string*>(object);
   str->append(begin, end - begin);
   return end;
 }

 // Defined in wire_format_lite.cc
 void PrintUTF8ErrorLog(absl::string_view message_name,
                        absl::string_view field_name, const char* operation_str,
                        bool emit_stacktrace);

 bool VerifyUTF8(absl::string_view str, const char* field_name) {
   if (!utf8_range::IsStructurallyValid(str)) {
     PrintUTF8ErrorLog("", field_name, "parsing", false);
     return false;
   }
   return true;
 }

 const char* InlineGreedyStringParser(std::string* s, const char* ptr,
                                      ParseContext* ctx) {
   int size = ReadSize(&ptr);
   if (!ptr) return nullptr;
   return ctx->ReadString(ptr, size, s);
 }


 template <typename T, bool sign>
 const char* VarintParser(void* object, const char* ptr, ParseContext* ctx) {
   return ctx->ReadPackedVarint(ptr, [object](uint64_t varint) {
     T val;
     if (sign) {
       if (sizeof(T) == 8) {
         val = WireFormatLite::ZigZagDecode64(varint);
       } else {
         val = WireFormatLite::ZigZagDecode32(varint);
       }
     } else {
       val = varint;
     }
     static_cast<RepeatedField<T>*>(object)->Add(val);
   });
 }

 const char* PackedInt32Parser(void* object, const char* ptr,
                               ParseContext* ctx) {
   return VarintParser<int32_t, false>(object, ptr, ctx);
 }
 const char* PackedUInt32Parser(void* object, const char* ptr,
                                ParseContext* ctx) {
   return VarintParser<uint32_t, false>(object, ptr, ctx);
 }
 const char* PackedInt64Parser(void* object, const char* ptr,
                               ParseContext* ctx) {
   return VarintParser<int64_t, false>(object, ptr, ctx);
 }
 const char* PackedUInt64Parser(void* object, const char* ptr,
                                ParseContext* ctx) {
   return VarintParser<uint64_t, false>(object, ptr, ctx);
 }
 const char* PackedSInt32Parser(void* object, const char* ptr,
                                ParseContext* ctx) {
   return VarintParser<int32_t, true>(object, ptr, ctx);
 }
 const char* PackedSInt64Parser(void* object, const char* ptr,
                                ParseContext* ctx) {
   return VarintParser<int64_t, true>(object, ptr, ctx);
 }

 const char* PackedEnumParser(void* object, const char* ptr, ParseContext* ctx) {
   return VarintParser<int, false>(object, ptr, ctx);
 }

 const char* PackedBoolParser(void* object, const char* ptr, ParseContext* ctx) {
   return VarintParser<bool, false>(object, ptr, ctx);
 }

 template <typename T>
 const char* FixedParser(void* object, const char* ptr, ParseContext* ctx) {
   int size = ReadSize(&ptr);
   return ctx->ReadPackedFixed(ptr, size,
                               static_cast<RepeatedField<T>*>(object));
 }

 const char* PackedFixed32Parser(void* object, const char* ptr,
                                 ParseContext* ctx) {
   return FixedParser<uint32_t>(object, ptr, ctx);
 }
 const char* PackedSFixed32Parser(void* object, const char* ptr,
                                  ParseContext* ctx) {
   return FixedParser<int32_t>(object, ptr, ctx);
 }
 const char* PackedFixed64Parser(void* object, const char* ptr,
                                 ParseContext* ctx) {
   return FixedParser<uint64_t>(object, ptr, ctx);
 }
 const char* PackedSFixed64Parser(void* object, const char* ptr,
                                  ParseContext* ctx) {
   return FixedParser<int64_t>(object, ptr, ctx);
 }
 const char* PackedFloatParser(void* object, const char* ptr,
                               ParseContext* ctx) {
   return FixedParser<float>(object, ptr, ctx);
 }
 const char* PackedDoubleParser(void* object, const char* ptr,
                                ParseContext* ctx) {
   return FixedParser<double>(object, ptr, ctx);
 }

 class UnknownFieldLiteParserHelper {
  public:
   explicit UnknownFieldLiteParserHelper(std::string* unknown)
       : unknown_(unknown) {}

   void AddVarint(uint32_t num, uint64_t value) {
     if (unknown_ == nullptr) return;
     WriteVarint(num * 8, unknown_);
     WriteVarint(value, unknown_);
   }
   void AddFixed64(uint32_t num, uint64_t value) {
     if (unknown_ == nullptr) return;
     WriteVarint(num * 8 + 1, unknown_);
     char buffer[8];
     io::CodedOutputStream::WriteLittleEndian64ToArray(
         value, reinterpret_cast<uint8_t*>(buffer));
     unknown_->append(buffer, 8);
   }
   const char* ParseLengthDelimited(uint32_t num, const char* ptr,
                                    ParseContext* ctx) {
     int size = ReadSize(&ptr);
     GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
     if (unknown_ == nullptr) return ctx->Skip(ptr, size);
     WriteVarint(num * 8 + 2, unknown_);
     WriteVarint(size, unknown_);
     return ctx->AppendString(ptr, size, unknown_);
   }
   const char* ParseGroup(uint32_t num, const char* ptr, ParseContext* ctx) {
     if (unknown_) WriteVarint(num * 8 + 3, unknown_);
     ptr = ctx->ParseGroup(this, ptr, num * 8 + 3);
     GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
     if (unknown_) WriteVarint(num * 8 + 4, unknown_);
     return ptr;
   }
   void AddFixed32(uint32_t num, uint32_t value) {
     if (unknown_ == nullptr) return;
     WriteVarint(num * 8 + 5, unknown_);
     char buffer[4];
     io::CodedOutputStream::WriteLittleEndian32ToArray(
         value, reinterpret_cast<uint8_t*>(buffer));
     unknown_->append(buffer, 4);
   }

   const char* _InternalParse(const char* ptr, ParseContext* ctx) {
     return WireFormatParser(*this, ptr, ctx);
   }

  private:
   std::string* unknown_;
 };

 const char* UnknownGroupLiteParse(std::string* unknown, const char* ptr,
                                   ParseContext* ctx) {
   UnknownFieldLiteParserHelper field_parser(unknown);
   return WireFormatParser(field_parser, ptr, ctx);
 }

 const char* UnknownFieldParse(uint32_t tag, std::string* unknown,
                               const char* ptr, ParseContext* ctx) {
   UnknownFieldLiteParserHelper field_parser(unknown);
   return FieldParser(tag, field_parser, ptr, ctx);
 }

 #ifdef __aarch64__
 // Generally, speaking, the ARM-optimized Varint decode algorithm is to extract
 // and concatenate all potentially valid data bits, compute the actual length
 // of the Varint, and mask off the data bits which are not actually part of the
 // result.  More detail on the two main parts is shown below.
 //
 // 1) Extract and concatenate all potentially valid data bits.
 //    Two ARM-specific features help significantly:
 //    a) Efficient and non-destructive bit extraction (UBFX)
 //    b) A single instruction can perform both an OR with a shifted
 //       second operand in one cycle.  E.g., the following two lines do the same
 //       thing
 //       ```result = operand_1 | (operand2 << 7);```
 //       ```ORR %[result], %[operand_1], %[operand_2], LSL #7```
 //    The figure below shows the implementation for handling four chunks.
 //
 // Bits   32    31-24    23   22-16    15    14-8      7     6-0
 //      +----+---------+----+---------+----+---------+----+---------+
 //      |CB 3| Chunk 3 |CB 2| Chunk 2 |CB 1| Chunk 1 |CB 0| Chunk 0 |
 //      +----+---------+----+---------+----+---------+----+---------+
 //                |              |              |              |
 //               UBFX           UBFX           UBFX           UBFX    -- cycle 1
 //                |              |              |              |
 //                V              V              V              V
 //               Combined LSL #7 and ORR     Combined LSL #7 and ORR  -- cycle 2
 //                                 |             |
 //                                 V             V
 //                            Combined LSL #14 and ORR                -- cycle 3
 //                                       |
 //                                       V
 //                                Parsed bits 0-27
 //
 //
 // 2) Calculate the index of the cleared continuation bit in order to determine
 //    where the encoded Varint ends and the size of the decoded value.  The
 //    easiest way to do this is mask off all data bits, leaving just the
 //    continuation bits.  We actually need to do the masking on an inverted
 //    copy of the data, which leaves a 1 in all continuation bits which were
 //    originally clear.  The number of trailing zeroes in this value indicates
 //    the size of the Varint.
 //
 //  AND  0x80    0x80    0x80    0x80    0x80    0x80    0x80    0x80
 //
 // Bits   63      55      47      39      31      23      15       7
 //      +----+--+----+--+----+--+----+--+----+--+----+--+----+--+----+--+
 // ~    |CB 7|  |CB 6|  |CB 5|  |CB 4|  |CB 3|  |CB 2|  |CB 1|  |CB 0|  |
 //      +----+--+----+--+----+--+----+--+----+--+----+--+----+--+----+--+
 //         |       |       |       |       |       |       |       |
 //         V       V       V       V       V       V       V       V
 // Bits   63      55      47      39      31      23      15       7
 //      +----+--+----+--+----+--+----+--+----+--+----+--+----+--+----+--+
 //      |~CB 7|0|~CB 6|0|~CB 5|0|~CB 4|0|~CB 3|0|~CB 2|0|~CB 1|0|~CB 0|0|
 //      +----+--+----+--+----+--+----+--+----+--+----+--+----+--+----+--+
 //                                      |
 //                                     CTZ
 //                                      V
 //                     Index of first cleared continuation bit
 //
 //
 // While this is implemented in C++ significant care has been taken to ensure
 // the compiler emits the best instruction sequence.  In some cases we use the
 // following two functions to manipulate the compiler's scheduling decisions.
 //
 // Controls compiler scheduling by telling it that the first value is modified
 // by the second value the callsite.  This is useful if non-critical path
 // instructions are too aggressively scheduled, resulting in a slowdown of the
 // actual critical path due to opportunity costs.  An example usage is shown
 // where a false dependence of num_bits on result is added to prevent checking
 // for a very unlikely error until all critical path instructions have been
 // fetched.
 //
 // ```
 // num_bits = <multiple operations to calculate new num_bits value>
 // result = <multiple operations to calculate result>
 // num_bits = ValueBarrier(num_bits, result);
 // if (num_bits == 63) {
 //   GOOGLE_LOG(FATAL) << "Invalid num_bits value";
 // }
 // ```
 template <typename V1Type, typename V2Type>
 PROTOBUF_ALWAYS_INLINE inline V1Type ValueBarrier(V1Type value1,
                                                   V2Type value2) {
   asm("" : "+r"(value1) : "r"(value2));
   return value1;
 }

 // Falsely indicate that the specific value is modified at this location.  This
 // prevents code which depends on this value from being scheduled earlier.
 template <typename V1Type>
 PROTOBUF_ALWAYS_INLINE inline V1Type ValueBarrier(V1Type value1) {
   asm("" : "+r"(value1));
   return value1;
 }

 PROTOBUF_ALWAYS_INLINE inline uint64_t ExtractAndMergeTwoChunks(
     uint64_t data, uint64_t first_byte) {
   GOOGLE_DCHECK_LE(first_byte, 6);
   uint64_t first = Ubfx7(data, first_byte * 8);
   uint64_t second = Ubfx7(data, (first_byte + 1) * 8);
   return ForceToRegister(first | (second << 7));
 }

 struct SlowPathEncodedInfo {
   const char* p;
   uint64_t last8;
   uint64_t valid_bits;
   uint64_t valid_chunk_bits;
   uint64_t masked_cont_bits;
 };

 // Performs multiple actions which are identical between 32 and 64 bit Varints
 // in order to compute the length of the encoded Varint and compute the new
 // of p.
 PROTOBUF_ALWAYS_INLINE inline SlowPathEncodedInfo ComputeLengthAndUpdateP(
     const char* p) {
   SlowPathEncodedInfo result;
   // Load the last two bytes of the encoded Varint.
   std::memcpy(&result.last8, p + 2, sizeof(result.last8));
   uint64_t mask = ForceToRegister(0x8080808080808080);
   // Only set continuation bits remain
   result.masked_cont_bits = ForceToRegister(mask & (~result.last8));
   // The first cleared continuation bit is the most significant 1 in the
   // reversed value.  Result is undefined for an input of 0 and we handle that
   // case below.
   result.valid_bits = absl::countr_zero(result.masked_cont_bits);
   // Calculates the number of chunks in the encoded Varint.  This value is low
   // by three as neither the cleared continuation chunk nor the first two chunks
   // are counted.
   uint64_t set_continuation_bits = result.valid_bits >> 3;
   // Update p to point past the encoded Varint.
   result.p = p + set_continuation_bits + 3;
   // Calculate number of valid data bits in the decoded value so invalid bits
   // can be masked off.  Value is too low by 14 but we account for that when
   // calculating the mask.
   result.valid_chunk_bits = result.valid_bits - set_continuation_bits;
   return result;
 }

 constexpr uint64_t kResultMaskUnshifted = 0xffffffffffffc000ULL;
 constexpr uint64_t kFirstResultBitChunk1 = 1 * 7;
 constexpr uint64_t kFirstResultBitChunk2 = 2 * 7;
 constexpr uint64_t kFirstResultBitChunk3 = 3 * 7;
 constexpr uint64_t kFirstResultBitChunk4 = 4 * 7;
 constexpr uint64_t kFirstResultBitChunk6 = 6 * 7;
 constexpr uint64_t kFirstResultBitChunk8 = 8 * 7;
 constexpr uint64_t kValidBitsForInvalidVarint = 0x60;

 PROTOBUF_NOINLINE const char* VarintParseSlowArm64(const char* p, uint64_t* out,
                                                    uint64_t first8) {
   SlowPathEncodedInfo info = ComputeLengthAndUpdateP(p);
   // Extract data bits from the low six chunks.  This includes chunks zero and
   // one which we already know are valid.
   uint64_t merged_01 = ExtractAndMergeTwoChunks(first8, /*first_chunk=*/0);
   uint64_t merged_23 = ExtractAndMergeTwoChunks(first8, /*first_chunk=*/2);
   uint64_t merged_45 = ExtractAndMergeTwoChunks(first8, /*first_chunk=*/4);
   // Low 42 bits of decoded value.
   uint64_t result = merged_01 | merged_23 << kFirstResultBitChunk2 |
                     merged_45 << kFirstResultBitChunk4;
   // This immediate ends in 14 zeroes since valid_chunk_bits is too low by 14.
   uint64_t result_mask = kResultMaskUnshifted << info.valid_chunk_bits;
   // masked_cont_bits is 0 iff the Varint is invalid.  In that case
   info.valid_bits =
       info.masked_cont_bits ? info.valid_bits : kValidBitsForInvalidVarint;
   // Test for early exit if Varint does not exceed 6 chunks.  Branching on one
   // bit is faster on ARM than via a compare and branch.
   if (PROTOBUF_PREDICT_FALSE((info.valid_bits & 0x20) != 0)) {
     // Extract data bits from high four chunks.
     uint64_t merged_67 = ExtractAndMergeTwoChunks(first8, /*first_chunk=*/6);
     // Last two chunks come from last two bytes of info.last8.
     uint64_t merged_89 =
         ExtractAndMergeTwoChunks(info.last8, /*first_chunk=*/6);
     result |= merged_67 << kFirstResultBitChunk6;
     result |= merged_89 << kFirstResultBitChunk8;
     // Handle an invalid Varint with all 10 continuation bits set.
     info.masked_cont_bits = ValueBarrier(info.masked_cont_bits);
     if (PROTOBUF_PREDICT_FALSE(info.masked_cont_bits == 0)) {
       *out = 0;
       return nullptr;
     }
   }
   // Mask off invalid data bytes.
   result &= ~result_mask;
   *out = result;
   return info.p;
 }

 // See comments in VarintParseSlowArm64 for a description of the algorithm.
 // Differences in the 32 bit version are noted below.
 PROTOBUF_NOINLINE const char* VarintParseSlowArm32(const char* p, uint32_t* out,
                                                    uint64_t first8) {
   // This also skips the slop bytes.
   SlowPathEncodedInfo info = ComputeLengthAndUpdateP(p);
   // Extract data bits from chunks 1-4.  Chunk zero is merged in below.
   uint64_t merged_12 = ExtractAndMergeTwoChunks(first8, /*first_chunk=*/1);
   uint64_t merged_34 = ExtractAndMergeTwoChunks(first8, /*first_chunk=*/3);
   first8 = ValueBarrier(first8, p);
   uint64_t result = Ubfx7(first8, /*start=*/0);
   result = ForceToRegister(result | merged_12 << kFirstResultBitChunk1);
   result = ForceToRegister(result | merged_34 << kFirstResultBitChunk3);
   uint64_t result_mask = kResultMaskUnshifted << info.valid_chunk_bits;
   result &= ~result_mask;
   // It is extremely unlikely that a Varint is invalid so checking that
   // condition isn't on the critical path. Here we make sure that we don't do so
   // until result has been computed.
   info.masked_cont_bits = ValueBarrier(info.masked_cont_bits, result);
   if (PROTOBUF_PREDICT_FALSE(info.masked_cont_bits == 0)) {
     // Makes the compiler think out was modified here.  This ensures it won't
     // predicate this extremely predictable branch.
     out = ValueBarrier(out);
     *out = 0;
     return nullptr;
   }
   *out = result;
   return info.p;
 }

 #endif  // __aarch64__

 }  // namespace internal
 }  // namespace protobuf
 }  // namespace google

 #include "google/protobuf/port_undef.inc"