blob: 20a9f3a80b995fe08709173ba9bc9cfa2c326b4b [file] [log] [blame]
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// -----------------------------------------------------------------------------
// File: internal/proto.h
// -----------------------------------------------------------------------------
//
// Declares functions for serializing and deserializing data to and from memory
// buffers in protocol buffer wire format. This library takes no steps to
// ensure that the encoded data matches with any message specification.
#ifndef ABSL_LOG_INTERNAL_PROTO_H_
#define ABSL_LOG_INTERNAL_PROTO_H_
#include <cstddef>
#include <cstdint>
#include <limits>
#include "absl/base/attributes.h"
#include "absl/base/casts.h"
#include "absl/base/config.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace log_internal {
// absl::Span<char> represents a view into the available space in a mutable
// buffer during encoding. Encoding functions shrink the span as they go so
// that the same view can be passed to a series of Encode functions. If the
// data do not fit, nothing is encoded, the view is set to size zero (so that
// all subsequent encode calls fail), and false is returned. Otherwise true is
// returned.
// In particular, attempting to encode a series of data into an insufficient
// buffer has consistent and efficient behavior without any caller-side error
// checking. Individual values will be encoded in their entirety or not at all
// (unless one of the `Truncate` functions is used). Once a value is omitted
// because it does not fit, no subsequent values will be encoded to preserve
// ordering; the decoded sequence will be a prefix of the original sequence.
// There are two ways to encode a message-typed field:
//
// * Construct its contents in a separate buffer and use `EncodeBytes` to copy
// it into the primary buffer with type, tag, and length.
// * Use `EncodeMessageStart` to write type and tag fields and reserve space for
// the length field, then encode the contents directly into the buffer, then
// use `EncodeMessageLength` to write the actual length into the reserved
// bytes. This works fine if the actual length takes fewer bytes to encode
// than were reserved, although you don't get your extra bytes back.
// This approach will always produce a valid encoding, but your protocol may
// require that the whole message field by omitted if the buffer is too small
// to contain all desired subfields. In this case, operate on a copy of the
// buffer view and assign back only if everything fit, i.e. if the last
// `Encode` call returned true.
// Encodes the specified integer as a varint field and returns true if it fits.
// Used for int32_t, int64_t, uint32_t, uint64_t, bool, and enum field types.
// Consumes up to kMaxVarintSize * 2 bytes (20).
bool EncodeVarint(uint64_t tag, uint64_t value, absl::Span<char> *buf);
inline bool EncodeVarint(uint64_t tag, int64_t value, absl::Span<char> *buf) {
return EncodeVarint(tag, static_cast<uint64_t>(value), buf);
}
inline bool EncodeVarint(uint64_t tag, uint32_t value, absl::Span<char> *buf) {
return EncodeVarint(tag, static_cast<uint64_t>(value), buf);
}
inline bool EncodeVarint(uint64_t tag, int32_t value, absl::Span<char> *buf) {
return EncodeVarint(tag, static_cast<uint64_t>(value), buf);
}
// Encodes the specified integer as a varint field using ZigZag encoding and
// returns true if it fits.
// Used for sint32 and sint64 field types.
// Consumes up to kMaxVarintSize * 2 bytes (20).
inline bool EncodeVarintZigZag(uint64_t tag, int64_t value,
absl::Span<char> *buf) {
if (value < 0)
return EncodeVarint(tag, 2 * static_cast<uint64_t>(-(value + 1)) + 1, buf);
return EncodeVarint(tag, 2 * static_cast<uint64_t>(value), buf);
}
// Encodes the specified integer as a 64-bit field and returns true if it fits.
// Used for fixed64 and sfixed64 field types.
// Consumes up to kMaxVarintSize + 8 bytes (18).
bool Encode64Bit(uint64_t tag, uint64_t value, absl::Span<char> *buf);
inline bool Encode64Bit(uint64_t tag, int64_t value, absl::Span<char> *buf) {
return Encode64Bit(tag, static_cast<uint64_t>(value), buf);
}
inline bool Encode64Bit(uint64_t tag, uint32_t value, absl::Span<char> *buf) {
return Encode64Bit(tag, static_cast<uint64_t>(value), buf);
}
inline bool Encode64Bit(uint64_t tag, int32_t value, absl::Span<char> *buf) {
return Encode64Bit(tag, static_cast<uint64_t>(value), buf);
}
// Encodes the specified double as a 64-bit field and returns true if it fits.
// Used for double field type.
// Consumes up to kMaxVarintSize + 8 bytes (18).
inline bool EncodeDouble(uint64_t tag, double value, absl::Span<char> *buf) {
return Encode64Bit(tag, absl::bit_cast<uint64_t>(value), buf);
}
// Encodes the specified integer as a 32-bit field and returns true if it fits.
// Used for fixed32 and sfixed32 field types.
// Consumes up to kMaxVarintSize + 4 bytes (14).
bool Encode32Bit(uint64_t tag, uint32_t value, absl::Span<char> *buf);
inline bool Encode32Bit(uint64_t tag, int32_t value, absl::Span<char> *buf) {
return Encode32Bit(tag, static_cast<uint32_t>(value), buf);
}
// Encodes the specified float as a 32-bit field and returns true if it fits.
// Used for float field type.
// Consumes up to kMaxVarintSize + 4 bytes (14).
inline bool EncodeFloat(uint64_t tag, float value, absl::Span<char> *buf) {
return Encode32Bit(tag, absl::bit_cast<uint32_t>(value), buf);
}
// Encodes the specified bytes as a length-delimited field and returns true if
// they fit.
// Used for string, bytes, message, and packed-repeated field type.
// Consumes up to kMaxVarintSize * 2 + value.size() bytes (20 + value.size()).
bool EncodeBytes(uint64_t tag, absl::Span<const char> value,
absl::Span<char> *buf);
// Encodes as many of the specified bytes as will fit as a length-delimited
// field and returns true as long as the field header (`tag_type` and `length`)
// fits.
// Used for string, bytes, message, and packed-repeated field type.
// Consumes up to kMaxVarintSize * 2 + value.size() bytes (20 + value.size()).
bool EncodeBytesTruncate(uint64_t tag, absl::Span<const char> value,
absl::Span<char> *buf);
// Encodes the specified string as a length-delimited field and returns true if
// it fits.
// Used for string, bytes, message, and packed-repeated field type.
// Consumes up to kMaxVarintSize * 2 + value.size() bytes (20 + value.size()).
inline bool EncodeString(uint64_t tag, absl::string_view value,
absl::Span<char> *buf) {
return EncodeBytes(tag, value, buf);
}
// Encodes as much of the specified string as will fit as a length-delimited
// field and returns true as long as the field header (`tag_type` and `length`)
// fits.
// Used for string, bytes, message, and packed-repeated field type.
// Consumes up to kMaxVarintSize * 2 + value.size() bytes (20 + value.size()).
inline bool EncodeStringTruncate(uint64_t tag, absl::string_view value,
absl::Span<char> *buf) {
return EncodeBytesTruncate(tag, value, buf);
}
// Encodes the header for a length-delimited field containing up to `max_size`
// bytes or the number remaining in the buffer, whichever is less. If the
// header fits, a non-nullptr `Span` is returned; this must be passed to
// `EncodeMessageLength` after all contents are encoded to finalize the length
// field. If the header does not fit, a nullptr `Span` is returned which is
// safe to pass to `EncodeMessageLength` but need not be.
// Used for string, bytes, message, and packed-repeated field type.
// Consumes up to kMaxVarintSize * 2 bytes (20).
ABSL_MUST_USE_RESULT absl::Span<char> EncodeMessageStart(uint64_t tag,
uint64_t max_size,
absl::Span<char> *buf);
// Finalizes the length field in `msg` so that it encompasses all data encoded
// since the call to `EncodeMessageStart` which returned `msg`. Does nothing if
// `msg` is a `nullptr` `Span`.
void EncodeMessageLength(absl::Span<char> msg, const absl::Span<char> *buf);
enum class WireType : uint64_t {
kVarint = 0,
k64Bit = 1,
kLengthDelimited = 2,
k32Bit = 5,
};
constexpr size_t VarintSize(uint64_t value) {
return value < 128 ? 1 : 1 + VarintSize(value >> 7);
}
constexpr size_t MinVarintSize() {
return VarintSize((std::numeric_limits<uint64_t>::min)());
}
constexpr size_t MaxVarintSize() {
return VarintSize((std::numeric_limits<uint64_t>::max)());
}
constexpr uint64_t MaxVarintForSize(size_t size) {
return size >= 10 ? (std::numeric_limits<uint64_t>::max)()
: (static_cast<uint64_t>(1) << size * 7) - 1;
}
constexpr uint64_t MakeTagType(uint64_t tag, WireType type) {
return tag << 3 | static_cast<uint64_t>(type);
}
// `BufferSizeFor` returns a number of bytes guaranteed to be sufficient to
// store encoded fields as `(tag, WireType)`, regardless of data values. This
// only makes sense for `WireType::kLengthDelimited` if you add in the length of
// the contents yourself, e.g. for string and bytes fields by adding the lengths
// of any encoded strings to the return value or for submessage fields by
// enumerating the fields you may encode into their contents.
constexpr size_t BufferSizeFor(uint64_t tag, WireType type) {
size_t buffer_size = VarintSize(MakeTagType(tag, type));
switch (type) {
case WireType::kVarint:
buffer_size += MaxVarintSize();
break;
case WireType::k64Bit:
buffer_size += size_t{8};
break;
case WireType::kLengthDelimited:
buffer_size += MaxVarintSize();
break;
case WireType::k32Bit:
buffer_size += size_t{4};
break;
}
return buffer_size;
}
// absl::Span<const char> represents a view into the un-processed space in a
// buffer during decoding. Decoding functions shrink the span as they go so
// that the same view can be decoded iteratively until all data are processed.
// In general, if the buffer is exhausted but additional bytes are expected by
// the decoder, it will return values as if the additional bytes were zeros.
// Length-delimited fields are an exception - if the encoded length field
// indicates more data bytes than are available in the buffer, the `bytes_value`
// and `string_value` accessors will return truncated views.
class ProtoField final {
public:
// Consumes bytes from `data` and returns true if there were any bytes to
// decode.
bool DecodeFrom(absl::Span<const char> *data);
uint64_t tag() const { return tag_; }
WireType type() const { return type_; }
// These value accessors will return nonsense if the data were not encoded in
// the corresponding wiretype from the corresponding C++ (or other language)
// type.
double double_value() const { return absl::bit_cast<double>(value_); }
float float_value() const {
return absl::bit_cast<float>(static_cast<uint32_t>(value_));
}
int32_t int32_value() const { return static_cast<int32_t>(value_); }
int64_t int64_value() const { return static_cast<int64_t>(value_); }
int32_t sint32_value() const {
if (value_ % 2) return static_cast<int32_t>(0 - ((value_ - 1) / 2) - 1);
return static_cast<int32_t>(value_ / 2);
}
int64_t sint64_value() const {
if (value_ % 2) return 0 - ((value_ - 1) / 2) - 1;
return value_ / 2;
}
uint32_t uint32_value() const { return static_cast<uint32_t>(value_); }
uint64_t uint64_value() const { return value_; }
bool bool_value() const { return value_ != 0; }
// To decode an enum, call int32_value() and cast to the appropriate type.
// Note that the official C++ proto compiler treats enum fields with values
// that do not correspond to a defined enumerator as unknown fields.
// To decode fields within a submessage field, call
// `DecodeNextField(field.BytesValue())`.
absl::Span<const char> bytes_value() const { return data_; }
absl::string_view string_value() const {
const auto data = bytes_value();
return absl::string_view(data.data(), data.size());
}
// Returns the encoded length of a length-delimited field. This equals
// `bytes_value().size()` except when the latter has been truncated due to
// buffer underrun.
uint64_t encoded_length() const { return value_; }
private:
uint64_t tag_;
WireType type_;
// For `kTypeVarint`, `kType64Bit`, and `kType32Bit`, holds the decoded value.
// For `kTypeLengthDelimited`, holds the decoded length.
uint64_t value_;
absl::Span<const char> data_;
};
} // namespace log_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_LOG_INTERNAL_PROTO_H_