Add `internal_metadata_locator.h` to provide offset-based access to `InternalMetadata`.
This new header defines `InternalMetadataOffset` and `InternalMetadataResolver` to allow locating a message's `_internal_metadata_`, or any other class's `internal_metadata_` member, from a field's address using a precomputed offset. This will allow us to implement `GetArena()` for message fields without needing to store a copy of the message Arena* in each field that has a `GetArena()` method.
PiperOrigin-RevId: 814765386
diff --git a/src/google/protobuf/BUILD.bazel b/src/google/protobuf/BUILD.bazel
index 1b8a3ec..5c0dec9 100644
--- a/src/google/protobuf/BUILD.bazel
+++ b/src/google/protobuf/BUILD.bazel
@@ -660,6 +660,7 @@
"has_bits.h",
"implicit_weak_message.h",
"inlined_string_field.h",
+ "internal_metadata_locator.h",
"map.h",
"map_field_lite.h",
"map_type_handler.h",
@@ -888,6 +889,36 @@
],
)
+proto_library(
+ name = "internal_metadata_locator_test_proto",
+ srcs = ["internal_metadata_locator_test.proto"],
+ strip_import_prefix = "/src",
+)
+
+cc_proto_library(
+ name = "internal_metadata_locator_test_cc_proto",
+ deps = [":internal_metadata_locator_test_proto"],
+)
+
+cc_test(
+ name = "internal_metadata_locator_test",
+ size = "small",
+ srcs = [
+ "internal_metadata_locator_test.cc",
+ ],
+ deps = [
+ ":arena",
+ ":internal_metadata_locator_test_cc_proto",
+ ":port",
+ ":protobuf",
+ ":protobuf_lite",
+ ":test_util2",
+ "//src/google/protobuf/io",
+ "@googletest//:gtest",
+ "@googletest//:gtest_main",
+ ],
+)
+
# This provides just the header files for use in projects that need to build
# shared libraries for dynamic loading. This target is available until Bazel
# adds native support for such use cases.
@@ -1001,6 +1032,7 @@
name = "test_proto_srcs",
srcs = [
"any_test.proto",
+ "internal_metadata_locator_test.proto",
"map_proto2_unittest.proto",
"map_unittest.proto",
"unittest.proto",
diff --git a/src/google/protobuf/internal_metadata_locator.h b/src/google/protobuf/internal_metadata_locator.h
new file mode 100644
index 0000000..3a859ac
--- /dev/null
+++ b/src/google/protobuf/internal_metadata_locator.h
@@ -0,0 +1,212 @@
+#ifndef GOOGLE_PROTOBUF_INTERNAL_METADATA_LOCATOR_H__
+#define GOOGLE_PROTOBUF_INTERNAL_METADATA_LOCATOR_H__
+
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+
+#include "absl/log/absl_check.h"
+#include "google/protobuf/arena.h"
+#include "google/protobuf/metadata_lite.h"
+
+// Must be included last.
+#include "google/protobuf/port_def.inc"
+
+namespace google {
+namespace protobuf {
+namespace internal {
+
+// A wrapper around the offset to internal metadata from the address of another
+// field in the same class/struct. This is used to reduce the size of fields
+// that need access to an Arena which can be found in the containing object.
+class InternalMetadataOffset {
+ // The offset to arena to use when there is no arena.
+ static constexpr int32_t kSentinelInternalMetadataOffset = 0;
+
+ public:
+ // A sentinel `InternalMetadataOffset`, which does not point to any metadata.
+ constexpr InternalMetadataOffset() = default;
+
+ // Constructs an `InternalMetadataOffset` which can recover the
+ // `InternalMetadata` from a containing type `T` given the starting address of
+ // the field at offset `FieldOffset` within `T`.
+ //
+ // This method expects to find a field with name `_internal_metadata_` in `T`,
+ // and the type of that field should be `InternalMetadata`.
+ template <typename T, size_t kFieldOffset>
+ static constexpr InternalMetadataOffset Build() {
+ static_assert(
+ std::is_same_v<std::remove_const_t<decltype(T::_internal_metadata_)>,
+ InternalMetadata>,
+ "Field `_internal_metadata_ is not of type `InternalMetadata`");
+
+ constexpr int64_t kInternalMetadataOffset =
+ static_cast<int64_t>(PROTOBUF_FIELD_OFFSET(T, _internal_metadata_));
+
+ static_assert(
+ kInternalMetadataOffset - static_cast<int64_t>(kFieldOffset) >=
+ int64_t{INT32_MIN},
+ "Offset from `_internal_metadata_` is underflowing an int32_t, "
+ "likely meaning your message body is too large.");
+ static_assert(
+ kInternalMetadataOffset - static_cast<int64_t>(kFieldOffset) <=
+ int64_t{INT32_MAX},
+ "Offset from `_internal_metadata_` is overflowing an int32_t, "
+ "likely meaning your message body is too large.");
+
+ return InternalMetadataOffset(
+ static_cast<int32_t>(kInternalMetadataOffset - kFieldOffset));
+ }
+
+ // Builds an `InternalMetadataOffset` from a dynamic offset from the start of
+ // `T`. This is used by `DynamicMessage` to build an `InternalMetadataOffset`
+ // for a field at a given runtime-derived offset from the start of the
+ // message.
+ //
+ // This function performs runtime checks to ensure that the offset from
+ // `_internal_metadata_` to the field is within the range of an int32_t. This
+ // is necessary to prevent integer overflow when calculating the offset.
+ template <typename T>
+ static InternalMetadataOffset BuildFromDynamicOffset(size_t field_offset) {
+ static_assert(
+ std::is_base_of_v<MessageLite, T>,
+ "BuildFromDynamicOffset can only be used for `DynamicMessage`");
+
+ constexpr int64_t kInternalMetadataOffset =
+ static_cast<int64_t>(PROTOBUF_FIELD_OFFSET(T, _internal_metadata_));
+
+ ABSL_DCHECK_GE(kInternalMetadataOffset - static_cast<int64_t>(field_offset),
+ int64_t{INT32_MIN})
+ << "Offset from `_internal_metadata_` to the field at offset "
+ << field_offset
+ << " is underflowing an int32_t, likely meaning your message body is "
+ "too large.";
+ ABSL_DCHECK_LE(kInternalMetadataOffset - static_cast<int64_t>(field_offset),
+ int64_t{INT32_MAX})
+ << "Offset from `_internal_metadata_` to the field at offset "
+ << field_offset
+ << " is overflowing an int32_t, likely meaning your message body is "
+ "too large.";
+
+ return InternalMetadataOffset(
+ static_cast<int32_t>(kInternalMetadataOffset - field_offset));
+ }
+
+ // If true, this `InternalMetadataOffset` does not point to any metadata.
+ constexpr bool IsSentinel() const {
+ return offset_ == kSentinelInternalMetadataOffset;
+ }
+
+ // The offset from the start of the field to the internal metadata of the
+ // containing type (either a `MessageLite` or some other internal class, like
+ // `RepeatedPtrFieldWithArena`).
+ //
+ // This should only be called if `IsSentinel()` is false.
+ constexpr int32_t Offset() const {
+ ABSL_DCHECK(!IsSentinel());
+ return offset_;
+ }
+
+ private:
+ // A private constructor for non-sentinel offsets which can only be called
+ // from the static build methods.
+ explicit constexpr InternalMetadataOffset(int32_t offset) : offset_(offset) {}
+
+ int32_t offset_ = kSentinelInternalMetadataOffset;
+};
+
+// A class which can recover the `InternalMetadata` field from a containing type
+// given a pointer to another field contained by that type.
+class InternalMetadataResolver {
+ public:
+ // Builds an `InternalMetadataResolver` which points to no metadata.
+ constexpr InternalMetadataResolver() = default;
+
+ constexpr explicit InternalMetadataResolver(InternalMetadataOffset offset)
+ : offset_(offset) {}
+
+ private:
+ template <auto Resolver, typename T>
+ friend inline Arena* ResolveArena(const T* object);
+
+ // Finds the `Arena*` from the `InternalMetadata` of the containing type given
+ // the `this` pointer to the field contained by that type.
+ template <typename T, InternalMetadataResolver T::* Resolver>
+ static inline Arena* FindArena(const T* object) {
+ auto& resolver = object->*Resolver;
+ if (resolver.offset_.IsSentinel()) {
+ return nullptr;
+ }
+ return resolver.FindInternalMetadata(object).arena();
+ }
+
+ // Finds the `InternalMetadata` by adding the offset to the address of the
+ // start of the field.
+ inline const InternalMetadata& FindInternalMetadata(
+ const void* object) const {
+ return *reinterpret_cast<const InternalMetadata*>(
+ reinterpret_cast<const char*>(object) + offset_.Offset());
+ }
+
+ InternalMetadataOffset offset_;
+};
+
+// Resolves an `Arena*` from the `InternalMetadata` of a containing type (which
+// has a member `InternalMetadata _internal_metadata_`) given a reference to a
+// field of type `T` contained by that type.
+//
+// The template parameter `Resolver` is a pointer-to-member to the
+// `InternalMetadataResolver` field of `object`.
+//
+// `object` must have been constructed by the containing type, which is
+// responsible for correctly constructing the `InternalMetadataOffset` for
+// `object`.
+//
+// This function exists as a standalone function and not a member of
+// `InternalMetadataResolver` because the offset must be computed relative to
+// the address of the field containing the resolver, not the resolver itself.
+// This pattern is easy to get wrong from the caller, so we force callers to
+// give a pointer-to-member to the resolver as a type argument, then require
+// that the pointer passed to `ResolveArena` is of the containing type of the
+// resolver field. With the pointer-to-member type, we can load the resolver
+// directly from the passed object, thereby ensuring we are using the correct
+// offset for the object.
+//
+// Example usage:
+//
+// ```cc
+// struct Bar {
+// int some_value;
+// InternalMetadataResolver resolver;
+//
+// Bar(int value, InternalMetadataOffset offset)
+// : some_value(value), resolver(offset) {}
+//
+// Arena* GetArena() const {
+// return ResolveArena<&Bar::resolver>(this);
+// }
+// };
+//
+// struct Foo {
+// InternalMetadata _internal_metadata_;
+// Bar field1;
+//
+// Foo(Arena* arena)
+// : _internal_metadata_(arena),
+// field1(123,
+// InternalMetadataOffset::Build<Foo, PROTOBUF_FIELD_OFFSET(
+// Foo, field1)>()) {}
+// };
+// ```
+template <auto Resolver, typename T>
+inline Arena* ResolveArena(const T* object) {
+ return InternalMetadataResolver::FindArena<T, Resolver>(object);
+}
+
+} // namespace internal
+} // namespace protobuf
+} // namespace google
+
+#include "google/protobuf/port_undef.inc"
+
+#endif // GOOGLE_PROTOBUF_INTERNAL_METADATA_LOCATOR_H__
diff --git a/src/google/protobuf/internal_metadata_locator_test.cc b/src/google/protobuf/internal_metadata_locator_test.cc
new file mode 100644
index 0000000..a64359d
--- /dev/null
+++ b/src/google/protobuf/internal_metadata_locator_test.cc
@@ -0,0 +1,89 @@
+#include "google/protobuf/internal_metadata_locator.h"
+
+#include <cstddef>
+#include <cstdint>
+
+#include <gtest/gtest.h>
+#include "google/protobuf/arena.h"
+#include "google/protobuf/internal_metadata_locator_test.pb.h"
+#include "google/protobuf/io/coded_stream.h"
+#include "google/protobuf/message_lite.h"
+#include "google/protobuf/metadata_lite.h"
+
+// Must be included last.
+#include "google/protobuf/port_def.inc"
+
+namespace google {
+namespace protobuf {
+namespace internal {
+namespace {
+
+// Since the `TestOneRepeatedField` message has only one field, the offset of
+// the field is sizeof(MessageLite) + sizeof(void*) for hasbits.
+static constexpr size_t kTestOneRepeatedFieldFieldOffset =
+ sizeof(MessageLite) + sizeof(void*);
+
+#ifdef PROTOBUF_CUSTOM_VTABLE
+static constexpr size_t kTestOneRepeatedFieldInternalMetadataOffset = 0;
+#else
+static constexpr size_t kTestOneRepeatedFieldInternalMetadataOffset =
+ sizeof(void*);
+#endif
+
+struct FieldWithInternalMetadataOffset {
+ explicit FieldWithInternalMetadataOffset(InternalMetadataOffset offset)
+ : resolver(offset) {}
+
+ int field = 0;
+ InternalMetadataResolver resolver;
+};
+
+struct StructWithInternalMetadata {
+ explicit StructWithInternalMetadata(Arena* arena)
+ : _internal_metadata_(arena),
+ field(InternalMetadataOffset::Build<
+ StructWithInternalMetadata,
+ PROTOBUF_FIELD_OFFSET(StructWithInternalMetadata, field)>()) {}
+
+ InternalMetadata _internal_metadata_;
+ FieldWithInternalMetadataOffset field;
+};
+
+TEST(InternalMetadataLocatorTest, Sentinel) {
+ constexpr InternalMetadataOffset offset;
+ EXPECT_TRUE(offset.IsSentinel());
+}
+
+TEST(InternalMetadataLocatorTest, BuildFromStaticOffset) {
+ constexpr auto offset =
+ InternalMetadataOffset::Build<StructWithInternalMetadata,
+ PROTOBUF_FIELD_OFFSET(
+ StructWithInternalMetadata, field)>();
+ EXPECT_FALSE(offset.IsSentinel());
+ EXPECT_EQ(offset.Offset(), -static_cast<int32_t>(sizeof(void*)));
+}
+
+TEST(InternalMetadataLocatorTest, BuildFromStaticOffsetForProtoMessage) {
+ constexpr auto offset =
+ InternalMetadataOffset::Build<proto2_unittest::TestOneRepeatedField,
+ kTestOneRepeatedFieldFieldOffset>();
+ EXPECT_FALSE(offset.IsSentinel());
+ EXPECT_EQ(offset.Offset(),
+ -static_cast<int32_t>(kTestOneRepeatedFieldFieldOffset -
+ kTestOneRepeatedFieldInternalMetadataOffset));
+}
+
+TEST(InternalMetadataLocatorTest, ReadArenaFromInternalMetadata) {
+ Arena arena;
+ StructWithInternalMetadata message(&arena);
+ const auto* field = &message.field;
+ EXPECT_EQ((ResolveArena<&FieldWithInternalMetadataOffset::resolver>(field)),
+ &arena);
+}
+
+} // namespace
+} // namespace internal
+} // namespace protobuf
+} // namespace google
+
+#include "google/protobuf/port_undef.inc"
diff --git a/src/google/protobuf/internal_metadata_locator_test.proto b/src/google/protobuf/internal_metadata_locator_test.proto
new file mode 100644
index 0000000..d666ef2
--- /dev/null
+++ b/src/google/protobuf/internal_metadata_locator_test.proto
@@ -0,0 +1,8 @@
+edition = "2023";
+
+package proto2_unittest;
+
+// The test that uses this message requires that the message has only one field.
+message TestOneRepeatedField {
+ repeated int32 repeated_int32 = 1;
+}
diff --git a/src/google/protobuf/message_lite.h b/src/google/protobuf/message_lite.h
index 25264ef..9ecb1ef 100644
--- a/src/google/protobuf/message_lite.h
+++ b/src/google/protobuf/message_lite.h
@@ -309,6 +309,9 @@
class DescriptorPoolExtensionFinder;
class ExtensionSet;
class HasBitsTestPeer;
+class InternalMetadataOffset;
+template <typename T, size_t kFieldOffset>
+struct InternalMetadataOffsetHelper;
class LazyField;
class RepeatedPtrFieldBase;
class TcParser;
@@ -1109,6 +1112,9 @@
friend class internal::DescriptorPoolExtensionFinder;
friend class internal::ExtensionSet;
friend class internal::HasBitsTestPeer;
+ friend class internal::InternalMetadataOffset;
+ template <typename T, size_t kFieldOffset>
+ friend struct internal::InternalMetadataOffsetHelper;
friend class internal::LazyField;
friend class internal::SwapFieldHelper;
friend class internal::TcParser;