Add embedded cleanups

PiperOrigin-RevId: 490480534
diff --git a/src/file_lists.cmake b/src/file_lists.cmake
index 1e326bc..35084d0 100644
--- a/src/file_lists.cmake
+++ b/src/file_lists.cmake
@@ -578,6 +578,7 @@
 set(protobuf_test_files
   ${protobuf_SOURCE_DIR}/src/google/protobuf/any_test.cc
   ${protobuf_SOURCE_DIR}/src/google/protobuf/arena_align_test.cc
+  ${protobuf_SOURCE_DIR}/src/google/protobuf/arena_cleanup_test.cc
   ${protobuf_SOURCE_DIR}/src/google/protobuf/arena_unittest.cc
   ${protobuf_SOURCE_DIR}/src/google/protobuf/arenastring_unittest.cc
   ${protobuf_SOURCE_DIR}/src/google/protobuf/arenaz_sampler_test.cc
diff --git a/src/google/protobuf/BUILD.bazel b/src/google/protobuf/BUILD.bazel
index 859b953..c77a026 100644
--- a/src/google/protobuf/BUILD.bazel
+++ b/src/google/protobuf/BUILD.bazel
@@ -204,10 +204,27 @@
         "//src/google/protobuf:__subpackages__",
     ],
     deps = [
+        ":arena_align",
         "@com_google_absl//absl/base:core_headers",
     ],
 )
 
+cc_test(
+    name = "arena_cleanup_test",
+    srcs = ["arena_cleanup_test.cc"],
+    copts = COPTS + select({
+        "//build_defs:config_msvc": [],
+        "//conditions:default": [
+            "-Wno-error=sign-compare",
+        ],
+    }),
+    deps = [
+        ":arena_cleanup",
+        "@com_google_googletest//:gtest",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
 cc_library(
     name = "arena_config",
     srcs = ["arena_config.cc"],
diff --git a/src/google/protobuf/arena.cc b/src/google/protobuf/arena.cc
index 327c26e..68d98ea 100644
--- a/src/google/protobuf/arena.cc
+++ b/src/google/protobuf/arena.cc
@@ -202,6 +202,23 @@
 }
 
 PROTOBUF_NOINLINE
+void* SerialArena::AllocateCleanupFallback(size_t size) {
+  GOOGLE_DCHECK(ArenaAlignDefault::IsAligned(size));
+  AllocateNewBlock(size);
+  return limit_ -= size;
+}
+
+PROTOBUF_NOINLINE
+SerialArena::Memory SerialArena::AllocateCleanupFallback(size_t size,
+                                                         size_t align) {
+  GOOGLE_DCHECK(ArenaAlignDefault::IsAligned(size));
+  GOOGLE_DCHECK(ArenaAlignDefault::IsAligned(align));
+  AllocateNewBlock(size + align - internal::ArenaAlignDefault::align);
+  size += reinterpret_cast<size_t>(limit_) & (align - 1);
+  return {limit_ -= size, size};
+}
+
+PROTOBUF_NOINLINE
 void SerialArena::AddCleanupFallback(void* elem, void (*destructor)(void*)) {
   size_t required = cleanup::Size(destructor);
   AllocateNewBlock(required);
@@ -277,40 +294,10 @@
     char* limit = b->Limit();
     char* it = reinterpret_cast<char*>(b->cleanup_nodes);
     GOOGLE_DCHECK(!b->IsSentry() || it == limit);
-    if (it < limit) {
-      // A prefetch distance of 8 here was chosen arbitrarily.  It makes the
-      // pending nodes fill a cacheline which seemed nice.
-      constexpr int kPrefetchDist = 8;
-      cleanup::Tag pending_type[kPrefetchDist];
-      char* pending_node[kPrefetchDist];
-
-      int pos = 0;
-      for (; pos < kPrefetchDist && it < limit; ++pos) {
-        pending_type[pos] = cleanup::Type(it);
-        pending_node[pos] = it;
-        it += cleanup::Size(pending_type[pos]);
-      }
-
-      if (pos < kPrefetchDist) {
-        for (int i = 0; i < pos; ++i) {
-          cleanup::DestroyNode(pending_type[i], pending_node[i]);
-        }
-      } else {
-        pos = 0;
-        while (it < limit) {
-          cleanup::PrefetchNode(it);
-          cleanup::DestroyNode(pending_type[pos], pending_node[pos]);
-          pending_type[pos] = cleanup::Type(it);
-          pending_node[pos] = it;
-          it += cleanup::Size(pending_type[pos]);
-          pos = (pos + 1) % kPrefetchDist;
-        }
-        for (int i = pos; i < pos + kPrefetchDist; ++i) {
-          cleanup::DestroyNode(pending_type[i % kPrefetchDist],
-                               pending_node[i % kPrefetchDist]);
-        }
-      }
+    while (it < limit) {
+      it += cleanup::DestroyNodeAt(it);
     }
+    GOOGLE_DCHECK_EQ(it, limit);
     b = b->next;
   } while (b);
 }
@@ -749,6 +736,19 @@
   }
 }
 
+void* ThreadSafeArena::AllocateCleanupFallback(size_t size) {
+  GOOGLE_DCHECK(ArenaAlignDefault::IsAligned(size));
+  return GetSerialArenaFallback(size)->AllocateCleanup(size);
+}
+
+ThreadSafeArena::Memory ThreadSafeArena::AllocateCleanupFallback(size_t size,
+                                                                 size_t align) {
+  GOOGLE_DCHECK(ArenaAlignDefault::IsAligned(size));
+  GOOGLE_DCHECK(ArenaAlignDefault::IsAligned(align));
+  size_t required = size + align - ArenaAlignDefault::align;
+  return GetSerialArenaFallback(required)->AllocateCleanup(size, align);
+}
+
 void ThreadSafeArena::AddCleanup(void* elem, void (*cleanup)(void*)) {
   SerialArena* arena;
   if (PROTOBUF_PREDICT_FALSE(!GetSerialArenaFast(&arena))) {
@@ -895,6 +895,38 @@
   return impl_.AllocateAlignedWithCleanup(n, align, destructor);
 }
 
+template <typename T, internal::cleanup::Tag tag>
+void* Arena::AllocateWithCleanup() {
+  using internal::ArenaAlignDefault;
+  using internal::cleanup::TaggedNode;
+  static_assert(alignof(T) <= ArenaAlignDefault::align);
+  constexpr size_t n = ArenaAlignDefault::Ceil(sizeof(TaggedNode) + sizeof(T));
+  void* mem = impl_.AllocateCleanup(n);
+  internal::cleanup::CreateNode(mem, tag);
+  return static_cast<TaggedNode*>(mem) + 1;
+}
+
+void* Arena::AllocateWithCleanup(size_t size, void (*dtor)(void*)) {
+  using internal::ArenaAlignDefault;
+  using internal::cleanup::DynamicNode;
+  size_t required = ArenaAlignDefault::Ceil(size + sizeof(DynamicNode));
+  void* mem = impl_.AllocateCleanup(required);
+  internal::cleanup::CreateNode(mem, required, dtor);
+  return static_cast<DynamicNode*>(mem) + 1;
+}
+
+void* Arena::AllocateWithCleanup(size_t size, size_t align,
+                                 void (*dtor)(void*)) {
+  using internal::cleanup::DynamicNode;
+  size_t required = size + sizeof(DynamicNode);
+  auto mem = impl_.AllocateCleanup(required, align);
+  internal::cleanup::CreateNode(mem.ptr, mem.size, dtor);
+  return static_cast<DynamicNode*>(mem.ptr) + 1;
+}
+
+template void*
+Arena::AllocateWithCleanup<std::string, internal::cleanup::Tag::kString>();
+
 }  // namespace protobuf
 }  // namespace google
 
diff --git a/src/google/protobuf/arena.h b/src/google/protobuf/arena.h
index abce219..426a1dd 100644
--- a/src/google/protobuf/arena.h
+++ b/src/google/protobuf/arena.h
@@ -253,6 +253,12 @@
     return Arena::CreateMaybeMessage<T>(arena, static_cast<Args&&>(args)...);
   }
 
+  template <typename T>
+  struct ObjectCreator {
+    template <typename... Args>
+    static T* Create(Arena* arena, Args&&... args);
+  };
+
   // API to create any objects on the arena. Note that only the object will
   // be created on the arena; the underlying ptrs (in case of a proto2 message)
   // will be still heap allocated. Proto messages should usually be allocated
@@ -273,11 +279,7 @@
     if (arena == nullptr) {
       return new T(std::forward<Args>(args)...);
     }
-    auto destructor =
-        internal::ObjectDestructor<std::is_trivially_destructible<T>::value,
-                                   T>::destructor;
-    return new (arena->AllocateInternal(sizeof(T), alignof(T), destructor))
-        T(std::forward<Args>(args)...);
+    return ObjectCreator<T>::Create(arena, std::forward<Args>(args)...);
   }
 
   // API to delete any objects not on an arena.  This can be used to safely
@@ -690,6 +692,11 @@
     }
   }
 
+  template <typename T, internal::cleanup::Tag tag>
+  void* AllocateWithCleanup();
+  void* AllocateWithCleanup(size_t size, void (*dtor)(void*));
+  void* AllocateWithCleanup(size_t size, size_t align, void (*dtor)(void*));
+
   void* Allocate(size_t n);
   void* AllocateForArray(size_t n);
   void* AllocateAlignedWithCleanup(size_t n, size_t align,
@@ -710,6 +717,45 @@
   friend struct internal::ArenaTestPeer;
 };
 
+template <typename T>
+template <typename... Args>
+PROTOBUF_NDEBUG_INLINE T* Arena::ObjectCreator<T>::Create(Arena* arena,
+                                                          Args&&... args) {
+  constexpr auto dtor = &internal::cleanup::arena_destruct_object<T>;
+
+  void* mem;
+  if (std::is_trivially_destructible<T>::value) {
+    // Simple case: trivially destructible
+    mem = arena->AllocateAligned(sizeof(T), alignof(T));
+  } else if (sizeof(T) <= 32) {
+    // The size of this item is small, and locality is important. We allocate
+    // the object inside the cleanup list itself. 32 bytes here is chosen as a
+    // reasonable 'half cache line' size. If we would embed large objects (one
+    // or more cache lines) into the cleanup list, we may actually  hurt cleanup
+    // iteration as we then need to skip one or more cache lines of 'dead' data
+    // during cleanup. Custom dtors on large objects are more likely to address
+    // at most a handful of memory locations.
+    mem = (alignof(T) > internal::ArenaAlignDefault::align)
+              ? arena->AllocateWithCleanup(sizeof(T), alignof(T), dtor)
+              : arena->AllocateWithCleanup(sizeof(T), dtor);
+  } else {
+    // Fallback to 'old school' create internal
+    mem = arena->AllocateInternal(sizeof(T), alignof(T), dtor);
+  }
+  return new (mem) T(std::forward<Args>(args)...);
+}
+
+template <>
+struct Arena::ObjectCreator<std::string> {
+  template <typename... Args>
+  PROTOBUF_NDEBUG_INLINE static std::string* Create(Arena* arena,
+                                                    Args&&... args) {
+    using internal::cleanup::Tag;
+    void* mem = arena->AllocateWithCleanup<std::string, Tag::kString>();
+    return new (mem) std::string(std::forward<Args>(args)...);
+  }
+};
+
 }  // namespace protobuf
 }  // namespace google
 
diff --git a/src/google/protobuf/arena_cleanup.h b/src/google/protobuf/arena_cleanup.h
index 0ca60c2..6492cc1 100644
--- a/src/google/protobuf/arena_cleanup.h
+++ b/src/google/protobuf/arena_cleanup.h
@@ -38,6 +38,7 @@
 #include "google/protobuf/stubs/logging.h"
 #include "google/protobuf/stubs/common.h"
 #include "absl/base/attributes.h"
+#include "google/protobuf/arena_align.h"
 
 
 // Must be included last.
@@ -58,10 +59,14 @@
 // lowest 2 bits of the `elem` value identifying the type of node. All node
 // types must start with a `uintptr_t` that stores `Tag` in its low two bits.
 enum class Tag : uintptr_t {
-  kDynamic = 0,  // DynamicNode
-  kString = 1,   // StringNode (std::string)
+  kDynamic = 0,   // DynamicNode
+  kEmbedded = 1,  // Embedded node
+  kString = 2,    // TaggedNode (std::string) (pointer or embedded)
+  kCord = 3,      // TaggedNode (absl::Cord)  (pointer or embedded)
 };
 
+enum { kTagMask = 0x3 };
+
 // DynamicNode contains the object (`elem`) that needs to be
 // destroyed, and the function to destroy it (`destructor`)
 // elem must be aligned at minimum on a 4 byte boundary.
@@ -70,18 +75,50 @@
   void (*destructor)(void*);
 };
 
-// StringNode contains a `std::string` object (`elem`) that needs to be
-// destroyed. The lowest 2 bits of `elem` contain the non-zero kString tag.
-struct StringNode {
+// TaggedNode describes a well known cleanup instance identified by the non-zero
+// tag value stored in the lowest 2 bits of `elem`.
+struct TaggedNode {
   uintptr_t elem;
 };
 
-
 // EnableSpecializedTags() return true if the alignment of tagged objects
 // such as std::string allow us to poke tags in the 2 LSB bits.
 inline constexpr bool EnableSpecializedTags() {
   // For now we require 2 bits
-  return alignof(std::string) >= 8;
+  return alignof(std::string) > kTagMask;
+}
+
+inline ABSL_ATTRIBUTE_ALWAYS_INLINE void CreateNode(void* pos,
+                                                    const void* object,
+                                                    void (*dtor)(void*)) {
+  auto elem = reinterpret_cast<uintptr_t>(object);
+  DynamicNode n = {elem, dtor};
+  memcpy(pos, &n, sizeof(n));
+}
+
+inline ABSL_ATTRIBUTE_ALWAYS_INLINE void CreateNode(void* pos, size_t size,
+                                                    void (*dtor)(void*)) {
+  GOOGLE_DCHECK_GT(size, 0);
+  GOOGLE_DCHECK(ArenaAlignDefault::IsAligned(size));
+  auto elem = static_cast<uintptr_t>(size);
+  DynamicNode n = {elem + static_cast<uintptr_t>(Tag::kEmbedded), dtor};
+  memcpy(pos, &n, sizeof(n));
+}
+
+inline ABSL_ATTRIBUTE_ALWAYS_INLINE void CreateNode(void* pos,
+                                                    const void* object,
+                                                    Tag tag) {
+  GOOGLE_DCHECK(tag != Tag::kDynamic && tag != Tag::kEmbedded);
+  auto elem = reinterpret_cast<uintptr_t>(object);
+  GOOGLE_DCHECK_EQ(elem & 3, 0ULL);  // Must be aligned
+  TaggedNode n = {elem + static_cast<uintptr_t>(tag)};
+  memcpy(pos, &n, sizeof(n));
+}
+
+inline ABSL_ATTRIBUTE_ALWAYS_INLINE void CreateNode(void* pos, Tag tag) {
+  GOOGLE_DCHECK(tag != Tag::kDynamic && tag != Tag::kEmbedded);
+  TaggedNode n = {static_cast<uintptr_t>(tag)};
+  memcpy(pos, &n, sizeof(n));
 }
 
 // Adds a cleanup entry identified by `tag` at memory location `pos`.
@@ -93,7 +130,7 @@
     GOOGLE_DCHECK_EQ(elem & 3, 0ULL);  // Must be aligned
     switch (tag) {
       case Tag::kString: {
-        StringNode n = {elem | static_cast<uintptr_t>(Tag::kString)};
+        TaggedNode n = {elem | static_cast<uintptr_t>(Tag::kString)};
         memcpy(pos, &n, sizeof(n));
         return;
       }
@@ -105,35 +142,49 @@
   memcpy(pos, &n, sizeof(n));
 }
 
-// Optimization: performs a prefetch on `elem_address`.
-inline ABSL_ATTRIBUTE_ALWAYS_INLINE void PrefetchNode(
-    const void* elem_address) {
-  (void)elem_address;
+template <typename T>
+inline ABSL_ATTRIBUTE_ALWAYS_INLINE size_t DestroyTaggedObject(void* pos,
+                                                               uintptr_t elem) {
+  // elem == 0 identifies embedded, elem != 0 pointer cleanup
+  if (elem) {
+    reinterpret_cast<T*>(elem)->~T();
+    return sizeof(TaggedNode);
+  } else {
+    reinterpret_cast<T*>(static_cast<TaggedNode*>(pos) + 1)->~T();
+    return ArenaAlignDefault::Ceil(sizeof(TaggedNode) + sizeof(T));
+  }
 }
 
 // Destroys the node idenitfied by `tag` stored at memory location `pos`.
-inline ABSL_ATTRIBUTE_ALWAYS_INLINE void DestroyNode(Tag tag, const void* pos) {
-  if (EnableSpecializedTags()) {
-    switch (tag) {
-      case Tag::kString: {
-        StringNode n;
-        memcpy(&n, pos, sizeof(n));
-        auto* s = reinterpret_cast<std::string*>(n.elem & ~0x7ULL);
-        // Some compilers don't like fully qualified explicit dtor calls,
-        // so use an alias to avoid having to type `::`.
-        using string_type = std::string;
-        s->~string_type();
-        return;
-      }
-      default:
-        break;
+inline ABSL_ATTRIBUTE_ALWAYS_INLINE size_t DestroyNodeAt(void* pos) {
+  uintptr_t head = *reinterpret_cast<uintptr_t*>(pos);
+  uintptr_t tag = head & kTagMask;
+  switch (static_cast<Tag>(tag)) {
+    case Tag::kDynamic: {
+      auto* node = static_cast<DynamicNode*>(pos);
+      GOOGLE_DCHECK(node->destructor != nullptr);
+      GOOGLE_DCHECK(head);
+      node->destructor(reinterpret_cast<void*>(head));
+      return sizeof(DynamicNode);
     }
+
+    case Tag::kEmbedded: {
+      auto* node = static_cast<DynamicNode*>(pos);
+      GOOGLE_DCHECK(node->destructor != nullptr);
+      GOOGLE_DCHECK_GT(head - tag, 0);
+      node->destructor(node + 1);
+      return head - tag;
+    }
+
+    case Tag::kString:
+      return DestroyTaggedObject<std::string>(pos, head - tag);
+
+    case Tag::kCord:
+      return DestroyTaggedObject<absl::Cord>(pos, head - tag);
   }
-  DynamicNode n;
-  memcpy(&n, pos, sizeof(n));
-  n.destructor(reinterpret_cast<void*>(n.elem));
 }
 
+
 // Returns the `tag` identifying the type of object for `destructor` or
 // kDynamic if `destructor` does not identify a well know object type.
 inline ABSL_ATTRIBUTE_ALWAYS_INLINE Tag Type(void (*destructor)(void*)) {
@@ -169,9 +220,11 @@
 
   switch (tag) {
     case Tag::kDynamic:
+    case Tag::kEmbedded:
       return sizeof(DynamicNode);
     case Tag::kString:
-      return sizeof(StringNode);
+    case Tag::kCord:
+      return sizeof(TaggedNode);
     default:
       GOOGLE_LOG(FATAL) << "Corrupted cleanup tag: " << static_cast<int>(tag);
       return sizeof(DynamicNode);
diff --git a/src/google/protobuf/arena_cleanup_test.cc b/src/google/protobuf/arena_cleanup_test.cc
new file mode 100644
index 0000000..f9b568d
--- /dev/null
+++ b/src/google/protobuf/arena_cleanup_test.cc
@@ -0,0 +1,106 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2022 Google Inc.  All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "google/protobuf/arena_cleanup.h"
+
+#include <string>
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+namespace google {
+namespace protobuf {
+namespace internal {
+namespace cleanup {
+namespace {
+
+using testing::Eq;
+
+struct DtorTracker {
+  DtorTracker() {
+    count = 0;
+    object = nullptr;
+  }
+
+  static void dtor(void* obj) {
+    ++count;
+    object = obj;
+  }
+
+  static int count;
+  static void* object;
+};
+
+int DtorTracker::count;
+void* DtorTracker::object;
+
+TEST(CleanupTest, CreateDestroyDynamicNode) {
+  alignas(ArenaAlignDefault::align) char buffer[1024];
+  void* object = &object;
+  DtorTracker dtor_tracker;
+
+  CreateNode(buffer, object, &DtorTracker::dtor);
+  EXPECT_THAT(DestroyNodeAt(buffer), Eq(sizeof(DynamicNode)));
+  EXPECT_THAT(DtorTracker::count, Eq(1));
+  EXPECT_THAT(DtorTracker::object, Eq(object));
+}
+
+TEST(CleanupTest, CreateDestroyEmbeddedNode) {
+  alignas(ArenaAlignDefault::align) char buffer[1024];
+  void* object = buffer + sizeof(DynamicNode);
+  DtorTracker dtor_tracker;
+
+  CreateNode(buffer, sizeof(DynamicNode) + 128, &DtorTracker::dtor);
+  EXPECT_THAT(DestroyNodeAt(buffer), Eq(sizeof(DynamicNode) + 128));
+  EXPECT_THAT(DtorTracker::count, Eq(1));
+  EXPECT_THAT(DtorTracker::object, Eq(object));
+}
+
+TEST(CleanupTest, CreateDestroyStringNode) {
+  alignas(ArenaAlignDefault::align) char buffer[1024];
+  alignas(std::string) char instance[sizeof(std::string)];
+  std::string* s = new (instance) std::string(1000, 'x');
+  CreateNode(buffer, s, Tag::kString);
+  EXPECT_THAT(DestroyNodeAt(buffer), Eq(sizeof(TaggedNode)));
+}
+
+TEST(CleanupTest, CreateDestroyEmbeddedStringNode) {
+  alignas(ArenaAlignDefault::align) char buffer[1024];
+  new (buffer + sizeof(TaggedNode)) std::string(1000, 'x');
+  CreateNode(buffer, Tag::kString);
+  EXPECT_THAT(DestroyNodeAt(buffer),
+              Eq(sizeof(TaggedNode) + sizeof(std::string)));
+}
+
+}  // namespace
+}  // namespace cleanup
+}  // namespace internal
+}  // namespace protobuf
+}  // namespace google
diff --git a/src/google/protobuf/arena_impl.h b/src/google/protobuf/arena_impl.h
index 9d2de93..5b0df53 100644
--- a/src/google/protobuf/arena_impl.h
+++ b/src/google/protobuf/arena_impl.h
@@ -183,6 +183,10 @@
   }
   uint64_t SpaceUsed() const;
 
+  inline size_t SpaceAvailable() const {
+    return static_cast<size_t>(limit_ - ptr());
+  }
+
   bool HasSpace(size_t n) const {
     return n <= static_cast<size_t>(limit_ - ptr());
   }
@@ -287,6 +291,25 @@
   }
 
  public:
+  PROTOBUF_NDEBUG_INLINE
+  void* AllocateCleanup(size_t size) {
+    GOOGLE_DCHECK(ArenaAlignDefault::IsAligned(size));
+    if (PROTOBUF_PREDICT_TRUE(size <= SpaceAvailable())) {
+      return limit_ -= size;
+    }
+    return AllocateCleanupFallback(size);
+  }
+
+  PROTOBUF_NDEBUG_INLINE
+  Memory AllocateCleanup(size_t size, size_t align) {
+    size_t tail_padding = reinterpret_cast<size_t>(limit_) & (align - 1);
+    if (PROTOBUF_PREDICT_TRUE(size + tail_padding <= SpaceAvailable())) {
+      size += tail_padding;
+      return {limit_ -= size, size};
+    }
+    return AllocateCleanupFallback(size, align);
+  }
+
   // Allocate space if the current region provides enough space.
   bool MaybeAllocateAligned(size_t n, void** out) {
     GOOGLE_DCHECK_EQ(internal::AlignUpTo8(n), n);  // Must be already aligned.
@@ -421,6 +444,8 @@
   void* AllocateAlignedWithCleanupFallback(size_t n, size_t align,
                                            void (*destructor)(void*));
   void AddCleanupFallback(void* elem, void (*destructor)(void*));
+  void* AllocateCleanupFallback(size_t size);
+  Memory AllocateCleanupFallback(size_t size, size_t align);
   inline void AllocateNewBlock(size_t n);
   inline void Init(ArenaBlock* b, size_t offset);
 
@@ -443,6 +468,8 @@
 // use #ifdef the select the best implementation based on hardware / OS.
 class PROTOBUF_EXPORT ThreadSafeArena {
  public:
+  using Memory = SerialArena::Memory;
+
   ThreadSafeArena();
 
   // Constructor solely used by message-owned arena.
@@ -507,6 +534,23 @@
   // Add object pointer and cleanup function pointer to the list.
   void AddCleanup(void* elem, void (*cleanup)(void*));
 
+  // Allocate cleanup space.
+  PROTOBUF_NDEBUG_INLINE void* AllocateCleanup(size_t size) {
+    SerialArena* arena;
+    if (PROTOBUF_PREDICT_TRUE(GetSerialArenaFast(&arena))) {
+      return arena->AllocateCleanup(size);
+    }
+    return AllocateCleanupFallback(size);
+  }
+
+  PROTOBUF_NDEBUG_INLINE Memory AllocateCleanup(size_t size, size_t align) {
+    SerialArena* arena;
+    if (PROTOBUF_PREDICT_TRUE(GetSerialArenaFast(&arena))) {
+      return arena->AllocateCleanup(size, align);
+    }
+    return AllocateCleanupFallback(size, align);
+  }
+
   // Checks whether this arena is message-owned.
   PROTOBUF_ALWAYS_INLINE bool IsMessageOwned() const {
     return tag_and_id_ & kMessageOwnedArena;
@@ -566,6 +610,8 @@
   void InitializeWithPolicy(const AllocationPolicy& policy);
   void* AllocateAlignedWithCleanupFallback(size_t n, size_t align,
                                            void (*destructor)(void*));
+  void* AllocateCleanupFallback(size_t size);
+  Memory AllocateCleanupFallback(size_t size, size_t align);
 
   void Init();