Add StringBlock experiment ALT 1

PiperOrigin-RevId: 495294072
diff --git a/src/file_lists.cmake b/src/file_lists.cmake
index 9faaf62..bb9728b 100644
--- a/src/file_lists.cmake
+++ b/src/file_lists.cmake
@@ -179,6 +179,7 @@
   ${protobuf_SOURCE_DIR}/src/google/protobuf/stubs/status_macros.h
   ${protobuf_SOURCE_DIR}/src/google/protobuf/text_format.h
   ${protobuf_SOURCE_DIR}/src/google/protobuf/thread_safe_arena.h
+  ${protobuf_SOURCE_DIR}/src/google/protobuf/typed_block.h
   ${protobuf_SOURCE_DIR}/src/google/protobuf/unknown_field_set.h
   ${protobuf_SOURCE_DIR}/src/google/protobuf/util/delimited_message_util.h
   ${protobuf_SOURCE_DIR}/src/google/protobuf/util/field_comparator.h
@@ -267,6 +268,7 @@
   ${protobuf_SOURCE_DIR}/src/google/protobuf/stubs/port.h
   ${protobuf_SOURCE_DIR}/src/google/protobuf/stubs/status_macros.h
   ${protobuf_SOURCE_DIR}/src/google/protobuf/thread_safe_arena.h
+  ${protobuf_SOURCE_DIR}/src/google/protobuf/typed_block.h
   ${protobuf_SOURCE_DIR}/src/google/protobuf/wire_format_lite.h
 )
 
diff --git a/src/google/protobuf/BUILD.bazel b/src/google/protobuf/BUILD.bazel
index 9581b91..741cda0 100644
--- a/src/google/protobuf/BUILD.bazel
+++ b/src/google/protobuf/BUILD.bazel
@@ -237,6 +237,16 @@
 )
 
 cc_library(
+    name = "typed_block",
+    hdrs = ["typed_block.h"],
+    include_prefix = "google/protobuf",
+    visibility = [
+        "//:__subpackages__",
+        "//src/google/protobuf:__subpackages__",
+    ],
+)
+
+cc_library(
     name = "arena",
     srcs = [
         "arena.cc",
@@ -258,6 +268,7 @@
         ":arena_allocation_policy",
         ":arena_cleanup",
         ":arena_config",
+        ":typed_block",
         "//src/google/protobuf/stubs:lite",
         "@com_google_absl//absl/synchronization",
     ],
@@ -330,6 +341,7 @@
         ":arena",
         ":arena_align",
         ":arena_config",
+        ":typed_block",
         "//src/google/protobuf/io",
         "//src/google/protobuf/stubs:lite",
         "@com_google_absl//absl/cleanup",
diff --git a/src/google/protobuf/arena.cc b/src/google/protobuf/arena.cc
index 25ddcc3..933190a 100644
--- a/src/google/protobuf/arena.cc
+++ b/src/google/protobuf/arena.cc
@@ -35,6 +35,7 @@
 #include <cstddef>
 #include <cstdint>
 #include <limits>
+#include <string>
 #include <typeinfo>
 
 #include "absl/base/attributes.h"
@@ -166,6 +167,7 @@
   space_allocated_.store(b->size, std::memory_order_relaxed);
   cached_block_length_ = 0;
   cached_blocks_ = nullptr;
+  string_block_ = StringBlock::sentinel();
 }
 
 SerialArena* SerialArena::New(Memory mem, ThreadSafeArena& parent) {
@@ -190,6 +192,20 @@
   return mem;
 }
 
+void* SerialArena::AllocateStringFallback() {
+  const size_t sz = string_block_->next_size();
+  if (PROTOBUF_PREDICT_TRUE(HasSpace(sz))) {
+    void* mem = AllocateFromExisting(sz);
+    SpaceUsedAdd(string_block_->space_used() - sz);
+    string_block_ = string_block_->EmplaceNext(mem, sz);
+  } else {
+    SpaceUsedAdd(string_block_->space_used());
+    string_block_ = string_block_->CreateNext(sz);
+    SpaceAllocatedAdd(string_block_->space_allocated());
+  }
+  return string_block_->Allocate();
+}
+
 PROTOBUF_NOINLINE
 void* SerialArena::AllocateAlignedFallback(size_t n) {
   AllocateNewBlock(n);
@@ -222,8 +238,7 @@
     // Record how much used in this block.
     used = static_cast<size_t>(ptr() - old_head->Pointer(kBlockHeaderSize));
     wasted = old_head->size - used;
-    space_used_.store(space_used_.load(std::memory_order_relaxed) + used,
-                      std::memory_order_relaxed);
+    SpaceUsedAdd(used);
   }
 
   // TODO(sbenza): Evaluate if pushing unused space into the cached blocks is a
@@ -232,12 +247,7 @@
   // the microbenchmark.
 
   auto mem = AllocateMemory(parent_.AllocPolicy(), old_head->size, n);
-  // We don't want to emit an expensive RMW instruction that requires
-  // exclusive access to a cacheline. Hence we write it in terms of a
-  // regular add.
-  space_allocated_.store(
-      space_allocated_.load(std::memory_order_relaxed) + mem.size,
-      std::memory_order_relaxed);
+  SpaceAllocatedAdd(mem.size);
   ThreadSafeArenaStats::RecordAllocateStats(parent_.arena_stats_.MutableStats(),
                                             /*used=*/used,
                                             /*allocated=*/mem.size, wasted);
@@ -260,20 +270,32 @@
   // usage of the *current* block.
   // TODO(mkruskal) Consider eliminating this race in exchange for a possible
   // performance hit on ARM (see cl/455186837).
+  uint64_t current_space_used = string_block_->space_used();
   const ArenaBlock* h = head_.load(std::memory_order_acquire);
-  if (h->IsSentry()) return 0;
+  if (h->IsSentry()) return current_space_used;
 
   const uint64_t current_block_size = h->size;
-  uint64_t current_space_used = std::min(
+  current_space_used += std::min(
       static_cast<uint64_t>(
           ptr() - const_cast<ArenaBlock*>(h)->Pointer(kBlockHeaderSize)),
       current_block_size);
   return current_space_used + space_used_.load(std::memory_order_relaxed);
 }
 
-void SerialArena::CleanupList() {
+size_t SerialArena::CleanupList() {
+  size_t deallocated = 0;
+  StringBlock* block = string_block_;
+  while (StringBlock* next = block->next()) {
+    block->DestroyAll();
+    if (block->is_heap_allocated()) {
+      deallocated += block->space_allocated();
+      StringBlock::Delete(block);
+    }
+    block = next;
+  }
+
   ArenaBlock* b = head();
-  if (b->IsSentry()) return;
+  if (b->IsSentry()) return deallocated;
 
   b->cleanup_nodes = limit_;
   do {
@@ -285,6 +307,7 @@
     }
     b = b->next;
   } while (b);
+  return deallocated;
 }
 
 // Stores arrays of void* and SerialArena* instead of linked list of
@@ -664,11 +687,10 @@
 uint64_t ThreadSafeArena::Reset() {
   // Have to do this in a first pass, because some of the destructors might
   // refer to memory in other blocks.
-  CleanupList();
+  size_t space_allocated = CleanupList();
 
   // Discard all blocks except the first one. Whether it is user-provided or
   // allocated, always reuse the first block for the first arena.
-  size_t space_allocated = 0;
   auto mem = Free(&space_allocated);
   space_allocated += mem.size;
 
@@ -784,8 +806,9 @@
 template void*
     ThreadSafeArena::AllocateAlignedFallback<AllocationClient::kArray>(size_t);
 
-void ThreadSafeArena::CleanupList() {
-  WalkSerialArenaChunk([](SerialArenaChunk* chunk) {
+size_t ThreadSafeArena::CleanupList() {
+  size_t deallocated = 0;
+  WalkSerialArenaChunk([&](SerialArenaChunk* chunk) {
     absl::Span<std::atomic<SerialArena*>> span = chunk->arenas();
     // Walks arenas backward to handle the first serial arena the last.
     // Destroying in reverse-order to the construction is often assumed by users
@@ -793,11 +816,13 @@
     for (auto it = span.rbegin(); it != span.rend(); ++it) {
       SerialArena* serial = it->load(std::memory_order_relaxed);
       GOOGLE_ABSL_DCHECK_NE(serial, nullptr);
-      serial->CleanupList();
+      deallocated += serial->CleanupList();
     }
   });
   // First arena must be cleaned up last. (b/247560530)
-  first_arena_.CleanupList();
+  deallocated += first_arena_.CleanupList();
+
+  return deallocated;
 }
 
 PROTOBUF_NOINLINE
@@ -848,6 +873,12 @@
   return impl_.AllocateAlignedWithCleanup(n, align, destructor);
 }
 
+template <>
+PROTOBUF_EXPORT_TEMPLATE_DEFINE void*
+Arena::AllocateInternal<std::string, false>() {
+  return impl_.AllocateString();
+}
+
 }  // namespace protobuf
 }  // namespace google
 
diff --git a/src/google/protobuf/arena.h b/src/google/protobuf/arena.h
index 06acc3d..f2268eb 100644
--- a/src/google/protobuf/arena.h
+++ b/src/google/protobuf/arena.h
@@ -275,11 +275,7 @@
     if (PROTOBUF_PREDICT_FALSE(arena == nullptr)) {
       return new T(std::forward<Args>(args)...);
     }
-    auto destructor =
-        internal::ObjectDestructor<std::is_trivially_destructible<T>::value,
-                                   T>::destructor;
-    return new (arena->AllocateInternal(sizeof(T), alignof(T), destructor))
-        T(std::forward<Args>(args)...);
+    return new (arena->AllocateInternal<T>()) T(std::forward<Args>(args)...);
   }
 
   // API to delete any objects not on an arena.  This can be used to safely
@@ -566,13 +562,14 @@
     }
   }
 
-  PROTOBUF_NDEBUG_INLINE void* AllocateInternal(size_t size, size_t align,
-                                                void (*destructor)(void*)) {
-    // Monitor allocation if needed.
-    if (destructor == nullptr) {
-      return AllocateAligned(size, align);
+  template <typename T, bool trivial = std::is_trivially_destructible<T>::value>
+  PROTOBUF_NDEBUG_INLINE void* AllocateInternal() {
+    // is_destructor_skippable<T>::value
+    if (trivial) {
+      return AllocateAligned(sizeof(T), alignof(T));
     } else {
-      return AllocateAlignedWithCleanup(size, align, destructor);
+      constexpr auto dtor = &internal::cleanup::arena_destruct_object<T>;
+      return AllocateAlignedWithCleanup(sizeof(T), alignof(T), dtor);
     }
   }
 
@@ -605,11 +602,8 @@
   template <typename T, typename... Args>
   PROTOBUF_NDEBUG_INLINE T* DoCreateMessage(Args&&... args) {
     return InternalHelper<T>::Construct(
-        AllocateInternal(sizeof(T), alignof(T),
-                         internal::ObjectDestructor<
-                             InternalHelper<T>::is_destructor_skippable::value,
-                             T>::destructor),
-        this, std::forward<Args>(args)...);
+        AllocateInternal<T, is_destructor_skippable<T>::value>(), this,
+        std::forward<Args>(args)...);
   }
 
   // CreateInArenaStorage is used to implement map field. Without it,
@@ -689,6 +683,9 @@
   friend struct internal::ArenaTestPeer;
 };
 
+template <>
+void* Arena::AllocateInternal<std::string, false>();
+
 }  // namespace protobuf
 }  // namespace google
 
diff --git a/src/google/protobuf/port.h b/src/google/protobuf/port.h
index 654379c..6b39283 100644
--- a/src/google/protobuf/port.h
+++ b/src/google/protobuf/port.h
@@ -51,6 +51,15 @@
 namespace internal {
 
 
+struct SizedPtr {
+  void* p;
+  size_t n;
+};
+
+inline SizedPtr SizedAllocate(size_t size) {
+  return {::operator new(size), size};
+}
+
 inline void SizedDelete(void* p, size_t size) {
 #if defined(__cpp_sized_deallocation)
   ::operator delete(p, size);
diff --git a/src/google/protobuf/serial_arena.h b/src/google/protobuf/serial_arena.h
index 171483b..04300f6 100644
--- a/src/google/protobuf/serial_arena.h
+++ b/src/google/protobuf/serial_arena.h
@@ -35,11 +35,13 @@
 
 #include <algorithm>
 #include <atomic>
+#include <string>
 #include <type_traits>
 #include <typeinfo>
 #include <utility>
 
 #include "google/protobuf/stubs/common.h"
+#include "absl/base/attributes.h"
 #include "google/protobuf/stubs/logging.h"
 #include "absl/numeric/bits.h"
 #include "google/protobuf/arena_align.h"
@@ -47,6 +49,7 @@
 #include "google/protobuf/arena_config.h"
 #include "google/protobuf/arenaz_sampler.h"
 #include "google/protobuf/port.h"
+#include "google/protobuf/typed_block.h"
 
 // Must be included last.
 #include "google/protobuf/port_def.inc"
@@ -109,11 +112,23 @@
     size_t size;
   };
 
-  void CleanupList();
+  size_t CleanupList();
   uint64_t SpaceAllocated() const {
     return space_allocated_.load(std::memory_order_relaxed);
   }
+  void SpaceAllocatedAdd(size_t n) {
+    space_allocated_.store(space_allocated_.load(std::memory_order_relaxed) + n,
+                           std::memory_order_relaxed);
+  }
   uint64_t SpaceUsed() const;
+  void SpaceUsedAdd(size_t n) {
+    space_used_.store(space_used_.load(std::memory_order_relaxed) + n,
+                      std::memory_order_relaxed);
+  }
+  void SpaceUsedSub(size_t n) {
+    space_used_.store(space_used_.load(std::memory_order_relaxed) - n,
+                      std::memory_order_relaxed);
+  }
 
   bool HasSpace(size_t n) const {
     return n <= static_cast<size_t>(limit_ - ptr());
@@ -284,6 +299,9 @@
     AddCleanupFromExisting(elem, destructor);
   }
 
+  void* AllocateString() ABSL_ATTRIBUTE_RETURNS_NONNULL;
+  void* TryAllocateString();
+
  private:
   void* AllocateFromExistingWithCleanupFallback(size_t n, size_t align,
                                                 void (*destructor)(void*)) {
@@ -307,7 +325,11 @@
     cleanup::CreateNode(tag, limit_, elem, destructor);
   }
 
+  void* AllocateStringFallback();
+
  private:
+  using StringBlock = TypedBlock<std::string>;
+
   friend class ThreadSafeArena;
 
   // Creates a new SerialArena inside mem using the remaining memory as for
@@ -330,7 +352,8 @@
   char* limit_ = nullptr;
 
   std::atomic<ArenaBlock*> head_{nullptr};  // Head of linked list of blocks.
-  std::atomic<size_t> space_used_{0};       // Necessary for metrics.
+  StringBlock* string_block_ = StringBlock::sentinel();  // String blocks
+  std::atomic<size_t> space_used_{0};  // Necessary for metrics.
   std::atomic<size_t> space_allocated_{0};
   ThreadSafeArena& parent_;
 
@@ -377,6 +400,32 @@
       ArenaAlignDefault::Ceil(sizeof(ArenaBlock));
 };
 
+inline PROTOBUF_NDEBUG_INLINE void* SerialArena::AllocateString() {
+  void* ptr = string_block_->TryAllocate();
+  if (PROTOBUF_PREDICT_TRUE(ptr != nullptr)) return ptr;
+  return AllocateStringFallback();
+}
+
+inline PROTOBUF_NDEBUG_INLINE void* SerialArena::TryAllocateString() {
+  void* p = string_block_->TryAllocate();
+  if (PROTOBUF_PREDICT_TRUE(p != nullptr)) return p;
+
+  if (PROTOBUF_PREDICT_TRUE(HasSpace(StringBlock::min_size()))) {
+    size_t available = static_cast<size_t>(limit_ - ptr());
+    size_t sz = std::min(available, string_block_->next_size());
+    SpaceUsedAdd(string_block_->space_used() - sz);
+    string_block_ = string_block_->EmplaceNext(AllocateFromExisting(sz), sz);
+    return string_block_->Allocate();
+  }
+  return nullptr;
+}
+
+template <>
+inline PROTOBUF_ALWAYS_INLINE void*
+SerialArena::MaybeAllocateWithCleanup<std::string>() {
+  return TryAllocateString();
+}
+
 }  // namespace internal
 }  // namespace protobuf
 }  // namespace google
diff --git a/src/google/protobuf/thread_safe_arena.h b/src/google/protobuf/thread_safe_arena.h
index b30d85a..5978ed2 100644
--- a/src/google/protobuf/thread_safe_arena.h
+++ b/src/google/protobuf/thread_safe_arena.h
@@ -124,6 +124,14 @@
   // Add object pointer and cleanup function pointer to the list.
   void AddCleanup(void* elem, void (*cleanup)(void*));
 
+  PROTOBUF_NDEBUG_INLINE void* AllocateString() {
+    SerialArena* arena;
+    if (PROTOBUF_PREDICT_TRUE(GetSerialArenaFast(&arena))) {
+      return arena->AllocateString();
+    }
+    return GetSerialArenaFallback(0)->AllocateStringFallback();
+  }
+
  private:
   friend class ArenaBenchmark;
   friend class TcParser;
@@ -179,7 +187,7 @@
   void Init();
 
   // Delete or Destruct all objects owned by the arena.
-  void CleanupList();
+  size_t CleanupList();
 
   inline void CacheSerialArena(SerialArena* serial) {
     thread_cache().last_serial_arena = serial;
diff --git a/src/google/protobuf/typed_block.h b/src/google/protobuf/typed_block.h
new file mode 100644
index 0000000..ded7b64
--- /dev/null
+++ b/src/google/protobuf/typed_block.h
@@ -0,0 +1,164 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2022 Google Inc.  All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// This file defines the internal templated class TypedBlock<T>
+
+#ifndef GOOGLE_PROTOBUF_TYPED_BLOCK_H__
+#define GOOGLE_PROTOBUF_TYPED_BLOCK_H__
+
+#include <algorithm>
+#include <limits>
+#include <memory>
+#include <string>
+
+#include "absl/base/attributes.h"
+#include "google/protobuf/stubs/logging.h"
+#include "google/protobuf/port.h"
+
+
+// Must be included last.
+#include "google/protobuf/port_def.inc"
+
+namespace google {
+namespace protobuf {
+namespace internal {
+
+template <typename T>
+class TypedBlock {
+ public:
+  static constexpr size_t min_size();
+  static constexpr size_t max_size();
+
+  TypedBlock* EmplaceNext(void* mem, size_t size) {
+    GOOGLE_ABSL_DCHECK_GE(size, sizeof(TypedBlock));
+    GOOGLE_ABSL_DCHECK_LE(size, max_size());
+    size_t next_sz = std::min(next_size() + size, max_size());
+    return new (mem) TypedBlock(0, size, next_sz, false, this);
+  }
+
+  TypedBlock* CreateNext(size_t size) {
+    GOOGLE_ABSL_DCHECK_GE(size, sizeof(TypedBlock));
+    GOOGLE_ABSL_DCHECK_LE(size, max_size());
+    SizedPtr res = internal::SizedAllocate(size);
+    size_t next_sz = std::min(next_size() + res.n, max_size());
+    return new (res.p) TypedBlock(res.n, res.n, next_sz, true, this);
+  }
+
+  static void Delete(TypedBlock* block) {
+    GOOGLE_ABSL_DCHECK(block != nullptr);
+    GOOGLE_ABSL_DCHECK(block->heap_allocated_);
+    internal::SizedDelete(block, block->allocated_size_);
+  }
+
+  void DestroyAll() {
+    for (T& t : *this) t.~T();
+  }
+
+  static TypedBlock* sentinel() ABSL_ATTRIBUTE_RETURNS_NONNULL;
+
+  TypedBlock* next() const { return next_; }
+  size_t next_size() const { return next_size_; }
+
+  size_t space_used() const { return count_ * sizeof(T); }
+  size_t space_allocated() const { return allocated_size_; }
+
+  bool is_heap_allocated() const { return heap_allocated_; }
+
+  inline std::string* TryAllocate() {
+    if (ABSL_PREDICT_TRUE(count_ < capacity_)) {
+      return reinterpret_cast<T*>(this + 1) + count_++;
+    }
+    return nullptr;
+  }
+
+  inline std::string* Allocate() ABSL_ATTRIBUTE_RETURNS_NONNULL {
+    GOOGLE_ABSL_DCHECK_LT(count_, capacity_);
+    return reinterpret_cast<T*>(this + 1) + count_++;
+  }
+
+ private:
+  static constexpr size_t SizeToCount(size_t size) {
+    return (size - sizeof(TypedBlock)) / sizeof(T);
+  }
+
+  constexpr TypedBlock() = default;
+  constexpr TypedBlock(size_t allocated_size, size_t size, size_t next_size,
+                       bool heap_allocated, TypedBlock* next)
+      : allocated_size_(static_cast<uint32_t>(allocated_size)),
+        next_size_(next_size),
+        capacity_(static_cast<uint16_t>(SizeToCount(size))),
+        heap_allocated_(heap_allocated),
+        next_(next) {}
+
+  T* begin() { return reinterpret_cast<T*>(this + 1); }
+  T* end() { return reinterpret_cast<T*>(this + 1) + count_; }
+
+  struct Sentinel;
+
+  struct alignas(T) {
+    const uint32_t allocated_size_ = 0;
+    const uint32_t next_size_ = static_cast<uint32_t>(min_size());
+    const uint16_t capacity_ = 0;
+    uint16_t count_ = 0;
+    const bool heap_allocated_ = false;
+    TypedBlock* const next_ = nullptr;
+  };
+};
+
+template <typename T>
+inline constexpr size_t TypedBlock<T>::min_size() {
+  return std::max(size_t{512}, sizeof(TypedBlock<T>) + sizeof(T) * 8);
+}
+
+template <typename T>
+constexpr size_t TypedBlock<T>::max_size() {
+  return std::max(min_size(), size_t{4 << 10});
+}
+
+template <typename T>
+struct TypedBlock<T>::Sentinel {
+  static constexpr TypedBlock<T> kSentinel = {};
+};
+
+template <typename T>
+constexpr TypedBlock<T> TypedBlock<T>::Sentinel::kSentinel;
+
+template <typename T>
+inline TypedBlock<T>* TypedBlock<T>::sentinel() {
+  return const_cast<TypedBlock<T>*>(&Sentinel::kSentinel);
+}
+
+}  // namespace internal
+}  // namespace protobuf
+}  // namespace google
+
+#include "google/protobuf/port_undef.inc"
+
+#endif  // GOOGLE_PROTOBUF_TYPED_BLOCK_H__