Unify alignment logic

This change implements the existing 'Align*' logic in terms of `arena_align.h` enabling further cleanups and a long needed split of arena_impl.h into serial_arena.h and thread_safe_arena.h. The latter reduces the cost of adding new features or optimizations to arena.

PiperOrigin-RevId: 494259282
diff --git a/src/google/protobuf/BUILD.bazel b/src/google/protobuf/BUILD.bazel
index 48a8115..4902ce2 100644
--- a/src/google/protobuf/BUILD.bazel
+++ b/src/google/protobuf/BUILD.bazel
@@ -253,6 +253,7 @@
         "//src/google/protobuf:__subpackages__",
     ],
     deps = [
+        ":arena_align",
         ":arena_allocation_policy",
         ":arena_cleanup",
         ":arena_config",
diff --git a/src/google/protobuf/arena.h b/src/google/protobuf/arena.h
index ee41320..42fca7a 100644
--- a/src/google/protobuf/arena.h
+++ b/src/google/protobuf/arena.h
@@ -49,6 +49,7 @@
 #endif
 
 #include <type_traits>
+#include "google/protobuf/arena_align.h"
 #include "google/protobuf/arena_config.h"
 #include "google/protobuf/arena_impl.h"
 #include "google/protobuf/port.h"
@@ -291,15 +292,16 @@
 
   // Allocates memory with the specific size and alignment.
   void* AllocateAligned(size_t size, size_t align = 8) {
-    if (align <= 8) {
-      return Allocate(internal::AlignUpTo8(size));
+    if (align <= internal::ArenaAlignDefault::align) {
+      return Allocate(internal::ArenaAlignDefault::Ceil(size));
     } else {
       // We are wasting space by over allocating align - 8 bytes. Compared
       // to a dedicated function that takes current alignment in consideration.
       // Such a scheme would only waste (align - 8)/2 bytes on average, but
       // requires a dedicated function in the outline arena allocation
       // functions. Possibly re-evaluate tradeoffs later.
-      return internal::AlignTo(Allocate(size + align - 8), align);
+      auto align_as = internal::ArenaAlignAs(align);
+      return align_as.Ceil(Allocate(align_as.Padded(size)));
     }
   }
 
@@ -678,15 +680,16 @@
   }
 
   void* AllocateAlignedForArray(size_t n, size_t align) {
-    if (align <= 8) {
-      return AllocateForArray(internal::AlignUpTo8(n));
+    if (align <= internal::ArenaAlignDefault::align) {
+      return AllocateForArray(internal::ArenaAlignDefault::Ceil(n));
     } else {
       // We are wasting space by over allocating align - 8 bytes. Compared
       // to a dedicated function that takes current alignment in consideration.
       // Such a scheme would only waste (align - 8)/2 bytes on average, but
       // requires a dedicated function in the outline arena allocation
       // functions. Possibly re-evaluate tradeoffs later.
-      return internal::AlignTo(AllocateForArray(n + align - 8), align);
+      auto align_as = internal::ArenaAlignAs(align);
+      return align_as.Ceil(AllocateForArray(align_as.Padded(n)));
     }
   }
 
diff --git a/src/google/protobuf/arena_align.h b/src/google/protobuf/arena_align.h
index 0603972..90cadfb 100644
--- a/src/google/protobuf/arena_align.h
+++ b/src/google/protobuf/arena_align.h
@@ -35,14 +35,21 @@
 //
 //   Ceil(size_t n)      - rounds `n` up to the nearest `align` boundary.
 //   Floor(size_t n)     - rounds `n` down to the nearest `align` boundary.
-//   Ceil(T* P)          - rounds `p` up to the nearest `align` boundary.
+//   Padded(size_t n)    - returns the unaligned size to align 'n' bytes. (1)
+
+//   Ceil(T* P)          - rounds `p` up to the nearest `align` boundary. (2)
 //   IsAligned(size_t n) - returns true if `n` is aligned to `align`
 //   IsAligned(T* p)     - returns true if `p` is aligned to `align`
 //   CheckAligned(T* p)  - returns `p`. Checks alignment of `p` in debug.
 //
-// Additionally there is an optimized `CeilDefaultAligned(T*)` method which is
-// equivalent to `Ceil(ArenaAlignDefault().CheckAlign(p))` but more efficiently
-// implemented as a 'check only' for ArenaAlignDefault.
+// 1) `Padded(n)` returns the minimum size needed to align an object of size 'n'
+//    into a memory area that is default aligned. For example, allocating 'n'
+//    bytes aligned at 32 bytes requires a size of 'n + 32 - 8' to align at 32
+//    bytes for any 8 byte boundary.
+//
+// 2) There is an optimized `CeilDefaultAligned(T*)` method which is equivalent
+//    to `Ceil(ArenaAlignDefault::CheckAlign(p))` but more efficiently
+//    implemented as a 'check only' for ArenaAlignDefault.
 //
 // These classes allow for generic arena logic using 'alignment policies'.
 //
@@ -50,10 +57,14 @@
 //
 //  template <Align>
 //  void* NaiveAlloc(size_t n, Align align) {
-//    align.CheckAligned(n);
-//    uint8_t* ptr = align.CeilDefaultAligned(ptr_);
-//    ptr_ += n;
-//    return ptr;
+//    ABSL_ASSERT(align.IsAligned(n));
+//    const size_t required = align.Padded(n);
+//    if (required <= static_cast<size_t>(ptr_ - limit_)) {
+//      uint8_t* ptr = align.CeilDefaultAligned(ptr_);
+//      ptr_ = ptr + n;
+//      return ptr;
+//    }
+//    return nullptr;
 //  }
 //
 //  void CallSites() {
@@ -83,28 +94,38 @@
   static constexpr bool IsAligned(size_t n) { return (n & (align - 1)) == 0; }
 
   template <typename T>
-  static bool IsAligned(T* ptr) {
+  static inline PROTOBUF_ALWAYS_INLINE bool IsAligned(T* ptr) {
     return (reinterpret_cast<uintptr_t>(ptr) & (align - 1)) == 0;
   }
 
-  static constexpr size_t Ceil(size_t n) { return (n + align - 1) & -align; }
-  static constexpr size_t Floor(size_t n) { return (n & ~(align - 1)); }
+  static inline PROTOBUF_ALWAYS_INLINE constexpr size_t Ceil(size_t n) {
+    return (n + align - 1) & -align;
+  }
+  static inline PROTOBUF_ALWAYS_INLINE constexpr size_t Floor(size_t n) {
+    return (n & ~(align - 1));
+  }
+
+  static inline PROTOBUF_ALWAYS_INLINE constexpr size_t Padded(size_t n) {
+    ABSL_ASSERT(IsAligned(n));
+    return n;
+  }
 
   template <typename T>
-  T* Ceil(T* ptr) const {
+  static inline PROTOBUF_ALWAYS_INLINE T* Ceil(T* ptr) {
     uintptr_t intptr = reinterpret_cast<uintptr_t>(ptr);
     return reinterpret_cast<T*>((intptr + align - 1) & -align);
   }
 
   template <typename T>
-  T* CeilDefaultAligned(T* ptr) const {
-    return ArenaAlignDefault().CheckAligned(ptr);
+  static inline PROTOBUF_ALWAYS_INLINE T* CeilDefaultAligned(T* ptr) {
+    ABSL_ASSERT(IsAligned(ptr));
+    return ptr;
   }
 
   // Address sanitizer enabled alignment check
   template <typename T>
-  static T* CheckAligned(T* ptr) {
-    GOOGLE_DCHECK(IsAligned(ptr)) << static_cast<void*>(ptr);
+  static inline PROTOBUF_ALWAYS_INLINE T* CheckAligned(T* ptr) {
+    ABSL_ASSERT(IsAligned(ptr));
     return ptr;
   }
 };
@@ -124,6 +145,12 @@
   constexpr size_t Ceil(size_t n) const { return (n + align - 1) & -align; }
   constexpr size_t Floor(size_t n) const { return (n & ~(align - 1)); }
 
+  constexpr size_t Padded(size_t n) const {
+    ABSL_ASSERT(IsAligned(n));
+    ABSL_ASSERT(ArenaAlignDefault::IsAligned(align));
+    return n + align - ArenaAlignDefault::align;
+  }
+
   template <typename T>
   T* Ceil(T* ptr) const {
     uintptr_t intptr = reinterpret_cast<uintptr_t>(ptr);
@@ -132,13 +159,14 @@
 
   template <typename T>
   T* CeilDefaultAligned(T* ptr) const {
-    return Ceil(ArenaAlignDefault().CheckAligned(ptr));
+    ABSL_ASSERT(ArenaAlignDefault::IsAligned(ptr));
+    return Ceil(ptr);
   }
 
   // Address sanitizer enabled alignment check
   template <typename T>
   T* CheckAligned(T* ptr) const {
-    GOOGLE_DCHECK(IsAligned(ptr)) << static_cast<void*>(ptr);
+    ABSL_ASSERT(IsAligned(ptr));
     return ptr;
   }
 };
@@ -150,6 +178,36 @@
   return ArenaAlign{align};
 }
 
+template <bool, size_t align>
+struct AlignFactory {
+  static_assert(align > ArenaAlignDefault::align, "Not over-aligned");
+  static_assert((align & (align - 1)) == 0, "Not power of 2");
+  static constexpr ArenaAlign Create() { return ArenaAlign{align}; }
+};
+
+template <size_t align>
+struct AlignFactory<true, align> {
+  static_assert(align <= ArenaAlignDefault::align, "Over-aligned");
+  static_assert((align & (align - 1)) == 0, "Not power of 2");
+  static constexpr ArenaAlignDefault Create() { return ArenaAlignDefault{}; }
+};
+
+// Returns an `ArenaAlignDefault` instance for `align` less than or equal to the
+// default alignment, and `AlignAs(align)` for over-aligned values of `align`.
+// The purpose is to take advantage of invoking functions accepting a template
+// overloaded 'Align align` argument reducing the alignment operations on
+// `ArenaAlignDefault` implementations to no-ops.
+template <size_t align>
+inline constexpr auto ArenaAlignAs() {
+  return AlignFactory<align <= ArenaAlignDefault::align, align>::Create();
+}
+
+// Returns ArenaAlignAs<alignof(T)>
+template <typename T>
+inline constexpr auto ArenaAlignOf() {
+  return ArenaAlignAs<alignof(T)>();
+}
+
 }  // namespace internal
 }  // namespace protobuf
 }  // namespace google
diff --git a/src/google/protobuf/arena_align_test.cc b/src/google/protobuf/arena_align_test.cc
index e17cb33..466d1b6 100644
--- a/src/google/protobuf/arena_align_test.cc
+++ b/src/google/protobuf/arena_align_test.cc
@@ -67,6 +67,16 @@
   EXPECT_THAT(align_default.Ceil(16), Eq(16));
 }
 
+TEST(ArenaAlignDefault, Padded) {
+  auto align_default = ArenaAlignDefault();
+  EXPECT_THAT(align_default.Padded(0), Eq(0));
+  EXPECT_THAT(align_default.Padded(8), Eq(8));
+  EXPECT_THAT(align_default.Padded(64), Eq(64));
+#ifdef PROTOBUF_HAS_DEATH_TEST
+  EXPECT_DEBUG_DEATH(align_default.Padded(1), ".*");
+#endif  // PROTOBUF_HAS_DEATH_TEST
+}
+
 TEST(ArenaAlignDefault, CeilPtr) {
   alignas(8) char p[17] = {0};
   auto align_default = ArenaAlignDefault();
@@ -147,6 +157,16 @@
   EXPECT_THAT(align_64.Ceil(128), Eq(128));
 }
 
+TEST(ArenaAlign, Padded) {
+  auto align_64 = ArenaAlignAs(64);
+  EXPECT_THAT(align_64.Padded(64), Eq(64 + 64 - ArenaAlignDefault::align));
+  EXPECT_THAT(align_64.Padded(128), Eq(128 + 64 - ArenaAlignDefault::align));
+#ifdef PROTOBUF_HAS_DEATH_TEST
+  EXPECT_DEBUG_DEATH(align_64.Padded(16), ".*");
+  EXPECT_DEBUG_DEATH(ArenaAlignAs(2).Padded(8), ".*");
+#endif  // PROTOBUF_HAS_DEATH_TEST
+}
+
 TEST(ArenaAlign, CeilPtr) {
   alignas(64) char p[129] = {0};
   auto align_64 = ArenaAlignAs(64);
diff --git a/src/google/protobuf/arena_impl.h b/src/google/protobuf/arena_impl.h
index a9ca93b..4917f00 100644
--- a/src/google/protobuf/arena_impl.h
+++ b/src/google/protobuf/arena_impl.h
@@ -42,9 +42,10 @@
 
 #include "google/protobuf/stubs/common.h"
 #include "google/protobuf/stubs/logging.h"
-#include "absl/base/attributes.h"
 #include "absl/numeric/bits.h"
+#include "absl/strings/cord.h"
 #include "absl/synchronization/mutex.h"
+#include "google/protobuf/arena_align.h"
 #include "google/protobuf/arena_allocation_policy.h"
 #include "google/protobuf/arena_cleanup.h"
 #include "google/protobuf/arena_config.h"
@@ -59,29 +60,6 @@
 namespace protobuf {
 namespace internal {
 
-inline PROTOBUF_ALWAYS_INLINE constexpr size_t AlignUpTo8(size_t n) {
-  // Align n to next multiple of 8 (from Hacker's Delight, Chapter 3.)
-  return (n + 7) & static_cast<size_t>(-8);
-}
-
-inline PROTOBUF_ALWAYS_INLINE constexpr size_t AlignUpTo(size_t n, size_t a) {
-  // We are wasting space by over allocating align - 8 bytes. Compared to a
-  // dedicated function that takes current alignment in consideration.  Such a
-  // scheme would only waste (align - 8)/2 bytes on average, but requires a
-  // dedicated function in the outline arena allocation functions. Possibly
-  // re-evaluate tradeoffs later.
-  return a <= 8 ? AlignUpTo8(n) : n + a - 8;
-}
-
-inline PROTOBUF_ALWAYS_INLINE void* AlignTo(void* p, size_t a) {
-  if (a <= 8) {
-    return p;
-  } else {
-    auto u = reinterpret_cast<uintptr_t>(p);
-    return reinterpret_cast<void*>((u + a - 1) & (~a + 1));
-  }
-}
-
 // Arena blocks are variable length malloc-ed objects.  The following structure
 // describes the common header for all blocks.
 struct ArenaBlock {
@@ -172,7 +150,7 @@
   // from it.
   template <AllocationClient alloc_client = AllocationClient::kDefault>
   void* AllocateAligned(size_t n) {
-    GOOGLE_DCHECK_EQ(internal::AlignUpTo8(n), n);  // Must be already aligned.
+    GOOGLE_DCHECK(internal::ArenaAlignDefault::IsAligned(n));
     GOOGLE_DCHECK_GE(limit_, ptr());
 
     if (alloc_client == AllocationClient::kArray) {
@@ -188,6 +166,22 @@
   }
 
  private:
+  static inline PROTOBUF_ALWAYS_INLINE constexpr size_t AlignUpTo(size_t n,
+                                                                  size_t a) {
+    // We are wasting space by over allocating align - 8 bytes. Compared to a
+    // dedicated function that takes current alignment in consideration.  Such a
+    // scheme would only waste (align - 8)/2 bytes on average, but requires a
+    // dedicated function in the outline arena allocation functions. Possibly
+    // re-evaluate tradeoffs later.
+    return a <= 8 ? ArenaAlignDefault::Ceil(n) : ArenaAlignAs(a).Padded(n);
+  }
+
+  static inline PROTOBUF_ALWAYS_INLINE void* AlignTo(void* p, size_t a) {
+    return (a <= ArenaAlignDefault::align)
+               ? ArenaAlignDefault::CeilDefaultAligned(p)
+               : ArenaAlignAs(a).CeilDefaultAligned(p);
+  }
+
   void* AllocateFromExisting(size_t n) {
     PROTOBUF_UNPOISON_MEMORY_REGION(ptr(), n);
     void* ret = ptr();
@@ -248,7 +242,7 @@
  public:
   // Allocate space if the current region provides enough space.
   bool MaybeAllocateAligned(size_t n, void** out) {
-    GOOGLE_DCHECK_EQ(internal::AlignUpTo8(n), n);  // Must be already aligned.
+    GOOGLE_DCHECK(internal::ArenaAlignDefault::IsAligned(n));
     GOOGLE_DCHECK_GE(limit_, ptr());
     if (PROTOBUF_PREDICT_FALSE(!HasSpace(n))) return false;
     *out = AllocateFromExisting(n);
@@ -264,7 +258,7 @@
     static_assert(!std::is_trivially_destructible<T>::value,
                   "This function is only for non-trivial types.");
 
-    constexpr int aligned_size = AlignUpTo8(sizeof(T));
+    constexpr int aligned_size = ArenaAlignDefault::Ceil(sizeof(T));
     constexpr auto destructor = cleanup::arena_destruct_object<T>;
     size_t required = aligned_size + cleanup::Size(destructor);
     if (PROTOBUF_PREDICT_FALSE(!HasSpace(required))) {
@@ -300,7 +294,7 @@
                                                 void (*destructor)(void*)) {
     n = AlignUpTo(n, align);
     PROTOBUF_UNPOISON_MEMORY_REGION(ptr(), n);
-    void* ret = internal::AlignTo(ptr(), align);
+    void* ret = ArenaAlignAs(align).CeilDefaultAligned(ptr());
     set_ptr(ptr() + n);
     GOOGLE_DCHECK_GE(limit_, ptr());
     AddCleanupFromExisting(ret, destructor);
@@ -384,7 +378,8 @@
   inline void Init(ArenaBlock* b, size_t offset);
 
  public:
-  static constexpr size_t kBlockHeaderSize = AlignUpTo8(sizeof(ArenaBlock));
+  static constexpr size_t kBlockHeaderSize =
+      ArenaAlignDefault::Ceil(sizeof(ArenaBlock));
 };
 
 // Tag type used to invoke the constructor of message-owned arena.
@@ -623,7 +618,7 @@
   static constexpr size_t kSerialArenaSize =
       (sizeof(SerialArena) + 7) & static_cast<size_t>(-8);
   static constexpr size_t kAllocPolicySize =
-      AlignUpTo8(sizeof(AllocationPolicy));
+      ArenaAlignDefault::Ceil(sizeof(AllocationPolicy));
   static constexpr size_t kMaxCleanupNodeSize = 16;
   static_assert(kBlockHeaderSize % 8 == 0,
                 "kBlockHeaderSize must be a multiple of 8.");