Add StringBlock experiment ALT 1 PiperOrigin-RevId: 495294072
diff --git a/src/file_lists.cmake b/src/file_lists.cmake index 9faaf62..bb9728b 100644 --- a/src/file_lists.cmake +++ b/src/file_lists.cmake
@@ -179,6 +179,7 @@ ${protobuf_SOURCE_DIR}/src/google/protobuf/stubs/status_macros.h ${protobuf_SOURCE_DIR}/src/google/protobuf/text_format.h ${protobuf_SOURCE_DIR}/src/google/protobuf/thread_safe_arena.h + ${protobuf_SOURCE_DIR}/src/google/protobuf/typed_block.h ${protobuf_SOURCE_DIR}/src/google/protobuf/unknown_field_set.h ${protobuf_SOURCE_DIR}/src/google/protobuf/util/delimited_message_util.h ${protobuf_SOURCE_DIR}/src/google/protobuf/util/field_comparator.h @@ -267,6 +268,7 @@ ${protobuf_SOURCE_DIR}/src/google/protobuf/stubs/port.h ${protobuf_SOURCE_DIR}/src/google/protobuf/stubs/status_macros.h ${protobuf_SOURCE_DIR}/src/google/protobuf/thread_safe_arena.h + ${protobuf_SOURCE_DIR}/src/google/protobuf/typed_block.h ${protobuf_SOURCE_DIR}/src/google/protobuf/wire_format_lite.h )
diff --git a/src/google/protobuf/BUILD.bazel b/src/google/protobuf/BUILD.bazel index 9581b91..741cda0 100644 --- a/src/google/protobuf/BUILD.bazel +++ b/src/google/protobuf/BUILD.bazel
@@ -237,6 +237,16 @@ ) cc_library( + name = "typed_block", + hdrs = ["typed_block.h"], + include_prefix = "google/protobuf", + visibility = [ + "//:__subpackages__", + "//src/google/protobuf:__subpackages__", + ], +) + +cc_library( name = "arena", srcs = [ "arena.cc", @@ -258,6 +268,7 @@ ":arena_allocation_policy", ":arena_cleanup", ":arena_config", + ":typed_block", "//src/google/protobuf/stubs:lite", "@com_google_absl//absl/synchronization", ], @@ -330,6 +341,7 @@ ":arena", ":arena_align", ":arena_config", + ":typed_block", "//src/google/protobuf/io", "//src/google/protobuf/stubs:lite", "@com_google_absl//absl/cleanup",
diff --git a/src/google/protobuf/arena.cc b/src/google/protobuf/arena.cc index 25ddcc3..933190a 100644 --- a/src/google/protobuf/arena.cc +++ b/src/google/protobuf/arena.cc
@@ -35,6 +35,7 @@ #include <cstddef> #include <cstdint> #include <limits> +#include <string> #include <typeinfo> #include "absl/base/attributes.h" @@ -166,6 +167,7 @@ space_allocated_.store(b->size, std::memory_order_relaxed); cached_block_length_ = 0; cached_blocks_ = nullptr; + string_block_ = StringBlock::sentinel(); } SerialArena* SerialArena::New(Memory mem, ThreadSafeArena& parent) { @@ -190,6 +192,20 @@ return mem; } +void* SerialArena::AllocateStringFallback() { + const size_t sz = string_block_->next_size(); + if (PROTOBUF_PREDICT_TRUE(HasSpace(sz))) { + void* mem = AllocateFromExisting(sz); + SpaceUsedAdd(string_block_->space_used() - sz); + string_block_ = string_block_->EmplaceNext(mem, sz); + } else { + SpaceUsedAdd(string_block_->space_used()); + string_block_ = string_block_->CreateNext(sz); + SpaceAllocatedAdd(string_block_->space_allocated()); + } + return string_block_->Allocate(); +} + PROTOBUF_NOINLINE void* SerialArena::AllocateAlignedFallback(size_t n) { AllocateNewBlock(n); @@ -222,8 +238,7 @@ // Record how much used in this block. used = static_cast<size_t>(ptr() - old_head->Pointer(kBlockHeaderSize)); wasted = old_head->size - used; - space_used_.store(space_used_.load(std::memory_order_relaxed) + used, - std::memory_order_relaxed); + SpaceUsedAdd(used); } // TODO(sbenza): Evaluate if pushing unused space into the cached blocks is a @@ -232,12 +247,7 @@ // the microbenchmark. auto mem = AllocateMemory(parent_.AllocPolicy(), old_head->size, n); - // We don't want to emit an expensive RMW instruction that requires - // exclusive access to a cacheline. Hence we write it in terms of a - // regular add. - space_allocated_.store( - space_allocated_.load(std::memory_order_relaxed) + mem.size, - std::memory_order_relaxed); + SpaceAllocatedAdd(mem.size); ThreadSafeArenaStats::RecordAllocateStats(parent_.arena_stats_.MutableStats(), /*used=*/used, /*allocated=*/mem.size, wasted); @@ -260,20 +270,32 @@ // usage of the *current* block. // TODO(mkruskal) Consider eliminating this race in exchange for a possible // performance hit on ARM (see cl/455186837). + uint64_t current_space_used = string_block_->space_used(); const ArenaBlock* h = head_.load(std::memory_order_acquire); - if (h->IsSentry()) return 0; + if (h->IsSentry()) return current_space_used; const uint64_t current_block_size = h->size; - uint64_t current_space_used = std::min( + current_space_used += std::min( static_cast<uint64_t>( ptr() - const_cast<ArenaBlock*>(h)->Pointer(kBlockHeaderSize)), current_block_size); return current_space_used + space_used_.load(std::memory_order_relaxed); } -void SerialArena::CleanupList() { +size_t SerialArena::CleanupList() { + size_t deallocated = 0; + StringBlock* block = string_block_; + while (StringBlock* next = block->next()) { + block->DestroyAll(); + if (block->is_heap_allocated()) { + deallocated += block->space_allocated(); + StringBlock::Delete(block); + } + block = next; + } + ArenaBlock* b = head(); - if (b->IsSentry()) return; + if (b->IsSentry()) return deallocated; b->cleanup_nodes = limit_; do { @@ -285,6 +307,7 @@ } b = b->next; } while (b); + return deallocated; } // Stores arrays of void* and SerialArena* instead of linked list of @@ -664,11 +687,10 @@ uint64_t ThreadSafeArena::Reset() { // Have to do this in a first pass, because some of the destructors might // refer to memory in other blocks. - CleanupList(); + size_t space_allocated = CleanupList(); // Discard all blocks except the first one. Whether it is user-provided or // allocated, always reuse the first block for the first arena. - size_t space_allocated = 0; auto mem = Free(&space_allocated); space_allocated += mem.size; @@ -784,8 +806,9 @@ template void* ThreadSafeArena::AllocateAlignedFallback<AllocationClient::kArray>(size_t); -void ThreadSafeArena::CleanupList() { - WalkSerialArenaChunk([](SerialArenaChunk* chunk) { +size_t ThreadSafeArena::CleanupList() { + size_t deallocated = 0; + WalkSerialArenaChunk([&](SerialArenaChunk* chunk) { absl::Span<std::atomic<SerialArena*>> span = chunk->arenas(); // Walks arenas backward to handle the first serial arena the last. // Destroying in reverse-order to the construction is often assumed by users @@ -793,11 +816,13 @@ for (auto it = span.rbegin(); it != span.rend(); ++it) { SerialArena* serial = it->load(std::memory_order_relaxed); GOOGLE_ABSL_DCHECK_NE(serial, nullptr); - serial->CleanupList(); + deallocated += serial->CleanupList(); } }); // First arena must be cleaned up last. (b/247560530) - first_arena_.CleanupList(); + deallocated += first_arena_.CleanupList(); + + return deallocated; } PROTOBUF_NOINLINE @@ -848,6 +873,12 @@ return impl_.AllocateAlignedWithCleanup(n, align, destructor); } +template <> +PROTOBUF_EXPORT_TEMPLATE_DEFINE void* +Arena::AllocateInternal<std::string, false>() { + return impl_.AllocateString(); +} + } // namespace protobuf } // namespace google
diff --git a/src/google/protobuf/arena.h b/src/google/protobuf/arena.h index 06acc3d..f2268eb 100644 --- a/src/google/protobuf/arena.h +++ b/src/google/protobuf/arena.h
@@ -275,11 +275,7 @@ if (PROTOBUF_PREDICT_FALSE(arena == nullptr)) { return new T(std::forward<Args>(args)...); } - auto destructor = - internal::ObjectDestructor<std::is_trivially_destructible<T>::value, - T>::destructor; - return new (arena->AllocateInternal(sizeof(T), alignof(T), destructor)) - T(std::forward<Args>(args)...); + return new (arena->AllocateInternal<T>()) T(std::forward<Args>(args)...); } // API to delete any objects not on an arena. This can be used to safely @@ -566,13 +562,14 @@ } } - PROTOBUF_NDEBUG_INLINE void* AllocateInternal(size_t size, size_t align, - void (*destructor)(void*)) { - // Monitor allocation if needed. - if (destructor == nullptr) { - return AllocateAligned(size, align); + template <typename T, bool trivial = std::is_trivially_destructible<T>::value> + PROTOBUF_NDEBUG_INLINE void* AllocateInternal() { + // is_destructor_skippable<T>::value + if (trivial) { + return AllocateAligned(sizeof(T), alignof(T)); } else { - return AllocateAlignedWithCleanup(size, align, destructor); + constexpr auto dtor = &internal::cleanup::arena_destruct_object<T>; + return AllocateAlignedWithCleanup(sizeof(T), alignof(T), dtor); } } @@ -605,11 +602,8 @@ template <typename T, typename... Args> PROTOBUF_NDEBUG_INLINE T* DoCreateMessage(Args&&... args) { return InternalHelper<T>::Construct( - AllocateInternal(sizeof(T), alignof(T), - internal::ObjectDestructor< - InternalHelper<T>::is_destructor_skippable::value, - T>::destructor), - this, std::forward<Args>(args)...); + AllocateInternal<T, is_destructor_skippable<T>::value>(), this, + std::forward<Args>(args)...); } // CreateInArenaStorage is used to implement map field. Without it, @@ -689,6 +683,9 @@ friend struct internal::ArenaTestPeer; }; +template <> +void* Arena::AllocateInternal<std::string, false>(); + } // namespace protobuf } // namespace google
diff --git a/src/google/protobuf/port.h b/src/google/protobuf/port.h index 654379c..6b39283 100644 --- a/src/google/protobuf/port.h +++ b/src/google/protobuf/port.h
@@ -51,6 +51,15 @@ namespace internal { +struct SizedPtr { + void* p; + size_t n; +}; + +inline SizedPtr SizedAllocate(size_t size) { + return {::operator new(size), size}; +} + inline void SizedDelete(void* p, size_t size) { #if defined(__cpp_sized_deallocation) ::operator delete(p, size);
diff --git a/src/google/protobuf/serial_arena.h b/src/google/protobuf/serial_arena.h index 171483b..04300f6 100644 --- a/src/google/protobuf/serial_arena.h +++ b/src/google/protobuf/serial_arena.h
@@ -35,11 +35,13 @@ #include <algorithm> #include <atomic> +#include <string> #include <type_traits> #include <typeinfo> #include <utility> #include "google/protobuf/stubs/common.h" +#include "absl/base/attributes.h" #include "google/protobuf/stubs/logging.h" #include "absl/numeric/bits.h" #include "google/protobuf/arena_align.h" @@ -47,6 +49,7 @@ #include "google/protobuf/arena_config.h" #include "google/protobuf/arenaz_sampler.h" #include "google/protobuf/port.h" +#include "google/protobuf/typed_block.h" // Must be included last. #include "google/protobuf/port_def.inc" @@ -109,11 +112,23 @@ size_t size; }; - void CleanupList(); + size_t CleanupList(); uint64_t SpaceAllocated() const { return space_allocated_.load(std::memory_order_relaxed); } + void SpaceAllocatedAdd(size_t n) { + space_allocated_.store(space_allocated_.load(std::memory_order_relaxed) + n, + std::memory_order_relaxed); + } uint64_t SpaceUsed() const; + void SpaceUsedAdd(size_t n) { + space_used_.store(space_used_.load(std::memory_order_relaxed) + n, + std::memory_order_relaxed); + } + void SpaceUsedSub(size_t n) { + space_used_.store(space_used_.load(std::memory_order_relaxed) - n, + std::memory_order_relaxed); + } bool HasSpace(size_t n) const { return n <= static_cast<size_t>(limit_ - ptr()); @@ -284,6 +299,9 @@ AddCleanupFromExisting(elem, destructor); } + void* AllocateString() ABSL_ATTRIBUTE_RETURNS_NONNULL; + void* TryAllocateString(); + private: void* AllocateFromExistingWithCleanupFallback(size_t n, size_t align, void (*destructor)(void*)) { @@ -307,7 +325,11 @@ cleanup::CreateNode(tag, limit_, elem, destructor); } + void* AllocateStringFallback(); + private: + using StringBlock = TypedBlock<std::string>; + friend class ThreadSafeArena; // Creates a new SerialArena inside mem using the remaining memory as for @@ -330,7 +352,8 @@ char* limit_ = nullptr; std::atomic<ArenaBlock*> head_{nullptr}; // Head of linked list of blocks. - std::atomic<size_t> space_used_{0}; // Necessary for metrics. + StringBlock* string_block_ = StringBlock::sentinel(); // String blocks + std::atomic<size_t> space_used_{0}; // Necessary for metrics. std::atomic<size_t> space_allocated_{0}; ThreadSafeArena& parent_; @@ -377,6 +400,32 @@ ArenaAlignDefault::Ceil(sizeof(ArenaBlock)); }; +inline PROTOBUF_NDEBUG_INLINE void* SerialArena::AllocateString() { + void* ptr = string_block_->TryAllocate(); + if (PROTOBUF_PREDICT_TRUE(ptr != nullptr)) return ptr; + return AllocateStringFallback(); +} + +inline PROTOBUF_NDEBUG_INLINE void* SerialArena::TryAllocateString() { + void* p = string_block_->TryAllocate(); + if (PROTOBUF_PREDICT_TRUE(p != nullptr)) return p; + + if (PROTOBUF_PREDICT_TRUE(HasSpace(StringBlock::min_size()))) { + size_t available = static_cast<size_t>(limit_ - ptr()); + size_t sz = std::min(available, string_block_->next_size()); + SpaceUsedAdd(string_block_->space_used() - sz); + string_block_ = string_block_->EmplaceNext(AllocateFromExisting(sz), sz); + return string_block_->Allocate(); + } + return nullptr; +} + +template <> +inline PROTOBUF_ALWAYS_INLINE void* +SerialArena::MaybeAllocateWithCleanup<std::string>() { + return TryAllocateString(); +} + } // namespace internal } // namespace protobuf } // namespace google
diff --git a/src/google/protobuf/thread_safe_arena.h b/src/google/protobuf/thread_safe_arena.h index b30d85a..5978ed2 100644 --- a/src/google/protobuf/thread_safe_arena.h +++ b/src/google/protobuf/thread_safe_arena.h
@@ -124,6 +124,14 @@ // Add object pointer and cleanup function pointer to the list. void AddCleanup(void* elem, void (*cleanup)(void*)); + PROTOBUF_NDEBUG_INLINE void* AllocateString() { + SerialArena* arena; + if (PROTOBUF_PREDICT_TRUE(GetSerialArenaFast(&arena))) { + return arena->AllocateString(); + } + return GetSerialArenaFallback(0)->AllocateStringFallback(); + } + private: friend class ArenaBenchmark; friend class TcParser; @@ -179,7 +187,7 @@ void Init(); // Delete or Destruct all objects owned by the arena. - void CleanupList(); + size_t CleanupList(); inline void CacheSerialArena(SerialArena* serial) { thread_cache().last_serial_arena = serial;
diff --git a/src/google/protobuf/typed_block.h b/src/google/protobuf/typed_block.h new file mode 100644 index 0000000..ded7b64 --- /dev/null +++ b/src/google/protobuf/typed_block.h
@@ -0,0 +1,164 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2022 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// This file defines the internal templated class TypedBlock<T> + +#ifndef GOOGLE_PROTOBUF_TYPED_BLOCK_H__ +#define GOOGLE_PROTOBUF_TYPED_BLOCK_H__ + +#include <algorithm> +#include <limits> +#include <memory> +#include <string> + +#include "absl/base/attributes.h" +#include "google/protobuf/stubs/logging.h" +#include "google/protobuf/port.h" + + +// Must be included last. +#include "google/protobuf/port_def.inc" + +namespace google { +namespace protobuf { +namespace internal { + +template <typename T> +class TypedBlock { + public: + static constexpr size_t min_size(); + static constexpr size_t max_size(); + + TypedBlock* EmplaceNext(void* mem, size_t size) { + GOOGLE_ABSL_DCHECK_GE(size, sizeof(TypedBlock)); + GOOGLE_ABSL_DCHECK_LE(size, max_size()); + size_t next_sz = std::min(next_size() + size, max_size()); + return new (mem) TypedBlock(0, size, next_sz, false, this); + } + + TypedBlock* CreateNext(size_t size) { + GOOGLE_ABSL_DCHECK_GE(size, sizeof(TypedBlock)); + GOOGLE_ABSL_DCHECK_LE(size, max_size()); + SizedPtr res = internal::SizedAllocate(size); + size_t next_sz = std::min(next_size() + res.n, max_size()); + return new (res.p) TypedBlock(res.n, res.n, next_sz, true, this); + } + + static void Delete(TypedBlock* block) { + GOOGLE_ABSL_DCHECK(block != nullptr); + GOOGLE_ABSL_DCHECK(block->heap_allocated_); + internal::SizedDelete(block, block->allocated_size_); + } + + void DestroyAll() { + for (T& t : *this) t.~T(); + } + + static TypedBlock* sentinel() ABSL_ATTRIBUTE_RETURNS_NONNULL; + + TypedBlock* next() const { return next_; } + size_t next_size() const { return next_size_; } + + size_t space_used() const { return count_ * sizeof(T); } + size_t space_allocated() const { return allocated_size_; } + + bool is_heap_allocated() const { return heap_allocated_; } + + inline std::string* TryAllocate() { + if (ABSL_PREDICT_TRUE(count_ < capacity_)) { + return reinterpret_cast<T*>(this + 1) + count_++; + } + return nullptr; + } + + inline std::string* Allocate() ABSL_ATTRIBUTE_RETURNS_NONNULL { + GOOGLE_ABSL_DCHECK_LT(count_, capacity_); + return reinterpret_cast<T*>(this + 1) + count_++; + } + + private: + static constexpr size_t SizeToCount(size_t size) { + return (size - sizeof(TypedBlock)) / sizeof(T); + } + + constexpr TypedBlock() = default; + constexpr TypedBlock(size_t allocated_size, size_t size, size_t next_size, + bool heap_allocated, TypedBlock* next) + : allocated_size_(static_cast<uint32_t>(allocated_size)), + next_size_(next_size), + capacity_(static_cast<uint16_t>(SizeToCount(size))), + heap_allocated_(heap_allocated), + next_(next) {} + + T* begin() { return reinterpret_cast<T*>(this + 1); } + T* end() { return reinterpret_cast<T*>(this + 1) + count_; } + + struct Sentinel; + + struct alignas(T) { + const uint32_t allocated_size_ = 0; + const uint32_t next_size_ = static_cast<uint32_t>(min_size()); + const uint16_t capacity_ = 0; + uint16_t count_ = 0; + const bool heap_allocated_ = false; + TypedBlock* const next_ = nullptr; + }; +}; + +template <typename T> +inline constexpr size_t TypedBlock<T>::min_size() { + return std::max(size_t{512}, sizeof(TypedBlock<T>) + sizeof(T) * 8); +} + +template <typename T> +constexpr size_t TypedBlock<T>::max_size() { + return std::max(min_size(), size_t{4 << 10}); +} + +template <typename T> +struct TypedBlock<T>::Sentinel { + static constexpr TypedBlock<T> kSentinel = {}; +}; + +template <typename T> +constexpr TypedBlock<T> TypedBlock<T>::Sentinel::kSentinel; + +template <typename T> +inline TypedBlock<T>* TypedBlock<T>::sentinel() { + return const_cast<TypedBlock<T>*>(&Sentinel::kSentinel); +} + +} // namespace internal +} // namespace protobuf +} // namespace google + +#include "google/protobuf/port_undef.inc" + +#endif // GOOGLE_PROTOBUF_TYPED_BLOCK_H__