Optimize memory orders for weakly ordered architectures.
In `RefCount::Unref()`, `fetch_sub()` with `memory_order_release` rather than
`memory_order_acq_rel`. Then, only in the unexpected path when this was the last
decrement (due to `IsUnique()` called before, this can happen only if another
thread calls `Unref()` at the same time), `atomic_thread_fence()` with
`memory_order_acquire`.
This removes the unnecessary acquire semantics from the expected path.
In `StableDependencyDefault::EnsureAllocatedSlow()`, `compare_exchange_strong()`
with a separate `memory_order_release` for the expected success path and
`memory_order_acquire` for the unexpected failure path (which can happen only
another thread calls `EnsureAllocatd()` at the same time), instead of using
`memory_order_acq_rel` for both.
In the unexpected path there is no change because `compare_exchange_strong()`
implicitly demotes `memory_order_acq_rel` to `memory_order_acquire` there.
This removes the unnecessary acquire semantics from the expected path.
Cosmetics: in `RefCount::{Ref,Unref}()`, replace `if` with `if constexpr`,
and split a complex conditional into separate statements.
PiperOrigin-RevId: 915941386
diff --git a/riegeli/base/ref_count.h b/riegeli/base/ref_count.h
index 1551321..1d2b0d9 100644
--- a/riegeli/base/ref_count.h
+++ b/riegeli/base/ref_count.h
@@ -21,6 +21,7 @@
#include <type_traits>
#include "absl/base/nullability.h"
+#include "absl/base/optimization.h"
#include "riegeli/base/ownership.h"
ABSL_POINTERS_DEFAULT_NONNULL
@@ -41,7 +42,7 @@
template <typename Ownership = ShareOwnership,
std::enable_if_t<IsOwnership<Ownership>::value, int> = 0>
void Ref() const {
- if (std::is_same_v<Ownership, ShareOwnership>) {
+ if constexpr (std::is_same_v<Ownership, ShareOwnership>) {
ref_count_.fetch_add(1, std::memory_order_relaxed);
}
}
@@ -51,17 +52,32 @@
//
// Does nothing and returns `false` if `Ownership` is `ShareOwnership`.
//
- // When `Unref()` returns `true`, the decrement can be skipped and the actual
- // value of the reference count is unspecified. This avoids an expensive
+ // When `Unref()` returns `true`, the decrement might be skipped, leaving the
+ // actual value of the reference count unspecified. This avoids an expensive
// atomic read-modify-write operation, making the last `Unref()` much faster,
// at the cost of making a non-last `Unref()` a bit slower. This is in
// contrast to `std::shared_ptr` in libc++ and libstdc++.
template <typename Ownership = PassOwnership,
std::enable_if_t<IsOwnership<Ownership>::value, int> = 0>
bool Unref() const {
- return std::is_same_v<Ownership, PassOwnership> &&
- (HasUniqueOwner() ||
- ref_count_.fetch_sub(1, std::memory_order_acq_rel) == 1);
+ if constexpr (std::is_same_v<Ownership, PassOwnership>) {
+ if (HasUniqueOwner()) return true;
+ if (ABSL_PREDICT_FALSE(
+ ref_count_.fetch_sub(1, std::memory_order_release) == 1)) {
+ // Even though `HasUniqueOwner()` was `false` before, another thread
+ // has just decremented the reference count. This is the last reference
+ // after all.
+#ifdef THREAD_SANITIZER
+ // TSAN does not support `std::atomic_thread_fence()`. Using `load()`
+ // instead is less efficient but also correct.
+ (void)ref_count_.load(std::memory_order_acquire);
+#else
+ std::atomic_thread_fence(std::memory_order_acquire);
+#endif
+ return true;
+ }
+ }
+ return false;
}
// Returns `true` if there is only one owner of the object.
diff --git a/riegeli/base/stable_dependency.h b/riegeli/base/stable_dependency.h
index b0ae3b3..6add8a4 100644
--- a/riegeli/base/stable_dependency.h
+++ b/riegeli/base/stable_dependency.h
@@ -250,8 +250,10 @@
Dependency<Handle, Manager>* const dep = new Dependency<Handle, Manager>();
Dependency<Handle, Manager>* other_dep = nullptr;
if (ABSL_PREDICT_FALSE(!self.dep_.compare_exchange_strong(
- other_dep, dep, std::memory_order_acq_rel))) {
- // We lost the race.
+ other_dep, dep, std::memory_order_release,
+ std::memory_order_acquire))) {
+ // Another thread has just called `EnsureAllocated()` and won the race.
+ // `dep` is not needed after all.
delete dep;
return *other_dep;
}