| // Copyright 2017 The Abseil Authors. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // https://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #include <cstdint> |
| #include <mutex> // NOLINT(build/c++11) |
| #include <vector> |
| |
| #include "absl/base/internal/cycleclock.h" |
| #include "absl/base/internal/spinlock.h" |
| #include "absl/synchronization/blocking_counter.h" |
| #include "absl/synchronization/internal/thread_pool.h" |
| #include "absl/synchronization/mutex.h" |
| #include "benchmark/benchmark.h" |
| |
| namespace { |
| |
| void BM_Mutex(benchmark::State& state) { |
| static absl::Mutex* mu = new absl::Mutex; |
| for (auto _ : state) { |
| absl::MutexLock lock(mu); |
| } |
| } |
| BENCHMARK(BM_Mutex)->UseRealTime()->Threads(1)->ThreadPerCpu(); |
| |
| static void DelayNs(int64_t ns, int* data) { |
| int64_t end = absl::base_internal::CycleClock::Now() + |
| ns * absl::base_internal::CycleClock::Frequency() / 1e9; |
| while (absl::base_internal::CycleClock::Now() < end) { |
| ++(*data); |
| benchmark::DoNotOptimize(*data); |
| } |
| } |
| |
| template <typename MutexType> |
| class RaiiLocker { |
| public: |
| explicit RaiiLocker(MutexType* mu) : mu_(mu) { mu_->Lock(); } |
| ~RaiiLocker() { mu_->Unlock(); } |
| private: |
| MutexType* mu_; |
| }; |
| |
| template <> |
| class RaiiLocker<std::mutex> { |
| public: |
| explicit RaiiLocker(std::mutex* mu) : mu_(mu) { mu_->lock(); } |
| ~RaiiLocker() { mu_->unlock(); } |
| private: |
| std::mutex* mu_; |
| }; |
| |
| template <typename MutexType> |
| void BM_Contended(benchmark::State& state) { |
| struct Shared { |
| MutexType mu; |
| int data = 0; |
| }; |
| static auto* shared = new Shared; |
| int local = 0; |
| for (auto _ : state) { |
| // Here we model both local work outside of the critical section as well as |
| // some work inside of the critical section. The idea is to capture some |
| // more or less realisitic contention levels. |
| // If contention is too low, the benchmark won't measure anything useful. |
| // If contention is unrealistically high, the benchmark will favor |
| // bad mutex implementations that block and otherwise distract threads |
| // from the mutex and shared state for as much as possible. |
| // To achieve this amount of local work is multiplied by number of threads |
| // to keep ratio between local work and critical section approximately |
| // equal regardless of number of threads. |
| DelayNs(100 * state.threads, &local); |
| RaiiLocker<MutexType> locker(&shared->mu); |
| DelayNs(state.range(0), &shared->data); |
| } |
| } |
| |
| BENCHMARK_TEMPLATE(BM_Contended, absl::Mutex) |
| ->UseRealTime() |
| // ThreadPerCpu poorly handles non-power-of-two CPU counts. |
| ->Threads(1) |
| ->Threads(2) |
| ->Threads(4) |
| ->Threads(6) |
| ->Threads(8) |
| ->Threads(12) |
| ->Threads(16) |
| ->Threads(24) |
| ->Threads(32) |
| ->Threads(48) |
| ->Threads(64) |
| ->Threads(96) |
| ->Threads(128) |
| ->Threads(192) |
| ->Threads(256) |
| // Some empirically chosen amounts of work in critical section. |
| // 1 is low contention, 200 is high contention and few values in between. |
| ->Arg(1) |
| ->Arg(20) |
| ->Arg(50) |
| ->Arg(200); |
| |
| BENCHMARK_TEMPLATE(BM_Contended, absl::base_internal::SpinLock) |
| ->UseRealTime() |
| // ThreadPerCpu poorly handles non-power-of-two CPU counts. |
| ->Threads(1) |
| ->Threads(2) |
| ->Threads(4) |
| ->Threads(6) |
| ->Threads(8) |
| ->Threads(12) |
| ->Threads(16) |
| ->Threads(24) |
| ->Threads(32) |
| ->Threads(48) |
| ->Threads(64) |
| ->Threads(96) |
| ->Threads(128) |
| ->Threads(192) |
| ->Threads(256) |
| // Some empirically chosen amounts of work in critical section. |
| // 1 is low contention, 200 is high contention and few values in between. |
| ->Arg(1) |
| ->Arg(20) |
| ->Arg(50) |
| ->Arg(200); |
| |
| BENCHMARK_TEMPLATE(BM_Contended, std::mutex) |
| ->UseRealTime() |
| // ThreadPerCpu poorly handles non-power-of-two CPU counts. |
| ->Threads(1) |
| ->Threads(2) |
| ->Threads(4) |
| ->Threads(6) |
| ->Threads(8) |
| ->Threads(12) |
| ->Threads(16) |
| ->Threads(24) |
| ->Threads(32) |
| ->Threads(48) |
| ->Threads(64) |
| ->Threads(96) |
| ->Threads(128) |
| ->Threads(192) |
| ->Threads(256) |
| // Some empirically chosen amounts of work in critical section. |
| // 1 is low contention, 200 is high contention and few values in between. |
| ->Arg(1) |
| ->Arg(20) |
| ->Arg(50) |
| ->Arg(200); |
| |
| // Measure the overhead of conditions on mutex release (when they must be |
| // evaluated). Mutex has (some) support for equivalence classes allowing |
| // Conditions with the same function/argument to potentially not be multiply |
| // evaluated. |
| // |
| // num_classes==0 is used for the special case of every waiter being distinct. |
| void BM_ConditionWaiters(benchmark::State& state) { |
| int num_classes = state.range(0); |
| int num_waiters = state.range(1); |
| |
| struct Helper { |
| static void Waiter(absl::BlockingCounter* init, absl::Mutex* m, int* p) { |
| init->DecrementCount(); |
| m->LockWhen(absl::Condition( |
| static_cast<bool (*)(int*)>([](int* v) { return *v == 0; }), p)); |
| m->Unlock(); |
| } |
| }; |
| |
| if (num_classes == 0) { |
| // No equivalence classes. |
| num_classes = num_waiters; |
| } |
| |
| absl::BlockingCounter init(num_waiters); |
| absl::Mutex mu; |
| std::vector<int> equivalence_classes(num_classes, 1); |
| |
| // Must be declared last to be destroyed first. |
| absl::synchronization_internal::ThreadPool pool(num_waiters); |
| |
| for (int i = 0; i < num_waiters; i++) { |
| // Mutex considers Conditions with the same function and argument |
| // to be equivalent. |
| pool.Schedule([&, i] { |
| Helper::Waiter(&init, &mu, &equivalence_classes[i % num_classes]); |
| }); |
| } |
| init.Wait(); |
| |
| for (auto _ : state) { |
| mu.Lock(); |
| mu.Unlock(); // Each unlock requires Condition evaluation for our waiters. |
| } |
| |
| mu.Lock(); |
| for (int i = 0; i < num_classes; i++) { |
| equivalence_classes[i] = 0; |
| } |
| mu.Unlock(); |
| } |
| |
| // Some configurations have higher thread limits than others. |
| #if defined(__linux__) && !defined(THREAD_SANITIZER) |
| constexpr int kMaxConditionWaiters = 8192; |
| #else |
| constexpr int kMaxConditionWaiters = 1024; |
| #endif |
| BENCHMARK(BM_ConditionWaiters)->RangePair(0, 2, 1, kMaxConditionWaiters); |
| |
| } // namespace |