|  | // Copyright 2023 The Abseil Authors | 
|  | // | 
|  | // Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | // you may not use this file except in compliance with the License. | 
|  | // You may obtain a copy of the License at | 
|  | // | 
|  | //     https://www.apache.org/licenses/LICENSE-2.0 | 
|  | // | 
|  | // Unless required by applicable law or agreed to in writing, software | 
|  | // distributed under the License is distributed on an "AS IS" BASIS, | 
|  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | // See the License for the specific language governing permissions and | 
|  | // limitations under the License. | 
|  | // | 
|  | // ----------------------------------------------------------------------------- | 
|  | // File: prefetch.h | 
|  | // ----------------------------------------------------------------------------- | 
|  | // | 
|  | // This header file defines prefetch functions to prefetch memory contents | 
|  | // into the first level cache (L1) for the current CPU. The prefetch logic | 
|  | // offered in this header is limited to prefetching first level cachelines | 
|  | // only, and is aimed at relatively 'simple' prefetching logic. | 
|  | // | 
|  | #ifndef ABSL_BASE_PREFETCH_H_ | 
|  | #define ABSL_BASE_PREFETCH_H_ | 
|  |  | 
|  | #include "absl/base/attributes.h" | 
|  | #include "absl/base/config.h" | 
|  |  | 
|  | #if defined(ABSL_INTERNAL_HAVE_SSE) | 
|  | #include <xmmintrin.h> | 
|  | #endif | 
|  |  | 
|  | #if defined(_MSC_VER) | 
|  | #include <intrin.h> | 
|  | #if defined(ABSL_INTERNAL_HAVE_SSE) | 
|  | #pragma intrinsic(_mm_prefetch) | 
|  | #endif | 
|  | #endif | 
|  |  | 
|  | namespace absl { | 
|  | ABSL_NAMESPACE_BEGIN | 
|  |  | 
|  | // Moves data into the L1 cache before it is read, or "prefetches" it. | 
|  | // | 
|  | // The value of `addr` is the address of the memory to prefetch. If | 
|  | // the target and compiler support it, data prefetch instructions are | 
|  | // generated. If the prefetch is done some time before the memory is | 
|  | // read, it may be in the cache by the time the read occurs. | 
|  | // | 
|  | // This method prefetches data with the highest degree of temporal locality; | 
|  | // data is prefetched where possible into all levels of the cache. | 
|  | // | 
|  | // Incorrect or gratuitous use of this function can degrade performance. | 
|  | // Use this function only when representative benchmarks show an improvement. | 
|  | // | 
|  | // Example: | 
|  | // | 
|  | //  // Computes incremental checksum for `data`. | 
|  | //  int ComputeChecksum(int sum, absl::string_view data); | 
|  | // | 
|  | //  // Computes cumulative checksum for all values in `data` | 
|  | //  int ComputeChecksum(absl::Span<const std::string> data) { | 
|  | //    int sum = 0; | 
|  | //    auto it = data.begin(); | 
|  | //    auto pit = data.begin(); | 
|  | //    auto end = data.end(); | 
|  | //    for (int dist = 8; dist > 0 && pit != data.end(); --dist, ++pit) { | 
|  | //      absl::PrefetchToLocalCache(pit->data()); | 
|  | //    } | 
|  | //    for (; pit != end; ++pit, ++it) { | 
|  | //      sum = ComputeChecksum(sum, *it); | 
|  | //      absl::PrefetchToLocalCache(pit->data()); | 
|  | //    } | 
|  | //    for (; it != end; ++it) { | 
|  | //      sum = ComputeChecksum(sum, *it); | 
|  | //    } | 
|  | //    return sum; | 
|  | //  } | 
|  | // | 
|  | void PrefetchToLocalCache(const void* addr); | 
|  |  | 
|  | // Moves data into the L1 cache before it is read, or "prefetches" it. | 
|  | // | 
|  | // This function is identical to `PrefetchToLocalCache()` except that it has | 
|  | // non-temporal locality: the fetched data should not be left in any of the | 
|  | // cache tiers. This is useful for cases where the data is used only once / | 
|  | // short term, for example, invoking a destructor on an object. | 
|  | // | 
|  | // Incorrect or gratuitous use of this function can degrade performance. | 
|  | // Use this function only when representative benchmarks show an improvement. | 
|  | // | 
|  | // Example: | 
|  | // | 
|  | //  template <typename Iterator> | 
|  | //  void DestroyPointers(Iterator begin, Iterator end) { | 
|  | //    size_t distance = std::min(8U, bars.size()); | 
|  | // | 
|  | //    int dist = 8; | 
|  | //    auto prefetch_it = begin; | 
|  | //    while (prefetch_it != end && --dist;) { | 
|  | //      absl::PrefetchToLocalCacheNta(*prefetch_it++); | 
|  | //    } | 
|  | //    while (prefetch_it != end) { | 
|  | //      delete *begin++; | 
|  | //      absl::PrefetchToLocalCacheNta(*prefetch_it++); | 
|  | //    } | 
|  | //    while (begin != end) { | 
|  | //      delete *begin++; | 
|  | //    } | 
|  | //  } | 
|  | // | 
|  | void PrefetchToLocalCacheNta(const void* addr); | 
|  |  | 
|  | // Moves data into the L1 cache with the intent to modify it. | 
|  | // | 
|  | // This function is similar to `PrefetchToLocalCache()` except that it | 
|  | // prefetches cachelines with an 'intent to modify' This typically includes | 
|  | // invalidating cache entries for this address in all other cache tiers, and an | 
|  | // exclusive access intent. | 
|  | // | 
|  | // Incorrect or gratuitous use of this function can degrade performance. As this | 
|  | // function can invalidate cached cachelines on other caches and computer cores, | 
|  | // incorrect usage of this function can have an even greater negative impact | 
|  | // than incorrect regular prefetches. | 
|  | // Use this function only when representative benchmarks show an improvement. | 
|  | // | 
|  | // Example: | 
|  | // | 
|  | //  void* Arena::Allocate(size_t size) { | 
|  | //    void* ptr = AllocateBlock(size); | 
|  | //    absl::PrefetchToLocalCacheForWrite(ptr); | 
|  | //    return ptr; | 
|  | //  } | 
|  | // | 
|  | void PrefetchToLocalCacheForWrite(const void* addr); | 
|  |  | 
|  | #if ABSL_HAVE_BUILTIN(__builtin_prefetch) || defined(__GNUC__) | 
|  |  | 
|  | #define ABSL_HAVE_PREFETCH 1 | 
|  |  | 
|  | // See __builtin_prefetch: | 
|  | // https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html. | 
|  | // | 
|  | ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache( | 
|  | const void* addr) { | 
|  | __builtin_prefetch(addr, 0, 3); | 
|  | } | 
|  |  | 
|  | ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta( | 
|  | const void* addr) { | 
|  | __builtin_prefetch(addr, 0, 0); | 
|  | } | 
|  |  | 
|  | ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite( | 
|  | const void* addr) { | 
|  | // [x86] gcc/clang don't generate PREFETCHW for __builtin_prefetch(.., 1) | 
|  | // unless -march=broadwell or newer; this is not generally the default, so we | 
|  | // manually emit prefetchw. PREFETCHW is recognized as a no-op on older Intel | 
|  | // processors and has been present on AMD processors since the K6-2. | 
|  | #if defined(__x86_64__) && !defined(__PRFCHW__) | 
|  | asm("prefetchw %0" : : "m"(*reinterpret_cast<const char*>(addr))); | 
|  | #else | 
|  | __builtin_prefetch(addr, 1, 3); | 
|  | #endif | 
|  | } | 
|  |  | 
|  | #elif defined(ABSL_INTERNAL_HAVE_SSE) | 
|  |  | 
|  | #define ABSL_HAVE_PREFETCH 1 | 
|  |  | 
|  | ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache( | 
|  | const void* addr) { | 
|  | _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T0); | 
|  | } | 
|  |  | 
|  | ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta( | 
|  | const void* addr) { | 
|  | _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_NTA); | 
|  | } | 
|  |  | 
|  | ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite( | 
|  | const void* addr) { | 
|  | #if defined(_MM_HINT_ET0) | 
|  | _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_ET0); | 
|  | #elif !defined(_MSC_VER) && defined(__x86_64__) | 
|  | // _MM_HINT_ET0 is not universally supported. As we commented further | 
|  | // up, PREFETCHW is recognized as a no-op on older Intel processors | 
|  | // and has been present on AMD processors since the K6-2. We have this | 
|  | // disabled for MSVC compilers as this miscompiles on older MSVC compilers. | 
|  | asm("prefetchw %0" : : "m"(*reinterpret_cast<const char*>(addr))); | 
|  | #endif | 
|  | } | 
|  |  | 
|  | #else | 
|  |  | 
|  | ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache( | 
|  | const void* addr) {} | 
|  | ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta( | 
|  | const void* addr) {} | 
|  | ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite( | 
|  | const void* addr) {} | 
|  |  | 
|  | #endif | 
|  |  | 
|  | ABSL_NAMESPACE_END | 
|  | }  // namespace absl | 
|  |  | 
|  | #endif  // ABSL_BASE_PREFETCH_H_ |