Martijn Vels | db51f68 | 2023-01-26 14:22:33 -0800 | [diff] [blame] | 1 | // Copyright 2023 The Abseil Authors |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | // |
| 15 | // ----------------------------------------------------------------------------- |
| 16 | // File: prefetch.h |
| 17 | // ----------------------------------------------------------------------------- |
| 18 | // |
| 19 | // This header file defines prefetch functions to prefetch memory contents |
| 20 | // into the first level cache (L1) for the current CPU. The prefetch logic |
| 21 | // offered in this header is limited to prefetching first level cachelines |
| 22 | // only, and is aimed at relatively 'simple' prefetching logic. |
| 23 | // |
| 24 | #ifndef ABSL_BASE_PREFETCH_H_ |
| 25 | #define ABSL_BASE_PREFETCH_H_ |
| 26 | |
Hannah Lin | 7191065 | 2023-08-14 14:34:51 -0700 | [diff] [blame] | 27 | #include "absl/base/attributes.h" |
Martijn Vels | db51f68 | 2023-01-26 14:22:33 -0800 | [diff] [blame] | 28 | #include "absl/base/config.h" |
| 29 | |
| 30 | #if defined(ABSL_INTERNAL_HAVE_SSE) |
| 31 | #include <xmmintrin.h> |
| 32 | #endif |
| 33 | |
Derek Mauro | 8028a87 | 2023-11-28 09:36:00 -0800 | [diff] [blame] | 34 | #if defined(_MSC_VER) |
Martijn Vels | db51f68 | 2023-01-26 14:22:33 -0800 | [diff] [blame] | 35 | #include <intrin.h> |
Derek Mauro | 8028a87 | 2023-11-28 09:36:00 -0800 | [diff] [blame] | 36 | #if defined(ABSL_INTERNAL_HAVE_SSE) |
Martijn Vels | db51f68 | 2023-01-26 14:22:33 -0800 | [diff] [blame] | 37 | #pragma intrinsic(_mm_prefetch) |
| 38 | #endif |
Derek Mauro | 8028a87 | 2023-11-28 09:36:00 -0800 | [diff] [blame] | 39 | #endif |
Martijn Vels | db51f68 | 2023-01-26 14:22:33 -0800 | [diff] [blame] | 40 | |
| 41 | namespace absl { |
| 42 | ABSL_NAMESPACE_BEGIN |
| 43 | |
| 44 | // Moves data into the L1 cache before it is read, or "prefetches" it. |
| 45 | // |
| 46 | // The value of `addr` is the address of the memory to prefetch. If |
| 47 | // the target and compiler support it, data prefetch instructions are |
| 48 | // generated. If the prefetch is done some time before the memory is |
| 49 | // read, it may be in the cache by the time the read occurs. |
| 50 | // |
| 51 | // This method prefetches data with the highest degree of temporal locality; |
| 52 | // data is prefetched where possible into all levels of the cache. |
| 53 | // |
| 54 | // Incorrect or gratuitous use of this function can degrade performance. |
| 55 | // Use this function only when representative benchmarks show an improvement. |
| 56 | // |
| 57 | // Example: |
| 58 | // |
| 59 | // // Computes incremental checksum for `data`. |
| 60 | // int ComputeChecksum(int sum, absl::string_view data); |
| 61 | // |
| 62 | // // Computes cumulative checksum for all values in `data` |
| 63 | // int ComputeChecksum(absl::Span<const std::string> data) { |
| 64 | // int sum = 0; |
| 65 | // auto it = data.begin(); |
| 66 | // auto pit = data.begin(); |
| 67 | // auto end = data.end(); |
| 68 | // for (int dist = 8; dist > 0 && pit != data.end(); --dist, ++pit) { |
| 69 | // absl::PrefetchToLocalCache(pit->data()); |
| 70 | // } |
| 71 | // for (; pit != end; ++pit, ++it) { |
| 72 | // sum = ComputeChecksum(sum, *it); |
| 73 | // absl::PrefetchToLocalCache(pit->data()); |
| 74 | // } |
| 75 | // for (; it != end; ++it) { |
| 76 | // sum = ComputeChecksum(sum, *it); |
| 77 | // } |
| 78 | // return sum; |
| 79 | // } |
| 80 | // |
| 81 | void PrefetchToLocalCache(const void* addr); |
| 82 | |
| 83 | // Moves data into the L1 cache before it is read, or "prefetches" it. |
| 84 | // |
| 85 | // This function is identical to `PrefetchToLocalCache()` except that it has |
| 86 | // non-temporal locality: the fetched data should not be left in any of the |
| 87 | // cache tiers. This is useful for cases where the data is used only once / |
| 88 | // short term, for example, invoking a destructor on an object. |
| 89 | // |
| 90 | // Incorrect or gratuitous use of this function can degrade performance. |
| 91 | // Use this function only when representative benchmarks show an improvement. |
| 92 | // |
| 93 | // Example: |
| 94 | // |
| 95 | // template <typename Iterator> |
| 96 | // void DestroyPointers(Iterator begin, Iterator end) { |
| 97 | // size_t distance = std::min(8U, bars.size()); |
| 98 | // |
| 99 | // int dist = 8; |
| 100 | // auto prefetch_it = begin; |
| 101 | // while (prefetch_it != end && --dist;) { |
| 102 | // absl::PrefetchToLocalCacheNta(*prefetch_it++); |
| 103 | // } |
| 104 | // while (prefetch_it != end) { |
| 105 | // delete *begin++; |
| 106 | // absl::PrefetchToLocalCacheNta(*prefetch_it++); |
| 107 | // } |
| 108 | // while (begin != end) { |
| 109 | // delete *begin++; |
| 110 | // } |
| 111 | // } |
| 112 | // |
| 113 | void PrefetchToLocalCacheNta(const void* addr); |
| 114 | |
| 115 | // Moves data into the L1 cache with the intent to modify it. |
| 116 | // |
| 117 | // This function is similar to `PrefetchToLocalCache()` except that it |
| 118 | // prefetches cachelines with an 'intent to modify' This typically includes |
| 119 | // invalidating cache entries for this address in all other cache tiers, and an |
| 120 | // exclusive access intent. |
| 121 | // |
| 122 | // Incorrect or gratuitous use of this function can degrade performance. As this |
| 123 | // function can invalidate cached cachelines on other caches and computer cores, |
| 124 | // incorrect usage of this function can have an even greater negative impact |
| 125 | // than incorrect regular prefetches. |
| 126 | // Use this function only when representative benchmarks show an improvement. |
| 127 | // |
| 128 | // Example: |
| 129 | // |
| 130 | // void* Arena::Allocate(size_t size) { |
| 131 | // void* ptr = AllocateBlock(size); |
Abseil Team | 90ebb6f | 2024-02-27 01:36:12 -0800 | [diff] [blame] | 132 | // absl::PrefetchToLocalCacheForWrite(ptr); |
Martijn Vels | db51f68 | 2023-01-26 14:22:33 -0800 | [diff] [blame] | 133 | // return ptr; |
| 134 | // } |
| 135 | // |
Abseil Team | cdad8cd | 2023-02-03 16:00:19 -0800 | [diff] [blame] | 136 | void PrefetchToLocalCacheForWrite(const void* addr); |
Martijn Vels | db51f68 | 2023-01-26 14:22:33 -0800 | [diff] [blame] | 137 | |
| 138 | #if ABSL_HAVE_BUILTIN(__builtin_prefetch) || defined(__GNUC__) |
| 139 | |
| 140 | #define ABSL_HAVE_PREFETCH 1 |
| 141 | |
| 142 | // See __builtin_prefetch: |
| 143 | // https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html. |
| 144 | // |
Hannah Lin | 7191065 | 2023-08-14 14:34:51 -0700 | [diff] [blame] | 145 | ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache( |
| 146 | const void* addr) { |
Martijn Vels | db51f68 | 2023-01-26 14:22:33 -0800 | [diff] [blame] | 147 | __builtin_prefetch(addr, 0, 3); |
| 148 | } |
| 149 | |
Hannah Lin | 7191065 | 2023-08-14 14:34:51 -0700 | [diff] [blame] | 150 | ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta( |
| 151 | const void* addr) { |
Martijn Vels | db51f68 | 2023-01-26 14:22:33 -0800 | [diff] [blame] | 152 | __builtin_prefetch(addr, 0, 0); |
| 153 | } |
| 154 | |
Hannah Lin | 7191065 | 2023-08-14 14:34:51 -0700 | [diff] [blame] | 155 | ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite( |
| 156 | const void* addr) { |
Martijn Vels | db51f68 | 2023-01-26 14:22:33 -0800 | [diff] [blame] | 157 | // [x86] gcc/clang don't generate PREFETCHW for __builtin_prefetch(.., 1) |
| 158 | // unless -march=broadwell or newer; this is not generally the default, so we |
| 159 | // manually emit prefetchw. PREFETCHW is recognized as a no-op on older Intel |
| 160 | // processors and has been present on AMD processors since the K6-2. |
Chris Kennelly | d59eabb | 2023-10-26 08:43:56 -0700 | [diff] [blame] | 161 | #if defined(__x86_64__) && !defined(__PRFCHW__) |
Dmitry Vyukov | 0378614 | 2023-10-18 22:19:06 -0700 | [diff] [blame] | 162 | asm("prefetchw %0" : : "m"(*reinterpret_cast<const char*>(addr))); |
Martijn Vels | db51f68 | 2023-01-26 14:22:33 -0800 | [diff] [blame] | 163 | #else |
Abseil Team | cdad8cd | 2023-02-03 16:00:19 -0800 | [diff] [blame] | 164 | __builtin_prefetch(addr, 1, 3); |
Martijn Vels | db51f68 | 2023-01-26 14:22:33 -0800 | [diff] [blame] | 165 | #endif |
| 166 | } |
| 167 | |
| 168 | #elif defined(ABSL_INTERNAL_HAVE_SSE) |
| 169 | |
| 170 | #define ABSL_HAVE_PREFETCH 1 |
| 171 | |
Hannah Lin | 7191065 | 2023-08-14 14:34:51 -0700 | [diff] [blame] | 172 | ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache( |
| 173 | const void* addr) { |
Martijn Vels | db51f68 | 2023-01-26 14:22:33 -0800 | [diff] [blame] | 174 | _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T0); |
| 175 | } |
| 176 | |
Hannah Lin | 7191065 | 2023-08-14 14:34:51 -0700 | [diff] [blame] | 177 | ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta( |
| 178 | const void* addr) { |
Martijn Vels | db51f68 | 2023-01-26 14:22:33 -0800 | [diff] [blame] | 179 | _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_NTA); |
| 180 | } |
| 181 | |
Hannah Lin | 7191065 | 2023-08-14 14:34:51 -0700 | [diff] [blame] | 182 | ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite( |
| 183 | const void* addr) { |
Martijn Vels | db51f68 | 2023-01-26 14:22:33 -0800 | [diff] [blame] | 184 | #if defined(_MM_HINT_ET0) |
| 185 | _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_ET0); |
Martijn Vels | 75d2525 | 2023-01-27 12:36:55 -0800 | [diff] [blame] | 186 | #elif !defined(_MSC_VER) && defined(__x86_64__) |
Martijn Vels | db51f68 | 2023-01-26 14:22:33 -0800 | [diff] [blame] | 187 | // _MM_HINT_ET0 is not universally supported. As we commented further |
| 188 | // up, PREFETCHW is recognized as a no-op on older Intel processors |
Martijn Vels | 75d2525 | 2023-01-27 12:36:55 -0800 | [diff] [blame] | 189 | // and has been present on AMD processors since the K6-2. We have this |
| 190 | // disabled for MSVC compilers as this miscompiles on older MSVC compilers. |
Dmitry Vyukov | 0378614 | 2023-10-18 22:19:06 -0700 | [diff] [blame] | 191 | asm("prefetchw %0" : : "m"(*reinterpret_cast<const char*>(addr))); |
Martijn Vels | db51f68 | 2023-01-26 14:22:33 -0800 | [diff] [blame] | 192 | #endif |
| 193 | } |
| 194 | |
| 195 | #else |
| 196 | |
Hannah Lin | 7191065 | 2023-08-14 14:34:51 -0700 | [diff] [blame] | 197 | ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache( |
| 198 | const void* addr) {} |
| 199 | ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta( |
| 200 | const void* addr) {} |
| 201 | ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite( |
| 202 | const void* addr) {} |
Martijn Vels | db51f68 | 2023-01-26 14:22:33 -0800 | [diff] [blame] | 203 | |
| 204 | #endif |
| 205 | |
| 206 | ABSL_NAMESPACE_END |
| 207 | } // namespace absl |
| 208 | |
| 209 | #endif // ABSL_BASE_PREFETCH_H_ |