blob: 482cde3a0166ce4db7f09a029d71ffc7a0635b73 [file] [log] [blame]
Martijn Velsdb51f682023-01-26 14:22:33 -08001// Copyright 2023 The Abseil Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// -----------------------------------------------------------------------------
16// File: prefetch.h
17// -----------------------------------------------------------------------------
18//
19// This header file defines prefetch functions to prefetch memory contents
20// into the first level cache (L1) for the current CPU. The prefetch logic
21// offered in this header is limited to prefetching first level cachelines
22// only, and is aimed at relatively 'simple' prefetching logic.
23//
24#ifndef ABSL_BASE_PREFETCH_H_
25#define ABSL_BASE_PREFETCH_H_
26
Hannah Lin71910652023-08-14 14:34:51 -070027#include "absl/base/attributes.h"
Martijn Velsdb51f682023-01-26 14:22:33 -080028#include "absl/base/config.h"
29
30#if defined(ABSL_INTERNAL_HAVE_SSE)
31#include <xmmintrin.h>
32#endif
33
Derek Mauro8028a872023-11-28 09:36:00 -080034#if defined(_MSC_VER)
Martijn Velsdb51f682023-01-26 14:22:33 -080035#include <intrin.h>
Derek Mauro8028a872023-11-28 09:36:00 -080036#if defined(ABSL_INTERNAL_HAVE_SSE)
Martijn Velsdb51f682023-01-26 14:22:33 -080037#pragma intrinsic(_mm_prefetch)
38#endif
Derek Mauro8028a872023-11-28 09:36:00 -080039#endif
Martijn Velsdb51f682023-01-26 14:22:33 -080040
41namespace absl {
42ABSL_NAMESPACE_BEGIN
43
44// Moves data into the L1 cache before it is read, or "prefetches" it.
45//
46// The value of `addr` is the address of the memory to prefetch. If
47// the target and compiler support it, data prefetch instructions are
48// generated. If the prefetch is done some time before the memory is
49// read, it may be in the cache by the time the read occurs.
50//
51// This method prefetches data with the highest degree of temporal locality;
52// data is prefetched where possible into all levels of the cache.
53//
54// Incorrect or gratuitous use of this function can degrade performance.
55// Use this function only when representative benchmarks show an improvement.
56//
57// Example:
58//
59// // Computes incremental checksum for `data`.
60// int ComputeChecksum(int sum, absl::string_view data);
61//
62// // Computes cumulative checksum for all values in `data`
63// int ComputeChecksum(absl::Span<const std::string> data) {
64// int sum = 0;
65// auto it = data.begin();
66// auto pit = data.begin();
67// auto end = data.end();
68// for (int dist = 8; dist > 0 && pit != data.end(); --dist, ++pit) {
69// absl::PrefetchToLocalCache(pit->data());
70// }
71// for (; pit != end; ++pit, ++it) {
72// sum = ComputeChecksum(sum, *it);
73// absl::PrefetchToLocalCache(pit->data());
74// }
75// for (; it != end; ++it) {
76// sum = ComputeChecksum(sum, *it);
77// }
78// return sum;
79// }
80//
81void PrefetchToLocalCache(const void* addr);
82
83// Moves data into the L1 cache before it is read, or "prefetches" it.
84//
85// This function is identical to `PrefetchToLocalCache()` except that it has
86// non-temporal locality: the fetched data should not be left in any of the
87// cache tiers. This is useful for cases where the data is used only once /
88// short term, for example, invoking a destructor on an object.
89//
90// Incorrect or gratuitous use of this function can degrade performance.
91// Use this function only when representative benchmarks show an improvement.
92//
93// Example:
94//
95// template <typename Iterator>
96// void DestroyPointers(Iterator begin, Iterator end) {
97// size_t distance = std::min(8U, bars.size());
98//
99// int dist = 8;
100// auto prefetch_it = begin;
101// while (prefetch_it != end && --dist;) {
102// absl::PrefetchToLocalCacheNta(*prefetch_it++);
103// }
104// while (prefetch_it != end) {
105// delete *begin++;
106// absl::PrefetchToLocalCacheNta(*prefetch_it++);
107// }
108// while (begin != end) {
109// delete *begin++;
110// }
111// }
112//
113void PrefetchToLocalCacheNta(const void* addr);
114
115// Moves data into the L1 cache with the intent to modify it.
116//
117// This function is similar to `PrefetchToLocalCache()` except that it
118// prefetches cachelines with an 'intent to modify' This typically includes
119// invalidating cache entries for this address in all other cache tiers, and an
120// exclusive access intent.
121//
122// Incorrect or gratuitous use of this function can degrade performance. As this
123// function can invalidate cached cachelines on other caches and computer cores,
124// incorrect usage of this function can have an even greater negative impact
125// than incorrect regular prefetches.
126// Use this function only when representative benchmarks show an improvement.
127//
128// Example:
129//
130// void* Arena::Allocate(size_t size) {
131// void* ptr = AllocateBlock(size);
Abseil Team90ebb6f2024-02-27 01:36:12 -0800132// absl::PrefetchToLocalCacheForWrite(ptr);
Martijn Velsdb51f682023-01-26 14:22:33 -0800133// return ptr;
134// }
135//
Abseil Teamcdad8cd2023-02-03 16:00:19 -0800136void PrefetchToLocalCacheForWrite(const void* addr);
Martijn Velsdb51f682023-01-26 14:22:33 -0800137
138#if ABSL_HAVE_BUILTIN(__builtin_prefetch) || defined(__GNUC__)
139
140#define ABSL_HAVE_PREFETCH 1
141
142// See __builtin_prefetch:
143// https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html.
144//
Hannah Lin71910652023-08-14 14:34:51 -0700145ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache(
146 const void* addr) {
Martijn Velsdb51f682023-01-26 14:22:33 -0800147 __builtin_prefetch(addr, 0, 3);
148}
149
Hannah Lin71910652023-08-14 14:34:51 -0700150ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta(
151 const void* addr) {
Martijn Velsdb51f682023-01-26 14:22:33 -0800152 __builtin_prefetch(addr, 0, 0);
153}
154
Hannah Lin71910652023-08-14 14:34:51 -0700155ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite(
156 const void* addr) {
Martijn Velsdb51f682023-01-26 14:22:33 -0800157 // [x86] gcc/clang don't generate PREFETCHW for __builtin_prefetch(.., 1)
158 // unless -march=broadwell or newer; this is not generally the default, so we
159 // manually emit prefetchw. PREFETCHW is recognized as a no-op on older Intel
160 // processors and has been present on AMD processors since the K6-2.
Chris Kennellyd59eabb2023-10-26 08:43:56 -0700161#if defined(__x86_64__) && !defined(__PRFCHW__)
Dmitry Vyukov03786142023-10-18 22:19:06 -0700162 asm("prefetchw %0" : : "m"(*reinterpret_cast<const char*>(addr)));
Martijn Velsdb51f682023-01-26 14:22:33 -0800163#else
Abseil Teamcdad8cd2023-02-03 16:00:19 -0800164 __builtin_prefetch(addr, 1, 3);
Martijn Velsdb51f682023-01-26 14:22:33 -0800165#endif
166}
167
168#elif defined(ABSL_INTERNAL_HAVE_SSE)
169
170#define ABSL_HAVE_PREFETCH 1
171
Hannah Lin71910652023-08-14 14:34:51 -0700172ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache(
173 const void* addr) {
Martijn Velsdb51f682023-01-26 14:22:33 -0800174 _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T0);
175}
176
Hannah Lin71910652023-08-14 14:34:51 -0700177ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta(
178 const void* addr) {
Martijn Velsdb51f682023-01-26 14:22:33 -0800179 _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_NTA);
180}
181
Hannah Lin71910652023-08-14 14:34:51 -0700182ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite(
183 const void* addr) {
Martijn Velsdb51f682023-01-26 14:22:33 -0800184#if defined(_MM_HINT_ET0)
185 _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_ET0);
Martijn Vels75d25252023-01-27 12:36:55 -0800186#elif !defined(_MSC_VER) && defined(__x86_64__)
Martijn Velsdb51f682023-01-26 14:22:33 -0800187 // _MM_HINT_ET0 is not universally supported. As we commented further
188 // up, PREFETCHW is recognized as a no-op on older Intel processors
Martijn Vels75d25252023-01-27 12:36:55 -0800189 // and has been present on AMD processors since the K6-2. We have this
190 // disabled for MSVC compilers as this miscompiles on older MSVC compilers.
Dmitry Vyukov03786142023-10-18 22:19:06 -0700191 asm("prefetchw %0" : : "m"(*reinterpret_cast<const char*>(addr)));
Martijn Velsdb51f682023-01-26 14:22:33 -0800192#endif
193}
194
195#else
196
Hannah Lin71910652023-08-14 14:34:51 -0700197ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache(
198 const void* addr) {}
199ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta(
200 const void* addr) {}
201ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite(
202 const void* addr) {}
Martijn Velsdb51f682023-01-26 14:22:33 -0800203
204#endif
205
206ABSL_NAMESPACE_END
207} // namespace absl
208
209#endif // ABSL_BASE_PREFETCH_H_