Replace absl::base_internal::Prefetch* calls with absl::Prefetch* calls

PiperOrigin-RevId: 505184961
Change-Id: I64482558a76abda6896bec4b2d323833b6cd7edf
diff --git a/absl/base/BUILD.bazel b/absl/base/BUILD.bazel
index b4d1c21..dd29daf 100644
--- a/absl/base/BUILD.bazel
+++ b/absl/base/BUILD.bazel
@@ -738,7 +738,10 @@
     ],
     copts = ABSL_DEFAULT_COPTS,
     linkopts = ABSL_DEFAULT_LINKOPTS,
-    deps = [":config"],
+    deps = [
+        ":config",
+        ":core_headers",  # TODO(b/265984188): remove
+    ],
 )
 
 cc_test(
diff --git a/absl/base/CMakeLists.txt b/absl/base/CMakeLists.txt
index 74495d0..71b9379 100644
--- a/absl/base/CMakeLists.txt
+++ b/absl/base/CMakeLists.txt
@@ -657,6 +657,7 @@
     ${ABSL_DEFAULT_LINKOPTS}
   DEPS
     absl::config
+    absl::core_headers  # TODO(b/265984188): remove
 )
 
 absl_cc_test(
diff --git a/absl/base/internal/prefetch.h b/absl/base/internal/prefetch.h
index 0641928..aecfd87 100644
--- a/absl/base/internal/prefetch.h
+++ b/absl/base/internal/prefetch.h
@@ -12,10 +12,14 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+// TODO(b/265984188): remove all uses and delete this header.
+
 #ifndef ABSL_BASE_INTERNAL_PREFETCH_H_
 #define ABSL_BASE_INTERNAL_PREFETCH_H_
 
+#include "absl/base/attributes.h"
 #include "absl/base/config.h"
+#include "absl/base/prefetch.h"
 
 #ifdef __SSE__
 #include <xmmintrin.h>
@@ -72,10 +76,21 @@
 ABSL_NAMESPACE_BEGIN
 namespace base_internal {
 
-void PrefetchT0(const void* addr);
+ABSL_DEPRECATED("Use absl::PrefetchToLocalCache() instead")
+inline void PrefetchT0(const void* address) {
+  absl::PrefetchToLocalCache(address);
+}
+
+ABSL_DEPRECATED("Use absl::PrefetchToLocalCache() instead")
+inline void PrefetchNta(const void* address) {
+  absl::PrefetchToLocalCacheNta(address);
+}
+
+ABSL_DEPRECATED("Use __builtin_prefetch() for advanced prefetch logic instead")
 void PrefetchT1(const void* addr);
+
+ABSL_DEPRECATED("Use __builtin_prefetch() for advanced prefetch logic instead")
 void PrefetchT2(const void* addr);
-void PrefetchNta(const void* addr);
 
 // Implementation details follow.
 
@@ -90,10 +105,6 @@
 // safe for all currently supported platforms. However, prefetch for
 // store may have problems depending on the target platform.
 //
-inline void PrefetchT0(const void* addr) {
-  // Note: this uses prefetcht0 on Intel.
-  __builtin_prefetch(addr, 0, 3);
-}
 inline void PrefetchT1(const void* addr) {
   // Note: this uses prefetcht1 on Intel.
   __builtin_prefetch(addr, 0, 2);
@@ -102,33 +113,21 @@
   // Note: this uses prefetcht2 on Intel.
   __builtin_prefetch(addr, 0, 1);
 }
-inline void PrefetchNta(const void* addr) {
-  // Note: this uses prefetchtnta on Intel.
-  __builtin_prefetch(addr, 0, 0);
-}
 
 #elif defined(ABSL_INTERNAL_HAVE_SSE)
 
 #define ABSL_INTERNAL_HAVE_PREFETCH 1
 
-inline void PrefetchT0(const void* addr) {
-  _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T0);
-}
 inline void PrefetchT1(const void* addr) {
   _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T1);
 }
 inline void PrefetchT2(const void* addr) {
   _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T2);
 }
-inline void PrefetchNta(const void* addr) {
-  _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_NTA);
-}
 
 #else
-inline void PrefetchT0(const void*) {}
 inline void PrefetchT1(const void*) {}
 inline void PrefetchT2(const void*) {}
-inline void PrefetchNta(const void*) {}
 #endif
 
 }  // namespace base_internal
diff --git a/absl/base/prefetch.h b/absl/base/prefetch.h
index 4d42846..6bc9863 100644
--- a/absl/base/prefetch.h
+++ b/absl/base/prefetch.h
@@ -30,9 +30,11 @@
 #include <xmmintrin.h>
 #endif
 
-#if defined(_MSC_VER) && defined(ABSL_INTERNAL_HAVE_SSE)
+#if defined(_MSC_VER) && _MSC_VER >= 1900 && \
+    (defined(_M_X64) || defined(_M_IX86))
 #include <intrin.h>
 #pragma intrinsic(_mm_prefetch)
+#pragma intrinsic(_m_prefetchw)
 #endif
 
 namespace absl {
@@ -174,10 +176,15 @@
 inline void PrefetchToLocalCacheForWrite(const void* addr) {
 #if defined(_MM_HINT_ET0)
   _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_ET0);
-#elif defined(__x86_64__)
+#elif defined(_MSC_VER) && _MSC_VER >= 1900 && \
+    (defined(_M_X64) || defined(_M_IX86))
+  // MSVC 2015 and up on x86/x64 supports prefetchw (feature listed as 3DNOW)
+  _m_prefetchw(const_cast<void*>(addr));
+#elif !defined(_MSC_VER) && defined(__x86_64__)
   // _MM_HINT_ET0 is not universally supported. As we commented further
   // up, PREFETCHW is recognized as a no-op on older Intel processors
-  // and has been present on AMD processors since the K6-2
+  // and has been present on AMD processors since the K6-2. We have this
+  // disabled for MSVC compilers as this miscompiles on older MSVC compilers.
   asm("prefetchw (%0)" : : "r"(addr));
 #endif
 }
diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h
index 61ef196..09b55f6 100644
--- a/absl/container/internal/raw_hash_set.h
+++ b/absl/container/internal/raw_hash_set.h
@@ -185,10 +185,10 @@
 
 #include "absl/base/config.h"
 #include "absl/base/internal/endian.h"
-#include "absl/base/internal/prefetch.h"
 #include "absl/base/internal/raw_logging.h"
 #include "absl/base/optimization.h"
 #include "absl/base/port.h"
+#include "absl/base/prefetch.h"
 #include "absl/container/internal/common.h"
 #include "absl/container/internal/compressed_tuple.h"
 #include "absl/container/internal/container_memory.h"
@@ -2117,12 +2117,12 @@
   void prefetch(const key_arg<K>& key) const {
     (void)key;
     // Avoid probing if we won't be able to prefetch the addresses received.
-#ifdef ABSL_INTERNAL_HAVE_PREFETCH
+#ifdef ABSL_HAVE_PREFETCH
     prefetch_heap_block();
     auto seq = probe(common(), hash_ref()(key));
-    base_internal::PrefetchT0(control() + seq.offset());
-    base_internal::PrefetchT0(slot_array() + seq.offset());
-#endif  // ABSL_INTERNAL_HAVE_PREFETCH
+    PrefetchToLocalCache(control() + seq.offset());
+    PrefetchToLocalCache(slot_array() + seq.offset());
+#endif  // ABSL_HAVE_PREFETCH
   }
 
   // The API of find() has two extensions.
@@ -2529,10 +2529,14 @@
   // See `CapacityToGrowth()`.
   size_t& growth_left() { return common().growth_left(); }
 
-  // Prefetch the heap-allocated memory region to resolve potential TLB misses.
-  // This is intended to overlap with execution of calculating the hash for a
-  // key.
-  void prefetch_heap_block() const { base_internal::PrefetchT2(control()); }
+  // Prefetch the heap-allocated memory region to resolve potential TLB and
+  // cache misses. This is intended to overlap with execution of calculating the
+  // hash for a key.
+  void prefetch_heap_block() const {
+#if ABSL_HAVE_BUILTIN(__builtin_prefetch) || defined(__GNUC__)
+    __builtin_prefetch(control(), 0, 1);
+#endif
+  }
 
   CommonFields& common() { return settings_.template get<0>(); }
   const CommonFields& common() const { return settings_.template get<0>(); }
diff --git a/absl/container/internal/raw_hash_set_test.cc b/absl/container/internal/raw_hash_set_test.cc
index 3d3b089..bdffb81 100644
--- a/absl/container/internal/raw_hash_set_test.cc
+++ b/absl/container/internal/raw_hash_set_test.cc
@@ -40,8 +40,8 @@
 #include "absl/base/attributes.h"
 #include "absl/base/config.h"
 #include "absl/base/internal/cycleclock.h"
-#include "absl/base/internal/prefetch.h"
 #include "absl/base/internal/raw_logging.h"
+#include "absl/base/prefetch.h"
 #include "absl/container/flat_hash_map.h"
 #include "absl/container/flat_hash_set.h"
 #include "absl/container/internal/container_memory.h"
diff --git a/absl/crc/internal/crc.cc b/absl/crc/internal/crc.cc
index bb8936e..337a173 100644
--- a/absl/crc/internal/crc.cc
+++ b/absl/crc/internal/crc.cc
@@ -44,8 +44,8 @@
 #include <cstdint>
 
 #include "absl/base/internal/endian.h"
-#include "absl/base/internal/prefetch.h"
 #include "absl/base/internal/raw_logging.h"
+#include "absl/base/prefetch.h"
 #include "absl/crc/internal/crc_internal.h"
 
 namespace absl {
@@ -309,7 +309,7 @@
 
     // Process kStride interleaved swaths through the data in parallel.
     while ((e - p) > kPrefetchHorizon) {
-      base_internal::PrefetchNta(
+      PrefetchToLocalCacheNta(
           reinterpret_cast<const void*>(p + kPrefetchHorizon));
       // Process 64 bytes at a time
       step_stride();
diff --git a/absl/crc/internal/crc_memcpy_x86_64.cc b/absl/crc/internal/crc_memcpy_x86_64.cc
index 66f784d..0078f0e 100644
--- a/absl/crc/internal/crc_memcpy_x86_64.cc
+++ b/absl/crc/internal/crc_memcpy_x86_64.cc
@@ -52,8 +52,8 @@
 #include <type_traits>
 
 #include "absl/base/dynamic_annotations.h"
-#include "absl/base/internal/prefetch.h"
 #include "absl/base/optimization.h"
+#include "absl/base/prefetch.h"
 #include "absl/crc/crc32c.h"
 #include "absl/crc/internal/cpu_detect.h"
 #include "absl/crc/internal/crc_memcpy.h"
@@ -242,10 +242,8 @@
   while (copy_rounds > kBlocksPerCacheLine) {
     // Prefetch kPrefetchAhead bytes ahead of each pointer.
     for (size_t i = 0; i < kRegions; i++) {
-      absl::base_internal::PrefetchT0(src_bytes + kPrefetchAhead +
-                                      region_size * i);
-      absl::base_internal::PrefetchT0(dst_bytes + kPrefetchAhead +
-                                      region_size * i);
+      absl::PrefetchToLocalCache(src_bytes + kPrefetchAhead + region_size * i);
+      absl::PrefetchToLocalCache(dst_bytes + kPrefetchAhead + region_size * i);
     }
 
     // Load and store data, computing CRC on the way.
diff --git a/absl/crc/internal/crc_x86_arm_combined.cc b/absl/crc/internal/crc_x86_arm_combined.cc
index d71191e..e482b37 100644
--- a/absl/crc/internal/crc_x86_arm_combined.cc
+++ b/absl/crc/internal/crc_x86_arm_combined.cc
@@ -21,7 +21,7 @@
 #include "absl/base/config.h"
 #include "absl/base/dynamic_annotations.h"
 #include "absl/base/internal/endian.h"
-#include "absl/base/internal/prefetch.h"
+#include "absl/base/prefetch.h"
 #include "absl/crc/internal/cpu_detect.h"
 #include "absl/crc/internal/crc.h"
 #include "absl/crc/internal/crc32_x86_arm_combined_simd.h"
@@ -429,11 +429,11 @@
           ABSL_INTERNAL_STEP8BY3(l64, l641, l642, p, p1, p2);
           ABSL_INTERNAL_STEP8BY3(l64, l641, l642, p, p1, p2);
           ABSL_INTERNAL_STEP8BY3(l64, l641, l642, p, p1, p2);
-          base_internal::PrefetchT0(
+          PrefetchToLocalCache(
               reinterpret_cast<const char*>(p + kPrefetchHorizonMedium));
-          base_internal::PrefetchT0(
+          PrefetchToLocalCache(
               reinterpret_cast<const char*>(p1 + kPrefetchHorizonMedium));
-          base_internal::PrefetchT0(
+          PrefetchToLocalCache(
               reinterpret_cast<const char*>(p2 + kPrefetchHorizonMedium));
         }
         // Don't run crc on last 8 bytes.
@@ -517,12 +517,12 @@
       for (size_t i = 1; i < bs; i++) {
         // Prefetch data for next itterations.
         for (size_t j = 0; j < num_crc_streams; j++) {
-          base_internal::PrefetchT0(
+          PrefetchToLocalCache(
               reinterpret_cast<const char*>(crc_streams[j] + kPrefetchHorizon));
         }
         for (size_t j = 0; j < num_pclmul_streams; j++) {
-          base_internal::PrefetchT0(reinterpret_cast<const char*>(
-              pclmul_streams[j] + kPrefetchHorizon));
+          PrefetchToLocalCache(reinterpret_cast<const char*>(pclmul_streams[j] +
+                                                             kPrefetchHorizon));
         }
 
         // We process each stream in 64 byte blocks. This can be written as