Optimize ConvertSpecialToEmptyAndFullToDeleted on Arm BM_DropDeletes 73.4µs ± 0% 68.9µs ± 1% -6.22% (p=0.008 n=5+5) PiperOrigin-RevId: 511813266 Change-Id: Id28cece454d583e2dfe060e27cfc4720f987f009
diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index e5306a4..f4de2d6 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h
@@ -675,9 +675,10 @@ void ConvertSpecialToEmptyAndFullToDeleted(ctrl_t* dst) const { uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(ctrl), 0); constexpr uint64_t msbs = 0x8080808080808080ULL; - constexpr uint64_t lsbs = 0x0101010101010101ULL; - auto x = mask & msbs; - auto res = (~x + (x >> 7)) & ~lsbs; + constexpr uint64_t slsbs = 0x0202020202020202ULL; + constexpr uint64_t midbs = 0x7e7e7e7e7e7e7e7eULL; + auto x = slsbs & (mask >> 6); + auto res = (x + midbs) | msbs; little_endian::Store64(dst, res); }