[K/N] Don't get out of bound during SSE hashCode calculation
Use loadl_epi64 to read vectors of u16x4.
diff --git a/kotlin-native/runtime/src/main/cpp/polyhash/arm.cpp b/kotlin-native/runtime/src/main/cpp/polyhash/arm.cpp
index 31de4c0..34402a6 100644
--- a/kotlin-native/runtime/src/main/cpp/polyhash/arm.cpp
+++ b/kotlin-native/runtime/src/main/cpp/polyhash/arm.cpp
@@ -34,7 +34,7 @@
ALWAYS_INLINE static VecType initVec() { return vdupq_n_u32(0); }
ALWAYS_INLINE static Vec128Type initVec128() { return vdupq_n_u32(0); }
ALWAYS_INLINE static int vec128toInt(Vec128Type x) { return vgetq_lane_u32(x, 0); }
- ALWAYS_INLINE static VecType u16Load(U16VecType x) { return vmovl_u16(x); }
+ ALWAYS_INLINE static VecType u16Load(U16VecType const* x) { return vmovl_u16(*x); }
ALWAYS_INLINE static Vec128Type vec128Mul(Vec128Type x, Vec128Type y) { return vmulq_u32(x, y); }
ALWAYS_INLINE static Vec128Type vec128Add(Vec128Type x, Vec128Type y) { return vaddq_u32(x, y); }
ALWAYS_INLINE static VecType vecMul(VecType x, VecType y) { return vmulq_u32(x, y); }
diff --git a/kotlin-native/runtime/src/main/cpp/polyhash/attributeSensitiveFunctions.inc b/kotlin-native/runtime/src/main/cpp/polyhash/attributeSensitiveFunctions.inc
index 7524cae..9eb6329 100644
--- a/kotlin-native/runtime/src/main/cpp/polyhash/attributeSensitiveFunctions.inc
+++ b/kotlin-native/runtime/src/main/cpp/polyhash/attributeSensitiveFunctions.inc
@@ -9,7 +9,7 @@
const int vecLength = sizeof(VecType) / 4;
if (n < vecLength / 4) return;
- VecType x = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str));
+ VecType x = Traits::u16Load(reinterpret_cast<U16VecType const*>(str));
res = Traits::vec128Mul(res, *reinterpret_cast<Vec128Type const*>(b));
VecType z = Traits::vecMul(x, *reinterpret_cast<VecType const*>(p));
res = Traits::vec128Add(res, Traits::squash1(z));
@@ -33,8 +33,8 @@
VecType res1 = Traits::initVec();
do {
- VecType x0 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str));
- VecType x1 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength));
+ VecType x0 = Traits::u16Load(reinterpret_cast<U16VecType const*>(str));
+ VecType x1 = Traits::u16Load(reinterpret_cast<U16VecType const*>(str + vecLength));
res0 = Traits::vecMul(res0, *reinterpret_cast<VecType const*>(b));
res1 = Traits::vecMul(res1, *reinterpret_cast<VecType const*>(b));
VecType z0 = Traits::vecMul(x0, *reinterpret_cast<VecType const*>(p));
@@ -66,10 +66,10 @@
VecType res3 = Traits::initVec();
do {
- VecType x0 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str));
- VecType x1 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength));
- VecType x2 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength * 2));
- VecType x3 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength * 3));
+ VecType x0 = Traits::u16Load(reinterpret_cast<U16VecType const*>(str));
+ VecType x1 = Traits::u16Load(reinterpret_cast<U16VecType const*>(str + vecLength));
+ VecType x2 = Traits::u16Load(reinterpret_cast<U16VecType const*>(str + vecLength * 2));
+ VecType x3 = Traits::u16Load(reinterpret_cast<U16VecType const*>(str + vecLength * 3));
res0 = Traits::vecMul(res0, *reinterpret_cast<VecType const*>(b));
res1 = Traits::vecMul(res1, *reinterpret_cast<VecType const*>(b));
res2 = Traits::vecMul(res2, *reinterpret_cast<VecType const*>(b));
@@ -109,14 +109,14 @@
VecType res7 = Traits::initVec();
do {
- VecType x0 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str));
- VecType x1 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength));
- VecType x2 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength * 2));
- VecType x3 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength * 3));
- VecType x4 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength * 4));
- VecType x5 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength * 5));
- VecType x6 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength * 6));
- VecType x7 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength * 7));
+ VecType x0 = Traits::u16Load(reinterpret_cast<U16VecType const*>(str));
+ VecType x1 = Traits::u16Load(reinterpret_cast<U16VecType const*>(str + vecLength));
+ VecType x2 = Traits::u16Load(reinterpret_cast<U16VecType const*>(str + vecLength * 2));
+ VecType x3 = Traits::u16Load(reinterpret_cast<U16VecType const*>(str + vecLength * 3));
+ VecType x4 = Traits::u16Load(reinterpret_cast<U16VecType const*>(str + vecLength * 4));
+ VecType x5 = Traits::u16Load(reinterpret_cast<U16VecType const*>(str + vecLength * 5));
+ VecType x6 = Traits::u16Load(reinterpret_cast<U16VecType const*>(str + vecLength * 6));
+ VecType x7 = Traits::u16Load(reinterpret_cast<U16VecType const*>(str + vecLength * 7));
res0 = Traits::vecMul(res0, *reinterpret_cast<VecType const*>(b));
res1 = Traits::vecMul(res1, *reinterpret_cast<VecType const*>(b));
res2 = Traits::vecMul(res2, *reinterpret_cast<VecType const*>(b));
@@ -149,4 +149,4 @@
Vec128Type sum1 = Traits::vec128Add(Traits::squash2(res0, res1), Traits::squash2(res2, res3));
Vec128Type sum2 = Traits::vec128Add(Traits::squash2(res4, res5), Traits::squash2(res6, res7));
res = Traits::vec128Add(res, Traits::vec128Add(sum1, sum2));
-}
\ No newline at end of file
+}
diff --git a/kotlin-native/runtime/src/main/cpp/polyhash/x86.cpp b/kotlin-native/runtime/src/main/cpp/polyhash/x86.cpp
index d53ef38..9838580 100644
--- a/kotlin-native/runtime/src/main/cpp/polyhash/x86.cpp
+++ b/kotlin-native/runtime/src/main/cpp/polyhash/x86.cpp
@@ -32,7 +32,7 @@
static VecType initVec() { return _mm_setzero_si128(); }
static Vec128Type initVec128() { return _mm_setzero_si128(); }
static int vec128toInt(Vec128Type x) { return _mm_cvtsi128_si32(x); }
- static VecType u16Load(U16VecType x) { return _mm_cvtepu16_epi32(x); }
+ static VecType u16Load(U16VecType const* x) { return _mm_cvtepu16_epi32(_mm_loadl_epi64(x)); }
static Vec128Type vec128Mul(Vec128Type x, Vec128Type y) { return _mm_mullo_epi32(x, y); }
static Vec128Type vec128Add(Vec128Type x, Vec128Type y) { return _mm_add_epi32(x, y); }
static VecType vecMul(VecType x, VecType y) { return _mm_mullo_epi32(x, y); }
@@ -80,7 +80,7 @@
static VecType initVec() { return _mm256_setzero_si256(); }
static Vec128Type initVec128() { return _mm_setzero_si128(); }
static int vec128toInt(Vec128Type x) { return _mm_cvtsi128_si32(x); }
- static VecType u16Load(U16VecType x) { return _mm256_cvtepu16_epi32(x); }
+ static VecType u16Load(U16VecType const* x) { return _mm256_cvtepu16_epi32(*x); }
static Vec128Type vec128Mul(Vec128Type x, Vec128Type y) { return _mm_mullo_epi32(x, y); }
static Vec128Type vec128Add(Vec128Type x, Vec128Type y) { return _mm_add_epi32(x, y); }
static VecType vecMul(VecType x, VecType y) { return _mm256_mullo_epi32(x, y); }