[K/N] Don't get out of bound during SSE hashCode calculation Use loadl_epi64 to read vectors of u16x4.
diff --git a/kotlin-native/runtime/src/main/cpp/polyhash/arm.cpp b/kotlin-native/runtime/src/main/cpp/polyhash/arm.cpp index 31de4c0..34402a6 100644 --- a/kotlin-native/runtime/src/main/cpp/polyhash/arm.cpp +++ b/kotlin-native/runtime/src/main/cpp/polyhash/arm.cpp
@@ -34,7 +34,7 @@ ALWAYS_INLINE static VecType initVec() { return vdupq_n_u32(0); } ALWAYS_INLINE static Vec128Type initVec128() { return vdupq_n_u32(0); } ALWAYS_INLINE static int vec128toInt(Vec128Type x) { return vgetq_lane_u32(x, 0); } - ALWAYS_INLINE static VecType u16Load(U16VecType x) { return vmovl_u16(x); } + ALWAYS_INLINE static VecType u16Load(U16VecType const* x) { return vmovl_u16(*x); } ALWAYS_INLINE static Vec128Type vec128Mul(Vec128Type x, Vec128Type y) { return vmulq_u32(x, y); } ALWAYS_INLINE static Vec128Type vec128Add(Vec128Type x, Vec128Type y) { return vaddq_u32(x, y); } ALWAYS_INLINE static VecType vecMul(VecType x, VecType y) { return vmulq_u32(x, y); }
diff --git a/kotlin-native/runtime/src/main/cpp/polyhash/attributeSensitiveFunctions.inc b/kotlin-native/runtime/src/main/cpp/polyhash/attributeSensitiveFunctions.inc index 7524cae..9eb6329 100644 --- a/kotlin-native/runtime/src/main/cpp/polyhash/attributeSensitiveFunctions.inc +++ b/kotlin-native/runtime/src/main/cpp/polyhash/attributeSensitiveFunctions.inc
@@ -9,7 +9,7 @@ const int vecLength = sizeof(VecType) / 4; if (n < vecLength / 4) return; - VecType x = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str)); + VecType x = Traits::u16Load(reinterpret_cast<U16VecType const*>(str)); res = Traits::vec128Mul(res, *reinterpret_cast<Vec128Type const*>(b)); VecType z = Traits::vecMul(x, *reinterpret_cast<VecType const*>(p)); res = Traits::vec128Add(res, Traits::squash1(z)); @@ -33,8 +33,8 @@ VecType res1 = Traits::initVec(); do { - VecType x0 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str)); - VecType x1 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength)); + VecType x0 = Traits::u16Load(reinterpret_cast<U16VecType const*>(str)); + VecType x1 = Traits::u16Load(reinterpret_cast<U16VecType const*>(str + vecLength)); res0 = Traits::vecMul(res0, *reinterpret_cast<VecType const*>(b)); res1 = Traits::vecMul(res1, *reinterpret_cast<VecType const*>(b)); VecType z0 = Traits::vecMul(x0, *reinterpret_cast<VecType const*>(p)); @@ -66,10 +66,10 @@ VecType res3 = Traits::initVec(); do { - VecType x0 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str)); - VecType x1 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength)); - VecType x2 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength * 2)); - VecType x3 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength * 3)); + VecType x0 = Traits::u16Load(reinterpret_cast<U16VecType const*>(str)); + VecType x1 = Traits::u16Load(reinterpret_cast<U16VecType const*>(str + vecLength)); + VecType x2 = Traits::u16Load(reinterpret_cast<U16VecType const*>(str + vecLength * 2)); + VecType x3 = Traits::u16Load(reinterpret_cast<U16VecType const*>(str + vecLength * 3)); res0 = Traits::vecMul(res0, *reinterpret_cast<VecType const*>(b)); res1 = Traits::vecMul(res1, *reinterpret_cast<VecType const*>(b)); res2 = Traits::vecMul(res2, *reinterpret_cast<VecType const*>(b)); @@ -109,14 +109,14 @@ VecType res7 = Traits::initVec(); do { - VecType x0 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str)); - VecType x1 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength)); - VecType x2 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength * 2)); - VecType x3 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength * 3)); - VecType x4 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength * 4)); - VecType x5 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength * 5)); - VecType x6 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength * 6)); - VecType x7 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength * 7)); + VecType x0 = Traits::u16Load(reinterpret_cast<U16VecType const*>(str)); + VecType x1 = Traits::u16Load(reinterpret_cast<U16VecType const*>(str + vecLength)); + VecType x2 = Traits::u16Load(reinterpret_cast<U16VecType const*>(str + vecLength * 2)); + VecType x3 = Traits::u16Load(reinterpret_cast<U16VecType const*>(str + vecLength * 3)); + VecType x4 = Traits::u16Load(reinterpret_cast<U16VecType const*>(str + vecLength * 4)); + VecType x5 = Traits::u16Load(reinterpret_cast<U16VecType const*>(str + vecLength * 5)); + VecType x6 = Traits::u16Load(reinterpret_cast<U16VecType const*>(str + vecLength * 6)); + VecType x7 = Traits::u16Load(reinterpret_cast<U16VecType const*>(str + vecLength * 7)); res0 = Traits::vecMul(res0, *reinterpret_cast<VecType const*>(b)); res1 = Traits::vecMul(res1, *reinterpret_cast<VecType const*>(b)); res2 = Traits::vecMul(res2, *reinterpret_cast<VecType const*>(b)); @@ -149,4 +149,4 @@ Vec128Type sum1 = Traits::vec128Add(Traits::squash2(res0, res1), Traits::squash2(res2, res3)); Vec128Type sum2 = Traits::vec128Add(Traits::squash2(res4, res5), Traits::squash2(res6, res7)); res = Traits::vec128Add(res, Traits::vec128Add(sum1, sum2)); -} \ No newline at end of file +}
diff --git a/kotlin-native/runtime/src/main/cpp/polyhash/x86.cpp b/kotlin-native/runtime/src/main/cpp/polyhash/x86.cpp index d53ef38..9838580 100644 --- a/kotlin-native/runtime/src/main/cpp/polyhash/x86.cpp +++ b/kotlin-native/runtime/src/main/cpp/polyhash/x86.cpp
@@ -32,7 +32,7 @@ static VecType initVec() { return _mm_setzero_si128(); } static Vec128Type initVec128() { return _mm_setzero_si128(); } static int vec128toInt(Vec128Type x) { return _mm_cvtsi128_si32(x); } - static VecType u16Load(U16VecType x) { return _mm_cvtepu16_epi32(x); } + static VecType u16Load(U16VecType const* x) { return _mm_cvtepu16_epi32(_mm_loadl_epi64(x)); } static Vec128Type vec128Mul(Vec128Type x, Vec128Type y) { return _mm_mullo_epi32(x, y); } static Vec128Type vec128Add(Vec128Type x, Vec128Type y) { return _mm_add_epi32(x, y); } static VecType vecMul(VecType x, VecType y) { return _mm_mullo_epi32(x, y); } @@ -80,7 +80,7 @@ static VecType initVec() { return _mm256_setzero_si256(); } static Vec128Type initVec128() { return _mm_setzero_si128(); } static int vec128toInt(Vec128Type x) { return _mm_cvtsi128_si32(x); } - static VecType u16Load(U16VecType x) { return _mm256_cvtepu16_epi32(x); } + static VecType u16Load(U16VecType const* x) { return _mm256_cvtepu16_epi32(*x); } static Vec128Type vec128Mul(Vec128Type x, Vec128Type y) { return _mm_mullo_epi32(x, y); } static Vec128Type vec128Add(Vec128Type x, Vec128Type y) { return _mm_add_epi32(x, y); } static VecType vecMul(VecType x, VecType y) { return _mm256_mullo_epi32(x, y); }