Dedup a few more load/store implementations.

Change-Id: I521922842b3586ab7c6e242281188745a879f1df
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/53094
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/poly1305/poly1305_vec.c b/crypto/poly1305/poly1305_vec.c
index 83f1efe..0730508 100644
--- a/crypto/poly1305/poly1305_vec.c
+++ b/crypto/poly1305/poly1305_vec.c
@@ -27,22 +27,6 @@
 
 #include <emmintrin.h>
 
-static uint32_t load_u32_le(const uint8_t in[4]) {
-  uint32_t ret;
-  OPENSSL_memcpy(&ret, in, 4);
-  return ret;
-}
-
-static uint64_t load_u64_le(const uint8_t in[8]) {
-  uint64_t ret;
-  OPENSSL_memcpy(&ret, in, 8);
-  return ret;
-}
-
-static void store_u64_le(uint8_t out[8], uint64_t v) {
-  OPENSSL_memcpy(out, &v, 8);
-}
-
 typedef __m128i xmmi;
 
 static const alignas(16) uint32_t poly1305_x64_sse2_message_mask[4] = {
@@ -112,8 +96,8 @@
   uint64_t t0, t1;
 
   // clamp key
-  t0 = load_u64_le(key + 0);
-  t1 = load_u64_le(key + 8);
+  t0 = CRYPTO_load_u64_le(key + 0);
+  t1 = CRYPTO_load_u64_le(key + 8);
   r0 = t0 & 0xffc0fffffff;
   t0 >>= 44;
   t0 |= t1 << 20;
@@ -131,10 +115,10 @@
   p->R22.d[3] = (uint32_t)(r2 >> 32);
 
   // store pad
-  p->R23.d[1] = load_u32_le(key + 16);
-  p->R23.d[3] = load_u32_le(key + 20);
-  p->R24.d[1] = load_u32_le(key + 24);
-  p->R24.d[3] = load_u32_le(key + 28);
+  p->R23.d[1] = CRYPTO_load_u32_le(key + 16);
+  p->R23.d[3] = CRYPTO_load_u32_le(key + 20);
+  p->R24.d[1] = CRYPTO_load_u32_le(key + 24);
+  p->R24.d[3] = CRYPTO_load_u32_le(key + 28);
 
   // H = 0
   st->H[0] = _mm_setzero_si128();
@@ -766,8 +750,8 @@
   }
 
 poly1305_donna_atleast16bytes:
-  t0 = load_u64_le(m + 0);
-  t1 = load_u64_le(m + 8);
+  t0 = CRYPTO_load_u64_le(m + 0);
+  t1 = CRYPTO_load_u64_le(m + 8);
   h0 += t0 & 0xfffffffffff;
   t0 = shr128_pair(t1, t0, 44);
   h1 += t0 & 0xfffffffffff;
@@ -806,8 +790,8 @@
   OPENSSL_memset(m + leftover, 0, 16 - leftover);
   leftover = 16;
 
-  t0 = load_u64_le(m + 0);
-  t1 = load_u64_le(m + 8);
+  t0 = CRYPTO_load_u64_le(m + 0);
+  t1 = CRYPTO_load_u64_le(m + 8);
   h0 += t0 & 0xfffffffffff;
   t0 = shr128_pair(t1, t0, 44);
   h1 += t0 & 0xfffffffffff;
@@ -853,8 +837,8 @@
   t1 = (t1 >> 24);
   h2 += (t1)+c;
 
-  store_u64_le(mac + 0, ((h0) | (h1 << 44)));
-  store_u64_le(mac + 8, ((h1 >> 20) | (h2 << 24)));
+  CRYPTO_store_u64_le(mac + 0, ((h0) | (h1 << 44)));
+  CRYPTO_store_u64_le(mac + 8, ((h1 >> 20) | (h2 << 24)));
 }
 
 #endif  // BORINGSSL_HAS_UINT128 && OPENSSL_X86_64