blob: 31d8339d2a51e05b4232e6d90a1e15acbc693dd0 [file] [log] [blame]
/*
* This source file is licensed under the Apache License 2.0 *and* the MIT
* License. Please agree to *both* of the licensing terms!
*
*
* `transformH` function is a derivative work of OpenSSL. The original work
* is covered by the following license:
*
* Copyright 2013-2020 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the Apache License 2.0 (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
* in the file LICENSE in the source distribution or at
* https://www.openssl.org/source/license.html
*
*
* All other work, including modifications to the `transformH` function is
* covered by the following MIT license:
*
* Copyright (c) 2020 Fastly, Kazuho Oku
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <stdint.h>
#include <string.h>
#include <tmmintrin.h>
#include <wmmintrin.h>
#include "picotls.h"
#include "picotls/fusion.h"
static const uint64_t poly_[2] __attribute__((aligned(16))) = {1, 0xc200000000000000};
#define poly (*(__m128i *)poly_)
static const uint8_t bswap8_[16] __attribute__((aligned(16))) = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0};
#define bswap8 (*(__m128i *)bswap8_)
// This function is covered by the Apache License and the MIT License. See Above.
static __m128i transformH(__m128i H)
{
// # <<1 twist
// pshufd \$0b11111111,$Hkey,$T2 # broadcast uppermost dword
__m128i t2 = _mm_shuffle_epi32(H, 0xff);
// movdqa $Hkey,$T1
__m128i t1 = H;
// psllq \$1,$Hkey
H = _mm_slli_epi64(H, 1);
// pxor $T3,$T3 #
__m128i t3 = _mm_setzero_si128();
// psrlq \$63,$T1
t1 = _mm_srli_epi64(t1, 63);
// pcmpgtd $T2,$T3 # broadcast carry bit
t3 = _mm_cmplt_epi32(t2, t3);
// pslldq \$8,$T1
t1 = _mm_slli_si128(t1, 8);
// por $T1,$Hkey # H<<=1
H = _mm_or_si128(t1, H);
// # magic reduction
// pand .L0x1c2_polynomial(%rip),$T3
t3 = _mm_and_si128(t3, poly);
// pxor $T3,$Hkey # if(carry) H^=0x1c2_polynomial
H = _mm_xor_si128(t3, H);
return H;
}
// end of Apache License code
static __m128i gfmul(__m128i x, __m128i y)
{
__m128i lo = _mm_clmulepi64_si128(x, y, 0x00);
__m128i hi = _mm_clmulepi64_si128(x, y, 0x11);
__m128i a = _mm_shuffle_epi32(x, 78);
__m128i b = _mm_shuffle_epi32(y, 78);
a = _mm_xor_si128(a, x);
b = _mm_xor_si128(b, y);
a = _mm_clmulepi64_si128(a, b, 0x00);
a = _mm_xor_si128(a, lo);
a = _mm_xor_si128(a, hi);
b = _mm_slli_si128(a, 8);
a = _mm_srli_si128(a, 8);
lo = _mm_xor_si128(lo, b);
hi = _mm_xor_si128(hi, a);
// from https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf
__m128i t = _mm_clmulepi64_si128(lo, poly, 0x10);
lo = _mm_shuffle_epi32(lo, 78);
lo = _mm_xor_si128(lo, t);
t = _mm_clmulepi64_si128(lo, poly, 0x10);
lo = _mm_shuffle_epi32(lo, 78);
lo = _mm_xor_si128(lo, t);
return _mm_xor_si128(hi, lo);
}
#define AESECB6(b1, b2, b3, b4, b5, b6, b7, b8, b9) \
do { \
__m128i aesk = ctx->keys[0]; \
__m128i aes1 = _mm_xor_si128(data[0], aesk); \
__m128i aes2 = _mm_xor_si128(data[1], aesk); \
__m128i aes3 = _mm_xor_si128(data[2], aesk); \
__m128i aes4 = _mm_xor_si128(data[3], aesk); \
__m128i aes5 = _mm_xor_si128(data[4], aesk); \
__m128i aes6 = _mm_xor_si128(data[5], aesk); \
aesk = ctx->keys[1]; \
aes1 = _mm_aesenc_si128(aes1, aesk); \
aes2 = _mm_aesenc_si128(aes2, aesk); \
aes3 = _mm_aesenc_si128(aes3, aesk); \
aes4 = _mm_aesenc_si128(aes4, aesk); \
aes5 = _mm_aesenc_si128(aes5, aesk); \
aes6 = _mm_aesenc_si128(aes6, aesk); \
{b1} aesk = ctx->keys[2]; \
aes1 = _mm_aesenc_si128(aes1, aesk); \
aes2 = _mm_aesenc_si128(aes2, aesk); \
aes3 = _mm_aesenc_si128(aes3, aesk); \
aes4 = _mm_aesenc_si128(aes4, aesk); \
aes5 = _mm_aesenc_si128(aes5, aesk); \
aes6 = _mm_aesenc_si128(aes6, aesk); \
{b2} aesk = ctx->keys[3]; \
aes1 = _mm_aesenc_si128(aes1, aesk); \
aes2 = _mm_aesenc_si128(aes2, aesk); \
aes3 = _mm_aesenc_si128(aes3, aesk); \
aes4 = _mm_aesenc_si128(aes4, aesk); \
aes5 = _mm_aesenc_si128(aes5, aesk); \
aes6 = _mm_aesenc_si128(aes6, aesk); \
{b3} aesk = ctx->keys[4]; \
aes1 = _mm_aesenc_si128(aes1, aesk); \
aes2 = _mm_aesenc_si128(aes2, aesk); \
aes3 = _mm_aesenc_si128(aes3, aesk); \
aes4 = _mm_aesenc_si128(aes4, aesk); \
aes5 = _mm_aesenc_si128(aes5, aesk); \
aes6 = _mm_aesenc_si128(aes6, aesk); \
{b4} aesk = ctx->keys[5]; \
aes1 = _mm_aesenc_si128(aes1, aesk); \
aes2 = _mm_aesenc_si128(aes2, aesk); \
aes3 = _mm_aesenc_si128(aes3, aesk); \
aes4 = _mm_aesenc_si128(aes4, aesk); \
aes5 = _mm_aesenc_si128(aes5, aesk); \
aes6 = _mm_aesenc_si128(aes6, aesk); \
{b5} aesk = ctx->keys[6]; \
aes1 = _mm_aesenc_si128(aes1, aesk); \
aes2 = _mm_aesenc_si128(aes2, aesk); \
aes3 = _mm_aesenc_si128(aes3, aesk); \
aes4 = _mm_aesenc_si128(aes4, aesk); \
aes5 = _mm_aesenc_si128(aes5, aesk); \
aes6 = _mm_aesenc_si128(aes6, aesk); \
{b6} aesk = ctx->keys[7]; \
aes1 = _mm_aesenc_si128(aes1, aesk); \
aes2 = _mm_aesenc_si128(aes2, aesk); \
aes3 = _mm_aesenc_si128(aes3, aesk); \
aes4 = _mm_aesenc_si128(aes4, aesk); \
aes5 = _mm_aesenc_si128(aes5, aesk); \
aes6 = _mm_aesenc_si128(aes6, aesk); \
{b7} aesk = ctx->keys[8]; \
aes1 = _mm_aesenc_si128(aes1, aesk); \
aes2 = _mm_aesenc_si128(aes2, aesk); \
aes3 = _mm_aesenc_si128(aes3, aesk); \
aes4 = _mm_aesenc_si128(aes4, aesk); \
aes5 = _mm_aesenc_si128(aes5, aesk); \
aes6 = _mm_aesenc_si128(aes6, aesk); \
{b8} aesk = ctx->keys[9]; \
aes1 = _mm_aesenc_si128(aes1, aesk); \
aes2 = _mm_aesenc_si128(aes2, aesk); \
aes3 = _mm_aesenc_si128(aes3, aesk); \
aes4 = _mm_aesenc_si128(aes4, aesk); \
aes5 = _mm_aesenc_si128(aes5, aesk); \
aes6 = _mm_aesenc_si128(aes6, aesk); \
{b9} aesk = ctx->keys[10]; \
data[0] = _mm_aesenclast_si128(aes1, aesk); \
data[1] = _mm_aesenclast_si128(aes2, aesk); \
data[2] = _mm_aesenclast_si128(aes3, aesk); \
data[3] = _mm_aesenclast_si128(aes4, aesk); \
data[4] = _mm_aesenclast_si128(aes5, aesk); \
data[5] = _mm_aesenclast_si128(aes6, aesk); \
} while (0)
static inline void aesecb6(ptls_fusion_aesgcm_context_t *ctx, __m128i *data)
{
AESECB6({}, {}, {}, {}, {}, {}, {}, {}, {});
}
#define GHASH6(FUNC) \
do { \
__m128i X, lo, hi, mid, r, t; \
FUNC( \
{ \
X = _mm_loadu_si128(gdata + 5); \
X = _mm_shuffle_epi8(X, bswap8); \
lo = _mm_clmulepi64_si128(ctx->ghash[0].H, X, 0x00); \
hi = _mm_clmulepi64_si128(ctx->ghash[0].H, X, 0x11); \
mid = _mm_shuffle_epi32(X, 78); \
mid = _mm_xor_si128(mid, X); \
mid = _mm_clmulepi64_si128(ctx->ghash[0].r, mid, 0x00); \
}, \
{ \
X = _mm_loadu_si128(gdata + 4); \
X = _mm_shuffle_epi8(X, bswap8); \
t = _mm_clmulepi64_si128(ctx->ghash[1].H, X, 0x00); \
lo = _mm_xor_si128(lo, t); \
t = _mm_clmulepi64_si128(ctx->ghash[1].H, X, 0x11); \
hi = _mm_xor_si128(hi, t); \
t = _mm_shuffle_epi32(X, 78); \
t = _mm_xor_si128(t, X); \
t = _mm_clmulepi64_si128(ctx->ghash[1].r, t, 0x00); \
mid = _mm_xor_si128(mid, t); \
}, \
{ \
X = _mm_loadu_si128(gdata + 3); \
X = _mm_shuffle_epi8(X, bswap8); \
t = _mm_clmulepi64_si128(ctx->ghash[2].H, X, 0x00); \
lo = _mm_xor_si128(lo, t); \
t = _mm_clmulepi64_si128(ctx->ghash[2].H, X, 0x11); \
hi = _mm_xor_si128(hi, t); \
t = _mm_shuffle_epi32(X, 78); \
t = _mm_xor_si128(t, X); \
t = _mm_clmulepi64_si128(ctx->ghash[2].r, t, 0x00); \
mid = _mm_xor_si128(mid, t); \
}, \
{ \
X = _mm_loadu_si128(gdata + 2); \
X = _mm_shuffle_epi8(X, bswap8); \
t = _mm_clmulepi64_si128(ctx->ghash[3].H, X, 0x00); \
lo = _mm_xor_si128(lo, t); \
t = _mm_clmulepi64_si128(ctx->ghash[3].H, X, 0x11); \
hi = _mm_xor_si128(hi, t); \
t = _mm_shuffle_epi32(X, 78); \
t = _mm_xor_si128(t, X); \
t = _mm_clmulepi64_si128(ctx->ghash[3].r, t, 0x00); \
mid = _mm_xor_si128(mid, t); \
}, \
{ \
X = _mm_loadu_si128(gdata + 1); \
X = _mm_shuffle_epi8(X, bswap8); \
t = _mm_clmulepi64_si128(ctx->ghash[4].H, X, 0x00); \
lo = _mm_xor_si128(lo, t); \
t = _mm_clmulepi64_si128(ctx->ghash[4].H, X, 0x11); \
hi = _mm_xor_si128(hi, t); \
t = _mm_shuffle_epi32(X, 78); \
t = _mm_xor_si128(t, X); \
t = _mm_clmulepi64_si128(ctx->ghash[4].r, t, 0x00); \
mid = _mm_xor_si128(mid, t); \
}, \
{ \
X = _mm_loadu_si128(gdata + 0); \
X = _mm_shuffle_epi8(X, bswap8); \
X = _mm_xor_si128(X, ghash); \
t = _mm_clmulepi64_si128(ctx->ghash[5].H, X, 0x00); \
lo = _mm_xor_si128(lo, t); \
t = _mm_clmulepi64_si128(ctx->ghash[5].H, X, 0x11); \
}, \
{ \
hi = _mm_xor_si128(hi, t); \
t = _mm_shuffle_epi32(X, 78); \
t = _mm_xor_si128(t, X); \
t = _mm_clmulepi64_si128(ctx->ghash[5].r, t, 0x00); \
mid = _mm_xor_si128(mid, t); \
}, \
{ \
mid = _mm_xor_si128(mid, hi); \
mid = _mm_xor_si128(mid, lo); \
lo = _mm_xor_si128(lo, _mm_slli_si128(mid, 8)); \
hi = _mm_xor_si128(hi, _mm_srli_si128(mid, 8)); \
\
/* from https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf */ \
r = _mm_clmulepi64_si128(lo, poly, 0x10); \
}, \
{ \
lo = _mm_shuffle_epi32(lo, 78); \
lo = _mm_xor_si128(lo, r); \
r = _mm_clmulepi64_si128(lo, poly, 0x10); \
lo = _mm_shuffle_epi32(lo, 78); \
lo = _mm_xor_si128(lo, r); \
ghash = _mm_xor_si128(hi, lo); \
}); \
return ghash; \
} while (0)
static inline __m128i ghash6(ptls_fusion_aesgcm_context_t *ctx, const __m128i *gdata, __m128i ghash)
{
#define FUNC(b1, b2, b3, b4, b5, b6, b7, b8, b9) {b1} {b2} {b3} {b4} {b5} {b6} {b7} {b8} {b9}
GHASH6(FUNC);
#undef FUNC
}
static __m128i ghashn(ptls_fusion_aesgcm_context_t *ctx, const __m128i *src, size_t cnt, __m128i ghash)
{
__m128i hi = _mm_setzero_si128(), lo = _mm_setzero_si128(), mid = _mm_setzero_si128();
assert(cnt <= 6);
for (size_t i = 0; i < cnt; ++i) {
__m128i X = _mm_loadu_si128(src + cnt - 1 - i);
X = _mm_shuffle_epi8(X, bswap8);
if (i == cnt - 1)
X = _mm_xor_si128(X, ghash);
__m128i t = _mm_clmulepi64_si128(ctx->ghash[i].H, X, 0x00);
lo = _mm_xor_si128(lo, t);
t = _mm_clmulepi64_si128(ctx->ghash[i].H, X, 0x11);
hi = _mm_xor_si128(hi, t);
t = _mm_shuffle_epi32(X, 78);
t = _mm_xor_si128(t, X);
t = _mm_clmulepi64_si128(ctx->ghash[i].r, t, 0x00);
mid = _mm_xor_si128(mid, t);
}
mid = _mm_xor_si128(mid, hi);
mid = _mm_xor_si128(mid, lo);
lo = _mm_xor_si128(lo, _mm_slli_si128(mid, 8));
hi = _mm_xor_si128(hi, _mm_srli_si128(mid, 8));
/* from https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf */
__m128i r = _mm_clmulepi64_si128(lo, poly, 0x10);
lo = _mm_shuffle_epi32(lo, 78);
lo = _mm_xor_si128(lo, r);
r = _mm_clmulepi64_si128(lo, poly, 0x10);
lo = _mm_shuffle_epi32(lo, 78);
lo = _mm_xor_si128(lo, r);
ghash = _mm_xor_si128(hi, lo);
return ghash;
}
static inline __m128i aesecb6ghash6(ptls_fusion_aesgcm_context_t *ctx, __m128i *data, const __m128i *gdata, __m128i ghash)
{
GHASH6(AESECB6);
}
static inline __m128i loadn(const void *_p, size_t l)
{
const uint8_t *p = _p;
uint8_t buf[16] = {};
for (size_t i = 0; i != l; ++i)
buf[i] = p[i];
return *(__m128i *)buf;
}
static inline void storen(void *_p, size_t l, __m128i v)
{
uint8_t buf[16], *p = _p;
*(__m128i *)buf = v;
for (size_t i = 0; i != l; ++i)
p[i] = buf[i];
}
static inline void finish_gcm(ptls_fusion_aesgcm_context_t *ctx, __m128i *dst, const __m128i *dst_ghash, const __m128i *aad, size_t aadlen,
__m128i ghash, __m128i ac, __m128i ek0)
{
const __m128i *enc = dst_ghash;
size_t enclen = (const uint8_t *)dst - (const uint8_t *)enc;
__m128i gdata[6];
int gdata_index;
while (1) {
gdata_index = 0;
if (aadlen != 0) {
while (aadlen >= 16) {
gdata[gdata_index++] = *aad++;
aadlen -= 16;
if (gdata_index == 6)
goto GHASH6;
}
if (aadlen != 0) {
gdata[gdata_index++] = loadn(aad, aadlen);
aadlen = 0;
if (gdata_index == 6)
goto GHASH6;
}
}
if (enclen != 0) {
while (enclen >= 16) {
gdata[gdata_index++] = *enc++;
enclen -= 16;
if (gdata_index == 6)
goto GHASH6;
}
if (enclen != 0) {
gdata[gdata_index++] = loadn(enc, enclen);
enclen = 0;
if (gdata_index == 6)
goto GHASH6;
}
}
__m128i bswap64 = _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7);
gdata[gdata_index++] = _mm_shuffle_epi8(ac, bswap64);
break;
GHASH6:
ghash = ghash6(ctx, gdata, ghash);
}
/* final */
#if 0
for (int i = 0; i < gdata_index; ++i)
ghash = gfmul(_mm_xor_si128(ghash, _mm_shuffle_epi8(gdata[i], bswap8)), ctx->ghash[0].H);
#else
ghash = ghashn(ctx, gdata, gdata_index, ghash);
#endif
__m128i tag = _mm_shuffle_epi8(ghash, bswap8);
tag = _mm_xor_si128(tag, ek0);
_mm_storeu_si128(dst, tag);
}
void ptls_fusion_aesgcm_encrypt(ptls_fusion_aesgcm_context_t *ctx, const void *iv, const void *_aad, size_t aadlen, void *_dst,
const void *_src, size_t srclen)
{
__m128i bswap64 = _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7), one = _mm_set_epi32(0, 1, 0, 0);
__m128i ctr, ek0, bits[6], gdatabuf[6], ghash = _mm_setzero_si128();
int ek0_encrypted = 0;
__m128i ac = _mm_set_epi32(0, (int)srclen * 8, 0, (int)aadlen * 8);
// src and dst are updated after the chunk is processed
const __m128i *src = _src;
__m128i *dst = _dst;
// aad and src_ghash are updated before the chunk is processed (i.e., when the pointers are fed indo the processor)
const __m128i *aad = _aad, *dst_ghash = dst;
/* build counter */
ctr = loadn(iv, PTLS_AESGCM_IV_SIZE);
ctr = _mm_shuffle_epi8(ctr, bswap8);
ctr = _mm_add_epi64(ctr, one);
ek0 = _mm_shuffle_epi8(ctr, bswap64);
/* setup the counters (we can always run in full), but use the last slot for calculating ek0, if possible */
#define SETUP_BITS() \
do { \
for (int i = 0; i < 5; ++i) { \
ctr = _mm_add_epi64(ctr, one); \
bits[i] = _mm_shuffle_epi8(ctr, bswap64); \
} \
if (PTLS_LIKELY(srclen > 16 * 5)) { \
ctr = _mm_add_epi64(ctr, one); \
bits[5] = _mm_shuffle_epi8(ctr, bswap64); \
} else { \
assert(!ek0_encrypted); \
bits[5] = ek0; \
ek0_encrypted = 1; \
} \
} while (0)
/* build the first AES bits */
SETUP_BITS();
aesecb6(ctx, bits);
/* the main loop */
while (PTLS_LIKELY(srclen >= 6 * 16)) {
/* apply the bits */
for (int i = 0; i < 6; ++i)
_mm_storeu_si128(dst++, _mm_xor_si128(_mm_loadu_si128(src++), bits[i]));
srclen -= 6 * 16;
/* setup bits */
SETUP_BITS();
/* setup gdata */
const __m128i *gdata;
if (PTLS_UNLIKELY(aadlen != 0)) {
for (int i = 0; i < 6; ++i) {
if (aadlen < 16) {
if (aadlen != 0) {
gdatabuf[i++] = loadn(aad, aadlen);
aadlen = 0;
}
while (i < 6)
gdatabuf[i++] = *dst_ghash++;
break;
}
gdatabuf[i++] = _mm_loadu_si128(aad++);
aadlen -= 16;
}
gdata = gdatabuf;
} else {
gdata = dst_ghash;
dst_ghash += 6;
}
/* doit */
ghash = aesecb6ghash6(ctx, bits, gdata, ghash);
}
/* apply the bit stream to the remainder */
for (int i = 0; i < 6 && srclen != 0; ++i) {
if (srclen < 16) {
storen(dst, srclen, _mm_xor_si128(loadn(src, srclen), bits[i]));
dst = (__m128i *)((uint8_t *)dst + srclen);
srclen = 0;
break;
}
_mm_storeu_si128(dst++, _mm_xor_si128(_mm_loadu_si128(src++), bits[i]));
srclen -= 16;
}
if (ek0_encrypted) {
ek0 = bits[5];
} else {
assert(!"FIXME calculate ek0");
}
finish_gcm(ctx, dst, dst_ghash, aad, aadlen, ghash, ac, ek0);
}
static __m128i expand_key(__m128i key, __m128i t)
{
t = _mm_shuffle_epi32(t, _MM_SHUFFLE(3, 3, 3, 3));
key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
return _mm_xor_si128(key, t);
}
void ptls_fusion_aesgcm_init(ptls_fusion_aesgcm_context_t *ctx, const void *_userkey)
{
__m128i userkey = _mm_loadu_si128((__m128i *)_userkey);
size_t i = 0;
ctx->keys[i++] = userkey;
#define EXPAND(R) \
do { \
ctx->keys[i] = expand_key(ctx->keys[i - 1], _mm_aeskeygenassist_si128(ctx->keys[i - 1], R)); \
++i; \
} while (0)
EXPAND(0x1);
EXPAND(0x2);
EXPAND(0x4);
EXPAND(0x8);
EXPAND(0x10);
EXPAND(0x20);
EXPAND(0x40);
EXPAND(0x80);
EXPAND(0x1b);
EXPAND(0x36);
#undef EXPAND
ctx->ghash[0].H = ctx->keys[0];
for (i = 1; i < PTLS_FUSION_AESGCM_ROUNDS; ++i)
ctx->ghash[0].H = _mm_aesenc_si128(ctx->ghash[0].H, ctx->keys[i]);
ctx->ghash[0].H = _mm_aesenclast_si128(ctx->ghash[0].H, ctx->keys[PTLS_FUSION_AESGCM_ROUNDS]);
ctx->ghash[0].H = _mm_shuffle_epi8(ctx->ghash[0].H, bswap8);
ctx->ghash[0].H = transformH(ctx->ghash[0].H);
for (int i = 1; i < 6; ++i)
ctx->ghash[i].H = gfmul(ctx->ghash[i - 1].H, ctx->ghash[0].H);
for (int i = 0; i < 6; ++i) {
__m128i r = _mm_shuffle_epi32(ctx->ghash[i].H, 78);
r = _mm_xor_si128(r, ctx->ghash[i].H);
ctx->ghash[i].r = r;
}
}
void ptls_fusion_aesgcm_dispose(ptls_fusion_aesgcm_context_t *ctx)
{
ptls_clear_memory(ctx, sizeof(*ctx));
}
struct aesgcm_context {
ptls_aead_context_t super;
ptls_fusion_aesgcm_context_t aesgcm;
};
static void aesgcm_dispose_crypto(ptls_aead_context_t *_ctx)
{
struct aesgcm_context *ctx = (struct aesgcm_context *)_ctx;
ptls_fusion_aesgcm_dispose(&ctx->aesgcm);
}
static void aead_do_encrypt_init(ptls_aead_context_t *_ctx, const void *iv, const void *aad, size_t aadlen)
{
assert(!"FIXME");
}
static size_t aead_do_encrypt_update(ptls_aead_context_t *_ctx, void *output, const void *input, size_t inlen)
{
assert(!"FIXME");
return SIZE_MAX;
}
static size_t aead_do_encrypt_final(ptls_aead_context_t *_ctx, void *_output)
{
assert(!"FIXME");
return SIZE_MAX;
}
static size_t aead_do_decrypt(ptls_aead_context_t *_ctx, void *_output, const void *input, size_t inlen, const void *iv,
const void *aad, size_t aadlen)
{
assert(!"FIXME");
return SIZE_MAX;
}
static int aes128gcm_setup_crypto(ptls_aead_context_t *_ctx, int is_enc, const void *key)
{
struct aesgcm_context *ctx = (struct aesgcm_context *)_ctx;
ctx->super.dispose_crypto = aesgcm_dispose_crypto;
if (is_enc) {
ctx->super.do_encrypt_init = aead_do_encrypt_init;
ctx->super.do_encrypt_update = aead_do_encrypt_update;
ctx->super.do_encrypt_final = aead_do_encrypt_final;
ctx->super.do_decrypt = NULL;
} else {
ctx->super.do_encrypt_init = NULL;
ctx->super.do_encrypt_update = NULL;
ctx->super.do_encrypt_final = NULL;
ctx->super.do_decrypt = aead_do_decrypt;
}
assert(is_enc);
ptls_fusion_aesgcm_init(&ctx->aesgcm, key);
return 0;
}
ptls_aead_algorithm_t ptls_fusion_aes128gcm = {"AES128-GCM",
NULL, // &ptls_fusion_aes128ctr,
NULL, // &ptls_fusion_aes128ecb,
PTLS_AES128_KEY_SIZE,
PTLS_AESGCM_IV_SIZE,
PTLS_AESGCM_TAG_SIZE,
sizeof(struct aesgcm_context),
aes128gcm_setup_crypto};