blob: 228447b0ad069e0dfd666da47cf3a46584c57781 [file] [log] [blame] [edit]
/*
* Copyright (c) 2025 Fastly, Kazuho Oku
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef picotls_quiclb_h
#define picotls_quiclb_h
#if defined(__x86_64__) || defined(_M_X64)
#include <emmintrin.h>
#define PICOTLS_QUICLB_HAVE_SSE2 1
#endif
union picotls_quiclb_block {
uint8_t bytes[PTLS_AES_BLOCK_SIZE];
uint64_t u64[PTLS_AES_BLOCK_SIZE / sizeof(uint64_t)];
#if PICOTLS_QUICLB_HAVE_SSE2
__m128i m128;
#endif
};
/**
* encrypts one block of AES, assuming the context is `ptls_cipher_context_t` backed by ptls_foo_aes128ecb
*/
static inline void picotls_quiclb_cipher_aes(void *aesecb, union picotls_quiclb_block *block)
{
ptls_cipher_encrypt(aesecb, block->bytes, block->bytes, PTLS_AES_BLOCK_SIZE);
}
/**
* calculates X ^ AES(mask_and_expand(Y))
*/
static inline void picotls_quiclb_one_round(void (*aesecb_func)(void *aesecb, union picotls_quiclb_block *), void *aesecb_ctx,
union picotls_quiclb_block *dest, const union picotls_quiclb_block *x,
const union picotls_quiclb_block *y, const union picotls_quiclb_block *mask,
const union picotls_quiclb_block *len_pass)
{
#if PICOTLS_QUICLB_HAVE_SSE2
dest->m128 = _mm_or_si128(_mm_and_si128(y->m128, mask->m128), len_pass->m128);
#else
for (size_t i = 0; i < PTLS_ELEMENTSOF(dest->u64); ++i)
dest->u64[i] = (y->u64[i] & mask->u64[i]) | len_pass->u64[i];
#endif
aesecb_func(aesecb_ctx, dest);
#if PICOTLS_QUICLB_HAVE_SSE2
dest->m128 = _mm_xor_si128(dest->m128, x->m128);
#else
for (size_t i = 0; i < PTLS_ELEMENTSOF(dest->u64); ++i)
dest->u64[i] ^= x->u64[i];
#endif
}
static inline void picotls_quiclb_split_input(union picotls_quiclb_block *l, union picotls_quiclb_block *r, const uint8_t *input,
size_t len)
{
size_t i;
for (i = 0; i < (len + 1) / 2; ++i)
l->bytes[i] = input[i];
for (; i < PTLS_ELEMENTSOF(l->bytes); ++i)
l->bytes[i] = 0;
for (i = 0; i < (len + 1) / 2; ++i)
r->bytes[i] = input[i + len / 2];
for (; i < PTLS_ELEMENTSOF(r->bytes); ++i)
r->bytes[i] = 0;
}
static inline void picotls_quiclb_merge_output(uint8_t *output, size_t len, const union picotls_quiclb_block *l,
const union picotls_quiclb_block *r)
{
uint8_t *outp = output;
for (size_t i = 0; i < len / 2; ++i)
*outp++ = l->bytes[i];
if (len % 2 == 0) {
for (size_t i = 0; i < len / 2; ++i)
*outp++ = r->bytes[i];
} else {
*outp++ = (l->bytes[len / 2] & 0xf0) | (r->bytes[0] & 0x0f);
for (size_t i = 0; i < len / 2; ++i)
*outp++ = r->bytes[i + 1];
}
}
static inline void picotls_quiclb_do_init(ptls_cipher_context_t *ctx, const void *iv)
{
/* no-op */
}
static inline void picotls_quiclb_transform(void (*aesecb_func)(void *aesecb, union picotls_quiclb_block *), void *aesecb_ctx,
void *output, const void *input, size_t len, int encrypt)
{
static const struct quiclb_mask_t {
union picotls_quiclb_block l, r;
} masks[] = {
{{{0xff, 0xff, 0xff, 0xf0}}, {{0x0f, 0xff, 0xff, 0xff}}}, /* 7 (MIN_LEN) */
{{{0xff, 0xff, 0xff, 0xff}}, {{0xff, 0xff, 0xff, 0xff}}}, /* 8 */
{{{0xff, 0xff, 0xff, 0xff, 0xf0}}, {{0x0f, 0xff, 0xff, 0xff, 0xff}}}, /* 9 */
{{{0xff, 0xff, 0xff, 0xff, 0xff}}, {{0xff, 0xff, 0xff, 0xff, 0xff}}}, /* 10 */
{{{0xff, 0xff, 0xff, 0xff, 0xff, 0xf0}}, {{0x0f, 0xff, 0xff, 0xff, 0xff, 0xff}}}, /* 11 */
{{{0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}, {{0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}}, /* 12 */
{{{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0}}, {{0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}}, /* 13 */
{{{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}, {{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}}, /* 14 */
{{{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0}}, {{0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}}, /* 15 */
{{{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}, {{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}}, /* 16 */
{{{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0}},
{{0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}}, /* 17 */
{{{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}},
{{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}}, /* 18 */
{{{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0}},
{{0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}} /* 19 */
};
assert(PTLS_QUICLB_MIN_BLOCK_SIZE <= len && len <= PTLS_QUICLB_MAX_BLOCK_SIZE);
PTLS_BUILD_ASSERT(PTLS_QUICLB_MAX_BLOCK_SIZE == PTLS_QUICLB_MIN_BLOCK_SIZE + PTLS_ELEMENTSOF(masks) - 1);
const struct quiclb_mask_t *mask = &masks[len - PTLS_QUICLB_MIN_BLOCK_SIZE];
union picotls_quiclb_block l0, r0, r1, l1, r2, l2, len_pass = {{0}};
len_pass.bytes[14] = (uint8_t)len;
#define ROUND(rnd, dest, x, y, mask_side) \
do { \
len_pass.bytes[15] = (rnd); \
picotls_quiclb_one_round(aesecb_func, aesecb_ctx, &dest, &x, &y, &mask->mask_side, &len_pass); \
} while (0)
if (encrypt) {
picotls_quiclb_split_input(&l0, &r0, input, len);
ROUND(1, r1, r0, l0, l);
ROUND(2, l1, l0, r1, r);
ROUND(3, r2, r1, l1, l);
ROUND(4, l2, l1, r2, r);
picotls_quiclb_merge_output(output, len, &l2, &r2);
} else {
picotls_quiclb_split_input(&l2, &r2, input, len);
ROUND(4, l1, l2, r2, r);
ROUND(3, r1, r2, l1, l);
ROUND(2, l0, l1, r1, r);
ROUND(1, r0, r1, l0, l);
picotls_quiclb_merge_output(output, len, &l0, &r0);
}
#undef ROUND
}
#endif