blob: b0f525b8f8491ef37165d7ea5cae8f0c0a6193c4 [file] [log] [blame]
/*
* Copyright (c) 2016 DeNA Co., Ltd., Kazuho Oku
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifdef _WINDOWS
#include "wincompat.h"
#else
#include <arpa/inet.h>
#include <sys/time.h>
#include <sys/utsname.h>
#include <time.h>
#endif
#include <assert.h>
#include <string.h>
#include <stdio.h>
#include "picotls.h"
#include "picotls/ffx.h"
#include "picotls/minicrypto.h"
#include "picotls/openssl.h"
#ifndef _WINDOWS
#ifdef PTLS_HAVE_FUSION
#include "picotls/fusion.h"
#endif
#endif
#include <openssl/opensslv.h>
#ifdef _WINDOWS
#include <bcrypt.h>
#include "picotls/ptlsbcrypt.h"
#ifdef _DEBUG
#define BENCH_MODE "check"
#else
#define BENCH_MODE "release"
#endif
#include "../lib/ptlsbcrypt.c"
#else
#ifdef PTLS_DEBUG
#define BENCH_MODE "debug"
#else
#define BENCH_MODE "release"
#endif
#endif
#ifdef PTLS_HAVE_MBEDTLS
#include "picotls/mbedtls.h"
#endif
/* Time in microseconds */
static uint64_t bench_time()
{
struct timeval tv;
#ifdef CLOCK_PROCESS_CPUTIME_ID
struct timespec cpu;
if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &cpu) == 0) {
uint64_t nanos = (uint64_t)cpu.tv_nsec;
uint64_t micros = nanos / 1000;
micros += (1000000ull) * ((uint64_t)cpu.tv_sec);
return micros;
}
#endif
gettimeofday(&tv, NULL);
return (uint64_t)tv.tv_sec * 1000000 + tv.tv_usec;
}
/* Single measurement.
*/
#define BENCH_BATCH 1000
static int bench_run_one(ptls_aead_context_t *e, ptls_aead_context_t *d, size_t n, size_t l, uint64_t *t_enc, uint64_t *t_dec,
uint64_t *s)
{
int ret = 0;
uint8_t *v_in = NULL;
uint8_t *v_enc[BENCH_BATCH];
uint8_t *v_dec = NULL;
uint64_t h[4];
*t_enc = 0;
*t_dec = 0;
*s = 0;
memset(v_enc, 0, sizeof(v_enc));
memset(h, 0, sizeof(h));
v_in = (uint8_t *)malloc(l);
v_dec = (uint8_t *)malloc(l);
if (v_in == NULL || v_dec == NULL) {
ret = PTLS_ERROR_NO_MEMORY;
}
for (size_t i = 0; ret == 0 && i < BENCH_BATCH; i++) {
v_enc[i] = (uint8_t *)malloc(l + PTLS_MAX_DIGEST_SIZE);
if (v_enc[i] == 0) {
ret = PTLS_ERROR_NO_MEMORY;
}
}
if (ret == 0) {
memset(v_in, 0, l);
for (size_t k = 0; k < n;) {
size_t e_len;
size_t d_len;
size_t i_max = ((n - k) > BENCH_BATCH) ? BENCH_BATCH : n - k;
uint64_t old_h = h[0];
uint64_t t_start = bench_time();
uint64_t t_medium;
uint64_t t_end;
for (size_t i = 0; i < i_max; i++) {
h[0]++;
e_len = ptls_aead_encrypt(e, v_enc[i], v_in, l, h[0], h, sizeof(h));
*s += (v_enc[i])[l];
}
t_medium = bench_time();
h[0] = old_h;
for (size_t i = 0; i < i_max; i++) {
h[0]++;
d_len = ptls_aead_decrypt(d, v_dec, v_enc[i], e_len, h[0], h, sizeof(h));
if (d_len != l) {
ret = PTLS_ALERT_DECRYPT_ERROR;
break;
}
*s += v_dec[0];
}
t_end = bench_time();
*t_enc += t_medium - t_start;
*t_dec += t_end - t_medium;
k += i_max;
}
}
if (v_in != NULL) {
free(v_in);
}
for (size_t i = 0; i < BENCH_BATCH; i++) {
if (v_enc[i] != NULL) {
free(v_enc[i]);
}
}
if (v_dec != NULL) {
free(v_dec);
}
return ret;
}
static double bench_mbps(uint64_t t, size_t l, size_t n)
{
double x = (double)l;
x *= (double)n;
x *= 8;
x /= (double)t;
return x;
}
/* Measure one specific aead implementation
*/
static int bench_run_aead(char *OS, char *HW, int basic_ref, uint64_t s0, const char *provider, const char *algo_name,
ptls_aead_algorithm_t *aead, ptls_hash_algorithm_t *hash, size_t n, size_t l, uint64_t *s)
{
int ret = 0;
uint8_t secret[PTLS_MAX_SECRET_SIZE];
ptls_aead_context_t *e;
ptls_aead_context_t *d;
uint64_t t_e = 0;
uint64_t t_d = 0;
char p_version[128];
/* Document library version as it may have impact on performance */
p_version[0] = 0;
if (strcmp(provider, "openssl") == 0) {
/*
* OPENSSL_VERSION_NUMBER is a combination of the major, minor and patch version
* into a single integer 0xMNNFFPP0L, where M is major, NN is minor, PP is patch
*/
uint32_t combined = OPENSSL_VERSION_NUMBER;
int M = combined >> 28;
int NN = (combined >> 20) & 0xFF;
int FF = (combined >> 12) & 0xFF;
int PP = (combined >> 4) & 0xFF;
char letter = 'a' - 1 + PP;
#ifdef _WINDOWS
(void)sprintf_s(p_version, sizeof(p_version), "%d.%d.%d%c", M, NN, FF, letter);
#else
(void)sprintf(p_version, "%d.%d.%d%c", M, NN, FF, letter);
#endif
}
*s += s0;
memset(secret, 'z', sizeof(secret));
e = ptls_aead_new(aead, hash, 1, secret, NULL);
d = ptls_aead_new(aead, hash, 0, secret, NULL);
if (e == NULL || d == NULL) {
ret = PTLS_ERROR_NO_MEMORY;
} else {
ret = bench_run_one(e, d, n, l, &t_e, &t_d, s);
if (ret == 0) {
printf("%s, %s, %d, %s, %d, %s, %s, %s, %d, %d, %d, %d, %.2f, %.2f\n", OS, HW, (int)(8 * sizeof(size_t)), BENCH_MODE,
basic_ref, provider, p_version, algo_name, (int)n, (int)l, (int)t_e, (int)t_d, bench_mbps(t_e, l, n),
bench_mbps(t_d, l, n));
}
}
if (e) {
ptls_aead_free(e);
}
if (d) {
ptls_aead_free(d);
}
return ret;
}
typedef struct st_ptls_bench_entry_t {
const char *provider;
const char *algo_name;
ptls_aead_algorithm_t *aead;
ptls_hash_algorithm_t *hash;
int enabled_by_defaut;
} ptls_bench_entry_t;
static ptls_bench_entry_t aead_list[] = {
/* Minicrypto AES disabled by defaut because of atrocious perf */
{"minicrypto", "aes128gcm", &ptls_minicrypto_aes128gcm, &ptls_minicrypto_sha256, 0},
{"minicrypto", "aes256gcm", &ptls_minicrypto_aes256gcm, &ptls_minicrypto_sha384, 0},
{"minicrypto", "chacha20poly1305", &ptls_minicrypto_chacha20poly1305, &ptls_minicrypto_sha256, 1},
#ifdef PTLS_HAVE_AEGIS
{"minicrypto", "aegis128l", &ptls_minicrypto_aegis128l, &ptls_minicrypto_sha256, 1},
{"minicrypto", "aegis256", &ptls_minicrypto_aegis256, &ptls_minicrypto_sha384, 1},
#endif
#ifdef _WINDOWS
{"ptlsbcrypt", "aes128gcm", &ptls_bcrypt_aes128gcm, &ptls_bcrypt_sha256, 1},
{"ptlsbcrypt", "aes256gcm", &ptls_bcrypt_aes256gcm, &ptls_bcrypt_sha384, 1},
#endif
#if !defined(_WINDOWS)
#ifdef PTLS_HAVE_FUSION
{"fusion", "aes128gcm", &ptls_fusion_aes128gcm, &ptls_minicrypto_sha256, 1},
{"fusion", "aes256gcm", &ptls_fusion_aes256gcm, &ptls_minicrypto_sha384, 1},
#endif
#endif
#if PTLS_OPENSSL_HAVE_CHACHA20_POLY1305
{"openssl", "chacha20poly1305", &ptls_openssl_chacha20poly1305, &ptls_minicrypto_sha256, 1},
#endif
{"openssl", "aes128gcm", &ptls_openssl_aes128gcm, &ptls_minicrypto_sha256, 1},
{"openssl", "aes256gcm", &ptls_openssl_aes256gcm, &ptls_minicrypto_sha384, 1},
#ifdef PTLS_HAVE_MBEDTLS
{"mbedtls", "aes128gcm", &ptls_mbedtls_aes128gcm, &ptls_mbedtls_sha256, 1},
#if defined(MBEDTLS_SHA384_C)
{"mbedtls", "aes256gcm", &ptls_mbedtls_aes256gcm, &ptls_mbedtls_sha384, 1},
#endif
{"mbedtls", "chacha20poly1305", &ptls_mbedtls_chacha20poly1305, &ptls_mbedtls_sha256, 1},
#endif
};
static size_t nb_aead_list = sizeof(aead_list) / sizeof(ptls_bench_entry_t);
static int bench_basic(uint64_t *x)
{
uint64_t t_start = bench_time();
uint32_t a = (uint32_t)((*x) & 0xFFFFFFFF);
uint32_t b = (uint32_t)((*x) >> 32);
/* Evaluate the current CPU. The benchmark is designed to
* emulate typical encryption operations, hopefully so it
* will not be compiled out by the optimizer. */
for (unsigned int i = 0; i < 10000000; i++) {
uint32_t v = (a >> 3) | (a << 29);
v += a;
v ^= b;
b = a;
a = v;
}
*x = (((uint64_t)b) << 32) | a;
return (int)(bench_time() - t_start);
}
int main(int argc, char **argv)
{
int ret = 0;
int force_all_tests = 0;
uint64_t x = 0xdeadbeef;
uint64_t s = 0;
int basic_ref = bench_basic(&x);
char OS[128];
char HW[128];
#ifndef _WINDOWS
struct utsname uts;
#endif
#ifdef _WINDOWS
(void)strcpy_s(OS, sizeof(OS), "windows");
(void)strcpy_s(HW, sizeof(HW), "x86_64");
#else
OS[0] = 0;
HW[0] = 0;
if (uname(&uts) == 0) {
if (strlen(uts.sysname) + 1 < sizeof(OS)) {
strcpy(OS, uts.sysname);
}
if (strlen(uts.machine) + 1 < sizeof(HW)) {
strcpy(HW, uts.machine);
}
}
#endif
#ifdef PTLS_HAVE_MBEDTLS
if (psa_crypto_init() != PSA_SUCCESS) {
fprintf(stderr, "psa_crypto_init fails.\n");
exit(-1);
}
#endif
if (argc == 2 && strcmp(argv[1], "-f") == 0) {
force_all_tests = 1;
} else if (argc > 1) {
fprintf(stderr, "Usage: %s [-f]\n Use option \"-f\" to force execution of the slower tests.\n", argv[0]);
exit(-1);
}
printf(
"OS, HW, bits, mode, 10M ops, provider, version, algorithm, N, L, encrypt us, decrypt us, encrypt mbps, decrypt mbps,\n");
for (size_t i = 0; ret == 0 && i < nb_aead_list; i++) {
if (aead_list[i].enabled_by_defaut || force_all_tests) {
ret = bench_run_aead(OS, HW, basic_ref, x, aead_list[i].provider, aead_list[i].algo_name, aead_list[i].aead,
aead_list[i].hash, 1000, 1500, &s);
}
}
/* Gratuitous test, designed to ensure that the initial computation
* of the basic reference benchmark is not optimized away. */
if (s == 0) {
printf("Unexpected value of test sum s = %llx\n", (unsigned long long)s);
}
#ifdef PTLS_HAVE_MBEDTLS
/* Deinitialize the PSA crypto library. */
mbedtls_psa_crypto_free();
#endif
return ret;
}