Merge pull request #325 from h2o/fusion-benchmark

Add fusion to ptlsbench and fix bcrypt
diff --git a/CMakeLists.txt b/CMakeLists.txt
index dc0c4eb..d1d6c09 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -93,6 +93,10 @@
     lib/cifra/random.c)
 SET(TEST_EXES test-minicrypto.t)
 
+
+SET(PTLSBENCH_LIBS
+    picotls-minicrypto picotls-core)
+
 FIND_PACKAGE(OpenSSL)
 IF (OPENSSL_FOUND AND NOT (OPENSSL_VERSION VERSION_LESS "1.0.1"))
     MESSAGE(STATUS "  Enabling OpenSSL support")
@@ -122,9 +126,7 @@
     SET_TARGET_PROPERTIES(test-openssl.t PROPERTIES COMPILE_FLAGS "-DPTLS_MEMORY_DEBUG=1")
     TARGET_LINK_LIBRARIES(test-openssl.t ${OPENSSL_LIBRARIES} ${CMAKE_DL_LIBS})
 
-    ADD_EXECUTABLE(ptlsbench t/ptlsbench.c)
-    SET_TARGET_PROPERTIES(ptlsbench PROPERTIES COMPILE_FLAGS "-DPTLS_MEMORY_DEBUG=1")
-    TARGET_LINK_LIBRARIES(ptlsbench picotls-minicrypto picotls-openssl picotls-core ${OPENSSL_LIBRARIES} ${CMAKE_DL_LIBS})
+    LIST(APPEND PTLSBENCH_LIBS picotls-openssl ${OPENSSL_LIBRARIES} ${CMAKE_DL_LIBS})
 
     SET(TEST_EXES ${TEST_EXES} test-openssl.t)
 ELSE ()
@@ -149,8 +151,14 @@
         ADD_DEPENDENCIES(test-fusion.t generate-picotls-probes)
     ENDIF ()
     SET(TEST_EXES ${TEST_EXES} test-fusion.t)
+    
+    LIST(APPEND PTLSBENCH_LIBS picotls-fusion)
 ENDIF ()
 
+ADD_EXECUTABLE(ptlsbench t/ptlsbench.c)
+SET_TARGET_PROPERTIES(ptlsbench PROPERTIES COMPILE_FLAGS "-DPTLS_MEMORY_DEBUG=1")
+TARGET_LINK_LIBRARIES(ptlsbench ${PTLSBENCH_LIBS})
+
 ADD_CUSTOM_TARGET(check env BINARY_DIR=${CMAKE_CURRENT_BINARY_DIR} prove --exec '' -v ${CMAKE_CURRENT_BINARY_DIR}/*.t t/*.t WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DEPENDS ${TEST_EXES} cli)
 
 IF (CMAKE_SYSTEM_NAME STREQUAL "Linux")
diff --git a/lib/ptlsbcrypt.c b/lib/ptlsbcrypt.c
index 2a9a4d0..9f46a47 100644
--- a/lib/ptlsbcrypt.c
+++ b/lib/ptlsbcrypt.c
@@ -427,6 +427,22 @@
     return cbResult;
 }
 
+void ptls_bcrypt_do_encrypt(ptls_aead_context_t *ctx, void *output, const void *input, size_t inlen, uint64_t seq,
+                                  const void *aad, size_t aadlen, ptls_aead_supplementary_encryption_t *supp)
+{
+    size_t after_update;
+
+    ctx->do_encrypt_init(ctx, seq, aad, aadlen);
+    after_update = ctx->do_encrypt_update(ctx, output, input, inlen);
+    ctx->do_encrypt_final(ctx, (uint8_t *)output + after_update);
+
+    if (supp != NULL) {
+        ptls_cipher_init(supp->ctx, supp->input);
+        memset(supp->output, 0, sizeof(supp->output));
+        ptls_cipher_encrypt(supp->ctx, supp->output, supp->output, sizeof(supp->output));
+    }
+}
+
 static size_t ptls_bcrypt_aead_do_decrypt(struct st_ptls_aead_context_t *_ctx, void *output, const void *input, size_t inlen,
                                           uint64_t seq, const void *aad, size_t aadlen)
 {
@@ -518,7 +534,7 @@
             ctx->super.do_encrypt_init = ptls_bcrypt_aead_do_encrypt_init;
             ctx->super.do_encrypt_update = ptls_bcrypt_aead_do_encrypt_update;
             ctx->super.do_encrypt_final = ptls_bcrypt_aead_do_encrypt_final;
-            ctx->super.do_encrypt = ptls_aead__do_encrypt;
+            ctx->super.do_encrypt = ptls_bcrypt_do_encrypt;
         } else {
             ctx->super.dispose_crypto = ptls_bcrypt_aead_dispose_crypto;
             ctx->super.do_decrypt = ptls_bcrypt_aead_do_decrypt;
diff --git a/picotlsvs/picotlsvs.sln b/picotlsvs/picotlsvs.sln
index 2b6a843..4f79880 100644
--- a/picotlsvs/picotlsvs.sln
+++ b/picotlsvs/picotlsvs.sln
@@ -24,6 +24,7 @@
 		{559AC085-1BEF-450A-A62D-0D370561D596} = {559AC085-1BEF-450A-A62D-0D370561D596}
 		{499B82B3-F5A5-4C2E-91EF-A2F77CBC33F5} = {499B82B3-F5A5-4C2E-91EF-A2F77CBC33F5}
 		{56C264BF-822B-4F29-B512-5B26157CA2EC} = {56C264BF-822B-4F29-B512-5B26157CA2EC}
+		{55F22DE6-EAAE-4279-97B7-84FAAB7F29BB} = {55F22DE6-EAAE-4279-97B7-84FAAB7F29BB}
 		{0A0E7AF2-05C8-488B-867C-D83B776B8BF4} = {0A0E7AF2-05C8-488B-867C-D83B776B8BF4}
 		{497433FE-B252-4985-A504-54EB791F57F4} = {497433FE-B252-4985-A504-54EB791F57F4}
 	EndProjectSection
diff --git a/picotlsvs/ptlsbench/ptlsbench.vcxproj b/picotlsvs/ptlsbench/ptlsbench.vcxproj
index 4e06766..22e5c16 100644
--- a/picotlsvs/ptlsbench/ptlsbench.vcxproj
+++ b/picotlsvs/ptlsbench/ptlsbench.vcxproj
@@ -91,7 +91,7 @@
       <WarningLevel>Level3</WarningLevel>
       <Optimization>Disabled</Optimization>
       <SDLCheck>true</SDLCheck>
-      <PreprocessorDefinitions>_DEBUG;_CONSOLE;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>_DEBUG;_CONSOLE;_WINDOWS;_WINDOWS64;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <ConformanceMode>true</ConformanceMode>
       <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
       <AdditionalIncludeDirectories>$(ProjectDir)..\picotls;$(ProjectDir)..\..\include;$(ProjectDir)\..\..\deps\cifra\src;$(ProjectDir)\..\..\deps\cifra\src\ext;$(ProjectDir)\..\..\deps\micro-ecc;$(OPENSSL64DIR)\include;$(OPENSSL64DIR)\;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
@@ -100,7 +100,7 @@
       <SubSystem>Console</SubSystem>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <AdditionalLibraryDirectories>$(OPENSSL64DIR);$(OPENSSL64DIR)\lib;$(OutDir)</AdditionalLibraryDirectories>
-      <AdditionalDependencies>bcrypt.lib;picotls-core.lib;picotls-openssl.lib;picotls-bcrypt.lib;picotls-minicrypto.lib;picotls-minicrypto-deps.lib;libcrypto.lib;ws2_32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>bcrypt.lib;picotls-core.lib;picotls-openssl.lib;picotls-bcrypt.lib;picotls-minicrypto.lib;picotls-fusion.lib;picotls-minicrypto-deps.lib;libcrypto.lib;ws2_32.lib;%(AdditionalDependencies)</AdditionalDependencies>
     </Link>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
@@ -151,7 +151,7 @@
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <IntrinsicFunctions>true</IntrinsicFunctions>
       <SDLCheck>true</SDLCheck>
-      <PreprocessorDefinitions>NDEBUG;_CONSOLE;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;_WINDOWS;_WINDOWS64;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <ConformanceMode>true</ConformanceMode>
       <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
       <AdditionalIncludeDirectories>$(ProjectDir)..\picotls;$(ProjectDir)..\..\include;$(ProjectDir)\..\..\deps\cifra\src;$(ProjectDir)\..\..\deps\cifra\src\ext;$(ProjectDir)\..\..\deps\micro-ecc;$(OPENSSL64DIR)\include;$(OPENSSL64DIR)\;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
@@ -162,7 +162,7 @@
       <OptimizeReferences>true</OptimizeReferences>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <AdditionalLibraryDirectories>$(OPENSSL64DIR);$(OPENSSL64DIR)\lib;$(OutDir)</AdditionalLibraryDirectories>
-      <AdditionalDependencies>picotls-core.lib;picotls-openssl.lib;picotls-minicrypto.lib;picotls-minicrypto-deps.lib;libcrypto.lib;bcrypt.lib;ws2_32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>picotls-core.lib;picotls-openssl.lib;picotls-minicrypto.lib;picotls-fusion.lib;picotls-minicrypto-deps.lib;libcrypto.lib;bcrypt.lib;ws2_32.lib;%(AdditionalDependencies)</AdditionalDependencies>
     </Link>
   </ItemDefinitionGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
diff --git a/t/ptlsbench.c b/t/ptlsbench.c
index 09292e1..5209a02 100644
--- a/t/ptlsbench.c
+++ b/t/ptlsbench.c
@@ -35,6 +35,7 @@
 #include "picotls/ffx.h"
 #include "picotls/minicrypto.h"
 #include "picotls/openssl.h"
+#include "picotls/fusion.h"
 #include <openssl/opensslv.h>
 
 #ifdef _WINDOWS
@@ -60,10 +61,10 @@
     struct timeval tv;
 #ifdef CLOCK_PROCESS_CPUTIME_ID
     struct timespec cpu;
-    if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &cpu) == 0){
-        uint64_t nanos = (uint64_t) cpu.tv_nsec;
-        uint64_t micros = nanos/1000;
-        micros += (1000000ull)*((uint64_t)cpu.tv_sec);
+    if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &cpu) == 0) {
+        uint64_t nanos = (uint64_t)cpu.tv_nsec;
+        uint64_t micros = nanos / 1000;
+        micros += (1000000ull) * ((uint64_t)cpu.tv_sec);
         return micros;
     }
 #endif
@@ -77,7 +78,7 @@
 #define BENCH_BATCH 1000
 
 static int bench_run_one(ptls_aead_context_t *e, ptls_aead_context_t *d, size_t n, size_t l, uint64_t *t_enc, uint64_t *t_dec,
-                     uint64_t *s)
+                         uint64_t *s)
 {
     int ret = 0;
     uint8_t *v_in = NULL;
@@ -118,11 +119,7 @@
 
             for (size_t i = 0; i < i_max; i++) {
                 h[0]++;
-
-                ptls_aead_encrypt_init(e, h[0], h, sizeof(h));
-                e_len = ptls_aead_encrypt_update(e, v_enc[i], v_in, l);
-                e_len += ptls_aead_encrypt_final(e, v_enc[i] + e_len);
-
+                e_len = ptls_aead_encrypt(e, v_enc[i], v_in, l, h[0], h, sizeof(h));
                 *s += (v_enc[i])[l];
             }
 
@@ -179,7 +176,8 @@
 
 /* Measure one specific aead implementation
  */
-static int bench_run_aead(char  * OS, char * HW, int basic_ref, uint64_t s0, const char *provider, const char *algo_name, ptls_aead_algorithm_t *aead, ptls_hash_algorithm_t *hash, size_t n, size_t l, uint64_t *s)
+static int bench_run_aead(char *OS, char *HW, int basic_ref, uint64_t s0, const char *provider, const char *algo_name,
+                          ptls_aead_algorithm_t *aead, ptls_hash_algorithm_t *hash, size_t n, size_t l, uint64_t *s)
 {
     int ret = 0;
 
@@ -195,7 +193,7 @@
 
     if (strcmp(provider, "openssl") == 0) {
         /*
-         * OPENSSL_VERSION_NUMBER is a combination of the major, minor and patch version 
+         * OPENSSL_VERSION_NUMBER is a combination of the major, minor and patch version
          * into a single integer 0xMNNFFPP0L, where M is major, NN is minor, PP is patch
          */
         uint32_t combined = OPENSSL_VERSION_NUMBER;
@@ -223,8 +221,8 @@
     } else {
         ret = bench_run_one(e, d, n, l, &t_e, &t_d, s);
         if (ret == 0) {
-            printf("%s, %s, %d, %s, %d, %s, %s, %s, %d, %d, %d, %d, %.2f, %.2f\n", OS, HW, (int)(8 * sizeof(size_t)), BENCH_MODE, basic_ref,
-                   provider, p_version, algo_name, (int)n, (int)l, (int)t_e, (int)t_d, bench_mbps(t_e, l, n),
+            printf("%s, %s, %d, %s, %d, %s, %s, %s, %d, %d, %d, %d, %.2f, %.2f\n", OS, HW, (int)(8 * sizeof(size_t)), BENCH_MODE,
+                   basic_ref, provider, p_version, algo_name, (int)n, (int)l, (int)t_e, (int)t_d, bench_mbps(t_e, l, n),
                    bench_mbps(t_d, l, n));
         }
     }
@@ -257,6 +255,10 @@
     {"ptlsbcrypt", "aes128gcm", &ptls_bcrypt_aes128gcm, &ptls_bcrypt_sha256, 1},
     {"ptlsbcrypt", "aes256gcm", &ptls_bcrypt_aes256gcm, &ptls_bcrypt_sha384, 1},
 #endif
+#if !defined(_WINDOWS) || defined(_WINDOWS64)
+    {"fusion", "aes128gcm", &ptls_fusion_aes128gcm, &ptls_minicrypto_sha256, 1},
+    {"fusion", "aes256gcm", &ptls_fusion_aes256gcm, &ptls_minicrypto_sha384, 1},
+#endif
 #if PTLS_OPENSSL_HAVE_CHACHA20_POLY1305
     {"openssl", "chacha20poly1305", &ptls_openssl_chacha20poly1305, &ptls_minicrypto_sha256, 1},
 #endif
@@ -268,20 +270,20 @@
 static int bench_basic(uint64_t *x)
 {
     uint64_t t_start = bench_time();
-    uint32_t a = (uint32_t)((*x)&0xFFFFFFFF);
-    uint32_t b = (uint32_t)((*x)>>32);
+    uint32_t a = (uint32_t)((*x) & 0xFFFFFFFF);
+    uint32_t b = (uint32_t)((*x) >> 32);
 
-    /* Evaluate the current CPU. The benchmark is designed to 
+    /* Evaluate the current CPU. The benchmark is designed to
      * emulate typical encryption operations, hopefully so it
      * will not be compiled out by the optimizer. */
     for (unsigned int i = 0; i < 10000000; i++) {
-        uint32_t v = (a >> 3)|(a << 29);
+        uint32_t v = (a >> 3) | (a << 29);
         v += a;
         v ^= b;
         b = a;
         a = v;
     }
-    *x = (((uint64_t) b)<<32)|a;
+    *x = (((uint64_t)b) << 32) | a;
 
     return (int)(bench_time() - t_start);
 }
@@ -300,16 +302,16 @@
 #endif
 
 #ifdef _WINDOWS
-    (void) strcpy_s(OS, sizeof(OS), "windows");
+    (void)strcpy_s(OS, sizeof(OS), "windows");
     (void)strcpy_s(HW, sizeof(HW), "x86_64");
 #else
     OS[0] = 0;
     HW[0] = 0;
     if (uname(&uts) == 0) {
-        if (strlen(uts.sysname) + 1 < sizeof(OS)){
+        if (strlen(uts.sysname) + 1 < sizeof(OS)) {
             strcpy(OS, uts.sysname);
         }
-        if (strlen(uts.machine) + 1 < sizeof(HW)){
+        if (strlen(uts.machine) + 1 < sizeof(HW)) {
             strcpy(HW, uts.machine);
         }
     }
@@ -319,11 +321,12 @@
         force_all_tests = 1;
     } else if (argc > 1) {
         fprintf(stderr, "Usage: %s [-f]\n   Use option \"-f\" to force execution of the slower tests.\n", argv[0]);
-        exit (-1);
+        exit(-1);
     }
 
-    printf("OS, HW, bits, mode, 10M ops, provider, version, algorithm, N, L, encrypt us, decrypt us, encrypt mbps, decrypt mbps,\n");
- 
+    printf(
+        "OS, HW, bits, mode, 10M ops, provider, version, algorithm, N, L, encrypt us, decrypt us, encrypt mbps, decrypt mbps,\n");
+
     for (size_t i = 0; ret == 0 && i < nb_aead_list; i++) {
         if (aead_list[i].enabled_by_defaut || force_all_tests) {
             ret = bench_run_aead(OS, HW, basic_ref, x, aead_list[i].provider, aead_list[i].algo_name, aead_list[i].aead,
@@ -333,9 +336,9 @@
 
     /* Gratuitous test, designed to ensure that the initial computation
      * of the basic reference benchmark is not optimized away. */
-    if (s == 0){
-       printf("Unexpected value of test sum s = %llx\n", (unsigned long long)s);
-    } 
+    if (s == 0) {
+        printf("Unexpected value of test sum s = %llx\n", (unsigned long long)s);
+    }
 
     return ret;
 }