Implement cf_hmac() actually with constant flow
Signed-off-by: Manuel Pégourié-Gonnard <manuel.pegourie-gonnard@arm.com>
diff --git a/library/ssl_msg.c b/library/ssl_msg.c
index d77b658..9b4d1db 100644
--- a/library/ssl_msg.c
+++ b/library/ssl_msg.c
@@ -1071,7 +1071,52 @@
defined(MBEDTLS_SSL_PROTO_TLS1_1) || \
defined(MBEDTLS_SSL_PROTO_TLS1_2) )
/*
+ * Constant-flow conditional memcpy:
+ * - if c1 == c2, equivalent to memcpy(dst, src, len),
+ * - otherwise, a no-op,
+ * but with execution flow independent of the values of c1 and c2.
+ *
+ * Use only bit operations to avoid branches that could be used by some
+ * compilers on some platforms to translate comparison operators.
+ */
+static void mbedtls_ssl_cf_memcpy_if_eq(unsigned char *dst,
+ const unsigned char *src,
+ size_t len,
+ size_t c1, size_t c2 )
+{
+ /* diff = 0 if c1 == c2, non-zero otherwise */
+ const size_t diff = c1 ^ c2;
+
+ /* MSVC has a warning about unary minus on unsigned integer types,
+ * but this is well-defined and precisely what we want to do here. */
+#if defined(_MSC_VER)
+#pragma warning( push )
+#pragma warning( disable : 4146 )
+#endif
+
+ /* diff_msb's most significant bit is bit equal to c1 != c2 */
+ const size_t diff_msb = ( diff | -diff );
+
+ /* diff1 = c1 != c2 */
+ const size_t diff1 = diff_msb >> ( sizeof( diff_msb ) * 8 - 1 );
+
+ /* mask = c1 != c2 ? 0xff : 0x00 */
+ unsigned char mask = (unsigned char) -diff1;
+
+#if defined(_MSC_VER)
+#pragma warning( pop )
+#endif
+
+ /* dst[i] = c1 != c2 ? dst[i] : src[i] */
+ for( size_t i = 0; i < len; i++ )
+ dst[i] = ( dst[i] & mask ) | ( src[i] & ~mask );
+}
+
+/*
* Compute HMAC of variable-length data with constant flow.
+ *
+ * Only works with MD-5, SHA-1, SHA-256 and SHA-384.
+ * (Otherwise, computation of block_size needs to be adapted.)
*/
MBEDTLS_STATIC_TESTABLE int mbedtls_ssl_cf_hmac(
mbedtls_md_context_t *ctx,
@@ -1080,85 +1125,61 @@
size_t min_data_len, size_t max_data_len,
unsigned char *output )
{
- /* WORK IN PROGRESS - THIS IS ONLY PSEUDO-CONTANT-TIME */
-
/*
- * Process MAC and always update for padlen afterwards to make
- * total time independent of padlen.
+ * This function breaks the HMAC abstraction and uses the md_clone()
+ * extension to the MD API in order to get constant-flow behaviour.
*
- * Known timing attacks:
- * - Lucky Thirteen (http://www.isg.rhul.ac.uk/tls/TLStiming.pdf)
+ * HMAC(msg) is defined as HASH(okey + HASH(ikey + msg)) where + means
+ * concatenation, and okey/ikey is the XOR of the key with some fix bit
+ * patterns (see RFC 2104, sec. 2), which are stored in ctx->hmac_ctx.
*
- * To compensate for different timings for the MAC calculation
- * depending on how much padding was removed (which is determined
- * by padlen), process extra_run more blocks through the hash
- * function.
+ * We'll first compute inner_hash = HASH(ikey + msg) by hashing up to
+ * minlen, then cloning the context, and for each byte up to maxlen
+ * finishing up the hash computation, keeping only the correct result.
*
- * The formula in the paper is
- * extra_run = ceil( (L1-55) / 64 ) - ceil( (L2-55) / 64 )
- * where L1 is the size of the header plus the decrypted message
- * plus CBC padding and L2 is the size of the header plus the
- * decrypted message. This is for an underlying hash function
- * with 64-byte blocks.
- * We use ( (Lx+8) / 64 ) to handle 'negative Lx' values
- * correctly. We round down instead of up, so -56 is the correct
- * value for our calculations instead of -55.
- *
- * Repeat the formula rather than defining a block_size variable.
- * This avoids requiring division by a variable at runtime
- * (which would be marginally less efficient and would require
- * linking an extra division function in some builds).
+ * Then we only need to compute HASH(okey + inner_hash) and we're done.
*/
- size_t j, extra_run = 0;
- /* This size is enough to server either as input to
- * md_process() or as output to md_finish() */
- unsigned char tmp[MBEDTLS_MD_MAX_BLOCK_SIZE];
+ const mbedtls_md_type_t md_alg = mbedtls_md_get_type( ctx->md_info );
+ const size_t block_size = md_alg == MBEDTLS_MD_SHA384 ? 128 : 64;
+ const unsigned char * const ikey = (unsigned char *) ctx->hmac_ctx;
+ const unsigned char * const okey = ikey + block_size;
+ const size_t hash_size = mbedtls_md_get_size( ctx->md_info );
- memset( tmp, 0, sizeof( tmp ) );
+ unsigned char aux_out[MBEDTLS_MD_MAX_SIZE];
+ mbedtls_md_context_t aux;
+ size_t offset;
- switch( mbedtls_md_get_type( ctx->md_info ) )
+ mbedtls_md_init( &aux );
+ mbedtls_md_setup( &aux, ctx->md_info, 0 );
+
+ /* After hmac_start() of hmac_reset(), ikey has already been hashed,
+ * so we can start directly with the message */
+ mbedtls_md_update( ctx, add_data, add_data_len );
+ mbedtls_md_update( ctx, data, min_data_len );
+
+ /* For each possible length, compute the hash up to that point */
+ for( offset = min_data_len; offset <= max_data_len; offset++ )
{
-#if defined(MBEDTLS_MD5_C) || defined(MBEDTLS_SHA1_C) || \
-defined(MBEDTLS_SHA256_C)
- case MBEDTLS_MD_MD5:
- case MBEDTLS_MD_SHA1:
- case MBEDTLS_MD_SHA256:
- /* 8 bytes of message size, 64-byte compression blocks */
- extra_run = ( add_data_len + max_data_len + 8 ) / 64 -
- ( add_data_len + data_len_secret + 8 ) / 64;
- break;
-#endif
-#if defined(MBEDTLS_SHA512_C)
- case MBEDTLS_MD_SHA384:
- /* 16 bytes of message size, 128-byte compression blocks */
- extra_run = ( add_data_len + max_data_len + 16 ) / 128 -
- ( add_data_len + data_len_secret + 16 ) / 128;
- break;
-#endif
- default:
- return( MBEDTLS_ERR_SSL_INTERNAL_ERROR );
+ mbedtls_md_clone( &aux, ctx );
+ mbedtls_md_finish( &aux, aux_out );
+ /* Keep only the correct inner_hash in the output buffer */
+ mbedtls_ssl_cf_memcpy_if_eq( output, aux_out, hash_size,
+ offset, data_len_secret );
+
+ if( offset < max_data_len )
+ mbedtls_md_update( ctx, data + offset, 1 );
}
- mbedtls_md_hmac_update( ctx, add_data, add_data_len );
- mbedtls_md_hmac_update( ctx, data, data_len_secret );
- /* Make sure we access everything even when padlen > 0. This
- * makes the synchronisation requirements for just-in-time
- * Prime+Probe attacks much tighter and hopefully impractical. */
- ssl_read_memory( data + min_data_len, max_data_len - min_data_len );
- mbedtls_md_hmac_finish( ctx, output );
-
- /* Dummy calls to compression function.
- * Call mbedtls_md_process at least once due to cache attacks
- * that observe whether md_process() was called of not.
- * Respect the usual start-(process|update)-finish sequence for
- * the sake of hardware accelerators that might require it. */
+ /* Now compute HASH(okey + inner_hash) */
mbedtls_md_starts( ctx );
- for( j = 0; j < extra_run + 1; j++ )
- mbedtls_md_process( ctx, tmp );
- mbedtls_md_finish( ctx, tmp );
+ mbedtls_md_update( ctx, okey, block_size );
+ mbedtls_md_update( ctx, output, hash_size );
+ mbedtls_md_finish( ctx, output );
+ /* Done, get ready for next time */
mbedtls_md_hmac_reset( ctx );
+ mbedtls_md_free( &aux );
return( 0 );
}
#endif /* MBEDTLS_SSL_SOME_SUITES_USE_CBC && TLS 1.0-1.2 */