Fix Lucky 13 cache attack on MD/SHA padding

The basis for the Lucky 13 family of attacks is for an attacker to be able to
distinguish between (long) valid TLS-CBC padding and invalid TLS-CBC padding.
Since our code sets padlen = 0 for invalid padding, the length of the input to
the HMAC function gives information about that.

Information about this length (modulo the MD/SHA block size) can be deduced
from how much MD/SHA padding (this is distinct from TLS-CBC padding) is used.
If MD/SHA padding is read from a (static) buffer, a local attacker could get
information about how much is used via a cache attack targeting that buffer.

Let's get rid of this buffer. Now the only buffer used is the internal MD/SHA
one, which is always read fully by the process() function.
diff --git a/library/md5.c b/library/md5.c
index 8238c2b..2a740cd 100644
--- a/library/md5.c
+++ b/library/md5.c
@@ -309,14 +309,6 @@
 }
 #endif
 
-static const unsigned char md5_padding[64] =
-{
- 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
-
 /*
  * MD5 final digest
  */
@@ -324,26 +316,48 @@
                             unsigned char output[16] )
 {
     int ret;
-    uint32_t last, padn;
+    uint32_t used;
     uint32_t high, low;
-    unsigned char msglen[8];
 
+    /*
+     * Add padding: 0x80 then 0x00 until 8 bytes remain for the length
+     */
+    used = ctx->total[0] & 0x3F;
+
+    ctx->buffer[used++] = 0x80;
+
+    if( used <= 56 )
+    {
+        /* Enough room for padding + length in current block */
+        memset( ctx->buffer + used, 0, 56 - used );
+    }
+    else
+    {
+        /* We'll need an extra block */
+        memset( ctx->buffer + used, 0, 64 - used );
+
+        if( ( ret = mbedtls_internal_md5_process( ctx, ctx->buffer ) ) != 0 )
+            return( ret );
+
+        memset( ctx->buffer, 0, 56 );
+    }
+
+    /*
+     * Add message length
+     */
     high = ( ctx->total[0] >> 29 )
          | ( ctx->total[1] <<  3 );
     low  = ( ctx->total[0] <<  3 );
 
-    PUT_UINT32_LE( low,  msglen, 0 );
-    PUT_UINT32_LE( high, msglen, 4 );
+    PUT_UINT32_LE( low,  ctx->buffer, 56 );
+    PUT_UINT32_LE( high, ctx->buffer, 60 );
 
-    last = ctx->total[0] & 0x3F;
-    padn = ( last < 56 ) ? ( 56 - last ) : ( 120 - last );
+    if( ( ret = mbedtls_internal_md5_process( ctx, ctx->buffer ) ) != 0 )
+        return( ret );
 
-    if( ( ret = mbedtls_md5_update_ret( ctx, md5_padding, padn ) ) != 0 )
-            return( ret );
-
-    if( ( ret = mbedtls_md5_update_ret( ctx, msglen, 8 ) ) != 0 )
-            return( ret );
-
+    /*
+     * Output final state
+     */
     PUT_UINT32_LE( ctx->state[0], output,  0 );
     PUT_UINT32_LE( ctx->state[1], output,  4 );
     PUT_UINT32_LE( ctx->state[2], output,  8 );