ChaCha20: allow in-place en/decryption
All other ciphers so far allow this. In particular, the TLS layer depends on
this, despite what's documented in the Cipher layer, see
https://github.com/ARMmbed/mbedtls/issues/1085
https://github.com/ARMmbed/mbedtls/issues/1087
Also, this can be useful for implementing chachapoly without depending on the
semi-internal function keystream_block(), see next commit.
diff --git a/include/mbedtls/chacha20.h b/include/mbedtls/chacha20.h
index f88bd28..7999702 100644
--- a/include/mbedtls/chacha20.h
+++ b/include/mbedtls/chacha20.h
@@ -133,9 +133,8 @@
*
* This function is used to both encrypt and decrypt data.
*
- * \note The \p input and \p output buffers may overlap, but only
- * if input >= output (i.e. only if input points ahead of
- * the output pointer).
+ * \note The \p input and \p output pointers must either be equal or
+ * point to non-overlapping buffers.
*
* \note mbedtls_chacha20_setkey and mbedtls_chacha20_starts must be
* called at least once to setup the context before this function
diff --git a/library/chacha20.c b/library/chacha20.c
index 28133a6..1abb96e 100644
--- a/library/chacha20.c
+++ b/library/chacha20.c
@@ -314,23 +314,22 @@
/* Process full blocks */
while ( size >= CHACHA20_BLOCK_SIZE_BYTES )
{
- mbedtls_chacha20_block( ctx->initial_state, ctx->working_state, &output[offset] );
+ /* Generate new keystream block and increment counter */
+ mbedtls_chacha20_block( ctx->initial_state, ctx->working_state, ctx->keystream8 );
+ ctx->initial_state[CHACHA20_CTR_INDEX]++;
for ( i = 0U; i < 64U; i += 8U )
{
- output[offset + i ] ^= input[offset + i ];
- output[offset + i + 1U] ^= input[offset + i + 1U];
- output[offset + i + 2U] ^= input[offset + i + 2U];
- output[offset + i + 3U] ^= input[offset + i + 3U];
- output[offset + i + 4U] ^= input[offset + i + 4U];
- output[offset + i + 5U] ^= input[offset + i + 5U];
- output[offset + i + 6U] ^= input[offset + i + 6U];
- output[offset + i + 7U] ^= input[offset + i + 7U];
+ output[offset + i ] = input[offset + i ] ^ ctx->keystream8[i ];
+ output[offset + i + 1U ] = input[offset + i + 1U ] ^ ctx->keystream8[i + 1U ];
+ output[offset + i + 2U ] = input[offset + i + 2U ] ^ ctx->keystream8[i + 2U ];
+ output[offset + i + 3U ] = input[offset + i + 3U ] ^ ctx->keystream8[i + 3U ];
+ output[offset + i + 4U ] = input[offset + i + 4U ] ^ ctx->keystream8[i + 4U ];
+ output[offset + i + 5U ] = input[offset + i + 5U ] ^ ctx->keystream8[i + 5U ];
+ output[offset + i + 6U ] = input[offset + i + 6U ] ^ ctx->keystream8[i + 6U ];
+ output[offset + i + 7U ] = input[offset + i + 7U ] ^ ctx->keystream8[i + 7U ];
}
- /* Increment counter */
- ctx->initial_state[CHACHA20_CTR_INDEX]++;
-
offset += CHACHA20_BLOCK_SIZE_BYTES;
size -= CHACHA20_BLOCK_SIZE_BYTES;
}
@@ -338,7 +337,9 @@
/* Last (partial) block */
if ( size > 0U )
{
+ /* Generate new keystream block and increment counter */
mbedtls_chacha20_block( ctx->initial_state, ctx->working_state, ctx->keystream8 );
+ ctx->initial_state[CHACHA20_CTR_INDEX]++;
for ( i = 0U; i < size; i++)
{
@@ -347,8 +348,6 @@
ctx->keystream_bytes_used = size;
- /* Increment counter */
- ctx->initial_state[CHACHA20_CTR_INDEX]++;
}
return( 0 );