Add option to use smaller AES tables (table sizes reduced by 6144 bytes)
This patch adds MBEDTLS_AES_SMALL_TABLES option to reduce number of AES
look-up tables and thus save 6 KiB of memory. Enabling this option
cause performance hit MBEDTLS_AES_SMALL_TABLES of ~7% on ARM and ~15%
on x86-64.
Benchmark on Cortex-A7 (armhf):
Before:
AES-CBC-128 : 14394 Kb/s, 0 cycles/byte
AES-CBC-192 : 12442 Kb/s, 0 cycles/byte
AES-CBC-256 : 10958 Kb/s, 0 cycles/byte
After:
AES-CBC-128 : 13342 Kb/s, 0 cycles/byte
AES-CBC-192 : 11469 Kb/s, 0 cycles/byte
AES-CBC-256 : 10058 Kb/s, 0 cycles/byte
Benchmark on Intel Core i5-4570 (x86_64, 3.2 Ghz, no turbo):
Before:
AES-CBC-128 : 215759 Kb/s, 14 cycles/byte
AES-CBC-192 : 190884 Kb/s, 16 cycles/byte
AES-CBC-256 : 171536 Kb/s, 18 cycles/byte
After:
AES-CBC-128 : 185108 Kb/s, 16 cycles/byte
AES-CBC-192 : 162839 Kb/s, 19 cycles/byte
AES-CBC-256 : 144700 Kb/s, 21 cycles/byte
diff --git a/include/mbedtls/config.h b/include/mbedtls/config.h
index c4b8995..44def95 100644
--- a/include/mbedtls/config.h
+++ b/include/mbedtls/config.h
@@ -388,6 +388,15 @@
//#define MBEDTLS_AES_ROM_TABLES
/**
+ * \def MBEDTLS_AES_SMALL_TABLES
+ *
+ * Use less ROM/RAM for the AES implementation (saves about 6144 bytes).
+ *
+ * Uncomment this macro to use less memory for AES.
+ */
+//#define MBEDTLS_AES_SMALL_TABLES
+
+/**
* \def MBEDTLS_CAMELLIA_SMALL_MEMORY
*
* Use less ROM for the Camellia implementation (saves about 768 bytes).
diff --git a/library/aes.c b/library/aes.c
index 5e01c4f..aabacf9 100644
--- a/library/aes.c
+++ b/library/aes.c
@@ -201,6 +201,8 @@
static const uint32_t FT0[256] = { FT };
#undef V
+#ifndef MBEDTLS_AES_SMALL_TABLES
+
#define V(a,b,c,d) 0x##b##c##d##a
static const uint32_t FT1[256] = { FT };
#undef V
@@ -213,6 +215,8 @@
static const uint32_t FT3[256] = { FT };
#undef V
+#endif /* !MBEDTLS_AES_SMALL_TABLES */
+
#undef FT
/*
@@ -328,6 +332,8 @@
static const uint32_t RT0[256] = { RT };
#undef V
+#ifndef MBEDTLS_AES_SMALL_TABLES
+
#define V(a,b,c,d) 0x##b##c##d##a
static const uint32_t RT1[256] = { RT };
#undef V
@@ -340,6 +346,8 @@
static const uint32_t RT3[256] = { RT };
#undef V
+#endif /* !MBEDTLS_AES_SMALL_TABLES */
+
#undef RT
/*
@@ -359,18 +367,22 @@
*/
static unsigned char FSb[256];
static uint32_t FT0[256];
+#ifndef MBEDTLS_AES_SMALL_TABLES
static uint32_t FT1[256];
static uint32_t FT2[256];
static uint32_t FT3[256];
+#endif /* !MBEDTLS_AES_SMALL_TABLES */
/*
* Reverse S-box & tables
*/
static unsigned char RSb[256];
static uint32_t RT0[256];
+#ifndef MBEDTLS_AES_SMALL_TABLES
static uint32_t RT1[256];
static uint32_t RT2[256];
static uint32_t RT3[256];
+#endif /* !MBEDTLS_AES_SMALL_TABLES */
/*
* Round constants
@@ -445,9 +457,11 @@
( (uint32_t) x << 16 ) ^
( (uint32_t) z << 24 );
+#ifndef MBEDTLS_AES_SMALL_TABLES
FT1[i] = ROTL8( FT0[i] );
FT2[i] = ROTL8( FT1[i] );
FT3[i] = ROTL8( FT2[i] );
+#endif /* !MBEDTLS_AES_SMALL_TABLES */
x = RSb[i];
@@ -456,14 +470,48 @@
( (uint32_t) MUL( 0x0D, x ) << 16 ) ^
( (uint32_t) MUL( 0x0B, x ) << 24 );
+#ifndef MBEDTLS_AES_SMALL_TABLES
RT1[i] = ROTL8( RT0[i] );
RT2[i] = ROTL8( RT1[i] );
RT3[i] = ROTL8( RT2[i] );
+#endif /* !MBEDTLS_AES_SMALL_TABLES */
}
}
+#undef ROTL8
+
#endif /* MBEDTLS_AES_ROM_TABLES */
+#ifdef MBEDTLS_AES_SMALL_TABLES
+
+#define ROTL8(x) ( (uint32_t)( ( x ) << 8 ) + (uint32_t)( ( x ) >> 24 ) )
+#define ROTL16(x) ( (uint32_t)( ( x ) << 16 ) + (uint32_t)( ( x ) >> 16 ) )
+#define ROTL24(x) ( (uint32_t)( ( x ) << 24 ) + (uint32_t)( ( x ) >> 8 ) )
+
+#define AES_RT0(idx) RT0[idx]
+#define AES_RT1(idx) ROTL8( RT0[idx] )
+#define AES_RT2(idx) ROTL16( RT0[idx] )
+#define AES_RT3(idx) ROTL24( RT0[idx] )
+
+#define AES_FT0(idx) FT0[idx]
+#define AES_FT1(idx) ROTL8( FT0[idx] )
+#define AES_FT2(idx) ROTL16( FT0[idx] )
+#define AES_FT3(idx) ROTL24( FT0[idx] )
+
+#else /* MBEDTLS_AES_SMALL_TABLES */
+
+#define AES_RT0(idx) RT0[idx]
+#define AES_RT1(idx) RT1[idx]
+#define AES_RT2(idx) RT2[idx]
+#define AES_RT3(idx) RT3[idx]
+
+#define AES_FT0(idx) FT0[idx]
+#define AES_FT1(idx) FT1[idx]
+#define AES_FT2(idx) FT2[idx]
+#define AES_FT3(idx) FT3[idx]
+
+#endif /* MBEDTLS_AES_SMALL_TABLES */
+
void mbedtls_aes_init( mbedtls_aes_context *ctx )
{
memset( ctx, 0, sizeof( mbedtls_aes_context ) );
@@ -641,10 +689,10 @@
{
for( j = 0; j < 4; j++, SK++ )
{
- *RK++ = RT0[ FSb[ ( *SK ) & 0xFF ] ] ^
- RT1[ FSb[ ( *SK >> 8 ) & 0xFF ] ] ^
- RT2[ FSb[ ( *SK >> 16 ) & 0xFF ] ] ^
- RT3[ FSb[ ( *SK >> 24 ) & 0xFF ] ];
+ *RK++ = AES_RT0( FSb[ ( *SK ) & 0xFF ] ) ^
+ AES_RT1( FSb[ ( *SK >> 8 ) & 0xFF ] ) ^
+ AES_RT2( FSb[ ( *SK >> 16 ) & 0xFF ] ) ^
+ AES_RT3( FSb[ ( *SK >> 24 ) & 0xFF ] );
}
}
@@ -660,50 +708,50 @@
}
#endif /* !MBEDTLS_AES_SETKEY_DEC_ALT */
-#define AES_FROUND(X0,X1,X2,X3,Y0,Y1,Y2,Y3) \
-{ \
- X0 = *RK++ ^ FT0[ ( Y0 ) & 0xFF ] ^ \
- FT1[ ( Y1 >> 8 ) & 0xFF ] ^ \
- FT2[ ( Y2 >> 16 ) & 0xFF ] ^ \
- FT3[ ( Y3 >> 24 ) & 0xFF ]; \
- \
- X1 = *RK++ ^ FT0[ ( Y1 ) & 0xFF ] ^ \
- FT1[ ( Y2 >> 8 ) & 0xFF ] ^ \
- FT2[ ( Y3 >> 16 ) & 0xFF ] ^ \
- FT3[ ( Y0 >> 24 ) & 0xFF ]; \
- \
- X2 = *RK++ ^ FT0[ ( Y2 ) & 0xFF ] ^ \
- FT1[ ( Y3 >> 8 ) & 0xFF ] ^ \
- FT2[ ( Y0 >> 16 ) & 0xFF ] ^ \
- FT3[ ( Y1 >> 24 ) & 0xFF ]; \
- \
- X3 = *RK++ ^ FT0[ ( Y3 ) & 0xFF ] ^ \
- FT1[ ( Y0 >> 8 ) & 0xFF ] ^ \
- FT2[ ( Y1 >> 16 ) & 0xFF ] ^ \
- FT3[ ( Y2 >> 24 ) & 0xFF ]; \
+#define AES_FROUND(X0,X1,X2,X3,Y0,Y1,Y2,Y3) \
+{ \
+ X0 = *RK++ ^ AES_FT0( ( Y0 ) & 0xFF ) ^ \
+ AES_FT1( ( Y1 >> 8 ) & 0xFF ) ^ \
+ AES_FT2( ( Y2 >> 16 ) & 0xFF ) ^ \
+ AES_FT3( ( Y3 >> 24 ) & 0xFF ); \
+ \
+ X1 = *RK++ ^ AES_FT0( ( Y1 ) & 0xFF ) ^ \
+ AES_FT1( ( Y2 >> 8 ) & 0xFF ) ^ \
+ AES_FT2( ( Y3 >> 16 ) & 0xFF ) ^ \
+ AES_FT3( ( Y0 >> 24 ) & 0xFF ); \
+ \
+ X2 = *RK++ ^ AES_FT0( ( Y2 ) & 0xFF ) ^ \
+ AES_FT1( ( Y3 >> 8 ) & 0xFF ) ^ \
+ AES_FT2( ( Y0 >> 16 ) & 0xFF ) ^ \
+ AES_FT3( ( Y1 >> 24 ) & 0xFF ); \
+ \
+ X3 = *RK++ ^ AES_FT0( ( Y3 ) & 0xFF ) ^ \
+ AES_FT1( ( Y0 >> 8 ) & 0xFF ) ^ \
+ AES_FT2( ( Y1 >> 16 ) & 0xFF ) ^ \
+ AES_FT3( ( Y2 >> 24 ) & 0xFF ); \
}
-#define AES_RROUND(X0,X1,X2,X3,Y0,Y1,Y2,Y3) \
-{ \
- X0 = *RK++ ^ RT0[ ( Y0 ) & 0xFF ] ^ \
- RT1[ ( Y3 >> 8 ) & 0xFF ] ^ \
- RT2[ ( Y2 >> 16 ) & 0xFF ] ^ \
- RT3[ ( Y1 >> 24 ) & 0xFF ]; \
- \
- X1 = *RK++ ^ RT0[ ( Y1 ) & 0xFF ] ^ \
- RT1[ ( Y0 >> 8 ) & 0xFF ] ^ \
- RT2[ ( Y3 >> 16 ) & 0xFF ] ^ \
- RT3[ ( Y2 >> 24 ) & 0xFF ]; \
- \
- X2 = *RK++ ^ RT0[ ( Y2 ) & 0xFF ] ^ \
- RT1[ ( Y1 >> 8 ) & 0xFF ] ^ \
- RT2[ ( Y0 >> 16 ) & 0xFF ] ^ \
- RT3[ ( Y3 >> 24 ) & 0xFF ]; \
- \
- X3 = *RK++ ^ RT0[ ( Y3 ) & 0xFF ] ^ \
- RT1[ ( Y2 >> 8 ) & 0xFF ] ^ \
- RT2[ ( Y1 >> 16 ) & 0xFF ] ^ \
- RT3[ ( Y0 >> 24 ) & 0xFF ]; \
+#define AES_RROUND(X0,X1,X2,X3,Y0,Y1,Y2,Y3) \
+{ \
+ X0 = *RK++ ^ AES_RT0( ( Y0 ) & 0xFF ) ^ \
+ AES_RT1( ( Y3 >> 8 ) & 0xFF ) ^ \
+ AES_RT2( ( Y2 >> 16 ) & 0xFF ) ^ \
+ AES_RT3( ( Y1 >> 24 ) & 0xFF ); \
+ \
+ X1 = *RK++ ^ AES_RT0( ( Y1 ) & 0xFF ) ^ \
+ AES_RT1( ( Y0 >> 8 ) & 0xFF ) ^ \
+ AES_RT2( ( Y3 >> 16 ) & 0xFF ) ^ \
+ AES_RT3( ( Y2 >> 24 ) & 0xFF ); \
+ \
+ X2 = *RK++ ^ AES_RT0( ( Y2 ) & 0xFF ) ^ \
+ AES_RT1( ( Y1 >> 8 ) & 0xFF ) ^ \
+ AES_RT2( ( Y0 >> 16 ) & 0xFF ) ^ \
+ AES_RT3( ( Y3 >> 24 ) & 0xFF ); \
+ \
+ X3 = *RK++ ^ AES_RT0( ( Y3 ) & 0xFF ) ^ \
+ AES_RT1( ( Y2 >> 8 ) & 0xFF ) ^ \
+ AES_RT2( ( Y1 >> 16 ) & 0xFF ) ^ \
+ AES_RT3( ( Y0 >> 24 ) & 0xFF ); \
}
/*
diff --git a/library/version_features.c b/library/version_features.c
index 9f97c7b..2b65199 100644
--- a/library/version_features.c
+++ b/library/version_features.c
@@ -198,6 +198,9 @@
#if defined(MBEDTLS_AES_ROM_TABLES)
"MBEDTLS_AES_ROM_TABLES",
#endif /* MBEDTLS_AES_ROM_TABLES */
+#if defined(MBEDTLS_AES_SMALL_TABLES)
+ "MBEDTLS_AES_SMALL_TABLES",
+#endif /* MBEDTLS_AES_SMALL_TABLES */
#if defined(MBEDTLS_CAMELLIA_SMALL_MEMORY)
"MBEDTLS_CAMELLIA_SMALL_MEMORY",
#endif /* MBEDTLS_CAMELLIA_SMALL_MEMORY */