Document and slightly reorganize mod_pXXX
diff --git a/library/ecp.c b/library/ecp.c
index 21a2315..0f21e2e 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -483,9 +483,20 @@
}
#if defined(POLARSSL_ECP_DP_SECP192R1_ENABLED)
+/*
+ * Compared to the way things are presented in FIPS 186-3 D.2,
+ * we proceed in columns, from right (least significant chunk) to left,
+ * adding chunks to N in place, and keeping a carry for the next chunk.
+ * This avoids moving things around in memory, and uselessly adding zeros,
+ * compared to the more straightforward, line-oriented approach.
+ *
+ * For this prime we need to handle data in chunks of 64 bits.
+ * Since this is always a multiple of our basic t_uint, we can
+ * use a t_uint * to designate such a chunk, and small loops to handle it.
+ */
/* Add 64-bit chunks (dst += src) and update carry */
-static inline void add_64( t_uint *dst, t_uint *src, t_uint *carry )
+static inline void add64( t_uint *dst, t_uint *src, t_uint *carry )
{
unsigned char i;
t_uint c = 0;
@@ -508,11 +519,11 @@
}
}
-#define OFFSET ( 8 / sizeof( t_uint ) )
-#define A( i ) ( N->p + ( i ) * OFFSET )
-#define ADD( i ) add_64( p, A( i ), &c )
-#define NEXT p += OFFSET; carry64( p, &c )
-#define LAST p += OFFSET; *p = c; while( ++p < end ) *p = 0
+#define WIDTH 8 / sizeof( t_uint )
+#define A( i ) N->p + i * WIDTH
+#define ADD( i ) add64( p, A( i ), &c )
+#define NEXT p += WIDTH; carry64( p, &c )
+#define LAST p += WIDTH; *p = c; while( ++p < end ) *p = 0
/*
* Fast quasi-reduction modulo p192 (FIPS 186-3 D.2.1)
@@ -523,8 +534,9 @@
t_uint c = 0;
t_uint *p, *end;
- /* Make sure we have the correct number of blocks */
- MPI_CHK( mpi_grow( N, 6 * OFFSET ) );
+ /* Make sure we have enough blocks so that A(5) is legal */
+ MPI_CHK( mpi_grow( N, 6 * WIDTH ) );
+
p = N->p;
end = p + N->n;
@@ -536,28 +548,35 @@
return( ret );
}
-#undef OFFSET
+#undef WIDTH
#undef A
#undef ADD
#undef NEXT
#undef LAST
#endif /* POLARSSL_ECP_DP_SECP192R1_ENABLED */
-#if defined(POLARSSL_ECP_DP_SECP224R1_ENABLED)
+#if defined(POLARSSL_ECP_DP_SECP224R1_ENABLED) || \
+ defined(POLARSSL_ECP_DP_SECP256R1_ENABLED) || \
+ defined(POLARSSL_ECP_DP_SECP384R1_ENABLED)
+/*
+ * The reader is advised to first understand ecp_mod_p192() since the same
+ * general structure is used here, but with additional complications:
+ * (1) chunks of 32 bits, and (2) subtractions.
+ */
-static inline void add32( uint32_t *dst, uint32_t src, signed char *carry )
-{
- *dst += src;
- *carry += ( *dst < src );
-}
+/*
+ * For these primes, we need to handle data in chunks of 32 bits.
+ * This makes it more complicated if we use 64 bits limbs in MPI,
+ * which prevents us from using a uniform access method as for p192.
+ *
+ * So, we define a mini abstraction layer to access 32 bit chunks,
+ * load them in 'cur' for work, and store them back from 'cur' when done.
+ *
+ * While at it, also define the size of N in terms of 32-bit chunks.
+ */
+#define LOAD32 cur = A( i );
-static inline void sub32( uint32_t *dst, uint32_t src, signed char *carry )
-{
- *carry -= ( *dst < src );
- *dst -= src;
-}
-
-#if defined(POLARSSL_HAVE_INT8)
+#if defined(POLARSSL_HAVE_INT8) /* 8 bit */
#define MAX32 N->n / 4
#define A( j ) (uint32_t)( N->p[4*j+0] ) | \
@@ -569,20 +588,20 @@
N->p[4*i+2] = (uint8_t)( cur >> 16 ); \
N->p[4*i+3] = (uint8_t)( cur >> 24 );
-#elif defined(POLARSSL_HAVE_INT16)
+#elif defined(POLARSSL_HAVE_INT16) /* 16 bit */
#define MAX32 N->n / 2
#define A( j ) (uint32_t)( N->p[2*j] ) | ( N->p[2*j+1] << 16 )
#define STORE32 N->p[2*i+0] = (uint16_t)( cur ); \
N->p[2*i+1] = (uint16_t)( cur >> 16 );
-#elif defined(POLARSSL_HAVE_INT32)
+#elif defined(POLARSSL_HAVE_INT32) /* 32 bit */
#define MAX32 N->n
#define A( j ) N->p[j]
#define STORE32 N->p[i] = cur;
-#else /* 64-bit */
+#else /* 64-bit */
#define MAX32 N->n * 2
#define A( j ) j % 2 ? (uint32_t)( N->p[j/2] >> 32 ) : (uint32_t)( N->p[j/2] )
@@ -595,14 +614,37 @@
N->p[i/2] |= (uint64_t) cur; \
}
-#endif
+#endif /* sizeof( t_uint ) */
+
+/*
+ * Helpers for addition and subtraction of chunks, with signed carry.
+ */
+static inline void add32( uint32_t *dst, uint32_t src, signed char *carry )
+{
+ *dst += src;
+ *carry += ( *dst < src );
+}
+
+static inline void sub32( uint32_t *dst, uint32_t src, signed char *carry )
+{
+ *carry -= ( *dst < src );
+ *dst -= src;
+}
#define ADD( j ) add32( &cur, A( j ), &c );
#define SUB( j ) sub32( &cur, A( j ), &c );
-#define LOAD32 cur = A( i );
-
-#define FIRST c = 0; i = 0; LOAD32;
+/*
+ * Helpers for the main 'loop'
+ */
+#define INIT( b ) \
+ int ret; \
+ signed char c = 0, cc; \
+ uint32_t cur; \
+ size_t i = 0, bits = b; \
+ \
+ MPI_CHK( mpi_grow( N, b * 2 / 8 / sizeof( t_uint ) ) ); \
+ LOAD32;
#define NEXT \
STORE32; i++; LOAD32; \
@@ -638,22 +680,18 @@
return( ret );
}
+#endif /* POLARSSL_ECP_DP_SECP224R1_ENABLED ||
+ POLARSSL_ECP_DP_SECP256R1_ENABLED ||
+ POLARSSL_ECP_DP_SECP384R1_ENABLED */
+#if defined(POLARSSL_ECP_DP_SECP224R1_ENABLED)
/*
* Fast quasi-reduction modulo p224 (FIPS 186-3 D.2.2)
*/
static int ecp_mod_p224( mpi *N )
{
- int ret;
- signed char c, cc;
- uint32_t cur;
- size_t i;
- size_t bits = 224;
+ INIT( 224 );
- /* Make sure we have enough blocks */
- MPI_CHK( mpi_grow( N, bits * 2 / 8 / sizeof( t_uint ) ) );
-
- FIRST;
SUB( 7 ); SUB( 11 ); NEXT; // A0 += -A7 - A11
SUB( 8 ); SUB( 12 ); NEXT; // A1 += -A8 - A12
SUB( 9 ); SUB( 13 ); NEXT; // A2 += -A9 - A13
@@ -667,15 +705,32 @@
}
#endif /* POLARSSL_ECP_DP_SECP224R1_ENABLED */
+#if defined(POLARSSL_ECP_DP_SECP224R1_ENABLED) || \
+ defined(POLARSSL_ECP_DP_SECP256R1_ENABLED) || \
+ defined(POLARSSL_ECP_DP_SECP384R1_ENABLED)
+
+#undef A
+#undef LOAD32
+#undef STORE32
+#undef MAX32
+#undef INIT
+#undef NEXT
+#undef LAST
+
+#endif /* POLARSSL_ECP_DP_SECP224R1_ENABLED ||
+ POLARSSL_ECP_DP_SECP256R1_ENABLED ||
+ POLARSSL_ECP_DP_SECP384R1_ENABLED */
+
#if defined(POLARSSL_ECP_DP_SECP521R1_ENABLED)
/*
- * Size of p521 in terms of t_uint
+ * Here we have a real Mersenne prime, so things are more straightforward.
+ * However, things are aligned on a 'weird' boundary (521 bits).
*/
-#define P521_SIZE_INT ( 521 / 8 / sizeof( t_uint ) + 1 )
-/*
- * Bits to keep in the most significant t_uint
- */
+/* Size of p521 in terms of t_uint */
+#define P521_WIDTH ( 521 / 8 / sizeof( t_uint ) + 1 )
+
+/* Bits to keep in the most significant t_uint */
#if defined(POLARSSL_HAVE_INT8)
#define P521_MASK 0x01
#else
@@ -691,26 +746,26 @@
int ret;
size_t i;
mpi M;
- t_uint Mp[P521_SIZE_INT+1];
- /* Worst case for the size of M is when sizeof( t_uint ) == 16:
+ t_uint Mp[P521_WIDTH + 1];
+ /* Worst case for the size of M is when t_uint is 16 bits:
* we need to hold bits 513 to 1056, which is 34 limbs, that is
- * P521_SIZE_INT + 1. Otherwise P521_SIZE is enough. */
+ * P521_WIDTH + 1. Otherwise P521_WIDTH is enough. */
- if( N->n < P521_SIZE_INT )
+ if( N->n < P521_WIDTH )
return( 0 );
/* M = A1 */
M.s = 1;
- M.n = N->n - ( P521_SIZE_INT - 1 );
- if( M.n > P521_SIZE_INT + 1 )
- M.n = P521_SIZE_INT + 1;
+ M.n = N->n - ( P521_WIDTH - 1 );
+ if( M.n > P521_WIDTH + 1 )
+ M.n = P521_WIDTH + 1;
M.p = Mp;
- memcpy( Mp, N->p + P521_SIZE_INT - 1, M.n * sizeof( t_uint ) );
+ memcpy( Mp, N->p + P521_WIDTH - 1, M.n * sizeof( t_uint ) );
MPI_CHK( mpi_shift_r( &M, 521 % ( 8 * sizeof( t_uint ) ) ) );
/* N = A0 */
- N->p[P521_SIZE_INT - 1] &= P521_MASK;
- for( i = P521_SIZE_INT; i < N->n; i++ )
+ N->p[P521_WIDTH - 1] &= P521_MASK;
+ for( i = P521_WIDTH; i < N->n; i++ )
N->p[i] = 0;
/* N = A0 + A1 */
@@ -719,6 +774,9 @@
cleanup:
return( ret );
}
+
+#undef P521_WIDTH
+#undef P521_MASK
#endif /* POLARSSL_ECP_DP_SECP521R1_ENABLED */
/*