Fix fast ARM mult/square asm for Xcode
diff --git a/asm_arm_mult_square.inc b/asm_arm_mult_square.inc
index d46af78..9decef6 100644
--- a/asm_arm_mult_square.inc
+++ b/asm_arm_mult_square.inc
@@ -14,17 +14,17 @@
                                        \
     "mov r10, #0 \n\t"                 \
     "umull r11, r9, r3, r7 \n\t"       \
-    "adds r12, r11 \n\t"               \
-    "adc r9, #0 \n\t"                  \
+    "adds r12, r12, r11 \n\t"          \
+    "adc r9, r9, #0 \n\t"              \
     "umull r11, r14, r4, r6 \n\t"      \
-    "adds r12, r11 \n\t"               \
-    "adcs r9, r14 \n\t"                \
-    "adc r10, #0 \n\t"                 \
+    "adds r12, r12, r11 \n\t"          \
+    "adcs r9, r9, r14 \n\t"            \
+    "adc r10, r10, #0 \n\t"            \
     "stmia r0!, {r12} \n\t"            \
                                        \
     "umull r12, r14, r4, r7 \n\t"      \
-    "adds r9, r12 \n\t"                \
-    "adc r10, r14 \n\t"                \
+    "adds r9, r9, r12 \n\t"            \
+    "adc r10, r10, r14 \n\t"           \
     "stmia r0!, {r9, r10} \n\t"        \
                                        \
     "sub r0, 28 \n\t"                  \
@@ -37,123 +37,123 @@
                                        \
     "mov r10, #0 \n\t"                 \
     "umull r11, r9, r3, r7 \n\t"       \
-    "adds r12, r11 \n\t"               \
-    "adc r9, #0 \n\t"                  \
+    "adds r12, r12, r11 \n\t"          \
+    "adc r9, r9, #0 \n\t"              \
     "umull r11, r14, r4, r6 \n\t"      \
-    "adds r12, r11 \n\t"               \
-    "adcs r9, r14 \n\t"                \
-    "adc r10, #0 \n\t"                 \
+    "adds r12, r12, r11 \n\t"          \
+    "adcs r9, r9, r14 \n\t"            \
+    "adc r10, r10, #0 \n\t"            \
     "stmia r0!, {r12} \n\t"            \
                                        \
     "mov r11, #0 \n\t"                 \
     "umull r12, r14, r3, r8 \n\t"      \
-    "adds r9, r12 \n\t"                \
-    "adcs r10, r14 \n\t"               \
-    "adc r11, #0 \n\t"                 \
+    "adds r9, r9, r12 \n\t"            \
+    "adcs r10, r10, r14 \n\t"          \
+    "adc r11, r11, #0 \n\t"            \
     "umull r12, r14, r4, r7 \n\t"      \
-    "adds r9, r12 \n\t"                \
-    "adcs r10, r14 \n\t"               \
-    "adc r11, #0 \n\t"                 \
+    "adds r9, r9, r12 \n\t"            \
+    "adcs r10, r10, r14 \n\t"          \
+    "adc r11, r11, #0 \n\t"            \
     "umull r12, r14, r5, r6 \n\t"      \
-    "adds r9, r12 \n\t"                \
-    "adcs r10, r14 \n\t"               \
-    "adc r11, #0 \n\t"                 \
+    "adds r9, r9, r12 \n\t"            \
+    "adcs r10, r10, r14 \n\t"          \
+    "adc r11, r11, #0 \n\t"            \
     "stmia r0!, {r9} \n\t"             \
                                        \
     "ldmia r1!, {r3} \n\t"             \
     "mov r12, #0 \n\t"                 \
     "umull r14, r9, r4, r8 \n\t"       \
-    "adds r10, r14 \n\t"               \
-    "adcs r11, r9 \n\t"                \
-    "adc r12, #0 \n\t"                 \
+    "adds r10, r10, r14 \n\t"          \
+    "adcs r11, r11, r9 \n\t"           \
+    "adc r12, r12, #0 \n\t"            \
     "umull r14, r9, r5, r7 \n\t"       \
-    "adds r10, r14 \n\t"               \
-    "adcs r11, r9 \n\t"                \
-    "adc r12, #0 \n\t"                 \
+    "adds r10, r10, r14 \n\t"          \
+    "adcs r11, r11, r9 \n\t"           \
+    "adc r12, r12, #0 \n\t"            \
     "umull r14, r9, r3, r6 \n\t"       \
-    "adds r10, r14 \n\t"               \
-    "adcs r11, r9 \n\t"                \
-    "adc r12, #0 \n\t"                 \
+    "adds r10, r10, r14 \n\t"          \
+    "adcs r11, r11, r9 \n\t"           \
+    "adc r12, r12, #0 \n\t"            \
     "ldr r14, [r0] \n\t"               \
-    "adds r10, r14 \n\t"               \
-    "adcs r11, #0 \n\t"                \
-    "adc r12, #0 \n\t"                 \
+    "adds r10, r10, r14 \n\t"          \
+    "adcs r11, r11, #0 \n\t"           \
+    "adc r12, r12, #0 \n\t"            \
     "stmia r0!, {r10} \n\t"            \
                                        \
     "ldmia r1!, {r4} \n\t"             \
     "mov r14, #0 \n\t"                 \
     "umull r9, r10, r5, r8 \n\t"       \
-    "adds r11, r9 \n\t"                \
-    "adcs r12, r10 \n\t"               \
-    "adc r14, #0 \n\t"                 \
+    "adds r11, r11, r9 \n\t"           \
+    "adcs r12, r12, r10 \n\t"          \
+    "adc r14, r14, #0 \n\t"            \
     "umull r9, r10, r3, r7 \n\t"       \
-    "adds r11, r9 \n\t"                \
-    "adcs r12, r10 \n\t"               \
-    "adc r14, #0 \n\t"                 \
+    "adds r11, r11, r9 \n\t"           \
+    "adcs r12, r12, r10 \n\t"          \
+    "adc r14, r14, #0 \n\t"            \
     "umull r9, r10, r4, r6 \n\t"       \
-    "adds r11, r9 \n\t"                \
-    "adcs r12, r10 \n\t"               \
-    "adc r14, #0 \n\t"                 \
+    "adds r11, r11, r9 \n\t"           \
+    "adcs r12, r12, r10 \n\t"          \
+    "adc r14, r14, #0 \n\t"            \
     "ldr r9, [r0] \n\t"                \
-    "adds r11, r9 \n\t"                \
-    "adcs r12, #0 \n\t"                \
-    "adc r14, #0 \n\t"                 \
+    "adds r11, r11, r9 \n\t"           \
+    "adcs r12, r12, #0 \n\t"           \
+    "adc r14, r14, #0 \n\t"            \
     "stmia r0!, {r11} \n\t"            \
                                        \
     "ldmia r2!, {r6} \n\t"             \
     "mov r9, #0 \n\t"                  \
     "umull r10, r11, r5, r6 \n\t"      \
-    "adds r12, r10 \n\t"               \
-    "adcs r14, r11 \n\t"               \
-    "adc r9, #0 \n\t"                  \
+    "adds r12, r12, r10 \n\t"          \
+    "adcs r14, r14, r11 \n\t"          \
+    "adc r9, r9, #0 \n\t"              \
     "umull r10, r11, r3, r8 \n\t"      \
-    "adds r12, r10 \n\t"               \
-    "adcs r14, r11 \n\t"               \
-    "adc r9, #0 \n\t"                  \
+    "adds r12, r12, r10 \n\t"          \
+    "adcs r14, r14, r11 \n\t"          \
+    "adc r9, r9, #0 \n\t"              \
     "umull r10, r11, r4, r7 \n\t"      \
-    "adds r12, r10 \n\t"               \
-    "adcs r14, r11 \n\t"               \
-    "adc r9, #0 \n\t"                  \
+    "adds r12, r12, r10 \n\t"          \
+    "adcs r14, r14, r11 \n\t"          \
+    "adc r9, r9, #0 \n\t"              \
     "ldr r10, [r0] \n\t"               \
-    "adds r12, r10 \n\t"               \
-    "adcs r14, #0 \n\t"                \
-    "adc r9, #0 \n\t"                  \
+    "adds r12, r12, r10 \n\t"          \
+    "adcs r14, r14, #0 \n\t"           \
+    "adc r9, r9, #0 \n\t"              \
     "stmia r0!, {r12} \n\t"            \
                                        \
     "ldmia r2!, {r7} \n\t"             \
     "mov r10, #0 \n\t"                 \
     "umull r11, r12, r5, r7 \n\t"      \
-    "adds r14, r11 \n\t"               \
-    "adcs r9, r12 \n\t"                \
-    "adc r10, #0 \n\t"                 \
+    "adds r14, r14, r11 \n\t"          \
+    "adcs r9, r9, r12 \n\t"            \
+    "adc r10, r10, #0 \n\t"            \
     "umull r11, r12, r3, r6 \n\t"      \
-    "adds r14, r11 \n\t"               \
-    "adcs r9, r12 \n\t"                \
-    "adc r10, #0 \n\t"                 \
+    "adds r14, r14, r11 \n\t"          \
+    "adcs r9, r9, r12 \n\t"            \
+    "adc r10, r10, #0 \n\t"            \
     "umull r11, r12, r4, r8 \n\t"      \
-    "adds r14, r11 \n\t"               \
-    "adcs r9, r12 \n\t"                \
-    "adc r10, #0 \n\t"                 \
+    "adds r14, r14, r11 \n\t"          \
+    "adcs r9, r9, r12 \n\t"            \
+    "adc r10, r10, #0 \n\t"            \
     "ldr r11, [r0] \n\t"               \
-    "adds r14, r11 \n\t"               \
-    "adcs r9, #0 \n\t"                 \
-    "adc r10, #0 \n\t"                 \
+    "adds r14, r14, r11 \n\t"          \
+    "adcs r9, r9, #0 \n\t"             \
+    "adc r10, r10, #0 \n\t"            \
     "stmia r0!, {r14} \n\t"            \
                                        \
     "mov r11, #0 \n\t"                 \
     "umull r12, r14, r3, r7 \n\t"      \
-    "adds r9, r12 \n\t"                \
-    "adcs r10, r14 \n\t"               \
-    "adc r11, #0 \n\t"                 \
+    "adds r9, r9, r12 \n\t"            \
+    "adcs r10, r10, r14 \n\t"          \
+    "adc r11, r11, #0 \n\t"            \
     "umull r12, r14, r4, r6 \n\t"      \
-    "adds r9, r12 \n\t"                \
-    "adcs r10, r14 \n\t"               \
-    "adc r11, #0 \n\t"                 \
+    "adds r9, r9, r12 \n\t"            \
+    "adcs r10, r10, r14 \n\t"          \
+    "adc r11, r11, #0 \n\t"            \
     "stmia r0!, {r9} \n\t"             \
                                        \
     "umull r14, r9, r4, r7 \n\t"       \
-    "adds r10, r14 \n\t"               \
-    "adc r11, r9 \n\t"                 \
+    "adds r10, r10, r14 \n\t"          \
+    "adc r11, r11, r9 \n\t"            \
     "stmia r0!, {r10, r11} \n\t"
 
 #define FAST_MULT_ASM_6             \
@@ -167,43 +167,43 @@
                                     \
     "mov r10, #0 \n\t"              \
     "umull r11, r9, r3, r7 \n\t"    \
-    "adds r12, r11 \n\t"            \
-    "adc r9, #0 \n\t"               \
+    "adds r12, r12, r11 \n\t"       \
+    "adc r9, r9, #0 \n\t"           \
     "umull r11, r14, r4, r6 \n\t"   \
-    "adds r12, r11 \n\t"            \
-    "adcs r9, r14 \n\t"             \
-    "adc r10, #0 \n\t"              \
+    "adds r12, r12, r11 \n\t"       \
+    "adcs r9, r9, r14 \n\t"         \
+    "adc r10, r10, #0 \n\t"         \
     "stmia r0!, {r12} \n\t"         \
                                     \
     "mov r11, #0 \n\t"              \
     "umull r12, r14, r3, r8 \n\t"   \
-    "adds r9, r12 \n\t"             \
-    "adcs r10, r14 \n\t"            \
-    "adc r11, #0 \n\t"              \
+    "adds r9, r9, r12 \n\t"         \
+    "adcs r10, r10, r14 \n\t"       \
+    "adc r11, r11, #0 \n\t"         \
     "umull r12, r14, r4, r7 \n\t"   \
-    "adds r9, r12 \n\t"             \
-    "adcs r10, r14 \n\t"            \
-    "adc r11, #0 \n\t"              \
+    "adds r9, r9, r12 \n\t"         \
+    "adcs r10, r10, r14 \n\t"       \
+    "adc r11, r11, #0 \n\t"         \
     "umull r12, r14, r5, r6 \n\t"   \
-    "adds r9, r12 \n\t"             \
-    "adcs r10, r14 \n\t"            \
-    "adc r11, #0 \n\t"              \
+    "adds r9, r9, r12 \n\t"         \
+    "adcs r10, r10, r14 \n\t"       \
+    "adc r11, r11, #0 \n\t"         \
     "stmia r0!, {r9} \n\t"          \
                                     \
     "mov r12, #0 \n\t"              \
     "umull r14, r9, r4, r8 \n\t"    \
-    "adds r10, r14 \n\t"            \
-    "adcs r11, r9 \n\t"             \
-    "adc r12, #0 \n\t"              \
+    "adds r10, r10, r14 \n\t"       \
+    "adcs r11, r11, r9 \n\t"        \
+    "adc r12, r12, #0 \n\t"         \
     "umull r14, r9, r5, r7 \n\t"    \
-    "adds r10, r14 \n\t"            \
-    "adcs r11, r9 \n\t"             \
-    "adc r12, #0 \n\t"              \
+    "adds r10, r10, r14 \n\t"       \
+    "adcs r11, r11, r9 \n\t"        \
+    "adc r12, r12, #0 \n\t"         \
     "stmia r0!, {r10} \n\t"         \
                                     \
     "umull r9, r10, r5, r8 \n\t"    \
-    "adds r11, r9 \n\t"             \
-    "adc r12, r10 \n\t"             \
+    "adds r11, r11, r9 \n\t"        \
+    "adc r12, r12, r10 \n\t"        \
     "stmia r0!, {r11, r12} \n\t"    \
                                     \
     "sub r0, 36 \n\t"               \
@@ -215,163 +215,163 @@
                                     \
     "mov r10, #0 \n\t"              \
     "umull r11, r9, r3, r7 \n\t"    \
-    "adds r12, r11 \n\t"            \
-    "adc r9, #0 \n\t"               \
+    "adds r12, r12, r11 \n\t"       \
+    "adc r9, r9, #0 \n\t"           \
     "umull r11, r14, r4, r6 \n\t"   \
-    "adds r12, r11 \n\t"            \
-    "adcs r9, r14 \n\t"             \
-    "adc r10, #0 \n\t"              \
+    "adds r12, r12, r11 \n\t"       \
+    "adcs r9, r9, r14 \n\t"         \
+    "adc r10, r10, #0 \n\t"         \
     "stmia r0!, {r12} \n\t"         \
                                     \
     "mov r11, #0 \n\t"              \
     "umull r12, r14, r3, r8 \n\t"   \
-    "adds r9, r12 \n\t"             \
-    "adcs r10, r14 \n\t"            \
-    "adc r11, #0 \n\t"              \
+    "adds r9, r9, r12 \n\t"         \
+    "adcs r10, r10, r14 \n\t"       \
+    "adc r11, r11, #0 \n\t"         \
     "umull r12, r14, r4, r7 \n\t"   \
-    "adds r9, r12 \n\t"             \
-    "adcs r10, r14 \n\t"            \
-    "adc r11, #0 \n\t"              \
+    "adds r9, r9, r12 \n\t"         \
+    "adcs r10, r10, r14 \n\t"       \
+    "adc r11, r11, #0 \n\t"         \
     "umull r12, r14, r5, r6 \n\t"   \
-    "adds r9, r12 \n\t"             \
-    "adcs r10, r14 \n\t"            \
-    "adc r11, #0 \n\t"              \
+    "adds r9, r9, r12 \n\t"         \
+    "adcs r10, r10, r14 \n\t"       \
+    "adc r11, r11, #0 \n\t"         \
     "stmia r0!, {r9} \n\t"          \
                                     \
     "ldmia r1!, {r3} \n\t"          \
     "mov r12, #0 \n\t"              \
     "umull r14, r9, r4, r8 \n\t"    \
-    "adds r10, r14 \n\t"            \
-    "adcs r11, r9 \n\t"             \
-    "adc r12, #0 \n\t"              \
+    "adds r10, r10, r14 \n\t"       \
+    "adcs r11, r11, r9 \n\t"        \
+    "adc r12, r12, #0 \n\t"         \
     "umull r14, r9, r5, r7 \n\t"    \
-    "adds r10, r14 \n\t"            \
-    "adcs r11, r9 \n\t"             \
-    "adc r12, #0 \n\t"              \
+    "adds r10, r10, r14 \n\t"       \
+    "adcs r11, r11, r9 \n\t"        \
+    "adc r12, r12, #0 \n\t"         \
     "umull r14, r9, r3, r6 \n\t"    \
-    "adds r10, r14 \n\t"            \
-    "adcs r11, r9 \n\t"             \
-    "adc r12, #0 \n\t"              \
+    "adds r10, r10, r14 \n\t"       \
+    "adcs r11, r11, r9 \n\t"        \
+    "adc r12, r12, #0 \n\t"         \
     "ldr r14, [r0] \n\t"            \
-    "adds r10, r14 \n\t"            \
-    "adcs r11, #0 \n\t"             \
-    "adc r12, #0 \n\t"              \
+    "adds r10, r10, r14 \n\t"       \
+    "adcs r11, r11, #0 \n\t"        \
+    "adc r12, r12, #0 \n\t"         \
     "stmia r0!, {r10} \n\t"         \
                                     \
     "ldmia r1!, {r4} \n\t"          \
     "mov r14, #0 \n\t"              \
     "umull r9, r10, r5, r8 \n\t"    \
-    "adds r11, r9 \n\t"             \
-    "adcs r12, r10 \n\t"            \
-    "adc r14, #0 \n\t"              \
+    "adds r11, r11, r9 \n\t"        \
+    "adcs r12, r12, r10 \n\t"       \
+    "adc r14, r14, #0 \n\t"         \
     "umull r9, r10, r3, r7 \n\t"    \
-    "adds r11, r9 \n\t"             \
-    "adcs r12, r10 \n\t"            \
-    "adc r14, #0 \n\t"              \
+    "adds r11, r11, r9 \n\t"        \
+    "adcs r12, r12, r10 \n\t"       \
+    "adc r14, r14, #0 \n\t"         \
     "umull r9, r10, r4, r6 \n\t"    \
-    "adds r11, r9 \n\t"             \
-    "adcs r12, r10 \n\t"            \
-    "adc r14, #0 \n\t"              \
+    "adds r11, r11, r9 \n\t"        \
+    "adcs r12, r12, r10 \n\t"       \
+    "adc r14, r14, #0 \n\t"         \
     "ldr r9, [r0] \n\t"             \
-    "adds r11, r9 \n\t"             \
-    "adcs r12, #0 \n\t"             \
-    "adc r14, #0 \n\t"              \
+    "adds r11, r11, r9 \n\t"        \
+    "adcs r12, r12, #0 \n\t"        \
+    "adc r14, r14, #0 \n\t"         \
     "stmia r0!, {r11} \n\t"         \
                                     \
     "ldmia r1!, {r5} \n\t"          \
     "mov r9, #0 \n\t"               \
     "umull r10, r11, r3, r8 \n\t"   \
-    "adds r12, r10 \n\t"            \
-    "adcs r14, r11 \n\t"            \
-    "adc r9, #0 \n\t"               \
+    "adds r12, r12, r10 \n\t"       \
+    "adcs r14, r14, r11 \n\t"       \
+    "adc r9, r9, #0 \n\t"           \
     "umull r10, r11, r4, r7 \n\t"   \
-    "adds r12, r10 \n\t"            \
-    "adcs r14, r11 \n\t"            \
-    "adc r9, #0 \n\t"               \
+    "adds r12, r12, r10 \n\t"       \
+    "adcs r14, r14, r11 \n\t"       \
+    "adc r9, r9, #0 \n\t"           \
     "umull r10, r11, r5, r6 \n\t"   \
-    "adds r12, r10 \n\t"            \
-    "adcs r14, r11 \n\t"            \
-    "adc r9, #0 \n\t"               \
+    "adds r12, r12, r10 \n\t"       \
+    "adcs r14, r14, r11 \n\t"       \
+    "adc r9, r9, #0 \n\t"           \
     "ldr r10, [r0] \n\t"            \
-    "adds r12, r10 \n\t"            \
-    "adcs r14, #0 \n\t"             \
-    "adc r9, #0 \n\t"               \
+    "adds r12, r12, r10 \n\t"       \
+    "adcs r14, r14, #0 \n\t"        \
+    "adc r9, r9, #0 \n\t"           \
     "stmia r0!, {r12} \n\t"         \
                                     \
     "ldmia r2!, {r6} \n\t"          \
     "mov r10, #0 \n\t"              \
     "umull r11, r12, r3, r6 \n\t"   \
-    "adds r14, r11 \n\t"            \
-    "adcs r9, r12 \n\t"             \
-    "adc r10, #0 \n\t"              \
+    "adds r14, r14, r11 \n\t"       \
+    "adcs r9, r9, r12 \n\t"         \
+    "adc r10, r10, #0 \n\t"         \
     "umull r11, r12, r4, r8 \n\t"   \
-    "adds r14, r11 \n\t"            \
-    "adcs r9, r12 \n\t"             \
-    "adc r10, #0 \n\t"              \
+    "adds r14, r14, r11 \n\t"       \
+    "adcs r9, r9, r12 \n\t"         \
+    "adc r10, r10, #0 \n\t"         \
     "umull r11, r12, r5, r7 \n\t"   \
-    "adds r14, r11 \n\t"            \
-    "adcs r9, r12 \n\t"             \
-    "adc r10, #0 \n\t"              \
+    "adds r14, r14, r11 \n\t"       \
+    "adcs r9, r9, r12 \n\t"         \
+    "adc r10, r10, #0 \n\t"         \
     "ldr r11, [r0] \n\t"            \
-    "adds r14, r11 \n\t"            \
-    "adcs r9, #0 \n\t"              \
-    "adc r10, #0 \n\t"              \
+    "adds r14, r14, r11 \n\t"       \
+    "adcs r9, r9, #0 \n\t"          \
+    "adc r10, r10, #0 \n\t"         \
     "stmia r0!, {r14} \n\t"         \
                                     \
     "ldmia r2!, {r7} \n\t"          \
     "mov r11, #0 \n\t"              \
     "umull r12, r14, r3, r7 \n\t"   \
-    "adds r9, r12 \n\t"             \
-    "adcs r10, r14 \n\t"            \
-    "adc r11, #0 \n\t"              \
+    "adds r9, r9, r12 \n\t"         \
+    "adcs r10, r10, r14 \n\t"       \
+    "adc r11, r11, #0 \n\t"         \
     "umull r12, r14, r4, r6 \n\t"   \
-    "adds r9, r12 \n\t"             \
-    "adcs r10, r14 \n\t"            \
-    "adc r11, #0 \n\t"              \
+    "adds r9, r9, r12 \n\t"         \
+    "adcs r10, r10, r14 \n\t"       \
+    "adc r11, r11, #0 \n\t"         \
     "umull r12, r14, r5, r8 \n\t"   \
-    "adds r9, r12 \n\t"             \
-    "adcs r10, r14 \n\t"            \
-    "adc r11, #0 \n\t"              \
+    "adds r9, r9, r12 \n\t"         \
+    "adcs r10, r10, r14 \n\t"       \
+    "adc r11, r11, #0 \n\t"         \
     "ldr r12, [r0] \n\t"            \
-    "adds r9, r12 \n\t"             \
-    "adcs r10, #0 \n\t"             \
-    "adc r11, #0 \n\t"              \
+    "adds r9, r9, r12 \n\t"         \
+    "adcs r10, r10, #0 \n\t"        \
+    "adc r11, r11, #0 \n\t"         \
     "stmia r0!, {r9} \n\t"          \
                                     \
     "ldmia r2!, {r8} \n\t"          \
     "mov r12, #0 \n\t"              \
     "umull r14, r9, r3, r8 \n\t"    \
-    "adds r10, r14 \n\t"            \
-    "adcs r11, r9 \n\t"             \
-    "adc r12, #0 \n\t"              \
+    "adds r10, r10, r14 \n\t"       \
+    "adcs r11, r11, r9 \n\t"        \
+    "adc r12, r12, #0 \n\t"         \
     "umull r14, r9, r4, r7 \n\t"    \
-    "adds r10, r14 \n\t"            \
-    "adcs r11, r9 \n\t"             \
-    "adc r12, #0 \n\t"              \
+    "adds r10, r10, r14 \n\t"       \
+    "adcs r11, r11, r9 \n\t"        \
+    "adc r12, r12, #0 \n\t"         \
     "umull r14, r9, r5, r6 \n\t"    \
-    "adds r10, r14 \n\t"            \
-    "adcs r11, r9 \n\t"             \
-    "adc r12, #0 \n\t"              \
+    "adds r10, r10, r14 \n\t"       \
+    "adcs r11, r11, r9 \n\t"        \
+    "adc r12, r12, #0 \n\t"         \
     "ldr r14, [r0] \n\t"            \
-    "adds r10, r14 \n\t"            \
-    "adcs r11, #0 \n\t"             \
-    "adc r12, #0 \n\t"              \
+    "adds r10, r10, r14 \n\t"       \
+    "adcs r11, r11, #0 \n\t"        \
+    "adc r12, r12, #0 \n\t"         \
     "stmia r0!, {r10} \n\t"         \
                                     \
     "mov r14, #0 \n\t"              \
     "umull r9, r10, r4, r8 \n\t"    \
-    "adds r11, r9 \n\t"             \
-    "adcs r12, r10 \n\t"            \
-    "adc r14, #0 \n\t"              \
+    "adds r11, r11, r9 \n\t"        \
+    "adcs r12, r12, r10 \n\t"       \
+    "adc r14, r14, #0 \n\t"         \
     "umull r9, r10, r5, r7 \n\t"    \
-    "adds r11, r9 \n\t"             \
-    "adcs r12, r10 \n\t"            \
-    "adc r14, #0 \n\t"              \
+    "adds r11, r11, r9 \n\t"        \
+    "adcs r12, r12, r10 \n\t"       \
+    "adc r14, r14, #0 \n\t"         \
     "stmia r0!, {r11} \n\t"         \
                                     \
     "umull r10, r11, r5, r8 \n\t"   \
-    "adds r12, r10 \n\t"            \
-    "adc r14, r11 \n\t"             \
+    "adds r12, r12, r10 \n\t"       \
+    "adc r14, r14, r11 \n\t"        \
     "stmia r0!, {r12, r14} \n\t"
 
 #define FAST_MULT_ASM_7                \
@@ -393,83 +393,83 @@
                                        \
     "mov r14, #0 \n\t"                 \
     "umull r9, r12, r3, r7 \n\t"       \
-    "adds r10, r9 \n\t"                \
-    "adc r12, #0 \n\t"                 \
+    "adds r10, r10, r9 \n\t"           \
+    "adc r12, r12, #0 \n\t"            \
     "umull r9, r11, r4, r6 \n\t"       \
-    "adds r10, r9 \n\t"                \
-    "adcs r12, r11 \n\t"               \
-    "adc r14, #0 \n\t"                 \
+    "adds r10, r10, r9 \n\t"           \
+    "adcs r12, r12, r11 \n\t"          \
+    "adc r14, r14, #0 \n\t"            \
     "stmia r0!, {r10} \n\t"            \
                                        \
     "mov r9, #0 \n\t"                  \
     "umull r10, r11, r3, r8 \n\t"      \
-    "adds r12, r10 \n\t"               \
-    "adcs r14, r11 \n\t"               \
-    "adc r9, #0 \n\t"                  \
+    "adds r12, r12, r10 \n\t"          \
+    "adcs r14, r14, r11 \n\t"          \
+    "adc r9, r9, #0 \n\t"              \
     "umull r10, r11, r4, r7 \n\t"      \
-    "adds r12, r10 \n\t"               \
-    "adcs r14, r11 \n\t"               \
-    "adc r9, #0 \n\t"                  \
+    "adds r12, r12, r10 \n\t"          \
+    "adcs r14, r14, r11 \n\t"          \
+    "adc r9, r9, #0 \n\t"              \
     "umull r10, r11, r5, r6 \n\t"      \
-    "adds r12, r10 \n\t"               \
-    "adcs r14, r11 \n\t"               \
-    "adc r9, #0 \n\t"                  \
+    "adds r12, r12, r10 \n\t"          \
+    "adcs r14, r14, r11 \n\t"          \
+    "adc r9, r9, #0 \n\t"              \
     "stmia r0!, {r12} \n\t"            \
                                        \
     "ldmia r1!, {r3} \n\t"             \
     "mov r10, #0 \n\t"                 \
     "umull r11, r12, r4, r8 \n\t"      \
-    "adds r14, r11 \n\t"               \
-    "adcs r9, r12 \n\t"                \
-    "adc r10, #0 \n\t"                 \
+    "adds r14, r14, r11 \n\t"          \
+    "adcs r9, r9, r12 \n\t"            \
+    "adc r10, r10, #0 \n\t"            \
     "umull r11, r12, r5, r7 \n\t"      \
-    "adds r14, r11 \n\t"               \
-    "adcs r9, r12 \n\t"                \
-    "adc r10, #0 \n\t"                 \
+    "adds r14, r14, r11 \n\t"          \
+    "adcs r9, r9, r12 \n\t"            \
+    "adc r10, r10, #0 \n\t"            \
     "umull r11, r12, r3, r6 \n\t"      \
-    "adds r14, r11 \n\t"               \
-    "adcs r9, r12 \n\t"                \
-    "adc r10, #0 \n\t"                 \
+    "adds r14, r14, r11 \n\t"          \
+    "adcs r9, r9, r12 \n\t"            \
+    "adc r10, r10, #0 \n\t"            \
     "ldr r11, [r0] \n\t"               \
-    "adds r14, r11 \n\t"               \
-    "adcs r9, #0 \n\t"                 \
-    "adc r10, #0 \n\t"                 \
+    "adds r14, r14, r11 \n\t"          \
+    "adcs r9, r9, #0 \n\t"             \
+    "adc r10, r10, #0 \n\t"            \
     "stmia r0!, {r14} \n\t"            \
                                        \
     "ldmia r2!, {r6} \n\t"             \
     "mov r11, #0 \n\t"                 \
     "umull r12, r14, r4, r6 \n\t"      \
-    "adds r9, r12 \n\t"                \
-    "adcs r10, r14 \n\t"               \
-    "adc r11, #0 \n\t"                 \
+    "adds r9, r9, r12 \n\t"            \
+    "adcs r10, r10, r14 \n\t"          \
+    "adc r11, r11, #0 \n\t"            \
     "umull r12, r14, r5, r8 \n\t"      \
-    "adds r9, r12 \n\t"                \
-    "adcs r10, r14 \n\t"               \
-    "adc r11, #0 \n\t"                 \
+    "adds r9, r9, r12 \n\t"            \
+    "adcs r10, r10, r14 \n\t"          \
+    "adc r11, r11, #0 \n\t"            \
     "umull r12, r14, r3, r7 \n\t"      \
-    "adds r9, r12 \n\t"                \
-    "adcs r10, r14 \n\t"               \
-    "adc r11, #0 \n\t"                 \
+    "adds r9, r9, r12 \n\t"            \
+    "adcs r10, r10, r14 \n\t"          \
+    "adc r11, r11, #0 \n\t"            \
     "ldr r12, [r0] \n\t"               \
-    "adds r9, r12 \n\t"                \
-    "adcs r10, #0 \n\t"                \
-    "adc r11, #0 \n\t"                 \
+    "adds r9, r9, r12 \n\t"            \
+    "adcs r10, r10, #0 \n\t"           \
+    "adc r11, r11, #0 \n\t"            \
     "stmia r0!, {r9} \n\t"             \
                                        \
     "mov r12, #0 \n\t"                 \
     "umull r14, r9, r5, r6 \n\t"       \
-    "adds r10, r14 \n\t"               \
-    "adcs r11, r9 \n\t"                \
-    "adc r12, #0 \n\t"                 \
+    "adds r10, r10, r14 \n\t"          \
+    "adcs r11, r11, r9 \n\t"           \
+    "adc r12, r12, #0 \n\t"            \
     "umull r14, r9, r3, r8 \n\t"       \
-    "adds r10, r14 \n\t"               \
-    "adcs r11, r9 \n\t"                \
-    "adc r12, #0 \n\t"                 \
+    "adds r10, r10, r14 \n\t"          \
+    "adcs r11, r11, r9 \n\t"           \
+    "adc r12, r12, #0 \n\t"            \
     "stmia r0!, {r10} \n\t"            \
                                        \
     "umull r9, r10, r3, r6 \n\t"       \
-    "adds r11, r9 \n\t"                \
-    "adc r12, r10 \n\t"                \
+    "adds r11, r11, r9 \n\t"           \
+    "adc r12, r12, r10 \n\t"           \
     "stmia r0!, {r11, r12} \n\t"       \
                                        \
     "sub r0, 44 \n\t"                  \
@@ -483,203 +483,203 @@
                                        \
     "mov r14, #0 \n\t"                 \
     "umull r9, r12, r3, r7 \n\t"       \
-    "adds r10, r9 \n\t"                \
-    "adc r12, #0 \n\t"                 \
+    "adds r10, r10, r9 \n\t"           \
+    "adc r12, r12, #0 \n\t"            \
     "umull r9, r11, r4, r6 \n\t"       \
-    "adds r10, r9 \n\t"                \
-    "adcs r12, r11 \n\t"               \
-    "adc r14, #0 \n\t"                 \
+    "adds r10, r10, r9 \n\t"           \
+    "adcs r12, r12, r11 \n\t"          \
+    "adc r14, r14, #0 \n\t"            \
     "stmia r0!, {r10} \n\t"            \
                                        \
     "mov r9, #0 \n\t"                  \
     "umull r10, r11, r3, r8 \n\t"      \
-    "adds r12, r10 \n\t"               \
-    "adcs r14, r11 \n\t"               \
-    "adc r9, #0 \n\t"                  \
+    "adds r12, r12, r10 \n\t"          \
+    "adcs r14, r14, r11 \n\t"          \
+    "adc r9, r9, #0 \n\t"              \
     "umull r10, r11, r4, r7 \n\t"      \
-    "adds r12, r10 \n\t"               \
-    "adcs r14, r11 \n\t"               \
-    "adc r9, #0 \n\t"                  \
+    "adds r12, r12, r10 \n\t"          \
+    "adcs r14, r14, r11 \n\t"          \
+    "adc r9, r9, #0 \n\t"              \
     "umull r10, r11, r5, r6 \n\t"      \
-    "adds r12, r10 \n\t"               \
-    "adcs r14, r11 \n\t"               \
-    "adc r9, #0 \n\t"                  \
+    "adds r12, r12, r10 \n\t"          \
+    "adcs r14, r14, r11 \n\t"          \
+    "adc r9, r9, #0 \n\t"              \
     "stmia r0!, {r12} \n\t"            \
                                        \
     "ldmia r1!, {r3} \n\t"             \
     "mov r10, #0 \n\t"                 \
     "umull r11, r12, r4, r8 \n\t"      \
-    "adds r14, r11 \n\t"               \
-    "adcs r9, r12 \n\t"                \
-    "adc r10, #0 \n\t"                 \
+    "adds r14, r14, r11 \n\t"          \
+    "adcs r9, r9, r12 \n\t"            \
+    "adc r10, r10, #0 \n\t"            \
     "umull r11, r12, r5, r7 \n\t"      \
-    "adds r14, r11 \n\t"               \
-    "adcs r9, r12 \n\t"                \
-    "adc r10, #0 \n\t"                 \
+    "adds r14, r14, r11 \n\t"          \
+    "adcs r9, r9, r12 \n\t"            \
+    "adc r10, r10, #0 \n\t"            \
     "umull r11, r12, r3, r6 \n\t"      \
-    "adds r14, r11 \n\t"               \
-    "adcs r9, r12 \n\t"                \
-    "adc r10, #0 \n\t"                 \
+    "adds r14, r14, r11 \n\t"          \
+    "adcs r9, r9, r12 \n\t"            \
+    "adc r10, r10, #0 \n\t"            \
     "ldr r11, [r0] \n\t"               \
-    "adds r14, r11 \n\t"               \
-    "adcs r9, #0 \n\t"                 \
-    "adc r10, #0 \n\t"                 \
+    "adds r14, r14, r11 \n\t"          \
+    "adcs r9, r9, #0 \n\t"             \
+    "adc r10, r10, #0 \n\t"            \
     "stmia r0!, {r14} \n\t"            \
                                        \
     "ldmia r1!, {r4} \n\t"             \
     "mov r11, #0 \n\t"                 \
     "umull r12, r14, r5, r8 \n\t"      \
-    "adds r9, r12 \n\t"                \
-    "adcs r10, r14 \n\t"               \
-    "adc r11, #0 \n\t"                 \
+    "adds r9, r9, r12 \n\t"            \
+    "adcs r10, r10, r14 \n\t"          \
+    "adc r11, r11, #0 \n\t"            \
     "umull r12, r14, r3, r7 \n\t"      \
-    "adds r9, r12 \n\t"                \
-    "adcs r10, r14 \n\t"               \
-    "adc r11, #0 \n\t"                 \
+    "adds r9, r9, r12 \n\t"            \
+    "adcs r10, r10, r14 \n\t"          \
+    "adc r11, r11, #0 \n\t"            \
     "umull r12, r14, r4, r6 \n\t"      \
-    "adds r9, r12 \n\t"                \
-    "adcs r10, r14 \n\t"               \
-    "adc r11, #0 \n\t"                 \
+    "adds r9, r9, r12 \n\t"            \
+    "adcs r10, r10, r14 \n\t"          \
+    "adc r11, r11, #0 \n\t"            \
     "ldr r12, [r0] \n\t"               \
-    "adds r9, r12 \n\t"                \
-    "adcs r10, #0 \n\t"                \
-    "adc r11, #0 \n\t"                 \
+    "adds r9, r9, r12 \n\t"            \
+    "adcs r10, r10, #0 \n\t"           \
+    "adc r11, r11, #0 \n\t"            \
     "stmia r0!, {r9} \n\t"             \
                                        \
     "ldmia r1!, {r5} \n\t"             \
     "mov r12, #0 \n\t"                 \
     "umull r14, r9, r3, r8 \n\t"       \
-    "adds r10, r14 \n\t"               \
-    "adcs r11, r9 \n\t"                \
-    "adc r12, #0 \n\t"                 \
+    "adds r10, r10, r14 \n\t"          \
+    "adcs r11, r11, r9 \n\t"           \
+    "adc r12, r12, #0 \n\t"            \
     "umull r14, r9, r4, r7 \n\t"       \
-    "adds r10, r14 \n\t"               \
-    "adcs r11, r9 \n\t"                \
-    "adc r12, #0 \n\t"                 \
+    "adds r10, r10, r14 \n\t"          \
+    "adcs r11, r11, r9 \n\t"           \
+    "adc r12, r12, #0 \n\t"            \
     "umull r14, r9, r5, r6 \n\t"       \
-    "adds r10, r14 \n\t"               \
-    "adcs r11, r9 \n\t"                \
-    "adc r12, #0 \n\t"                 \
+    "adds r10, r10, r14 \n\t"          \
+    "adcs r11, r11, r9 \n\t"           \
+    "adc r12, r12, #0 \n\t"            \
     "ldr r14, [r0] \n\t"               \
-    "adds r10, r14 \n\t"               \
-    "adcs r11, #0 \n\t"                \
-    "adc r12, #0 \n\t"                 \
+    "adds r10, r10, r14 \n\t"          \
+    "adcs r11, r11, #0 \n\t"           \
+    "adc r12, r12, #0 \n\t"            \
     "stmia r0!, {r10} \n\t"            \
                                        \
     "ldmia r1!, {r3} \n\t"             \
     "mov r14, #0 \n\t"                 \
     "umull r9, r10, r4, r8 \n\t"       \
-    "adds r11, r9 \n\t"                \
-    "adcs r12, r10 \n\t"               \
-    "adc r14, #0 \n\t"                 \
+    "adds r11, r11, r9 \n\t"           \
+    "adcs r12, r12, r10 \n\t"          \
+    "adc r14, r14, #0 \n\t"            \
     "umull r9, r10, r5, r7 \n\t"       \
-    "adds r11, r9 \n\t"                \
-    "adcs r12, r10 \n\t"               \
-    "adc r14, #0 \n\t"                 \
+    "adds r11, r11, r9 \n\t"           \
+    "adcs r12, r12, r10 \n\t"          \
+    "adc r14, r14, #0 \n\t"            \
     "umull r9, r10, r3, r6 \n\t"       \
-    "adds r11, r9 \n\t"                \
-    "adcs r12, r10 \n\t"               \
-    "adc r14, #0 \n\t"                 \
+    "adds r11, r11, r9 \n\t"           \
+    "adcs r12, r12, r10 \n\t"          \
+    "adc r14, r14, #0 \n\t"            \
     "ldr r9, [r0] \n\t"                \
-    "adds r11, r9 \n\t"                \
-    "adcs r12, #0 \n\t"                \
-    "adc r14, #0 \n\t"                 \
+    "adds r11, r11, r9 \n\t"           \
+    "adcs r12, r12, #0 \n\t"           \
+    "adc r14, r14, #0 \n\t"            \
     "stmia r0!, {r11} \n\t"            \
                                        \
     "ldmia r2!, {r6} \n\t"             \
     "mov r9, #0 \n\t"                  \
     "umull r10, r11, r4, r6 \n\t"      \
-    "adds r12, r10 \n\t"               \
-    "adcs r14, r11 \n\t"               \
-    "adc r9, #0 \n\t"                  \
+    "adds r12, r12, r10 \n\t"          \
+    "adcs r14, r14, r11 \n\t"          \
+    "adc r9, r9, #0 \n\t"              \
     "umull r10, r11, r5, r8 \n\t"      \
-    "adds r12, r10 \n\t"               \
-    "adcs r14, r11 \n\t"               \
-    "adc r9, #0 \n\t"                  \
+    "adds r12, r12, r10 \n\t"          \
+    "adcs r14, r14, r11 \n\t"          \
+    "adc r9, r9, #0 \n\t"              \
     "umull r10, r11, r3, r7 \n\t"      \
-    "adds r12, r10 \n\t"               \
-    "adcs r14, r11 \n\t"               \
-    "adc r9, #0 \n\t"                  \
+    "adds r12, r12, r10 \n\t"          \
+    "adcs r14, r14, r11 \n\t"          \
+    "adc r9, r9, #0 \n\t"              \
     "ldr r10, [r0] \n\t"               \
-    "adds r12, r10 \n\t"               \
-    "adcs r14, #0 \n\t"                \
-    "adc r9, #0 \n\t"                  \
+    "adds r12, r12, r10 \n\t"          \
+    "adcs r14, r14, #0 \n\t"           \
+    "adc r9, r9, #0 \n\t"              \
     "stmia r0!, {r12} \n\t"            \
                                        \
     "ldmia r2!, {r7} \n\t"             \
     "mov r10, #0 \n\t"                 \
     "umull r11, r12, r4, r7 \n\t"      \
-    "adds r14, r11 \n\t"               \
-    "adcs r9, r12 \n\t"                \
-    "adc r10, #0 \n\t"                 \
+    "adds r14, r14, r11 \n\t"          \
+    "adcs r9, r9, r12 \n\t"            \
+    "adc r10, r10, #0 \n\t"            \
     "umull r11, r12, r5, r6 \n\t"      \
-    "adds r14, r11 \n\t"               \
-    "adcs r9, r12 \n\t"                \
-    "adc r10, #0 \n\t"                 \
+    "adds r14, r14, r11 \n\t"          \
+    "adcs r9, r9, r12 \n\t"            \
+    "adc r10, r10, #0 \n\t"            \
     "umull r11, r12, r3, r8 \n\t"      \
-    "adds r14, r11 \n\t"               \
-    "adcs r9, r12 \n\t"                \
-    "adc r10, #0 \n\t"                 \
+    "adds r14, r14, r11 \n\t"          \
+    "adcs r9, r9, r12 \n\t"            \
+    "adc r10, r10, #0 \n\t"            \
     "ldr r11, [r0] \n\t"               \
-    "adds r14, r11 \n\t"               \
-    "adcs r9, #0 \n\t"                 \
-    "adc r10, #0 \n\t"                 \
+    "adds r14, r14, r11 \n\t"          \
+    "adcs r9, r9, #0 \n\t"             \
+    "adc r10, r10, #0 \n\t"            \
     "stmia r0!, {r14} \n\t"            \
                                        \
     "ldmia r2!, {r8} \n\t"             \
     "mov r11, #0 \n\t"                 \
     "umull r12, r14, r4, r8 \n\t"      \
-    "adds r9, r12 \n\t"                \
-    "adcs r10, r14 \n\t"               \
-    "adc r11, #0 \n\t"                 \
+    "adds r9, r9, r12 \n\t"            \
+    "adcs r10, r10, r14 \n\t"          \
+    "adc r11, r11, #0 \n\t"            \
     "umull r12, r14, r5, r7 \n\t"      \
-    "adds r9, r12 \n\t"                \
-    "adcs r10, r14 \n\t"               \
-    "adc r11, #0 \n\t"                 \
+    "adds r9, r9, r12 \n\t"            \
+    "adcs r10, r10, r14 \n\t"          \
+    "adc r11, r11, #0 \n\t"            \
     "umull r12, r14, r3, r6 \n\t"      \
-    "adds r9, r12 \n\t"                \
-    "adcs r10, r14 \n\t"               \
-    "adc r11, #0 \n\t"                 \
+    "adds r9, r9, r12 \n\t"            \
+    "adcs r10, r10, r14 \n\t"          \
+    "adc r11, r11, #0 \n\t"            \
     "ldr r12, [r0] \n\t"               \
-    "adds r9, r12 \n\t"                \
-    "adcs r10, #0 \n\t"                \
-    "adc r11, #0 \n\t"                 \
+    "adds r9, r9, r12 \n\t"            \
+    "adcs r10, r10, #0 \n\t"           \
+    "adc r11, r11, #0 \n\t"            \
     "stmia r0!, {r9} \n\t"             \
                                        \
     "ldmia r2!, {r6} \n\t"             \
     "mov r12, #0 \n\t"                 \
     "umull r14, r9, r4, r6 \n\t"       \
-    "adds r10, r14 \n\t"               \
-    "adcs r11, r9 \n\t"                \
-    "adc r12, #0 \n\t"                 \
+    "adds r10, r10, r14 \n\t"          \
+    "adcs r11, r11, r9 \n\t"           \
+    "adc r12, r12, #0 \n\t"            \
     "umull r14, r9, r5, r8 \n\t"       \
-    "adds r10, r14 \n\t"               \
-    "adcs r11, r9 \n\t"                \
-    "adc r12, #0 \n\t"                 \
+    "adds r10, r10, r14 \n\t"          \
+    "adcs r11, r11, r9 \n\t"           \
+    "adc r12, r12, #0 \n\t"            \
     "umull r14, r9, r3, r7 \n\t"       \
-    "adds r10, r14 \n\t"               \
-    "adcs r11, r9 \n\t"                \
-    "adc r12, #0 \n\t"                 \
+    "adds r10, r10, r14 \n\t"          \
+    "adcs r11, r11, r9 \n\t"           \
+    "adc r12, r12, #0 \n\t"            \
     "ldr r14, [r0] \n\t"               \
-    "adds r10, r14 \n\t"               \
-    "adcs r11, #0 \n\t"                \
-    "adc r12, #0 \n\t"                 \
+    "adds r10, r10, r14 \n\t"          \
+    "adcs r11, r11, #0 \n\t"           \
+    "adc r12, r12, #0 \n\t"            \
     "stmia r0!, {r10} \n\t"            \
                                        \
     "mov r14, #0 \n\t"                 \
     "umull r9, r10, r5, r6 \n\t"       \
-    "adds r11, r9 \n\t"                \
-    "adcs r12, r10 \n\t"               \
-    "adc r14, #0 \n\t"                 \
+    "adds r11, r11, r9 \n\t"           \
+    "adcs r12, r12, r10 \n\t"          \
+    "adc r14, r14, #0 \n\t"            \
     "umull r9, r10, r3, r8 \n\t"       \
-    "adds r11, r9 \n\t"                \
-    "adcs r12, r10 \n\t"               \
-    "adc r14, #0 \n\t"                 \
+    "adds r11, r11, r9 \n\t"           \
+    "adcs r12, r12, r10 \n\t"          \
+    "adc r14, r14, #0 \n\t"            \
     "stmia r0!, {r11} \n\t"            \
                                        \
     "umull r10, r11, r3, r6 \n\t"      \
-    "adds r12, r10 \n\t"               \
-    "adc r14, r11 \n\t"                \
+    "adds r12, r12, r10 \n\t"          \
+    "adc r14, r14, r11 \n\t"           \
     "stmia r0!, {r12, r14} \n\t"
 
 #define FAST_MULT_ASM_8             \
@@ -693,17 +693,17 @@
                                     \
     "mov r10, #0 \n\t"              \
     "umull r11, r9, r3, r7 \n\t"    \
-    "adds r12, r11 \n\t"            \
-    "adc r9, #0 \n\t"               \
+    "adds r12, r12, r11 \n\t"       \
+    "adc r9, r9, #0 \n\t"           \
     "umull r11, r14, r4, r6 \n\t"   \
-    "adds r12, r11 \n\t"            \
-    "adcs r9, r14 \n\t"             \
-    "adc r10, #0 \n\t"              \
+    "adds r12, r12, r11 \n\t"       \
+    "adcs r9, r9, r14 \n\t"         \
+    "adc r10, r10, #0 \n\t"         \
     "stmia r0!, {r12} \n\t"         \
                                     \
     "umull r12, r14, r4, r7 \n\t"   \
-    "adds r9, r12 \n\t"             \
-    "adc r10, r14 \n\t"             \
+    "adds r9, r9, r12 \n\t"         \
+    "adc r10, r10, r14 \n\t"        \
     "stmia r0!, {r9, r10} \n\t"     \
                                     \
     "sub r0, 28 \n\t"               \
@@ -716,123 +716,123 @@
                                     \
     "mov r10, #0 \n\t"              \
     "umull r11, r9, r3, r7 \n\t"    \
-    "adds r12, r11 \n\t"            \
-    "adc r9, #0 \n\t"               \
+    "adds r12, r12, r11 \n\t"       \
+    "adc r9, r9, #0 \n\t"           \
     "umull r11, r14, r4, r6 \n\t"   \
-    "adds r12, r11 \n\t"            \
-    "adcs r9, r14 \n\t"             \
-    "adc r10, #0 \n\t"              \
+    "adds r12, r12, r11 \n\t"       \
+    "adcs r9, r9, r14 \n\t"         \
+    "adc r10, r10, #0 \n\t"         \
     "stmia r0!, {r12} \n\t"         \
                                     \
     "mov r11, #0 \n\t"              \
     "umull r12, r14, r3, r8 \n\t"   \
-    "adds r9, r12 \n\t"             \
-    "adcs r10, r14 \n\t"            \
-    "adc r11, #0 \n\t"              \
+    "adds r9, r9, r12 \n\t"         \
+    "adcs r10, r10, r14 \n\t"       \
+    "adc r11, r11, #0 \n\t"         \
     "umull r12, r14, r4, r7 \n\t"   \
-    "adds r9, r12 \n\t"             \
-    "adcs r10, r14 \n\t"            \
-    "adc r11, #0 \n\t"              \
+    "adds r9, r9, r12 \n\t"         \
+    "adcs r10, r10, r14 \n\t"       \
+    "adc r11, r11, #0 \n\t"         \
     "umull r12, r14, r5, r6 \n\t"   \
-    "adds r9, r12 \n\t"             \
-    "adcs r10, r14 \n\t"            \
-    "adc r11, #0 \n\t"              \
+    "adds r9, r9, r12 \n\t"         \
+    "adcs r10, r10, r14 \n\t"       \
+    "adc r11, r11, #0 \n\t"         \
     "stmia r0!, {r9} \n\t"          \
                                     \
     "ldmia r1!, {r3} \n\t"          \
     "mov r12, #0 \n\t"              \
     "umull r14, r9, r4, r8 \n\t"    \
-    "adds r10, r14 \n\t"            \
-    "adcs r11, r9 \n\t"             \
-    "adc r12, #0 \n\t"              \
+    "adds r10, r10, r14 \n\t"       \
+    "adcs r11, r11, r9 \n\t"        \
+    "adc r12, r12, #0 \n\t"         \
     "umull r14, r9, r5, r7 \n\t"    \
-    "adds r10, r14 \n\t"            \
-    "adcs r11, r9 \n\t"             \
-    "adc r12, #0 \n\t"              \
+    "adds r10, r10, r14 \n\t"       \
+    "adcs r11, r11, r9 \n\t"        \
+    "adc r12, r12, #0 \n\t"         \
     "umull r14, r9, r3, r6 \n\t"    \
-    "adds r10, r14 \n\t"            \
-    "adcs r11, r9 \n\t"             \
-    "adc r12, #0 \n\t"              \
+    "adds r10, r10, r14 \n\t"       \
+    "adcs r11, r11, r9 \n\t"        \
+    "adc r12, r12, #0 \n\t"         \
     "ldr r14, [r0] \n\t"            \
-    "adds r10, r14 \n\t"            \
-    "adcs r11, #0 \n\t"             \
-    "adc r12, #0 \n\t"              \
+    "adds r10, r10, r14 \n\t"       \
+    "adcs r11, r11, #0 \n\t"        \
+    "adc r12, r12, #0 \n\t"         \
     "stmia r0!, {r10} \n\t"         \
                                     \
     "ldmia r1!, {r4} \n\t"          \
     "mov r14, #0 \n\t"              \
     "umull r9, r10, r5, r8 \n\t"    \
-    "adds r11, r9 \n\t"             \
-    "adcs r12, r10 \n\t"            \
-    "adc r14, #0 \n\t"              \
+    "adds r11, r11, r9 \n\t"        \
+    "adcs r12, r12, r10 \n\t"       \
+    "adc r14, r14, #0 \n\t"         \
     "umull r9, r10, r3, r7 \n\t"    \
-    "adds r11, r9 \n\t"             \
-    "adcs r12, r10 \n\t"            \
-    "adc r14, #0 \n\t"              \
+    "adds r11, r11, r9 \n\t"        \
+    "adcs r12, r12, r10 \n\t"       \
+    "adc r14, r14, #0 \n\t"         \
     "umull r9, r10, r4, r6 \n\t"    \
-    "adds r11, r9 \n\t"             \
-    "adcs r12, r10 \n\t"            \
-    "adc r14, #0 \n\t"              \
+    "adds r11, r11, r9 \n\t"        \
+    "adcs r12, r12, r10 \n\t"       \
+    "adc r14, r14, #0 \n\t"         \
     "ldr r9, [r0] \n\t"             \
-    "adds r11, r9 \n\t"             \
-    "adcs r12, #0 \n\t"             \
-    "adc r14, #0 \n\t"              \
+    "adds r11, r11, r9 \n\t"        \
+    "adcs r12, r12, #0 \n\t"        \
+    "adc r14, r14, #0 \n\t"         \
     "stmia r0!, {r11} \n\t"         \
                                     \
     "ldmia r2!, {r6} \n\t"          \
     "mov r9, #0 \n\t"               \
     "umull r10, r11, r5, r6 \n\t"   \
-    "adds r12, r10 \n\t"            \
-    "adcs r14, r11 \n\t"            \
-    "adc r9, #0 \n\t"               \
+    "adds r12, r12, r10 \n\t"       \
+    "adcs r14, r14, r11 \n\t"       \
+    "adc r9, r9, #0 \n\t"           \
     "umull r10, r11, r3, r8 \n\t"   \
-    "adds r12, r10 \n\t"            \
-    "adcs r14, r11 \n\t"            \
-    "adc r9, #0 \n\t"               \
+    "adds r12, r12, r10 \n\t"       \
+    "adcs r14, r14, r11 \n\t"       \
+    "adc r9, r9, #0 \n\t"           \
     "umull r10, r11, r4, r7 \n\t"   \
-    "adds r12, r10 \n\t"            \
-    "adcs r14, r11 \n\t"            \
-    "adc r9, #0 \n\t"               \
+    "adds r12, r12, r10 \n\t"       \
+    "adcs r14, r14, r11 \n\t"       \
+    "adc r9, r9, #0 \n\t"           \
     "ldr r10, [r0] \n\t"            \
-    "adds r12, r10 \n\t"            \
-    "adcs r14, #0 \n\t"             \
-    "adc r9, #0 \n\t"               \
+    "adds r12, r12, r10 \n\t"       \
+    "adcs r14, r14, #0 \n\t"        \
+    "adc r9, r9, #0 \n\t"           \
     "stmia r0!, {r12} \n\t"         \
                                     \
     "ldmia r2!, {r7} \n\t"          \
     "mov r10, #0 \n\t"              \
     "umull r11, r12, r5, r7 \n\t"   \
-    "adds r14, r11 \n\t"            \
-    "adcs r9, r12 \n\t"             \
-    "adc r10, #0 \n\t"              \
+    "adds r14, r14, r11 \n\t"       \
+    "adcs r9, r9, r12 \n\t"         \
+    "adc r10, r10, #0 \n\t"         \
     "umull r11, r12, r3, r6 \n\t"   \
-    "adds r14, r11 \n\t"            \
-    "adcs r9, r12 \n\t"             \
-    "adc r10, #0 \n\t"              \
+    "adds r14, r14, r11 \n\t"       \
+    "adcs r9, r9, r12 \n\t"         \
+    "adc r10, r10, #0 \n\t"         \
     "umull r11, r12, r4, r8 \n\t"   \
-    "adds r14, r11 \n\t"            \
-    "adcs r9, r12 \n\t"             \
-    "adc r10, #0 \n\t"              \
+    "adds r14, r14, r11 \n\t"       \
+    "adcs r9, r9, r12 \n\t"         \
+    "adc r10, r10, #0 \n\t"         \
     "ldr r11, [r0] \n\t"            \
-    "adds r14, r11 \n\t"            \
-    "adcs r9, #0 \n\t"              \
-    "adc r10, #0 \n\t"              \
+    "adds r14, r14, r11 \n\t"       \
+    "adcs r9, r9, #0 \n\t"          \
+    "adc r10, r10, #0 \n\t"         \
     "stmia r0!, {r14} \n\t"         \
                                     \
     "mov r11, #0 \n\t"              \
     "umull r12, r14, r3, r7 \n\t"   \
-    "adds r9, r12 \n\t"             \
-    "adcs r10, r14 \n\t"            \
-    "adc r11, #0 \n\t"              \
+    "adds r9, r9, r12 \n\t"         \
+    "adcs r10, r10, r14 \n\t"       \
+    "adc r11, r11, #0 \n\t"         \
     "umull r12, r14, r4, r6 \n\t"   \
-    "adds r9, r12 \n\t"             \
-    "adcs r10, r14 \n\t"            \
-    "adc r11, #0 \n\t"              \
+    "adds r9, r9, r12 \n\t"         \
+    "adcs r10, r10, r14 \n\t"       \
+    "adc r11, r11, #0 \n\t"         \
     "stmia r0!, {r9} \n\t"          \
                                     \
     "umull r14, r9, r4, r7 \n\t"    \
-    "adds r10, r14 \n\t"            \
-    "adc r11, r9 \n\t"              \
+    "adds r10, r10, r14 \n\t"       \
+    "adc r11, r11, r9 \n\t"         \
     "stmia r0!, {r10, r11} \n\t"    \
                                     \
     "sub r0, 52 \n\t"               \
@@ -846,243 +846,243 @@
                                     \
     "mov r10, #0 \n\t"              \
     "umull r11, r9, r3, r7 \n\t"    \
-    "adds r12, r11 \n\t"            \
-    "adc r9, #0 \n\t"               \
+    "adds r12, r12, r11 \n\t"       \
+    "adc r9, r9, #0 \n\t"           \
     "umull r11, r14, r4, r6 \n\t"   \
-    "adds r12, r11 \n\t"            \
-    "adcs r9, r14 \n\t"             \
-    "adc r10, #0 \n\t"              \
+    "adds r12, r12, r11 \n\t"       \
+    "adcs r9, r9, r14 \n\t"         \
+    "adc r10, r10, #0 \n\t"         \
     "stmia r0!, {r12} \n\t"         \
                                     \
     "mov r11, #0 \n\t"              \
     "umull r12, r14, r3, r8 \n\t"   \
-    "adds r9, r12 \n\t"             \
-    "adcs r10, r14 \n\t"            \
-    "adc r11, #0 \n\t"              \
+    "adds r9, r9, r12 \n\t"         \
+    "adcs r10, r10, r14 \n\t"       \
+    "adc r11, r11, #0 \n\t"         \
     "umull r12, r14, r4, r7 \n\t"   \
-    "adds r9, r12 \n\t"             \
-    "adcs r10, r14 \n\t"            \
-    "adc r11, #0 \n\t"              \
+    "adds r9, r9, r12 \n\t"         \
+    "adcs r10, r10, r14 \n\t"       \
+    "adc r11, r11, #0 \n\t"         \
     "umull r12, r14, r5, r6 \n\t"   \
-    "adds r9, r12 \n\t"             \
-    "adcs r10, r14 \n\t"            \
-    "adc r11, #0 \n\t"              \
+    "adds r9, r9, r12 \n\t"         \
+    "adcs r10, r10, r14 \n\t"       \
+    "adc r11, r11, #0 \n\t"         \
     "stmia r0!, {r9} \n\t"          \
                                     \
     "ldmia r1!, {r3} \n\t"          \
     "mov r12, #0 \n\t"              \
     "umull r14, r9, r4, r8 \n\t"    \
-    "adds r10, r14 \n\t"            \
-    "adcs r11, r9 \n\t"             \
-    "adc r12, #0 \n\t"              \
+    "adds r10, r10, r14 \n\t"       \
+    "adcs r11, r11, r9 \n\t"        \
+    "adc r12, r12, #0 \n\t"         \
     "umull r14, r9, r5, r7 \n\t"    \
-    "adds r10, r14 \n\t"            \
-    "adcs r11, r9 \n\t"             \
-    "adc r12, #0 \n\t"              \
+    "adds r10, r10, r14 \n\t"       \
+    "adcs r11, r11, r9 \n\t"        \
+    "adc r12, r12, #0 \n\t"         \
     "umull r14, r9, r3, r6 \n\t"    \
-    "adds r10, r14 \n\t"            \
-    "adcs r11, r9 \n\t"             \
-    "adc r12, #0 \n\t"              \
+    "adds r10, r10, r14 \n\t"       \
+    "adcs r11, r11, r9 \n\t"        \
+    "adc r12, r12, #0 \n\t"         \
     "ldr r14, [r0] \n\t"            \
-    "adds r10, r14 \n\t"            \
-    "adcs r11, #0 \n\t"             \
-    "adc r12, #0 \n\t"              \
+    "adds r10, r10, r14 \n\t"       \
+    "adcs r11, r11, #0 \n\t"        \
+    "adc r12, r12, #0 \n\t"         \
     "stmia r0!, {r10} \n\t"         \
                                     \
     "ldmia r1!, {r4} \n\t"          \
     "mov r14, #0 \n\t"              \
     "umull r9, r10, r5, r8 \n\t"    \
-    "adds r11, r9 \n\t"             \
-    "adcs r12, r10 \n\t"            \
-    "adc r14, #0 \n\t"              \
+    "adds r11, r11, r9 \n\t"        \
+    "adcs r12, r12, r10 \n\t"       \
+    "adc r14, r14, #0 \n\t"         \
     "umull r9, r10, r3, r7 \n\t"    \
-    "adds r11, r9 \n\t"             \
-    "adcs r12, r10 \n\t"            \
-    "adc r14, #0 \n\t"              \
+    "adds r11, r11, r9 \n\t"        \
+    "adcs r12, r12, r10 \n\t"       \
+    "adc r14, r14, #0 \n\t"         \
     "umull r9, r10, r4, r6 \n\t"    \
-    "adds r11, r9 \n\t"             \
-    "adcs r12, r10 \n\t"            \
-    "adc r14, #0 \n\t"              \
+    "adds r11, r11, r9 \n\t"        \
+    "adcs r12, r12, r10 \n\t"       \
+    "adc r14, r14, #0 \n\t"         \
     "ldr r9, [r0] \n\t"             \
-    "adds r11, r9 \n\t"             \
-    "adcs r12, #0 \n\t"             \
-    "adc r14, #0 \n\t"              \
+    "adds r11, r11, r9 \n\t"        \
+    "adcs r12, r12, #0 \n\t"        \
+    "adc r14, r14, #0 \n\t"         \
     "stmia r0!, {r11} \n\t"         \
                                     \
     "ldmia r1!, {r5} \n\t"          \
     "mov r9, #0 \n\t"               \
     "umull r10, r11, r3, r8 \n\t"   \
-    "adds r12, r10 \n\t"            \
-    "adcs r14, r11 \n\t"            \
-    "adc r9, #0 \n\t"               \
+    "adds r12, r12, r10 \n\t"       \
+    "adcs r14, r14, r11 \n\t"       \
+    "adc r9, r9, #0 \n\t"           \
     "umull r10, r11, r4, r7 \n\t"   \
-    "adds r12, r10 \n\t"            \
-    "adcs r14, r11 \n\t"            \
-    "adc r9, #0 \n\t"               \
+    "adds r12, r12, r10 \n\t"       \
+    "adcs r14, r14, r11 \n\t"       \
+    "adc r9, r9, #0 \n\t"           \
     "umull r10, r11, r5, r6 \n\t"   \
-    "adds r12, r10 \n\t"            \
-    "adcs r14, r11 \n\t"            \
-    "adc r9, #0 \n\t"               \
+    "adds r12, r12, r10 \n\t"       \
+    "adcs r14, r14, r11 \n\t"       \
+    "adc r9, r9, #0 \n\t"           \
     "ldr r10, [r0] \n\t"            \
-    "adds r12, r10 \n\t"            \
-    "adcs r14, #0 \n\t"             \
-    "adc r9, #0 \n\t"               \
+    "adds r12, r12, r10 \n\t"       \
+    "adcs r14, r14, #0 \n\t"        \
+    "adc r9, r9, #0 \n\t"           \
     "stmia r0!, {r12} \n\t"         \
                                     \
     "ldmia r1!, {r3} \n\t"          \
     "mov r10, #0 \n\t"              \
     "umull r11, r12, r4, r8 \n\t"   \
-    "adds r14, r11 \n\t"            \
-    "adcs r9, r12 \n\t"             \
-    "adc r10, #0 \n\t"              \
+    "adds r14, r14, r11 \n\t"       \
+    "adcs r9, r9, r12 \n\t"         \
+    "adc r10, r10, #0 \n\t"         \
     "umull r11, r12, r5, r7 \n\t"   \
-    "adds r14, r11 \n\t"            \
-    "adcs r9, r12 \n\t"             \
-    "adc r10, #0 \n\t"              \
+    "adds r14, r14, r11 \n\t"       \
+    "adcs r9, r9, r12 \n\t"         \
+    "adc r10, r10, #0 \n\t"         \
     "umull r11, r12, r3, r6 \n\t"   \
-    "adds r14, r11 \n\t"            \
-    "adcs r9, r12 \n\t"             \
-    "adc r10, #0 \n\t"              \
+    "adds r14, r14, r11 \n\t"       \
+    "adcs r9, r9, r12 \n\t"         \
+    "adc r10, r10, #0 \n\t"         \
     "ldr r11, [r0] \n\t"            \
-    "adds r14, r11 \n\t"            \
-    "adcs r9, #0 \n\t"              \
-    "adc r10, #0 \n\t"              \
+    "adds r14, r14, r11 \n\t"       \
+    "adcs r9, r9, #0 \n\t"          \
+    "adc r10, r10, #0 \n\t"         \
     "stmia r0!, {r14} \n\t"         \
                                     \
     "ldmia r1!, {r4} \n\t"          \
     "mov r11, #0 \n\t"              \
     "umull r12, r14, r5, r8 \n\t"   \
-    "adds r9, r12 \n\t"             \
-    "adcs r10, r14 \n\t"            \
-    "adc r11, #0 \n\t"              \
+    "adds r9, r9, r12 \n\t"         \
+    "adcs r10, r10, r14 \n\t"       \
+    "adc r11, r11, #0 \n\t"         \
     "umull r12, r14, r3, r7 \n\t"   \
-    "adds r9, r12 \n\t"             \
-    "adcs r10, r14 \n\t"            \
-    "adc r11, #0 \n\t"              \
+    "adds r9, r9, r12 \n\t"         \
+    "adcs r10, r10, r14 \n\t"       \
+    "adc r11, r11, #0 \n\t"         \
     "umull r12, r14, r4, r6 \n\t"   \
-    "adds r9, r12 \n\t"             \
-    "adcs r10, r14 \n\t"            \
-    "adc r11, #0 \n\t"              \
+    "adds r9, r9, r12 \n\t"         \
+    "adcs r10, r10, r14 \n\t"       \
+    "adc r11, r11, #0 \n\t"         \
     "ldr r12, [r0] \n\t"            \
-    "adds r9, r12 \n\t"             \
-    "adcs r10, #0 \n\t"             \
-    "adc r11, #0 \n\t"              \
+    "adds r9, r9, r12 \n\t"         \
+    "adcs r10, r10, #0 \n\t"        \
+    "adc r11, r11, #0 \n\t"         \
     "stmia r0!, {r9} \n\t"          \
                                     \
     "ldmia r2!, {r6} \n\t"          \
     "mov r12, #0 \n\t"              \
     "umull r14, r9, r5, r6 \n\t"    \
-    "adds r10, r14 \n\t"            \
-    "adcs r11, r9 \n\t"             \
-    "adc r12, #0 \n\t"              \
+    "adds r10, r10, r14 \n\t"       \
+    "adcs r11, r11, r9 \n\t"        \
+    "adc r12, r12, #0 \n\t"         \
     "umull r14, r9, r3, r8 \n\t"    \
-    "adds r10, r14 \n\t"            \
-    "adcs r11, r9 \n\t"             \
-    "adc r12, #0 \n\t"              \
+    "adds r10, r10, r14 \n\t"       \
+    "adcs r11, r11, r9 \n\t"        \
+    "adc r12, r12, #0 \n\t"         \
     "umull r14, r9, r4, r7 \n\t"    \
-    "adds r10, r14 \n\t"            \
-    "adcs r11, r9 \n\t"             \
-    "adc r12, #0 \n\t"              \
+    "adds r10, r10, r14 \n\t"       \
+    "adcs r11, r11, r9 \n\t"        \
+    "adc r12, r12, #0 \n\t"         \
     "ldr r14, [r0] \n\t"            \
-    "adds r10, r14 \n\t"            \
-    "adcs r11, #0 \n\t"             \
-    "adc r12, #0 \n\t"              \
+    "adds r10, r10, r14 \n\t"       \
+    "adcs r11, r11, #0 \n\t"        \
+    "adc r12, r12, #0 \n\t"         \
     "stmia r0!, {r10} \n\t"         \
                                     \
     "ldmia r2!, {r7} \n\t"          \
     "mov r14, #0 \n\t"              \
     "umull r9, r10, r5, r7 \n\t"    \
-    "adds r11, r9 \n\t"             \
-    "adcs r12, r10 \n\t"            \
-    "adc r14, #0 \n\t"              \
+    "adds r11, r11, r9 \n\t"        \
+    "adcs r12, r12, r10 \n\t"       \
+    "adc r14, r14, #0 \n\t"         \
     "umull r9, r10, r3, r6 \n\t"    \
-    "adds r11, r9 \n\t"             \
-    "adcs r12, r10 \n\t"            \
-    "adc r14, #0 \n\t"              \
+    "adds r11, r11, r9 \n\t"        \
+    "adcs r12, r12, r10 \n\t"       \
+    "adc r14, r14, #0 \n\t"         \
     "umull r9, r10, r4, r8 \n\t"    \
-    "adds r11, r9 \n\t"             \
-    "adcs r12, r10 \n\t"            \
-    "adc r14, #0 \n\t"              \
+    "adds r11, r11, r9 \n\t"        \
+    "adcs r12, r12, r10 \n\t"       \
+    "adc r14, r14, #0 \n\t"         \
     "ldr r9, [r0] \n\t"             \
-    "adds r11, r9 \n\t"             \
-    "adcs r12, #0 \n\t"             \
-    "adc r14, #0 \n\t"              \
+    "adds r11, r11, r9 \n\t"        \
+    "adcs r12, r12, #0 \n\t"        \
+    "adc r14, r14, #0 \n\t"         \
     "stmia r0!, {r11} \n\t"         \
                                     \
     "ldmia r2!, {r8} \n\t"          \
     "mov r9, #0 \n\t"               \
     "umull r10, r11, r5, r8 \n\t"   \
-    "adds r12, r10 \n\t"            \
-    "adcs r14, r11 \n\t"            \
-    "adc r9, #0 \n\t"               \
+    "adds r12, r12, r10 \n\t"       \
+    "adcs r14, r14, r11 \n\t"       \
+    "adc r9, r9, #0 \n\t"           \
     "umull r10, r11, r3, r7 \n\t"   \
-    "adds r12, r10 \n\t"            \
-    "adcs r14, r11 \n\t"            \
-    "adc r9, #0 \n\t"               \
+    "adds r12, r12, r10 \n\t"       \
+    "adcs r14, r14, r11 \n\t"       \
+    "adc r9, r9, #0 \n\t"           \
     "umull r10, r11, r4, r6 \n\t"   \
-    "adds r12, r10 \n\t"            \
-    "adcs r14, r11 \n\t"            \
-    "adc r9, #0 \n\t"               \
+    "adds r12, r12, r10 \n\t"       \
+    "adcs r14, r14, r11 \n\t"       \
+    "adc r9, r9, #0 \n\t"           \
     "ldr r10, [r0] \n\t"            \
-    "adds r12, r10 \n\t"            \
-    "adcs r14, #0 \n\t"             \
-    "adc r9, #0 \n\t"               \
+    "adds r12, r12, r10 \n\t"       \
+    "adcs r14, r14, #0 \n\t"        \
+    "adc r9, r9, #0 \n\t"           \
     "stmia r0!, {r12} \n\t"         \
                                     \
     "ldmia r2!, {r6} \n\t"          \
     "mov r10, #0 \n\t"              \
     "umull r11, r12, r5, r6 \n\t"   \
-    "adds r14, r11 \n\t"            \
-    "adcs r9, r12 \n\t"             \
-    "adc r10, #0 \n\t"              \
+    "adds r14, r14, r11 \n\t"       \
+    "adcs r9, r9, r12 \n\t"         \
+    "adc r10, r10, #0 \n\t"         \
     "umull r11, r12, r3, r8 \n\t"   \
-    "adds r14, r11 \n\t"            \
-    "adcs r9, r12 \n\t"             \
-    "adc r10, #0 \n\t"              \
+    "adds r14, r14, r11 \n\t"       \
+    "adcs r9, r9, r12 \n\t"         \
+    "adc r10, r10, #0 \n\t"         \
     "umull r11, r12, r4, r7 \n\t"   \
-    "adds r14, r11 \n\t"            \
-    "adcs r9, r12 \n\t"             \
-    "adc r10, #0 \n\t"              \
+    "adds r14, r14, r11 \n\t"       \
+    "adcs r9, r9, r12 \n\t"         \
+    "adc r10, r10, #0 \n\t"         \
     "ldr r11, [r0] \n\t"            \
-    "adds r14, r11 \n\t"            \
-    "adcs r9, #0 \n\t"              \
-    "adc r10, #0 \n\t"              \
+    "adds r14, r14, r11 \n\t"       \
+    "adcs r9, r9, #0 \n\t"          \
+    "adc r10, r10, #0 \n\t"         \
     "stmia r0!, {r14} \n\t"         \
                                     \
     "ldmia r2!, {r7} \n\t"          \
     "mov r11, #0 \n\t"              \
     "umull r12, r14, r5, r7 \n\t"   \
-    "adds r9, r12 \n\t"             \
-    "adcs r10, r14 \n\t"            \
-    "adc r11, #0 \n\t"              \
+    "adds r9, r9, r12 \n\t"         \
+    "adcs r10, r10, r14 \n\t"       \
+    "adc r11, r11, #0 \n\t"         \
     "umull r12, r14, r3, r6 \n\t"   \
-    "adds r9, r12 \n\t"             \
-    "adcs r10, r14 \n\t"            \
-    "adc r11, #0 \n\t"              \
+    "adds r9, r9, r12 \n\t"         \
+    "adcs r10, r10, r14 \n\t"       \
+    "adc r11, r11, #0 \n\t"         \
     "umull r12, r14, r4, r8 \n\t"   \
-    "adds r9, r12 \n\t"             \
-    "adcs r10, r14 \n\t"            \
-    "adc r11, #0 \n\t"              \
+    "adds r9, r9, r12 \n\t"         \
+    "adcs r10, r10, r14 \n\t"       \
+    "adc r11, r11, #0 \n\t"         \
     "ldr r12, [r0] \n\t"            \
-    "adds r9, r12 \n\t"             \
-    "adcs r10, #0 \n\t"             \
-    "adc r11, #0 \n\t"              \
+    "adds r9, r9, r12 \n\t"         \
+    "adcs r10, r10, #0 \n\t"        \
+    "adc r11, r11, #0 \n\t"         \
     "stmia r0!, {r9} \n\t"          \
                                     \
     "mov r12, #0 \n\t"              \
     "umull r14, r9, r3, r7 \n\t"    \
-    "adds r10, r14 \n\t"            \
-    "adcs r11, r9 \n\t"             \
-    "adc r12, #0 \n\t"              \
+    "adds r10, r10, r14 \n\t"       \
+    "adcs r11, r11, r9 \n\t"        \
+    "adc r12, r12, #0 \n\t"         \
     "umull r14, r9, r4, r6 \n\t"    \
-    "adds r10, r14 \n\t"            \
-    "adcs r11, r9 \n\t"             \
-    "adc r12, #0 \n\t"              \
+    "adds r10, r10, r14 \n\t"       \
+    "adcs r11, r11, r9 \n\t"        \
+    "adc r12, r12, #0 \n\t"         \
     "stmia r0!, {r10} \n\t"         \
                                     \
     "umull r9, r10, r4, r7 \n\t"    \
-    "adds r11, r9 \n\t"             \
-    "adc r12, r10 \n\t"             \
+    "adds r11, r11, r9 \n\t"        \
+    "adc r12, r12, r10 \n\t"        \
     "stmia r0!, {r11, r12} \n\t"
 
 #define FAST_SQUARE_ASM_5               \
@@ -1093,101 +1093,101 @@
                                         \
     "mov r9, #0 \n\t"                   \
     "umull r10, r11, r2, r3 \n\t"       \
-    "adds r12, r10 \n\t"                \
+    "adds r12, r12, r10 \n\t"           \
     "adcs r8, r11, #0 \n\t"             \
-    "adc r9, #0 \n\t"                   \
-    "adds r12, r10 \n\t"                \
-    "adcs r8, r11 \n\t"                 \
-    "adc r9, #0 \n\t"                   \
+    "adc r9, r9, #0 \n\t"               \
+    "adds r12, r12, r10 \n\t"           \
+    "adcs r8, r8, r11 \n\t"             \
+    "adc r9, r9, #0 \n\t"               \
     "stmia r0!, {r12} \n\t"             \
                                         \
     "mov r10, #0 \n\t"                  \
     "umull r11, r12, r2, r4 \n\t"       \
-    "adds r11, r11 \n\t"                \
-    "adcs r12, r12 \n\t"                \
-    "adc r10, #0 \n\t"                  \
-    "adds r8, r11 \n\t"                 \
-    "adcs r9, r12 \n\t"                 \
-    "adc r10, #0 \n\t"                  \
+    "adds r11, r11, r11 \n\t"           \
+    "adcs r12, r12, r12 \n\t"           \
+    "adc r10, r10, #0 \n\t"             \
+    "adds r8, r8, r11 \n\t"             \
+    "adcs r9, r9, r12 \n\t"             \
+    "adc r10, r10, #0 \n\t"             \
     "umull r11, r12, r3, r3 \n\t"       \
-    "adds r8, r11 \n\t"                 \
-    "adcs r9, r12 \n\t"                 \
-    "adc r10, #0 \n\t"                  \
+    "adds r8, r8, r11 \n\t"             \
+    "adcs r9, r9, r12 \n\t"             \
+    "adc r10, r10, #0 \n\t"             \
     "stmia r0!, {r8} \n\t"              \
                                         \
     "mov r12, #0 \n\t"                  \
     "umull r8, r11, r2, r5 \n\t"        \
     "umull r1, r14, r3, r4 \n\t"        \
-    "adds r8, r1 \n\t"                  \
-    "adcs r11, r14 \n\t"                \
-    "adc r12, #0 \n\t"                  \
-    "adds r8, r8 \n\t"                  \
-    "adcs r11, r11 \n\t"                \
-    "adc r12, r12 \n\t"                 \
-    "adds r8, r9 \n\t"                  \
-    "adcs r11, r10 \n\t"                \
-    "adc r12, #0 \n\t"                  \
+    "adds r8, r8, r1 \n\t"              \
+    "adcs r11, r11, r14 \n\t"           \
+    "adc r12, r12, #0 \n\t"             \
+    "adds r8, r8, r8 \n\t"              \
+    "adcs r11, r11, r11 \n\t"           \
+    "adc r12, r12, r12 \n\t"            \
+    "adds r8, r8, r9 \n\t"              \
+    "adcs r11, r11, r10 \n\t"           \
+    "adc r12, r12, #0 \n\t"             \
     "stmia r0!, {r8} \n\t"              \
                                         \
     "mov r10, #0 \n\t"                  \
     "umull r8, r9, r2, r6 \n\t"         \
     "umull r1, r14, r3, r5 \n\t"        \
-    "adds r8, r1 \n\t"                  \
-    "adcs r9, r14 \n\t"                 \
-    "adc r10, #0 \n\t"                  \
-    "adds r8, r8 \n\t"                  \
-    "adcs r9, r9 \n\t"                  \
-    "adc r10, r10 \n\t"                 \
+    "adds r8, r8, r1 \n\t"              \
+    "adcs r9, r9, r14 \n\t"             \
+    "adc r10, r10, #0 \n\t"             \
+    "adds r8, r8, r8 \n\t"              \
+    "adcs r9, r9, r9 \n\t"              \
+    "adc r10, r10, r10 \n\t"            \
     "umull r1, r14, r4, r4 \n\t"        \
-    "adds r8, r1 \n\t"                  \
-    "adcs r9, r14 \n\t"                 \
-    "adc r10, #0 \n\t"                  \
-    "adds r8, r11 \n\t"                 \
-    "adcs r9, r12 \n\t"                 \
-    "adc r10, #0 \n\t"                  \
+    "adds r8, r8, r1 \n\t"              \
+    "adcs r9, r9, r14 \n\t"             \
+    "adc r10, r10, #0 \n\t"             \
+    "adds r8, r8, r11 \n\t"             \
+    "adcs r9, r9, r12 \n\t"             \
+    "adc r10, r10, #0 \n\t"             \
     "stmia r0!, {r8} \n\t"              \
                                         \
     "mov r12, #0 \n\t"                  \
     "umull r8, r11, r3, r6 \n\t"        \
     "umull r1, r14, r4, r5 \n\t"        \
-    "adds r8, r1 \n\t"                  \
-    "adcs r11, r14 \n\t"                \
-    "adc r12, #0 \n\t"                  \
-    "adds r8, r8 \n\t"                  \
-    "adcs r11, r11 \n\t"                \
-    "adc r12, r12 \n\t"                 \
-    "adds r8, r9 \n\t"                  \
-    "adcs r11, r10 \n\t"                \
-    "adc r12, #0 \n\t"                  \
+    "adds r8, r8, r1 \n\t"              \
+    "adcs r11, r11, r14 \n\t"           \
+    "adc r12, r12, #0 \n\t"             \
+    "adds r8, r8, r8 \n\t"              \
+    "adcs r11, r11, r11 \n\t"           \
+    "adc r12, r12, r12 \n\t"            \
+    "adds r8, r8, r9 \n\t"              \
+    "adcs r11, r11, r10 \n\t"           \
+    "adc r12, r12, #0 \n\t"             \
     "stmia r0!, {r8} \n\t"              \
                                         \
     "mov r8, #0 \n\t"                   \
     "umull r1, r10, r4, r6 \n\t"        \
-    "adds r1, r1 \n\t"                  \
-    "adcs r10, r10 \n\t"                \
-    "adc r8, #0 \n\t"                   \
-    "adds r11, r1 \n\t"                 \
-    "adcs r12, r10 \n\t"                \
-    "adc r8, #0 \n\t"                   \
+    "adds r1, r1, r1 \n\t"              \
+    "adcs r10, r10, r10 \n\t"           \
+    "adc r8, r8, #0 \n\t"               \
+    "adds r11, r11, r1 \n\t"            \
+    "adcs r12, r12, r10 \n\t"           \
+    "adc r8, r8, #0 \n\t"               \
     "umull r1, r10, r5, r5 \n\t"        \
-    "adds r11, r1 \n\t"                 \
-    "adcs r12, r10 \n\t"                \
-    "adc r8, #0 \n\t"                   \
+    "adds r11, r11, r1 \n\t"            \
+    "adcs r12, r12, r10 \n\t"           \
+    "adc r8, r8, #0 \n\t"               \
     "stmia r0!, {r11} \n\t"             \
                                         \
     "mov r11, #0 \n\t"                  \
     "umull r1, r10, r5, r6 \n\t"        \
-    "adds r1, r1 \n\t"                  \
-    "adcs r10, r10 \n\t"                \
-    "adc r11, #0 \n\t"                  \
-    "adds r12, r1 \n\t"                 \
-    "adcs r8, r10 \n\t"                 \
-    "adc r11, #0 \n\t"                  \
+    "adds r1, r1, r1 \n\t"              \
+    "adcs r10, r10, r10 \n\t"           \
+    "adc r11, r11, #0 \n\t"             \
+    "adds r12, r12, r1 \n\t"            \
+    "adcs r8, r8, r10 \n\t"             \
+    "adc r11, r11, #0 \n\t"             \
     "stmia r0!, {r12} \n\t"             \
                                         \
     "umull r1, r10, r6, r6 \n\t"        \
-    "adds r8, r1 \n\t"                  \
-    "adcs r11, r10 \n\t"                \
+    "adds r8, r8, r1 \n\t"              \
+    "adcs r11, r11, r10 \n\t"           \
     "stmia r0!, {r8, r11} \n\t"
 
 #define FAST_SQUARE_ASM_6                  \
@@ -1198,137 +1198,137 @@
                                            \
     "mov r9, #0 \n\t"                      \
     "umull r10, r11, r2, r3 \n\t"          \
-    "adds r12, r10 \n\t"                   \
+    "adds r12, r12, r10 \n\t"              \
     "adcs r8, r11, #0 \n\t"                \
-    "adc r9, #0 \n\t"                      \
-    "adds r12, r10 \n\t"                   \
-    "adcs r8, r11 \n\t"                    \
-    "adc r9, #0 \n\t"                      \
+    "adc r9, r9, #0 \n\t"                  \
+    "adds r12, r12, r10 \n\t"              \
+    "adcs r8, r8, r11 \n\t"                \
+    "adc r9, r9, #0 \n\t"                  \
     "stmia r0!, {r12} \n\t"                \
                                            \
     "mov r10, #0 \n\t"                     \
     "umull r11, r12, r2, r4 \n\t"          \
-    "adds r11, r11 \n\t"                   \
-    "adcs r12, r12 \n\t"                   \
-    "adc r10, #0 \n\t"                     \
-    "adds r8, r11 \n\t"                    \
-    "adcs r9, r12 \n\t"                    \
-    "adc r10, #0 \n\t"                     \
+    "adds r11, r11, r11 \n\t"              \
+    "adcs r12, r12, r12 \n\t"              \
+    "adc r10, r10, #0 \n\t"                \
+    "adds r8, r8, r11 \n\t"                \
+    "adcs r9, r9, r12 \n\t"                \
+    "adc r10, r10, #0 \n\t"                \
     "umull r11, r12, r3, r3 \n\t"          \
-    "adds r8, r11 \n\t"                    \
-    "adcs r9, r12 \n\t"                    \
-    "adc r10, #0 \n\t"                     \
+    "adds r8, r8, r11 \n\t"                \
+    "adcs r9, r9, r12 \n\t"                \
+    "adc r10, r10, #0 \n\t"                \
     "stmia r0!, {r8} \n\t"                 \
                                            \
     "mov r12, #0 \n\t"                     \
     "umull r8, r11, r2, r5 \n\t"           \
     "umull r1, r14, r3, r4 \n\t"           \
-    "adds r8, r1 \n\t"                     \
-    "adcs r11, r14 \n\t"                   \
-    "adc r12, #0 \n\t"                     \
-    "adds r8, r8 \n\t"                     \
-    "adcs r11, r11 \n\t"                   \
-    "adc r12, r12 \n\t"                    \
-    "adds r8, r9 \n\t"                     \
-    "adcs r11, r10 \n\t"                   \
-    "adc r12, #0 \n\t"                     \
+    "adds r8, r8, r1 \n\t"                 \
+    "adcs r11, r11, r14 \n\t"              \
+    "adc r12, r12, #0 \n\t"                \
+    "adds r8, r8, r8 \n\t"                 \
+    "adcs r11, r11, r11 \n\t"              \
+    "adc r12, r12, r12 \n\t"               \
+    "adds r8, r8, r9 \n\t"                 \
+    "adcs r11, r11, r10 \n\t"              \
+    "adc r12, r12, #0 \n\t"                \
     "stmia r0!, {r8} \n\t"                 \
                                            \
     "mov r10, #0 \n\t"                     \
     "umull r8, r9, r2, r6 \n\t"            \
     "umull r1, r14, r3, r5 \n\t"           \
-    "adds r8, r1 \n\t"                     \
-    "adcs r9, r14 \n\t"                    \
-    "adc r10, #0 \n\t"                     \
-    "adds r8, r8 \n\t"                     \
-    "adcs r9, r9 \n\t"                     \
-    "adc r10, r10 \n\t"                    \
+    "adds r8, r8, r1 \n\t"                 \
+    "adcs r9, r9, r14 \n\t"                \
+    "adc r10, r10, #0 \n\t"                \
+    "adds r8, r8, r8 \n\t"                 \
+    "adcs r9, r9, r9 \n\t"                 \
+    "adc r10, r10, r10 \n\t"               \
     "umull r1, r14, r4, r4 \n\t"           \
-    "adds r8, r1 \n\t"                     \
-    "adcs r9, r14 \n\t"                    \
-    "adc r10, #0 \n\t"                     \
-    "adds r8, r11 \n\t"                    \
-    "adcs r9, r12 \n\t"                    \
-    "adc r10, #0 \n\t"                     \
+    "adds r8, r8, r1 \n\t"                 \
+    "adcs r9, r9, r14 \n\t"                \
+    "adc r10, r10, #0 \n\t"                \
+    "adds r8, r8, r11 \n\t"                \
+    "adcs r9, r9, r12 \n\t"                \
+    "adc r10, r10, #0 \n\t"                \
     "stmia r0!, {r8} \n\t"                 \
                                            \
     "mov r12, #0 \n\t"                     \
     "umull r8, r11, r2, r7 \n\t"           \
     "umull r1, r14, r3, r6 \n\t"           \
-    "adds r8, r1 \n\t"                     \
-    "adcs r11, r14 \n\t"                   \
-    "adc r12, #0 \n\t"                     \
+    "adds r8, r8, r1 \n\t"                 \
+    "adcs r11, r11, r14 \n\t"              \
+    "adc r12, r12, #0 \n\t"                \
     "umull r1, r14, r4, r5 \n\t"           \
-    "adds r8, r1 \n\t"                     \
-    "adcs r11, r14 \n\t"                   \
-    "adc r12, #0 \n\t"                     \
-    "adds r8, r8 \n\t"                     \
-    "adcs r11, r11 \n\t"                   \
-    "adc r12, r12 \n\t"                    \
-    "adds r8, r9 \n\t"                     \
-    "adcs r11, r10 \n\t"                   \
-    "adc r12, #0 \n\t"                     \
+    "adds r8, r8, r1 \n\t"                 \
+    "adcs r11, r11, r14 \n\t"              \
+    "adc r12, r12, #0 \n\t"                \
+    "adds r8, r8, r8 \n\t"                 \
+    "adcs r11, r11, r11 \n\t"              \
+    "adc r12, r12, r12 \n\t"               \
+    "adds r8, r8, r9 \n\t"                 \
+    "adcs r11, r11, r10 \n\t"              \
+    "adc r12, r12, #0 \n\t"                \
     "stmia r0!, {r8} \n\t"                 \
                                            \
     "mov r10, #0 \n\t"                     \
     "umull r8, r9, r3, r7 \n\t"            \
     "umull r1, r14, r4, r6 \n\t"           \
-    "adds r8, r1 \n\t"                     \
-    "adcs r9, r14 \n\t"                    \
-    "adc r10, #0 \n\t"                     \
-    "adds r8, r8 \n\t"                     \
-    "adcs r9, r9 \n\t"                     \
-    "adc r10, r10 \n\t"                    \
+    "adds r8, r8, r1 \n\t"                 \
+    "adcs r9, r9, r14 \n\t"                \
+    "adc r10, r10, #0 \n\t"                \
+    "adds r8, r8, r8 \n\t"                 \
+    "adcs r9, r9, r9 \n\t"                 \
+    "adc r10, r10, r10 \n\t"               \
     "umull r1, r14, r5, r5 \n\t"           \
-    "adds r8, r1 \n\t"                     \
-    "adcs r9, r14 \n\t"                    \
-    "adc r10, #0 \n\t"                     \
-    "adds r8, r11 \n\t"                    \
-    "adcs r9, r12 \n\t"                    \
-    "adc r10, #0 \n\t"                     \
+    "adds r8, r8, r1 \n\t"                 \
+    "adcs r9, r9, r14 \n\t"                \
+    "adc r10, r10, #0 \n\t"                \
+    "adds r8, r8, r11 \n\t"                \
+    "adcs r9, r9, r12 \n\t"                \
+    "adc r10, r10, #0 \n\t"                \
     "stmia r0!, {r8} \n\t"                 \
                                            \
     "mov r12, #0 \n\t"                     \
     "umull r8, r11, r4, r7 \n\t"           \
     "umull r1, r14, r5, r6 \n\t"           \
-    "adds r8, r1 \n\t"                     \
-    "adcs r11, r14 \n\t"                   \
-    "adc r12, #0 \n\t"                     \
-    "adds r8, r8 \n\t"                     \
-    "adcs r11, r11 \n\t"                   \
-    "adc r12, r12 \n\t"                    \
-    "adds r8, r9 \n\t"                     \
-    "adcs r11, r10 \n\t"                   \
-    "adc r12, #0 \n\t"                     \
+    "adds r8, r8, r1 \n\t"                 \
+    "adcs r11, r11, r14 \n\t"              \
+    "adc r12, r12, #0 \n\t"                \
+    "adds r8, r8, r8 \n\t"                 \
+    "adcs r11, r11, r11 \n\t"              \
+    "adc r12, r12, r12 \n\t"               \
+    "adds r8, r8, r9 \n\t"                 \
+    "adcs r11, r11, r10 \n\t"              \
+    "adc r12, r12, #0 \n\t"                \
     "stmia r0!, {r8} \n\t"                 \
                                            \
     "mov r8, #0 \n\t"                      \
     "umull r1, r10, r5, r7 \n\t"           \
-    "adds r1, r1 \n\t"                     \
-    "adcs r10, r10 \n\t"                   \
-    "adc r8, #0 \n\t"                      \
-    "adds r11, r1 \n\t"                    \
-    "adcs r12, r10 \n\t"                   \
-    "adc r8, #0 \n\t"                      \
+    "adds r1, r1, r1 \n\t"                 \
+    "adcs r10, r10, r10 \n\t"              \
+    "adc r8, r8, #0 \n\t"                  \
+    "adds r11, r11, r1 \n\t"               \
+    "adcs r12, r12, r10 \n\t"              \
+    "adc r8, r8, #0 \n\t"                  \
     "umull r1, r10, r6, r6 \n\t"           \
-    "adds r11, r1 \n\t"                    \
-    "adcs r12, r10 \n\t"                   \
-    "adc r8, #0 \n\t"                      \
+    "adds r11, r11, r1 \n\t"               \
+    "adcs r12, r12, r10 \n\t"              \
+    "adc r8, r8, #0 \n\t"                  \
     "stmia r0!, {r11} \n\t"                \
                                            \
     "mov r11, #0 \n\t"                     \
     "umull r1, r10, r6, r7 \n\t"           \
-    "adds r1, r1 \n\t"                     \
-    "adcs r10, r10 \n\t"                   \
-    "adc r11, #0 \n\t"                     \
-    "adds r12, r1 \n\t"                    \
-    "adcs r8, r10 \n\t"                    \
-    "adc r11, #0 \n\t"                     \
+    "adds r1, r1, r1 \n\t"                 \
+    "adcs r10, r10, r10 \n\t"              \
+    "adc r11, r11, #0 \n\t"                \
+    "adds r12, r12, r1 \n\t"               \
+    "adcs r8, r8, r10 \n\t"                \
+    "adc r11, r11, #0 \n\t"                \
     "stmia r0!, {r12} \n\t"                \
                                            \
     "umull r1, r10, r7, r7 \n\t"           \
-    "adds r8, r1 \n\t"                     \
-    "adcs r11, r10 \n\t"                   \
+    "adds r8, r8, r1 \n\t"                 \
+    "adcs r11, r11, r10 \n\t"              \
     "stmia r0!, {r8, r11} \n\t"
 
 #define FAST_SQUARE_ASM_7                      \
@@ -1348,26 +1348,26 @@
                                                \
     "mov r9, #0 \n\t"                          \
     "umull r10, r11, r2, r3 \n\t"              \
-    "adds r12, r10 \n\t"                       \
+    "adds r12, r12, r10 \n\t"                  \
     "adcs r8, r11, #0 \n\t"                    \
-    "adc r9, #0 \n\t"                          \
-    "adds r12, r10 \n\t"                       \
-    "adcs r8, r11 \n\t"                        \
-    "adc r9, #0 \n\t"                          \
+    "adc r9, r9, #0 \n\t"                      \
+    "adds r12, r12, r10 \n\t"                  \
+    "adcs r8, r8, r11 \n\t"                    \
+    "adc r9, r9, #0 \n\t"                      \
     "stmia r0!, {r12} \n\t"                    \
                                                \
     "mov r10, #0 \n\t"                         \
     "umull r11, r12, r2, r4 \n\t"              \
-    "adds r11, r11 \n\t"                       \
-    "adcs r12, r12 \n\t"                       \
-    "adc r10, #0 \n\t"                         \
-    "adds r8, r11 \n\t"                        \
-    "adcs r9, r12 \n\t"                        \
-    "adc r10, #0 \n\t"                         \
+    "adds r11, r11, r11 \n\t"                  \
+    "adcs r12, r12, r12 \n\t"                  \
+    "adc r10, r10, #0 \n\t"                    \
+    "adds r8, r8, r11 \n\t"                    \
+    "adcs r9, r9, r12 \n\t"                    \
+    "adc r10, r10, #0 \n\t"                    \
     "umull r11, r12, r3, r3 \n\t"              \
-    "adds r8, r11 \n\t"                        \
-    "adcs r9, r12 \n\t"                        \
-    "adc r10, #0 \n\t"                         \
+    "adds r8, r8, r11 \n\t"                    \
+    "adcs r9, r9, r12 \n\t"                    \
+    "adc r10, r10, #0 \n\t"                    \
     "stmia r0!, {r8} \n\t"                     \
                                                \
     "mov r12, #0 \n\t"                         \
@@ -1376,13 +1376,13 @@
     "umlal r8, r11, r3, r4 \n\t"               \
     "cmp r14, r11 \n\t"                        \
     "it hi \n\t"                               \
-    "adchi r12, #0 \n\t"                       \
-    "adds r8, r8 \n\t"                         \
-    "adcs r11, r11 \n\t"                       \
-    "adc r12, r12 \n\t"                        \
-    "adds r8, r9 \n\t"                         \
-    "adcs r11, r10 \n\t"                       \
-    "adc r12, #0 \n\t"                         \
+    "adchi r12, r12, #0 \n\t"                  \
+    "adds r8, r8, r8 \n\t"                     \
+    "adcs r11, r11, r11 \n\t"                  \
+    "adc r12, r12, r12 \n\t"                   \
+    "adds r8, r8, r9 \n\t"                     \
+    "adcs r11, r11, r10 \n\t"                  \
+    "adc r12, r12, #0 \n\t"                    \
     "stmia r0!, {r8} \n\t"                     \
                                                \
     "mov r10, #0 \n\t"                         \
@@ -1391,18 +1391,18 @@
     "umlal r8, r9, r3, r5 \n\t"                \
     "cmp r14, r9 \n\t"                         \
     "it hi \n\t"                               \
-    "adchi r10, #0 \n\t"                       \
-    "adds r8, r8 \n\t"                         \
-    "adcs r9, r9 \n\t"                         \
-    "adc r10, r10 \n\t"                        \
+    "adchi r10, r10, #0 \n\t"                  \
+    "adds r8, r8, r8 \n\t"                     \
+    "adcs r9, r9, r9 \n\t"                     \
+    "adc r10, r10, r10 \n\t"                   \
     "mov r14, r9 \n\t"                         \
     "umlal r8, r9, r4, r4 \n\t"                \
     "cmp r14, r9 \n\t"                         \
     "it hi \n\t"                               \
-    "adchi r10, #0 \n\t"                       \
-    "adds r8, r11 \n\t"                        \
-    "adcs r9, r12 \n\t"                        \
-    "adc r10, #0 \n\t"                         \
+    "adchi r10, r10, #0 \n\t"                  \
+    "adds r8, r8, r11 \n\t"                    \
+    "adcs r9, r9, r12 \n\t"                    \
+    "adc r10, r10, #0 \n\t"                    \
     "stmia r0!, {r8} \n\t"                     \
                                                \
     "mov r12, #0 \n\t"                         \
@@ -1411,18 +1411,18 @@
     "umlal r8, r11, r3, r6 \n\t"               \
     "cmp r14, r11 \n\t"                        \
     "it hi \n\t"                               \
-    "adchi r12, #0 \n\t"                       \
+    "adchi r12, r12, #0 \n\t"                  \
     "mov r14, r11 \n\t"                        \
     "umlal r8, r11, r4, r5 \n\t"               \
     "cmp r14, r11 \n\t"                        \
     "it hi \n\t"                               \
-    "adchi r12, #0 \n\t"                       \
-    "adds r8, r8 \n\t"                         \
-    "adcs r11, r11 \n\t"                       \
-    "adc r12, r12 \n\t"                        \
-    "adds r8, r9 \n\t"                         \
-    "adcs r11, r10 \n\t"                       \
-    "adc r12, #0 \n\t"                         \
+    "adchi r12, r12, #0 \n\t"                  \
+    "adds r8, r8, r8 \n\t"                     \
+    "adcs r11, r11, r11 \n\t"                  \
+    "adc r12, r12, r12 \n\t"                   \
+    "adds r8, r8, r9 \n\t"                     \
+    "adcs r11, r11, r10 \n\t"                  \
+    "adc r12, r12, #0 \n\t"                    \
     "stmia r0!, {r8} \n\t"                     \
                                                \
     "ldmia r1!, {r2} \n\t"                     \
@@ -1432,22 +1432,22 @@
     "umlal r8, r9, r4, r6 \n\t"                \
     "cmp r14, r9 \n\t"                         \
     "it hi \n\t"                               \
-    "adchi r10, #0 \n\t"                       \
+    "adchi r10, r10, #0 \n\t"                  \
     "ldr r14, [r0] \n\t"                       \
-    "adds r8, r14 \n\t"                        \
-    "adcs r9, #0 \n\t"                         \
-    "adc r10, #0 \n\t"                         \
-    "adds r8, r8 \n\t"                         \
-    "adcs r9, r9 \n\t"                         \
-    "adc r10, r10 \n\t"                        \
+    "adds r8, r8, r14 \n\t"                    \
+    "adcs r9, r9, #0 \n\t"                     \
+    "adc r10, r10, #0 \n\t"                    \
+    "adds r8, r8, r8 \n\t"                     \
+    "adcs r9, r9, r9 \n\t"                     \
+    "adc r10, r10, r10 \n\t"                   \
     "mov r14, r9 \n\t"                         \
     "umlal r8, r9, r5, r5 \n\t"                \
     "cmp r14, r9 \n\t"                         \
     "it hi \n\t"                               \
-    "adchi r10, #0 \n\t"                       \
-    "adds r8, r11 \n\t"                        \
-    "adcs r9, r12 \n\t"                        \
-    "adc r10, #0 \n\t"                         \
+    "adchi r10, r10, #0 \n\t"                  \
+    "adds r8, r8, r11 \n\t"                    \
+    "adcs r9, r9, r12 \n\t"                    \
+    "adc r10, r10, #0 \n\t"                    \
     "stmia r0!, {r8} \n\t"                     \
                                                \
     "mov r12, #0 \n\t"                         \
@@ -1456,22 +1456,22 @@
     "umlal r8, r11, r4, r7 \n\t"               \
     "cmp r14, r11 \n\t"                        \
     "it hi \n\t"                               \
-    "adchi r12, #0 \n\t"                       \
+    "adchi r12, r12, #0 \n\t"                  \
     "mov r14, r11 \n\t"                        \
     "umlal r8, r11, r5, r6 \n\t"               \
     "cmp r14, r11 \n\t"                        \
     "it hi \n\t"                               \
-    "adchi r12, #0 \n\t"                       \
+    "adchi r12, r12, #0 \n\t"                  \
     "ldr r14, [r0] \n\t"                       \
-    "adds r8, r14 \n\t"                        \
-    "adcs r11, #0 \n\t"                        \
-    "adc r12, #0 \n\t"                         \
-    "adds r8, r8 \n\t"                         \
-    "adcs r11, r11 \n\t"                       \
-    "adc r12, r12 \n\t"                        \
-    "adds r8, r9 \n\t"                         \
-    "adcs r11, r10 \n\t"                       \
-    "adc r12, #0 \n\t"                         \
+    "adds r8, r8, r14 \n\t"                    \
+    "adcs r11, r11, #0 \n\t"                   \
+    "adc r12, r12, #0 \n\t"                    \
+    "adds r8, r8, r8 \n\t"                     \
+    "adcs r11, r11, r11 \n\t"                  \
+    "adc r12, r12, r12 \n\t"                   \
+    "adds r8, r8, r9 \n\t"                     \
+    "adcs r11, r11, r10 \n\t"                  \
+    "adc r12, r12, #0 \n\t"                    \
     "stmia r0!, {r8} \n\t"                     \
                                                \
     "mov r10, #0 \n\t"                         \
@@ -1480,18 +1480,18 @@
     "umlal r8, r9, r5, r7 \n\t"                \
     "cmp r14, r9 \n\t"                         \
     "it hi \n\t"                               \
-    "adchi r10, #0 \n\t"                       \
-    "adds r8, r8 \n\t"                         \
-    "adcs r9, r9 \n\t"                         \
-    "adc r10, r10 \n\t"                        \
+    "adchi r10, r10, #0 \n\t"                  \
+    "adds r8, r8, r8 \n\t"                     \
+    "adcs r9, r9, r9 \n\t"                     \
+    "adc r10, r10, r10 \n\t"                   \
     "mov r14, r9 \n\t"                         \
     "umlal r8, r9, r6, r6 \n\t"                \
     "cmp r14, r9 \n\t"                         \
     "it hi \n\t"                               \
-    "adchi r10, #0 \n\t"                       \
-    "adds r8, r11 \n\t"                        \
-    "adcs r9, r12 \n\t"                        \
-    "adc r10, #0 \n\t"                         \
+    "adchi r10, r10, #0 \n\t"                  \
+    "adds r8, r8, r11 \n\t"                    \
+    "adcs r9, r9, r12 \n\t"                    \
+    "adc r10, r10, #0 \n\t"                    \
     "stmia r0!, {r8} \n\t"                     \
                                                \
     "mov r12, #0 \n\t"                         \
@@ -1500,42 +1500,42 @@
     "umlal r8, r11, r6, r7 \n\t"               \
     "cmp r14, r11 \n\t"                        \
     "it hi \n\t"                               \
-    "adchi r12, #0 \n\t"                       \
-    "adds r8, r8 \n\t"                         \
-    "adcs r11, r11 \n\t"                       \
-    "adc r12, r12 \n\t"                        \
-    "adds r8, r9 \n\t"                         \
-    "adcs r11, r10 \n\t"                       \
-    "adc r12, #0 \n\t"                         \
+    "adchi r12, r12, #0 \n\t"                  \
+    "adds r8, r8, r8 \n\t"                     \
+    "adcs r11, r11, r11 \n\t"                  \
+    "adc r12, r12, r12 \n\t"                   \
+    "adds r8, r8, r9 \n\t"                     \
+    "adcs r11, r11, r10 \n\t"                  \
+    "adc r12, r12, #0 \n\t"                    \
     "stmia r0!, {r8} \n\t"                     \
                                                \
     "mov r8, #0 \n\t"                          \
     "umull r1, r10, r6, r2 \n\t"               \
-    "adds r1, r1 \n\t"                         \
-    "adcs r10, r10 \n\t"                       \
-    "adc r8, #0 \n\t"                          \
-    "adds r11, r1 \n\t"                        \
-    "adcs r12, r10 \n\t"                       \
-    "adc r8, #0 \n\t"                          \
+    "adds r1, r1, r1 \n\t"                     \
+    "adcs r10, r10, r10 \n\t"                  \
+    "adc r8, r8, #0 \n\t"                      \
+    "adds r11, r11, r1 \n\t"                   \
+    "adcs r12, r12, r10 \n\t"                  \
+    "adc r8, r8, #0 \n\t"                      \
     "umull r1, r10, r7, r7 \n\t"               \
-    "adds r11, r1 \n\t"                        \
-    "adcs r12, r10 \n\t"                       \
-    "adc r8, #0 \n\t"                          \
+    "adds r11, r11, r1 \n\t"                   \
+    "adcs r12, r12, r10 \n\t"                  \
+    "adc r8, r8, #0 \n\t"                      \
     "stmia r0!, {r11} \n\t"                    \
                                                \
     "mov r11, #0 \n\t"                         \
     "umull r1, r10, r7, r2 \n\t"               \
-    "adds r1, r1 \n\t"                         \
-    "adcs r10, r10 \n\t"                       \
-    "adc r11, #0 \n\t"                         \
-    "adds r12, r1 \n\t"                        \
-    "adcs r8, r10 \n\t"                        \
-    "adc r11, #0 \n\t"                         \
+    "adds r1, r1, r1 \n\t"                     \
+    "adcs r10, r10, r10 \n\t"                  \
+    "adc r11, r11, #0 \n\t"                    \
+    "adds r12, r12, r1 \n\t"                   \
+    "adcs r8, r8, r10 \n\t"                    \
+    "adc r11, r11, #0 \n\t"                    \
     "stmia r0!, {r12} \n\t"                    \
                                                \
     "umull r1, r10, r2, r2 \n\t"               \
-    "adds r8, r1 \n\t"                         \
-    "adcs r11, r10 \n\t"                       \
+    "adds r8, r8, r1 \n\t"                     \
+    "adcs r11, r11, r10 \n\t"                  \
     "stmia r0!, {r8, r11} \n\t"
 
 #define FAST_SQUARE_ASM_8                   \
@@ -1548,12 +1548,12 @@
     "stmia r0!, {r8} \n\t"                  \
                                             \
     "umull r12, r10, r2, r6 \n\t"           \
-    "adds r9, r12 \n\t"                     \
-    "adc r10, #0 \n\t"                      \
+    "adds r9, r9, r12 \n\t"                 \
+    "adc r10, r10, #0 \n\t"                 \
     "stmia r0!, {r9} \n\t"                  \
                                             \
     "umull r8, r9, r3, r6 \n\t"             \
-    "adds r10, r8 \n\t"                     \
+    "adds r10, r10, r8 \n\t"                \
     "adc r11, r9, #0 \n\t"                  \
     "stmia r0!, {r10, r11} \n\t"            \
                                             \
@@ -1566,26 +1566,26 @@
                                             \
     "mov r9, #0 \n\t"                       \
     "umull r10, r11, r2, r3 \n\t"           \
-    "adds r12, r10 \n\t"                    \
+    "adds r12, r12, r10 \n\t"               \
     "adcs r8, r11, #0 \n\t"                 \
-    "adc r9, #0 \n\t"                       \
-    "adds r12, r10 \n\t"                    \
-    "adcs r8, r11 \n\t"                     \
-    "adc r9, #0 \n\t"                       \
+    "adc r9, r9, #0 \n\t"                   \
+    "adds r12, r12, r10 \n\t"               \
+    "adcs r8, r8, r11 \n\t"                 \
+    "adc r9, r9, #0 \n\t"                   \
     "stmia r0!, {r12} \n\t"                 \
                                             \
     "mov r10, #0 \n\t"                      \
     "umull r11, r12, r2, r4 \n\t"           \
-    "adds r11, r11 \n\t"                    \
-    "adcs r12, r12 \n\t"                    \
-    "adc r10, #0 \n\t"                      \
-    "adds r8, r11 \n\t"                     \
-    "adcs r9, r12 \n\t"                     \
-    "adc r10, #0 \n\t"                      \
+    "adds r11, r11, r11 \n\t"               \
+    "adcs r12, r12, r12 \n\t"               \
+    "adc r10, r10, #0 \n\t"                 \
+    "adds r8, r8, r11 \n\t"                 \
+    "adcs r9, r9, r12 \n\t"                 \
+    "adc r10, r10, #0 \n\t"                 \
     "umull r11, r12, r3, r3 \n\t"           \
-    "adds r8, r11 \n\t"                     \
-    "adcs r9, r12 \n\t"                     \
-    "adc r10, #0 \n\t"                      \
+    "adds r8, r8, r11 \n\t"                 \
+    "adcs r9, r9, r12 \n\t"                 \
+    "adc r10, r10, #0 \n\t"                 \
     "stmia r0!, {r8} \n\t"                  \
                                             \
     "mov r12, #0 \n\t"                      \
@@ -1594,13 +1594,13 @@
     "umlal r8, r11, r3, r4 \n\t"            \
     "cmp r14, r11 \n\t"                     \
     "it hi \n\t"                            \
-    "adchi r12, #0 \n\t"                    \
-    "adds r8, r8 \n\t"                      \
-    "adcs r11, r11 \n\t"                    \
-    "adc r12, r12 \n\t"                     \
-    "adds r8, r9 \n\t"                      \
-    "adcs r11, r10 \n\t"                    \
-    "adc r12, #0 \n\t"                      \
+    "adchi r12, r12, #0 \n\t"               \
+    "adds r8, r8, r8 \n\t"                  \
+    "adcs r11, r11, r11 \n\t"               \
+    "adc r12, r12, r12 \n\t"                \
+    "adds r8, r8, r9 \n\t"                  \
+    "adcs r11, r11, r10 \n\t"               \
+    "adc r12, r12, #0 \n\t"                 \
     "stmia r0!, {r8} \n\t"                  \
                                             \
     "mov r10, #0 \n\t"                      \
@@ -1609,18 +1609,18 @@
     "umlal r8, r9, r3, r5 \n\t"             \
     "cmp r14, r9 \n\t"                      \
     "it hi \n\t"                            \
-    "adchi r10, #0 \n\t"                    \
-    "adds r8, r8 \n\t"                      \
-    "adcs r9, r9 \n\t"                      \
-    "adc r10, r10 \n\t"                     \
+    "adchi r10, r10, #0 \n\t"               \
+    "adds r8, r8, r8 \n\t"                  \
+    "adcs r9, r9, r9 \n\t"                  \
+    "adc r10, r10, r10 \n\t"                \
     "mov r14, r9 \n\t"                      \
     "umlal r8, r9, r4, r4 \n\t"             \
     "cmp r14, r9 \n\t"                      \
     "it hi \n\t"                            \
-    "adchi r10, #0 \n\t"                    \
-    "adds r8, r11 \n\t"                     \
-    "adcs r9, r12 \n\t"                     \
-    "adc r10, #0 \n\t"                      \
+    "adchi r10, r10, #0 \n\t"               \
+    "adds r8, r8, r11 \n\t"                 \
+    "adcs r9, r9, r12 \n\t"                 \
+    "adc r10, r10, #0 \n\t"                 \
     "stmia r0!, {r8} \n\t"                  \
                                             \
     "mov r12, #0 \n\t"                      \
@@ -1629,18 +1629,18 @@
     "umlal r8, r11, r3, r6 \n\t"            \
     "cmp r14, r11 \n\t"                     \
     "it hi \n\t"                            \
-    "adchi r12, #0 \n\t"                    \
+    "adchi r12, r12, #0 \n\t"               \
     "mov r14, r11 \n\t"                     \
     "umlal r8, r11, r4, r5 \n\t"            \
     "cmp r14, r11 \n\t"                     \
     "it hi \n\t"                            \
-    "adchi r12, #0 \n\t"                    \
-    "adds r8, r8 \n\t"                      \
-    "adcs r11, r11 \n\t"                    \
-    "adc r12, r12 \n\t"                     \
-    "adds r8, r9 \n\t"                      \
-    "adcs r11, r10 \n\t"                    \
-    "adc r12, #0 \n\t"                      \
+    "adchi r12, r12, #0 \n\t"               \
+    "adds r8, r8, r8 \n\t"                  \
+    "adcs r11, r11, r11 \n\t"               \
+    "adc r12, r12, r12 \n\t"                \
+    "adds r8, r8, r9 \n\t"                  \
+    "adcs r11, r11, r10 \n\t"               \
+    "adc r12, r12, #0 \n\t"                 \
     "stmia r0!, {r8} \n\t"                  \
                                             \
     "ldmia r1!, {r2} \n\t"                  \
@@ -1650,22 +1650,22 @@
     "umlal r8, r9, r4, r6 \n\t"             \
     "cmp r14, r9 \n\t"                      \
     "it hi \n\t"                            \
-    "adchi r10, #0 \n\t"                    \
+    "adchi r10, r10, #0 \n\t"               \
     "ldr r14, [r0] \n\t"                    \
-    "adds r8, r14 \n\t"                     \
-    "adcs r9, #0 \n\t"                      \
-    "adc r10, #0 \n\t"                      \
-    "adds r8, r8 \n\t"                      \
-    "adcs r9, r9 \n\t"                      \
-    "adc r10, r10 \n\t"                     \
+    "adds r8, r8, r14 \n\t"                 \
+    "adcs r9, r9, #0 \n\t"                  \
+    "adc r10, r10, #0 \n\t"                 \
+    "adds r8, r8, r8 \n\t"                  \
+    "adcs r9, r9, r9 \n\t"                  \
+    "adc r10, r10, r10 \n\t"                \
     "mov r14, r9 \n\t"                      \
     "umlal r8, r9, r5, r5 \n\t"             \
     "cmp r14, r9 \n\t"                      \
     "it hi \n\t"                            \
-    "adchi r10, #0 \n\t"                    \
-    "adds r8, r11 \n\t"                     \
-    "adcs r9, r12 \n\t"                     \
-    "adc r10, #0 \n\t"                      \
+    "adchi r10, r10, #0 \n\t"               \
+    "adds r8, r8, r11 \n\t"                 \
+    "adcs r9, r9, r12 \n\t"                 \
+    "adc r10, r10, #0 \n\t"                 \
     "stmia r0!, {r8} \n\t"                  \
                                             \
     "mov r12, #0 \n\t"                      \
@@ -1674,22 +1674,22 @@
     "umlal r8, r11, r4, r7 \n\t"            \
     "cmp r14, r11 \n\t"                     \
     "it hi \n\t"                            \
-    "adchi r12, #0 \n\t"                    \
+    "adchi r12, r12, #0 \n\t"               \
     "mov r14, r11 \n\t"                     \
     "umlal r8, r11, r5, r6 \n\t"            \
     "cmp r14, r11 \n\t"                     \
     "it hi \n\t"                            \
-    "adchi r12, #0 \n\t"                    \
+    "adchi r12, r12, #0 \n\t"               \
     "ldr r14, [r0] \n\t"                    \
-    "adds r8, r14 \n\t"                     \
-    "adcs r11, #0 \n\t"                     \
-    "adc r12, #0 \n\t"                      \
-    "adds r8, r8 \n\t"                      \
-    "adcs r11, r11 \n\t"                    \
-    "adc r12, r12 \n\t"                     \
-    "adds r8, r9 \n\t"                      \
-    "adcs r11, r10 \n\t"                    \
-    "adc r12, #0 \n\t"                      \
+    "adds r8, r8, r14 \n\t"                 \
+    "adcs r11, r11, #0 \n\t"                \
+    "adc r12, r12, #0 \n\t"                 \
+    "adds r8, r8, r8 \n\t"                  \
+    "adcs r11, r11, r11 \n\t"               \
+    "adc r12, r12, r12 \n\t"                \
+    "adds r8, r8, r9 \n\t"                  \
+    "adcs r11, r11, r10 \n\t"               \
+    "adc r12, r12, #0 \n\t"                 \
     "stmia r0!, {r8} \n\t"                  \
                                             \
     "ldmia r1!, {r3} \n\t"                  \
@@ -1699,22 +1699,22 @@
     "umlal r8, r9, r5, r7 \n\t"             \
     "cmp r14, r9 \n\t"                      \
     "it hi \n\t"                            \
-    "adchi r10, #0 \n\t"                    \
+    "adchi r10, r10, #0 \n\t"               \
     "ldr r14, [r0] \n\t"                    \
-    "adds r8, r14 \n\t"                     \
-    "adcs r9, #0 \n\t"                      \
-    "adc r10, #0 \n\t"                      \
-    "adds r8, r8 \n\t"                      \
-    "adcs r9, r9 \n\t"                      \
-    "adc r10, r10 \n\t"                     \
+    "adds r8, r8, r14 \n\t"                 \
+    "adcs r9, r9, #0 \n\t"                  \
+    "adc r10, r10, #0 \n\t"                 \
+    "adds r8, r8, r8 \n\t"                  \
+    "adcs r9, r9, r9 \n\t"                  \
+    "adc r10, r10, r10 \n\t"                \
     "mov r14, r9 \n\t"                      \
     "umlal r8, r9, r6, r6 \n\t"             \
     "cmp r14, r9 \n\t"                      \
     "it hi \n\t"                            \
-    "adchi r10, #0 \n\t"                    \
-    "adds r8, r11 \n\t"                     \
-    "adcs r9, r12 \n\t"                     \
-    "adc r10, #0 \n\t"                      \
+    "adchi r10, r10, #0 \n\t"               \
+    "adds r8, r8, r11 \n\t"                 \
+    "adcs r9, r9, r12 \n\t"                 \
+    "adc r10, r10, #0 \n\t"                 \
     "stmia r0!, {r8} \n\t"                  \
                                             \
     "mov r12, #0 \n\t"                      \
@@ -1723,22 +1723,22 @@
     "umlal r8, r11, r5, r2 \n\t"            \
     "cmp r14, r11 \n\t"                     \
     "it hi \n\t"                            \
-    "adchi r12, #0 \n\t"                    \
+    "adchi r12, r12, #0 \n\t"               \
     "mov r14, r11 \n\t"                     \
     "umlal r8, r11, r6, r7 \n\t"            \
     "cmp r14, r11 \n\t"                     \
     "it hi \n\t"                            \
-    "adchi r12, #0 \n\t"                    \
+    "adchi r12, r12, #0 \n\t"               \
     "ldr r14, [r0] \n\t"                    \
-    "adds r8, r14 \n\t"                     \
-    "adcs r11, #0 \n\t"                     \
-    "adc r12, #0 \n\t"                      \
-    "adds r8, r8 \n\t"                      \
-    "adcs r11, r11 \n\t"                    \
-    "adc r12, r12 \n\t"                     \
-    "adds r8, r9 \n\t"                      \
-    "adcs r11, r10 \n\t"                    \
-    "adc r12, #0 \n\t"                      \
+    "adds r8, r8, r14 \n\t"                 \
+    "adcs r11, r11, #0 \n\t"                \
+    "adc r12, r12, #0 \n\t"                 \
+    "adds r8, r8, r8 \n\t"                  \
+    "adcs r11, r11, r11 \n\t"               \
+    "adc r12, r12, r12 \n\t"                \
+    "adds r8, r8, r9 \n\t"                  \
+    "adcs r11, r11, r10 \n\t"               \
+    "adc r12, r12, #0 \n\t"                 \
     "stmia r0!, {r8} \n\t"                  \
                                             \
     "mov r10, #0 \n\t"                      \
@@ -1747,18 +1747,18 @@
     "umlal r8, r9, r6, r2 \n\t"             \
     "cmp r14, r9 \n\t"                      \
     "it hi \n\t"                            \
-    "adchi r10, #0 \n\t"                    \
-    "adds r8, r8 \n\t"                      \
-    "adcs r9, r9 \n\t"                      \
-    "adc r10, r10 \n\t"                     \
+    "adchi r10, r10, #0 \n\t"               \
+    "adds r8, r8, r8 \n\t"                  \
+    "adcs r9, r9, r9 \n\t"                  \
+    "adc r10, r10, r10 \n\t"                \
     "mov r14, r9 \n\t"                      \
     "umlal r8, r9, r7, r7 \n\t"             \
     "cmp r14, r9 \n\t"                      \
     "it hi \n\t"                            \
-    "adchi r10, #0 \n\t"                    \
-    "adds r8, r11 \n\t"                     \
-    "adcs r9, r12 \n\t"                     \
-    "adc r10, #0 \n\t"                      \
+    "adchi r10, r10, #0 \n\t"               \
+    "adds r8, r8, r11 \n\t"                 \
+    "adcs r9, r9, r12 \n\t"                 \
+    "adc r10, r10, #0 \n\t"                 \
     "stmia r0!, {r8} \n\t"                  \
                                             \
     "mov r12, #0 \n\t"                      \
@@ -1767,42 +1767,42 @@
     "umlal r8, r11, r7, r2 \n\t"            \
     "cmp r14, r11 \n\t"                     \
     "it hi \n\t"                            \
-    "adchi r12, #0 \n\t"                    \
-    "adds r8, r8 \n\t"                      \
-    "adcs r11, r11 \n\t"                    \
-    "adc r12, r12 \n\t"                     \
-    "adds r8, r9 \n\t"                      \
-    "adcs r11, r10 \n\t"                    \
-    "adc r12, #0 \n\t"                      \
+    "adchi r12, r12, #0 \n\t"               \
+    "adds r8, r8, r8 \n\t"                  \
+    "adcs r11, r11, r11 \n\t"               \
+    "adc r12, r12, r12 \n\t"                \
+    "adds r8, r8, r9 \n\t"                  \
+    "adcs r11, r11, r10 \n\t"               \
+    "adc r12, r12, #0 \n\t"                 \
     "stmia r0!, {r8} \n\t"                  \
                                             \
     "mov r8, #0 \n\t"                       \
     "umull r1, r10, r7, r3 \n\t"            \
-    "adds r1, r1 \n\t"                      \
-    "adcs r10, r10 \n\t"                    \
-    "adc r8, #0 \n\t"                       \
-    "adds r11, r1 \n\t"                     \
-    "adcs r12, r10 \n\t"                    \
-    "adc r8, #0 \n\t"                       \
+    "adds r1, r1, r1 \n\t"                  \
+    "adcs r10, r10, r10 \n\t"               \
+    "adc r8, r8, #0 \n\t"                   \
+    "adds r11, r11, r1 \n\t"                \
+    "adcs r12, r12, r10 \n\t"               \
+    "adc r8, r8, #0 \n\t"                   \
     "umull r1, r10, r2, r2 \n\t"            \
-    "adds r11, r1 \n\t"                     \
-    "adcs r12, r10 \n\t"                    \
-    "adc r8, #0 \n\t"                       \
+    "adds r11, r11, r1 \n\t"                \
+    "adcs r12, r12, r10 \n\t"               \
+    "adc r8, r8, #0 \n\t"                   \
     "stmia r0!, {r11} \n\t"                 \
                                             \
     "mov r11, #0 \n\t"                      \
     "umull r1, r10, r2, r3 \n\t"            \
-    "adds r1, r1 \n\t"                      \
-    "adcs r10, r10 \n\t"                    \
-    "adc r11, #0 \n\t"                      \
-    "adds r12, r1 \n\t"                     \
-    "adcs r8, r10 \n\t"                     \
-    "adc r11, #0 \n\t"                      \
+    "adds r1, r1, r1 \n\t"                  \
+    "adcs r10, r10, r10 \n\t"               \
+    "adc r11, r11, #0 \n\t"                 \
+    "adds r12, r12, r1 \n\t"                \
+    "adcs r8, r8, r10 \n\t"                 \
+    "adc r11, r11, #0 \n\t"                 \
     "stmia r0!, {r12} \n\t"                 \
                                             \
     "umull r1, r10, r3, r3 \n\t"            \
-    "adds r8, r1 \n\t"                      \
-    "adcs r11, r10 \n\t"                    \
+    "adds r8, r8, r1 \n\t"                  \
+    "adcs r11, r11, r10 \n\t"               \
     "stmia r0!, {r8, r11} \n\t"
 
 #endif /* _UECC_ASM_ARM_MULT_SQUARE_H_ */
diff --git a/scripts/mult_arm.py b/scripts/mult_arm.py
index 6715d4b..402ace1 100755
--- a/scripts/mult_arm.py
+++ b/scripts/mult_arm.py
@@ -40,12 +40,12 @@
     print ""
     emit("mov r10, #0")
     emit("umull r11, r9, r3, r7")
-    emit("adds r12, r11")
-    emit("adc r9, #0")
+    emit("adds r12, r12, r11")
+    emit("adc r9, r9, #0")
     emit("umull r11, r14, r4, r6")
-    emit("adds r12, r11")
-    emit("adcs r9, r14")
-    emit("adc r10, #0")
+    emit("adds r12, r12, r11")
+    emit("adcs r9, r9, r14")
+    emit("adc r10, r10, #0")
     emit("stmia r0!, {r12}")
     print ""
 
@@ -55,9 +55,9 @@
         emit("mov r%s, #0", acc[2])
         for i in xrange(0, 3):
             emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[i], ry[2 - i])
-            emit("adds r%s, r%s", acc[0], acc[3])
-            emit("adcs r%s, r%s", acc[1], acc[4])
-            emit("adc r%s, #0", acc[2])
+            emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
+            emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4])
+            emit("adc r%s, r%s, #0", acc[2], acc[2])
         emit("stmia r0!, {r%s}", acc[0])
         print ""
         acc = acc[1:] + acc[:1]
@@ -65,16 +65,16 @@
         emit("mov r%s, #0", acc[2])
         for i in xrange(0, 2):
             emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[i + 1], ry[2 - i])
-            emit("adds r%s, r%s", acc[0], acc[3])
-            emit("adcs r%s, r%s", acc[1], acc[4])
-            emit("adc r%s, #0", acc[2])
+            emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
+            emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4])
+            emit("adc r%s, r%s, #0", acc[2], acc[2])
         emit("stmia r0!, {r%s}", acc[0])
         print ""
         acc = acc[1:] + acc[:1]
     
     emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[init_size-1], ry[init_size-1])
-    emit("adds r%s, r%s", acc[0], acc[3])
-    emit("adc r%s, r%s", acc[1], acc[4])
+    emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
+    emit("adc r%s, r%s, r%s", acc[1], acc[1], acc[4])
     emit("stmia r0!, {r%s}", acc[0])
     emit("stmia r0!, {r%s}", acc[1])
 print ""
@@ -98,12 +98,12 @@
     print ""
     emit("mov r10, #0")
     emit("umull r11, r9, r3, r7")
-    emit("adds r12, r11")
-    emit("adc r9, #0")
+    emit("adds r12, r12, r11")
+    emit("adc r9, r9, #0")
     emit("umull r11, r14, r4, r6")
-    emit("adds r12, r11")
-    emit("adcs r9, r14")
-    emit("adc r10, #0")
+    emit("adds r12, r12, r11")
+    emit("adcs r9, r9, r14")
+    emit("adc r10, r10, #0")
     emit("stmia r0!, {r12}")
     print ""
 
@@ -111,9 +111,9 @@
     emit("mov r%s, #0", acc[2])
     for i in xrange(0, 3):
         emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[i], ry[2 - i])
-        emit("adds r%s, r%s", acc[0], acc[3])
-        emit("adcs r%s, r%s", acc[1], acc[4])
-        emit("adc r%s, #0", acc[2])
+        emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
+        emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4])
+        emit("adc r%s, r%s, #0", acc[2], acc[2])
     emit("stmia r0!, {r%s}", acc[0])
     print ""
     acc = acc[1:] + acc[:1]
@@ -126,13 +126,13 @@
         emit("mov r%s, #0", acc[2])
         for i in xrange(0, 3):
             emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[i], ry[2 - i])
-            emit("adds r%s, r%s", acc[0], acc[3])
-            emit("adcs r%s, r%s", acc[1], acc[4])
-            emit("adc r%s, #0", acc[2])
+            emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
+            emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4])
+            emit("adc r%s, r%s, #0", acc[2], acc[2])
         emit("ldr r%s, [r0]", acc[3]) # load stored value from initial block, and add to accumulator
-        emit("adds r%s, r%s", acc[0], acc[3])
-        emit("adcs r%s, #0", acc[1])
-        emit("adc r%s, #0", acc[2])
+        emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
+        emit("adcs r%s, r%s, #0", acc[1], acc[1])
+        emit("adc r%s, r%s, #0", acc[2], acc[2])
         emit("stmia r0!, {r%s}", acc[0])
         print ""
         acc = acc[1:] + acc[:1]
@@ -145,13 +145,13 @@
         emit("mov r%s, #0", acc[2])
         for i in xrange(0, 3):
             emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[i], y_regs[2 - i])
-            emit("adds r%s, r%s", acc[0], acc[3])
-            emit("adcs r%s, r%s", acc[1], acc[4])
-            emit("adc r%s, #0", acc[2])
+            emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
+            emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4])
+            emit("adc r%s, r%s, #0", acc[2], acc[2])
         emit("ldr r%s, [r0]", acc[3]) # load stored value from initial block, and add to accumulator
-        emit("adds r%s, r%s", acc[0], acc[3])
-        emit("adcs r%s, #0", acc[1])
-        emit("adc r%s, #0", acc[2])
+        emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
+        emit("adcs r%s, r%s, #0", acc[1], acc[1])
+        emit("adc r%s, r%s, #0", acc[2], acc[2])
         emit("stmia r0!, {r%s}", acc[0])
         print ""
         acc = acc[1:] + acc[:1]
@@ -160,16 +160,16 @@
     emit("mov r%s, #0", acc[2])
     for i in xrange(0, 2):
         emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[i + 1], y_regs[2 - i])
-        emit("adds r%s, r%s", acc[0], acc[3])
-        emit("adcs r%s, r%s", acc[1], acc[4])
-        emit("adc r%s, #0", acc[2])
+        emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
+        emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4])
+        emit("adc r%s, r%s, #0", acc[2], acc[2])
     emit("stmia r0!, {r%s}", acc[0])
     print ""
     acc = acc[1:] + acc[:1]
     
     emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[2], y_regs[2])
-    emit("adds r%s, r%s", acc[0], acc[3])
-    emit("adc r%s, r%s", acc[1], acc[4])
+    emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
+    emit("adc r%s, r%s, r%s", acc[1], acc[1], acc[4])
     emit("stmia r0!, {r%s}", acc[0])
     emit("stmia r0!, {r%s}", acc[1])
     print ""
diff --git a/scripts/square_arm.py b/scripts/square_arm.py
index ae11072..5330c7e 100755
--- a/scripts/square_arm.py
+++ b/scripts/square_arm.py
@@ -23,15 +23,15 @@
 def mulacc(acc, r1, r2):
     if size <= 6:
         emit("umull r1, r14, r%s, r%s", r1, r2)
-        emit("adds r%s, r1", acc[0])
-        emit("adcs r%s, r14", acc[1])
-        emit("adc r%s, #0", acc[2])
+        emit("adds r%s, r%s, r1", acc[0], acc[0])
+        emit("adcs r%s, r%s, r14", acc[1], acc[1])
+        emit("adc r%s, r%s, #0", acc[2], acc[2])
     else:
         emit("mov r14, r%s", acc[1])
         emit("umlal r%s, r%s, r%s, r%s", acc[0], acc[1], r1, r2)
         emit("cmp r14, r%s", acc[1])
         emit("it hi")
-        emit("adchi r%s, #0", acc[2])
+        emit("adchi r%s, r%s, #0", acc[2], acc[2])
 
 r = [2, 3, 4, 5, 6, 7]
 
@@ -62,13 +62,13 @@
     print ""
     
     emit("umull r12, r10, r2, r6")
-    emit("adds r9, r12")
-    emit("adc r10, #0")
+    emit("adds r9, r9, r12")
+    emit("adc r10, r10, #0")
     emit("stmia r0!, {r9}")
     print ""
     
     emit("umull r8, r9, r3, r6")
-    emit("adds r10, r8")
+    emit("adds r10, r10, r8")
     emit("adc r11, r9, #0")
     emit("stmia r0!, {r10, r11}")
     print ""
@@ -85,26 +85,26 @@
 print ""
 emit("mov r9, #0")
 emit("umull r10, r11, r2, r3")
-emit("adds r12, r10")
+emit("adds r12, r12, r10")
 emit("adcs r8, r11, #0")
-emit("adc r9, #0")
-emit("adds r12, r10")
-emit("adcs r8, r11")
-emit("adc r9, #0")
+emit("adc r9, r9, #0")
+emit("adds r12, r12, r10")
+emit("adcs r8, r8, r11")
+emit("adc r9, r9, #0")
 emit("stmia r0!, {r12}")
 print ""
 emit("mov r10, #0")
 emit("umull r11, r12, r2, r4")
-emit("adds r11, r11")
-emit("adcs r12, r12")
-emit("adc r10, #0")
-emit("adds r8, r11")
-emit("adcs r9, r12")
-emit("adc r10, #0")
+emit("adds r11, r11, r11")
+emit("adcs r12, r12, r12")
+emit("adc r10, r10, #0")
+emit("adds r8, r8, r11")
+emit("adcs r9, r9, r12")
+emit("adc r10, r10, #0")
 emit("umull r11, r12, r3, r3")
-emit("adds r8, r11")
-emit("adcs r9, r12")
-emit("adc r10, #0")
+emit("adds r8, r8, r11")
+emit("adcs r9, r9, r12")
+emit("adc r10, r10, #0")
 emit("stmia r0!, {r8}")
 print ""
 
@@ -121,18 +121,18 @@
     for j in xrange(1, (i+1)//2):
         mulacc(acc, r[j], r[i-j])
     # multiply by 2
-    emit("adds r%s, r%s", acc[0], acc[0])
-    emit("adcs r%s, r%s", acc[1], acc[1])
-    emit("adc r%s, r%s", acc[2], acc[2])
+    emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[0])
+    emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[1])
+    emit("adc r%s, r%s, r%s", acc[2], acc[2], acc[2])
     
     # add equal word (if any)
     if ((i+1) % 2) != 0:
         mulacc(acc, r[i//2], r[i//2])
     
     # add old accumulator
-    emit("adds r%s, r%s", acc[0], old_acc[0])
-    emit("adcs r%s, r%s", acc[1], old_acc[1])
-    emit("adc r%s, #0", acc[2])
+    emit("adds r%s, r%s, r%s", acc[0], acc[0], old_acc[0])
+    emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
+    emit("adc r%s, r%s, #0", acc[2], acc[2])
     
     # store
     emit("stmia r0!, {r%s}", acc[0])
@@ -155,23 +155,23 @@
             mulacc(acc, regs[j], regs[limit-j])
     
         emit("ldr r14, [r0]") # load stored value from initial block, and add to accumulator
-        emit("adds r%s, r14", acc[0])
-        emit("adcs r%s, #0", acc[1])
-        emit("adc r%s, #0", acc[2])
+        emit("adds r%s, r%s, r14", acc[0], acc[0])
+        emit("adcs r%s, r%s, #0", acc[1], acc[1])
+        emit("adc r%s, r%s, #0", acc[2], acc[2])
     
         # multiply by 2
-        emit("adds r%s, r%s", acc[0], acc[0])
-        emit("adcs r%s, r%s", acc[1], acc[1])
-        emit("adc r%s, r%s", acc[2], acc[2])
+        emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[0])
+        emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[1])
+        emit("adc r%s, r%s, r%s", acc[2], acc[2], acc[2])
     
         # add equal word
         if limit == 4:
             mulacc(acc, regs[2], regs[2])
     
         # add old accumulator
-        emit("adds r%s, r%s", acc[0], old_acc[0])
-        emit("adcs r%s, r%s", acc[1], old_acc[1])
-        emit("adc r%s, #0", acc[2])
+        emit("adds r%s, r%s, r%s", acc[0], acc[0], old_acc[0])
+        emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
+        emit("adc r%s, r%s, #0", acc[2], acc[2])
     
         # store
         emit("stmia r0!, {r%s}", acc[0])
@@ -189,18 +189,18 @@
         mulacc(acc, regs[i+j], regs[s - 1 - j])
 
     # multiply by 2
-    emit("adds r%s, r%s", acc[0], acc[0])
-    emit("adcs r%s, r%s", acc[1], acc[1])
-    emit("adc r%s, r%s", acc[2], acc[2])
+    emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[0])
+    emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[1])
+    emit("adc r%s, r%s, r%s", acc[2], acc[2], acc[2])
 
     # add equal word (if any)
     if ((s-i) % 2) != 0:
         mulacc(acc, regs[i + (s-i)//2], regs[i + (s-i)//2])
 
     # add old accumulator
-    emit("adds r%s, r%s", acc[0], old_acc[0])
-    emit("adcs r%s, r%s", acc[1], old_acc[1])
-    emit("adc r%s, #0", acc[2])
+    emit("adds r%s, r%s, r%s", acc[0], acc[0], old_acc[0])
+    emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
+    emit("adc r%s, r%s, #0", acc[2], acc[2])
 
     # store
     emit("stmia r0!, {r%s}", acc[0])
@@ -209,34 +209,34 @@
 acc = acc[1:] + acc[:1]
 emit("mov r%s, #0", acc[2])
 emit("umull r1, r%s, r%s, r%s", old_acc[1], regs[s - 3], regs[s - 1])
-emit("adds r1, r1")
-emit("adcs r%s, r%s", old_acc[1], old_acc[1])
-emit("adc r%s, #0", acc[2])
-emit("adds r%s, r1", acc[0])
-emit("adcs r%s, r%s", acc[1], old_acc[1])
-emit("adc r%s, #0", acc[2])
+emit("adds r1, r1, r1")
+emit("adcs r%s, r%s, r%s", old_acc[1], old_acc[1], old_acc[1])
+emit("adc r%s, r%s, #0", acc[2], acc[2])
+emit("adds r%s, r%s, r1", acc[0], acc[0])
+emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
+emit("adc r%s, r%s, #0", acc[2], acc[2])
 emit("umull r1, r%s, r%s, r%s", old_acc[1], regs[s - 2], regs[s - 2])
-emit("adds r%s, r1", acc[0])
-emit("adcs r%s, r%s", acc[1], old_acc[1])
-emit("adc r%s, #0", acc[2])
+emit("adds r%s, r%s, r1", acc[0], acc[0])
+emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
+emit("adc r%s, r%s, #0", acc[2], acc[2])
 emit("stmia r0!, {r%s}", acc[0])
 print ""
 
 acc = acc[1:] + acc[:1]
 emit("mov r%s, #0", acc[2])
 emit("umull r1, r%s, r%s, r%s", old_acc[1], regs[s - 2], regs[s - 1])
-emit("adds r1, r1")
-emit("adcs r%s, r%s", old_acc[1], old_acc[1])
-emit("adc r%s, #0", acc[2])
-emit("adds r%s, r1", acc[0])
-emit("adcs r%s, r%s", acc[1], old_acc[1])
-emit("adc r%s, #0", acc[2])
+emit("adds r1, r1, r1")
+emit("adcs r%s, r%s, r%s", old_acc[1], old_acc[1], old_acc[1])
+emit("adc r%s, r%s, #0", acc[2], acc[2])
+emit("adds r%s, r%s, r1", acc[0], acc[0])
+emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
+emit("adc r%s, r%s, #0", acc[2], acc[2])
 emit("stmia r0!, {r%s}", acc[0])
 print ""
 
 acc = acc[1:] + acc[:1]
 emit("umull r1, r%s, r%s, r%s", old_acc[1], regs[s - 1], regs[s - 1])
-emit("adds r%s, r1", acc[0])
-emit("adcs r%s, r%s", acc[1], old_acc[1])
+emit("adds r%s, r%s, r1", acc[0], acc[0])
+emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
 emit("stmia r0!, {r%s}", acc[0])
 emit("stmia r0!, {r%s}", acc[1])
diff --git a/uECC.c b/uECC.c
index a119738..70b8ce4 100644
--- a/uECC.c
+++ b/uECC.c
@@ -386,11 +386,13 @@
 
 #else /* uECC_SQUARE_FUNC */
 
+#if uECC_ENABLE_VLI_API
 uECC_VLI_API void uECC_vli_square(uECC_word_t *result,
                                   const uECC_word_t *left,
                                   wordcount_t num_words) {
     uECC_vli_mult(result, left, left, num_words);
 }
+#endif /* uECC_ENABLE_VLI_API */
     
 #endif /* uECC_SQUARE_FUNC */
 
@@ -493,6 +495,7 @@
 
 #if uECC_SQUARE_FUNC
 
+#if uECC_ENABLE_VLI_API
 /* Computes result = left^2 % mod. */
 uECC_VLI_API void uECC_vli_modSquare(uECC_word_t *result,
                                      const uECC_word_t *left,
@@ -502,6 +505,7 @@
     uECC_vli_square(product, left, num_words);
     uECC_vli_mmod(result, product, mod, num_words);
 }
+#endif /* uECC_ENABLE_VLI_API */
 
 uECC_VLI_API void uECC_vli_modSquare_fast(uECC_word_t *result,
                                           const uECC_word_t *left,
@@ -517,12 +521,14 @@
 
 #else /* uECC_SQUARE_FUNC */
 
+#if uECC_ENABLE_VLI_API
 uECC_VLI_API void uECC_vli_modSquare(uECC_word_t *result,
                                      const uECC_word_t *left,
                                      const uECC_word_t *mod,
                                      wordcount_t num_words) {
     uECC_vli_modMult(result, left, left, mod, num_words);
 }
+#endif /* uECC_ENABLE_VLI_API */
 
 uECC_VLI_API void uECC_vli_modSquare_fast(uECC_word_t *result,
                                           const uECC_word_t *left,