Fix fast ARM mult/square asm for Xcode
diff --git a/asm_arm_mult_square.inc b/asm_arm_mult_square.inc
index d46af78..9decef6 100644
--- a/asm_arm_mult_square.inc
+++ b/asm_arm_mult_square.inc
@@ -14,17 +14,17 @@
\
"mov r10, #0 \n\t" \
"umull r11, r9, r3, r7 \n\t" \
- "adds r12, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"umull r11, r14, r4, r6 \n\t" \
- "adds r12, r11 \n\t" \
- "adcs r9, r14 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r12, r12, r11 \n\t" \
+ "adcs r9, r9, r14 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"stmia r0!, {r12} \n\t" \
\
"umull r12, r14, r4, r7 \n\t" \
- "adds r9, r12 \n\t" \
- "adc r10, r14 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adc r10, r10, r14 \n\t" \
"stmia r0!, {r9, r10} \n\t" \
\
"sub r0, 28 \n\t" \
@@ -37,123 +37,123 @@
\
"mov r10, #0 \n\t" \
"umull r11, r9, r3, r7 \n\t" \
- "adds r12, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"umull r11, r14, r4, r6 \n\t" \
- "adds r12, r11 \n\t" \
- "adcs r9, r14 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r12, r12, r11 \n\t" \
+ "adcs r9, r9, r14 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"stmia r0!, {r12} \n\t" \
\
"mov r11, #0 \n\t" \
"umull r12, r14, r3, r8 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"umull r12, r14, r4, r7 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"umull r12, r14, r5, r6 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"stmia r0!, {r9} \n\t" \
\
"ldmia r1!, {r3} \n\t" \
"mov r12, #0 \n\t" \
"umull r14, r9, r4, r8 \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"umull r14, r9, r5, r7 \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"umull r14, r9, r3, r6 \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"ldr r14, [r0] \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, #0 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, #0 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"stmia r0!, {r10} \n\t" \
\
"ldmia r1!, {r4} \n\t" \
"mov r14, #0 \n\t" \
"umull r9, r10, r5, r8 \n\t" \
- "adds r11, r9 \n\t" \
- "adcs r12, r10 \n\t" \
- "adc r14, #0 \n\t" \
+ "adds r11, r11, r9 \n\t" \
+ "adcs r12, r12, r10 \n\t" \
+ "adc r14, r14, #0 \n\t" \
"umull r9, r10, r3, r7 \n\t" \
- "adds r11, r9 \n\t" \
- "adcs r12, r10 \n\t" \
- "adc r14, #0 \n\t" \
+ "adds r11, r11, r9 \n\t" \
+ "adcs r12, r12, r10 \n\t" \
+ "adc r14, r14, #0 \n\t" \
"umull r9, r10, r4, r6 \n\t" \
- "adds r11, r9 \n\t" \
- "adcs r12, r10 \n\t" \
- "adc r14, #0 \n\t" \
+ "adds r11, r11, r9 \n\t" \
+ "adcs r12, r12, r10 \n\t" \
+ "adc r14, r14, #0 \n\t" \
"ldr r9, [r0] \n\t" \
- "adds r11, r9 \n\t" \
- "adcs r12, #0 \n\t" \
- "adc r14, #0 \n\t" \
+ "adds r11, r11, r9 \n\t" \
+ "adcs r12, r12, #0 \n\t" \
+ "adc r14, r14, #0 \n\t" \
"stmia r0!, {r11} \n\t" \
\
"ldmia r2!, {r6} \n\t" \
"mov r9, #0 \n\t" \
"umull r10, r11, r5, r6 \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r14, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r14, r14, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"umull r10, r11, r3, r8 \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r14, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r14, r14, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"umull r10, r11, r4, r7 \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r14, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r14, r14, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"ldr r10, [r0] \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r14, #0 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r14, r14, #0 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"stmia r0!, {r12} \n\t" \
\
"ldmia r2!, {r7} \n\t" \
"mov r10, #0 \n\t" \
"umull r11, r12, r5, r7 \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"umull r11, r12, r3, r6 \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"umull r11, r12, r4, r8 \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"ldr r11, [r0] \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, #0 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, #0 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"stmia r0!, {r14} \n\t" \
\
"mov r11, #0 \n\t" \
"umull r12, r14, r3, r7 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"umull r12, r14, r4, r6 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"stmia r0!, {r9} \n\t" \
\
"umull r14, r9, r4, r7 \n\t" \
- "adds r10, r14 \n\t" \
- "adc r11, r9 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adc r11, r11, r9 \n\t" \
"stmia r0!, {r10, r11} \n\t"
#define FAST_MULT_ASM_6 \
@@ -167,43 +167,43 @@
\
"mov r10, #0 \n\t" \
"umull r11, r9, r3, r7 \n\t" \
- "adds r12, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"umull r11, r14, r4, r6 \n\t" \
- "adds r12, r11 \n\t" \
- "adcs r9, r14 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r12, r12, r11 \n\t" \
+ "adcs r9, r9, r14 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"stmia r0!, {r12} \n\t" \
\
"mov r11, #0 \n\t" \
"umull r12, r14, r3, r8 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"umull r12, r14, r4, r7 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"umull r12, r14, r5, r6 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"stmia r0!, {r9} \n\t" \
\
"mov r12, #0 \n\t" \
"umull r14, r9, r4, r8 \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"umull r14, r9, r5, r7 \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"stmia r0!, {r10} \n\t" \
\
"umull r9, r10, r5, r8 \n\t" \
- "adds r11, r9 \n\t" \
- "adc r12, r10 \n\t" \
+ "adds r11, r11, r9 \n\t" \
+ "adc r12, r12, r10 \n\t" \
"stmia r0!, {r11, r12} \n\t" \
\
"sub r0, 36 \n\t" \
@@ -215,163 +215,163 @@
\
"mov r10, #0 \n\t" \
"umull r11, r9, r3, r7 \n\t" \
- "adds r12, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"umull r11, r14, r4, r6 \n\t" \
- "adds r12, r11 \n\t" \
- "adcs r9, r14 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r12, r12, r11 \n\t" \
+ "adcs r9, r9, r14 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"stmia r0!, {r12} \n\t" \
\
"mov r11, #0 \n\t" \
"umull r12, r14, r3, r8 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"umull r12, r14, r4, r7 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"umull r12, r14, r5, r6 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"stmia r0!, {r9} \n\t" \
\
"ldmia r1!, {r3} \n\t" \
"mov r12, #0 \n\t" \
"umull r14, r9, r4, r8 \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"umull r14, r9, r5, r7 \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"umull r14, r9, r3, r6 \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"ldr r14, [r0] \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, #0 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, #0 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"stmia r0!, {r10} \n\t" \
\
"ldmia r1!, {r4} \n\t" \
"mov r14, #0 \n\t" \
"umull r9, r10, r5, r8 \n\t" \
- "adds r11, r9 \n\t" \
- "adcs r12, r10 \n\t" \
- "adc r14, #0 \n\t" \
+ "adds r11, r11, r9 \n\t" \
+ "adcs r12, r12, r10 \n\t" \
+ "adc r14, r14, #0 \n\t" \
"umull r9, r10, r3, r7 \n\t" \
- "adds r11, r9 \n\t" \
- "adcs r12, r10 \n\t" \
- "adc r14, #0 \n\t" \
+ "adds r11, r11, r9 \n\t" \
+ "adcs r12, r12, r10 \n\t" \
+ "adc r14, r14, #0 \n\t" \
"umull r9, r10, r4, r6 \n\t" \
- "adds r11, r9 \n\t" \
- "adcs r12, r10 \n\t" \
- "adc r14, #0 \n\t" \
+ "adds r11, r11, r9 \n\t" \
+ "adcs r12, r12, r10 \n\t" \
+ "adc r14, r14, #0 \n\t" \
"ldr r9, [r0] \n\t" \
- "adds r11, r9 \n\t" \
- "adcs r12, #0 \n\t" \
- "adc r14, #0 \n\t" \
+ "adds r11, r11, r9 \n\t" \
+ "adcs r12, r12, #0 \n\t" \
+ "adc r14, r14, #0 \n\t" \
"stmia r0!, {r11} \n\t" \
\
"ldmia r1!, {r5} \n\t" \
"mov r9, #0 \n\t" \
"umull r10, r11, r3, r8 \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r14, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r14, r14, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"umull r10, r11, r4, r7 \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r14, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r14, r14, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"umull r10, r11, r5, r6 \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r14, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r14, r14, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"ldr r10, [r0] \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r14, #0 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r14, r14, #0 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"stmia r0!, {r12} \n\t" \
\
"ldmia r2!, {r6} \n\t" \
"mov r10, #0 \n\t" \
"umull r11, r12, r3, r6 \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"umull r11, r12, r4, r8 \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"umull r11, r12, r5, r7 \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"ldr r11, [r0] \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, #0 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, #0 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"stmia r0!, {r14} \n\t" \
\
"ldmia r2!, {r7} \n\t" \
"mov r11, #0 \n\t" \
"umull r12, r14, r3, r7 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"umull r12, r14, r4, r6 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"umull r12, r14, r5, r8 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"ldr r12, [r0] \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, #0 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, #0 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"stmia r0!, {r9} \n\t" \
\
"ldmia r2!, {r8} \n\t" \
"mov r12, #0 \n\t" \
"umull r14, r9, r3, r8 \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"umull r14, r9, r4, r7 \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"umull r14, r9, r5, r6 \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"ldr r14, [r0] \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, #0 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, #0 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"stmia r0!, {r10} \n\t" \
\
"mov r14, #0 \n\t" \
"umull r9, r10, r4, r8 \n\t" \
- "adds r11, r9 \n\t" \
- "adcs r12, r10 \n\t" \
- "adc r14, #0 \n\t" \
+ "adds r11, r11, r9 \n\t" \
+ "adcs r12, r12, r10 \n\t" \
+ "adc r14, r14, #0 \n\t" \
"umull r9, r10, r5, r7 \n\t" \
- "adds r11, r9 \n\t" \
- "adcs r12, r10 \n\t" \
- "adc r14, #0 \n\t" \
+ "adds r11, r11, r9 \n\t" \
+ "adcs r12, r12, r10 \n\t" \
+ "adc r14, r14, #0 \n\t" \
"stmia r0!, {r11} \n\t" \
\
"umull r10, r11, r5, r8 \n\t" \
- "adds r12, r10 \n\t" \
- "adc r14, r11 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adc r14, r14, r11 \n\t" \
"stmia r0!, {r12, r14} \n\t"
#define FAST_MULT_ASM_7 \
@@ -393,83 +393,83 @@
\
"mov r14, #0 \n\t" \
"umull r9, r12, r3, r7 \n\t" \
- "adds r10, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"umull r9, r11, r4, r6 \n\t" \
- "adds r10, r9 \n\t" \
- "adcs r12, r11 \n\t" \
- "adc r14, #0 \n\t" \
+ "adds r10, r10, r9 \n\t" \
+ "adcs r12, r12, r11 \n\t" \
+ "adc r14, r14, #0 \n\t" \
"stmia r0!, {r10} \n\t" \
\
"mov r9, #0 \n\t" \
"umull r10, r11, r3, r8 \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r14, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r14, r14, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"umull r10, r11, r4, r7 \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r14, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r14, r14, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"umull r10, r11, r5, r6 \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r14, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r14, r14, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"stmia r0!, {r12} \n\t" \
\
"ldmia r1!, {r3} \n\t" \
"mov r10, #0 \n\t" \
"umull r11, r12, r4, r8 \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"umull r11, r12, r5, r7 \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"umull r11, r12, r3, r6 \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"ldr r11, [r0] \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, #0 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, #0 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"stmia r0!, {r14} \n\t" \
\
"ldmia r2!, {r6} \n\t" \
"mov r11, #0 \n\t" \
"umull r12, r14, r4, r6 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"umull r12, r14, r5, r8 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"umull r12, r14, r3, r7 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"ldr r12, [r0] \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, #0 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, #0 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"stmia r0!, {r9} \n\t" \
\
"mov r12, #0 \n\t" \
"umull r14, r9, r5, r6 \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"umull r14, r9, r3, r8 \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"stmia r0!, {r10} \n\t" \
\
"umull r9, r10, r3, r6 \n\t" \
- "adds r11, r9 \n\t" \
- "adc r12, r10 \n\t" \
+ "adds r11, r11, r9 \n\t" \
+ "adc r12, r12, r10 \n\t" \
"stmia r0!, {r11, r12} \n\t" \
\
"sub r0, 44 \n\t" \
@@ -483,203 +483,203 @@
\
"mov r14, #0 \n\t" \
"umull r9, r12, r3, r7 \n\t" \
- "adds r10, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"umull r9, r11, r4, r6 \n\t" \
- "adds r10, r9 \n\t" \
- "adcs r12, r11 \n\t" \
- "adc r14, #0 \n\t" \
+ "adds r10, r10, r9 \n\t" \
+ "adcs r12, r12, r11 \n\t" \
+ "adc r14, r14, #0 \n\t" \
"stmia r0!, {r10} \n\t" \
\
"mov r9, #0 \n\t" \
"umull r10, r11, r3, r8 \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r14, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r14, r14, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"umull r10, r11, r4, r7 \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r14, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r14, r14, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"umull r10, r11, r5, r6 \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r14, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r14, r14, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"stmia r0!, {r12} \n\t" \
\
"ldmia r1!, {r3} \n\t" \
"mov r10, #0 \n\t" \
"umull r11, r12, r4, r8 \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"umull r11, r12, r5, r7 \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"umull r11, r12, r3, r6 \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"ldr r11, [r0] \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, #0 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, #0 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"stmia r0!, {r14} \n\t" \
\
"ldmia r1!, {r4} \n\t" \
"mov r11, #0 \n\t" \
"umull r12, r14, r5, r8 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"umull r12, r14, r3, r7 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"umull r12, r14, r4, r6 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"ldr r12, [r0] \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, #0 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, #0 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"stmia r0!, {r9} \n\t" \
\
"ldmia r1!, {r5} \n\t" \
"mov r12, #0 \n\t" \
"umull r14, r9, r3, r8 \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"umull r14, r9, r4, r7 \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"umull r14, r9, r5, r6 \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"ldr r14, [r0] \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, #0 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, #0 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"stmia r0!, {r10} \n\t" \
\
"ldmia r1!, {r3} \n\t" \
"mov r14, #0 \n\t" \
"umull r9, r10, r4, r8 \n\t" \
- "adds r11, r9 \n\t" \
- "adcs r12, r10 \n\t" \
- "adc r14, #0 \n\t" \
+ "adds r11, r11, r9 \n\t" \
+ "adcs r12, r12, r10 \n\t" \
+ "adc r14, r14, #0 \n\t" \
"umull r9, r10, r5, r7 \n\t" \
- "adds r11, r9 \n\t" \
- "adcs r12, r10 \n\t" \
- "adc r14, #0 \n\t" \
+ "adds r11, r11, r9 \n\t" \
+ "adcs r12, r12, r10 \n\t" \
+ "adc r14, r14, #0 \n\t" \
"umull r9, r10, r3, r6 \n\t" \
- "adds r11, r9 \n\t" \
- "adcs r12, r10 \n\t" \
- "adc r14, #0 \n\t" \
+ "adds r11, r11, r9 \n\t" \
+ "adcs r12, r12, r10 \n\t" \
+ "adc r14, r14, #0 \n\t" \
"ldr r9, [r0] \n\t" \
- "adds r11, r9 \n\t" \
- "adcs r12, #0 \n\t" \
- "adc r14, #0 \n\t" \
+ "adds r11, r11, r9 \n\t" \
+ "adcs r12, r12, #0 \n\t" \
+ "adc r14, r14, #0 \n\t" \
"stmia r0!, {r11} \n\t" \
\
"ldmia r2!, {r6} \n\t" \
"mov r9, #0 \n\t" \
"umull r10, r11, r4, r6 \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r14, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r14, r14, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"umull r10, r11, r5, r8 \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r14, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r14, r14, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"umull r10, r11, r3, r7 \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r14, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r14, r14, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"ldr r10, [r0] \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r14, #0 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r14, r14, #0 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"stmia r0!, {r12} \n\t" \
\
"ldmia r2!, {r7} \n\t" \
"mov r10, #0 \n\t" \
"umull r11, r12, r4, r7 \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"umull r11, r12, r5, r6 \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"umull r11, r12, r3, r8 \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"ldr r11, [r0] \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, #0 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, #0 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"stmia r0!, {r14} \n\t" \
\
"ldmia r2!, {r8} \n\t" \
"mov r11, #0 \n\t" \
"umull r12, r14, r4, r8 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"umull r12, r14, r5, r7 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"umull r12, r14, r3, r6 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"ldr r12, [r0] \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, #0 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, #0 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"stmia r0!, {r9} \n\t" \
\
"ldmia r2!, {r6} \n\t" \
"mov r12, #0 \n\t" \
"umull r14, r9, r4, r6 \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"umull r14, r9, r5, r8 \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"umull r14, r9, r3, r7 \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"ldr r14, [r0] \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, #0 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, #0 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"stmia r0!, {r10} \n\t" \
\
"mov r14, #0 \n\t" \
"umull r9, r10, r5, r6 \n\t" \
- "adds r11, r9 \n\t" \
- "adcs r12, r10 \n\t" \
- "adc r14, #0 \n\t" \
+ "adds r11, r11, r9 \n\t" \
+ "adcs r12, r12, r10 \n\t" \
+ "adc r14, r14, #0 \n\t" \
"umull r9, r10, r3, r8 \n\t" \
- "adds r11, r9 \n\t" \
- "adcs r12, r10 \n\t" \
- "adc r14, #0 \n\t" \
+ "adds r11, r11, r9 \n\t" \
+ "adcs r12, r12, r10 \n\t" \
+ "adc r14, r14, #0 \n\t" \
"stmia r0!, {r11} \n\t" \
\
"umull r10, r11, r3, r6 \n\t" \
- "adds r12, r10 \n\t" \
- "adc r14, r11 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adc r14, r14, r11 \n\t" \
"stmia r0!, {r12, r14} \n\t"
#define FAST_MULT_ASM_8 \
@@ -693,17 +693,17 @@
\
"mov r10, #0 \n\t" \
"umull r11, r9, r3, r7 \n\t" \
- "adds r12, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"umull r11, r14, r4, r6 \n\t" \
- "adds r12, r11 \n\t" \
- "adcs r9, r14 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r12, r12, r11 \n\t" \
+ "adcs r9, r9, r14 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"stmia r0!, {r12} \n\t" \
\
"umull r12, r14, r4, r7 \n\t" \
- "adds r9, r12 \n\t" \
- "adc r10, r14 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adc r10, r10, r14 \n\t" \
"stmia r0!, {r9, r10} \n\t" \
\
"sub r0, 28 \n\t" \
@@ -716,123 +716,123 @@
\
"mov r10, #0 \n\t" \
"umull r11, r9, r3, r7 \n\t" \
- "adds r12, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"umull r11, r14, r4, r6 \n\t" \
- "adds r12, r11 \n\t" \
- "adcs r9, r14 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r12, r12, r11 \n\t" \
+ "adcs r9, r9, r14 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"stmia r0!, {r12} \n\t" \
\
"mov r11, #0 \n\t" \
"umull r12, r14, r3, r8 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"umull r12, r14, r4, r7 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"umull r12, r14, r5, r6 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"stmia r0!, {r9} \n\t" \
\
"ldmia r1!, {r3} \n\t" \
"mov r12, #0 \n\t" \
"umull r14, r9, r4, r8 \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"umull r14, r9, r5, r7 \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"umull r14, r9, r3, r6 \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"ldr r14, [r0] \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, #0 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, #0 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"stmia r0!, {r10} \n\t" \
\
"ldmia r1!, {r4} \n\t" \
"mov r14, #0 \n\t" \
"umull r9, r10, r5, r8 \n\t" \
- "adds r11, r9 \n\t" \
- "adcs r12, r10 \n\t" \
- "adc r14, #0 \n\t" \
+ "adds r11, r11, r9 \n\t" \
+ "adcs r12, r12, r10 \n\t" \
+ "adc r14, r14, #0 \n\t" \
"umull r9, r10, r3, r7 \n\t" \
- "adds r11, r9 \n\t" \
- "adcs r12, r10 \n\t" \
- "adc r14, #0 \n\t" \
+ "adds r11, r11, r9 \n\t" \
+ "adcs r12, r12, r10 \n\t" \
+ "adc r14, r14, #0 \n\t" \
"umull r9, r10, r4, r6 \n\t" \
- "adds r11, r9 \n\t" \
- "adcs r12, r10 \n\t" \
- "adc r14, #0 \n\t" \
+ "adds r11, r11, r9 \n\t" \
+ "adcs r12, r12, r10 \n\t" \
+ "adc r14, r14, #0 \n\t" \
"ldr r9, [r0] \n\t" \
- "adds r11, r9 \n\t" \
- "adcs r12, #0 \n\t" \
- "adc r14, #0 \n\t" \
+ "adds r11, r11, r9 \n\t" \
+ "adcs r12, r12, #0 \n\t" \
+ "adc r14, r14, #0 \n\t" \
"stmia r0!, {r11} \n\t" \
\
"ldmia r2!, {r6} \n\t" \
"mov r9, #0 \n\t" \
"umull r10, r11, r5, r6 \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r14, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r14, r14, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"umull r10, r11, r3, r8 \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r14, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r14, r14, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"umull r10, r11, r4, r7 \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r14, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r14, r14, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"ldr r10, [r0] \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r14, #0 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r14, r14, #0 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"stmia r0!, {r12} \n\t" \
\
"ldmia r2!, {r7} \n\t" \
"mov r10, #0 \n\t" \
"umull r11, r12, r5, r7 \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"umull r11, r12, r3, r6 \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"umull r11, r12, r4, r8 \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"ldr r11, [r0] \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, #0 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, #0 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"stmia r0!, {r14} \n\t" \
\
"mov r11, #0 \n\t" \
"umull r12, r14, r3, r7 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"umull r12, r14, r4, r6 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"stmia r0!, {r9} \n\t" \
\
"umull r14, r9, r4, r7 \n\t" \
- "adds r10, r14 \n\t" \
- "adc r11, r9 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adc r11, r11, r9 \n\t" \
"stmia r0!, {r10, r11} \n\t" \
\
"sub r0, 52 \n\t" \
@@ -846,243 +846,243 @@
\
"mov r10, #0 \n\t" \
"umull r11, r9, r3, r7 \n\t" \
- "adds r12, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"umull r11, r14, r4, r6 \n\t" \
- "adds r12, r11 \n\t" \
- "adcs r9, r14 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r12, r12, r11 \n\t" \
+ "adcs r9, r9, r14 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"stmia r0!, {r12} \n\t" \
\
"mov r11, #0 \n\t" \
"umull r12, r14, r3, r8 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"umull r12, r14, r4, r7 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"umull r12, r14, r5, r6 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"stmia r0!, {r9} \n\t" \
\
"ldmia r1!, {r3} \n\t" \
"mov r12, #0 \n\t" \
"umull r14, r9, r4, r8 \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"umull r14, r9, r5, r7 \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"umull r14, r9, r3, r6 \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"ldr r14, [r0] \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, #0 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, #0 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"stmia r0!, {r10} \n\t" \
\
"ldmia r1!, {r4} \n\t" \
"mov r14, #0 \n\t" \
"umull r9, r10, r5, r8 \n\t" \
- "adds r11, r9 \n\t" \
- "adcs r12, r10 \n\t" \
- "adc r14, #0 \n\t" \
+ "adds r11, r11, r9 \n\t" \
+ "adcs r12, r12, r10 \n\t" \
+ "adc r14, r14, #0 \n\t" \
"umull r9, r10, r3, r7 \n\t" \
- "adds r11, r9 \n\t" \
- "adcs r12, r10 \n\t" \
- "adc r14, #0 \n\t" \
+ "adds r11, r11, r9 \n\t" \
+ "adcs r12, r12, r10 \n\t" \
+ "adc r14, r14, #0 \n\t" \
"umull r9, r10, r4, r6 \n\t" \
- "adds r11, r9 \n\t" \
- "adcs r12, r10 \n\t" \
- "adc r14, #0 \n\t" \
+ "adds r11, r11, r9 \n\t" \
+ "adcs r12, r12, r10 \n\t" \
+ "adc r14, r14, #0 \n\t" \
"ldr r9, [r0] \n\t" \
- "adds r11, r9 \n\t" \
- "adcs r12, #0 \n\t" \
- "adc r14, #0 \n\t" \
+ "adds r11, r11, r9 \n\t" \
+ "adcs r12, r12, #0 \n\t" \
+ "adc r14, r14, #0 \n\t" \
"stmia r0!, {r11} \n\t" \
\
"ldmia r1!, {r5} \n\t" \
"mov r9, #0 \n\t" \
"umull r10, r11, r3, r8 \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r14, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r14, r14, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"umull r10, r11, r4, r7 \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r14, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r14, r14, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"umull r10, r11, r5, r6 \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r14, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r14, r14, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"ldr r10, [r0] \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r14, #0 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r14, r14, #0 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"stmia r0!, {r12} \n\t" \
\
"ldmia r1!, {r3} \n\t" \
"mov r10, #0 \n\t" \
"umull r11, r12, r4, r8 \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"umull r11, r12, r5, r7 \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"umull r11, r12, r3, r6 \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"ldr r11, [r0] \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, #0 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, #0 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"stmia r0!, {r14} \n\t" \
\
"ldmia r1!, {r4} \n\t" \
"mov r11, #0 \n\t" \
"umull r12, r14, r5, r8 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"umull r12, r14, r3, r7 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"umull r12, r14, r4, r6 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"ldr r12, [r0] \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, #0 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, #0 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"stmia r0!, {r9} \n\t" \
\
"ldmia r2!, {r6} \n\t" \
"mov r12, #0 \n\t" \
"umull r14, r9, r5, r6 \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"umull r14, r9, r3, r8 \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"umull r14, r9, r4, r7 \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"ldr r14, [r0] \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, #0 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, #0 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"stmia r0!, {r10} \n\t" \
\
"ldmia r2!, {r7} \n\t" \
"mov r14, #0 \n\t" \
"umull r9, r10, r5, r7 \n\t" \
- "adds r11, r9 \n\t" \
- "adcs r12, r10 \n\t" \
- "adc r14, #0 \n\t" \
+ "adds r11, r11, r9 \n\t" \
+ "adcs r12, r12, r10 \n\t" \
+ "adc r14, r14, #0 \n\t" \
"umull r9, r10, r3, r6 \n\t" \
- "adds r11, r9 \n\t" \
- "adcs r12, r10 \n\t" \
- "adc r14, #0 \n\t" \
+ "adds r11, r11, r9 \n\t" \
+ "adcs r12, r12, r10 \n\t" \
+ "adc r14, r14, #0 \n\t" \
"umull r9, r10, r4, r8 \n\t" \
- "adds r11, r9 \n\t" \
- "adcs r12, r10 \n\t" \
- "adc r14, #0 \n\t" \
+ "adds r11, r11, r9 \n\t" \
+ "adcs r12, r12, r10 \n\t" \
+ "adc r14, r14, #0 \n\t" \
"ldr r9, [r0] \n\t" \
- "adds r11, r9 \n\t" \
- "adcs r12, #0 \n\t" \
- "adc r14, #0 \n\t" \
+ "adds r11, r11, r9 \n\t" \
+ "adcs r12, r12, #0 \n\t" \
+ "adc r14, r14, #0 \n\t" \
"stmia r0!, {r11} \n\t" \
\
"ldmia r2!, {r8} \n\t" \
"mov r9, #0 \n\t" \
"umull r10, r11, r5, r8 \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r14, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r14, r14, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"umull r10, r11, r3, r7 \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r14, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r14, r14, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"umull r10, r11, r4, r6 \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r14, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r14, r14, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"ldr r10, [r0] \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r14, #0 \n\t" \
- "adc r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r14, r14, #0 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"stmia r0!, {r12} \n\t" \
\
"ldmia r2!, {r6} \n\t" \
"mov r10, #0 \n\t" \
"umull r11, r12, r5, r6 \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"umull r11, r12, r3, r8 \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"umull r11, r12, r4, r7 \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"ldr r11, [r0] \n\t" \
- "adds r14, r11 \n\t" \
- "adcs r9, #0 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r14, r14, r11 \n\t" \
+ "adcs r9, r9, #0 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"stmia r0!, {r14} \n\t" \
\
"ldmia r2!, {r7} \n\t" \
"mov r11, #0 \n\t" \
"umull r12, r14, r5, r7 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"umull r12, r14, r3, r6 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"umull r12, r14, r4, r8 \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, r14 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, r14 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"ldr r12, [r0] \n\t" \
- "adds r9, r12 \n\t" \
- "adcs r10, #0 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adcs r10, r10, #0 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"stmia r0!, {r9} \n\t" \
\
"mov r12, #0 \n\t" \
"umull r14, r9, r3, r7 \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"umull r14, r9, r4, r6 \n\t" \
- "adds r10, r14 \n\t" \
- "adcs r11, r9 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r10, r10, r14 \n\t" \
+ "adcs r11, r11, r9 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"stmia r0!, {r10} \n\t" \
\
"umull r9, r10, r4, r7 \n\t" \
- "adds r11, r9 \n\t" \
- "adc r12, r10 \n\t" \
+ "adds r11, r11, r9 \n\t" \
+ "adc r12, r12, r10 \n\t" \
"stmia r0!, {r11, r12} \n\t"
#define FAST_SQUARE_ASM_5 \
@@ -1093,101 +1093,101 @@
\
"mov r9, #0 \n\t" \
"umull r10, r11, r2, r3 \n\t" \
- "adds r12, r10 \n\t" \
+ "adds r12, r12, r10 \n\t" \
"adcs r8, r11, #0 \n\t" \
- "adc r9, #0 \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r8, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adc r9, r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r8, r8, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"stmia r0!, {r12} \n\t" \
\
"mov r10, #0 \n\t" \
"umull r11, r12, r2, r4 \n\t" \
- "adds r11, r11 \n\t" \
- "adcs r12, r12 \n\t" \
- "adc r10, #0 \n\t" \
- "adds r8, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r11, r11, r11 \n\t" \
+ "adcs r12, r12, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
+ "adds r8, r8, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"umull r11, r12, r3, r3 \n\t" \
- "adds r8, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r8, r8, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"stmia r0!, {r8} \n\t" \
\
"mov r12, #0 \n\t" \
"umull r8, r11, r2, r5 \n\t" \
"umull r1, r14, r3, r4 \n\t" \
- "adds r8, r1 \n\t" \
- "adcs r11, r14 \n\t" \
- "adc r12, #0 \n\t" \
- "adds r8, r8 \n\t" \
- "adcs r11, r11 \n\t" \
- "adc r12, r12 \n\t" \
- "adds r8, r9 \n\t" \
- "adcs r11, r10 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r8, r8, r1 \n\t" \
+ "adcs r11, r11, r14 \n\t" \
+ "adc r12, r12, #0 \n\t" \
+ "adds r8, r8, r8 \n\t" \
+ "adcs r11, r11, r11 \n\t" \
+ "adc r12, r12, r12 \n\t" \
+ "adds r8, r8, r9 \n\t" \
+ "adcs r11, r11, r10 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"stmia r0!, {r8} \n\t" \
\
"mov r10, #0 \n\t" \
"umull r8, r9, r2, r6 \n\t" \
"umull r1, r14, r3, r5 \n\t" \
- "adds r8, r1 \n\t" \
- "adcs r9, r14 \n\t" \
- "adc r10, #0 \n\t" \
- "adds r8, r8 \n\t" \
- "adcs r9, r9 \n\t" \
- "adc r10, r10 \n\t" \
+ "adds r8, r8, r1 \n\t" \
+ "adcs r9, r9, r14 \n\t" \
+ "adc r10, r10, #0 \n\t" \
+ "adds r8, r8, r8 \n\t" \
+ "adcs r9, r9, r9 \n\t" \
+ "adc r10, r10, r10 \n\t" \
"umull r1, r14, r4, r4 \n\t" \
- "adds r8, r1 \n\t" \
- "adcs r9, r14 \n\t" \
- "adc r10, #0 \n\t" \
- "adds r8, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r8, r8, r1 \n\t" \
+ "adcs r9, r9, r14 \n\t" \
+ "adc r10, r10, #0 \n\t" \
+ "adds r8, r8, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"stmia r0!, {r8} \n\t" \
\
"mov r12, #0 \n\t" \
"umull r8, r11, r3, r6 \n\t" \
"umull r1, r14, r4, r5 \n\t" \
- "adds r8, r1 \n\t" \
- "adcs r11, r14 \n\t" \
- "adc r12, #0 \n\t" \
- "adds r8, r8 \n\t" \
- "adcs r11, r11 \n\t" \
- "adc r12, r12 \n\t" \
- "adds r8, r9 \n\t" \
- "adcs r11, r10 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r8, r8, r1 \n\t" \
+ "adcs r11, r11, r14 \n\t" \
+ "adc r12, r12, #0 \n\t" \
+ "adds r8, r8, r8 \n\t" \
+ "adcs r11, r11, r11 \n\t" \
+ "adc r12, r12, r12 \n\t" \
+ "adds r8, r8, r9 \n\t" \
+ "adcs r11, r11, r10 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"stmia r0!, {r8} \n\t" \
\
"mov r8, #0 \n\t" \
"umull r1, r10, r4, r6 \n\t" \
- "adds r1, r1 \n\t" \
- "adcs r10, r10 \n\t" \
- "adc r8, #0 \n\t" \
- "adds r11, r1 \n\t" \
- "adcs r12, r10 \n\t" \
- "adc r8, #0 \n\t" \
+ "adds r1, r1, r1 \n\t" \
+ "adcs r10, r10, r10 \n\t" \
+ "adc r8, r8, #0 \n\t" \
+ "adds r11, r11, r1 \n\t" \
+ "adcs r12, r12, r10 \n\t" \
+ "adc r8, r8, #0 \n\t" \
"umull r1, r10, r5, r5 \n\t" \
- "adds r11, r1 \n\t" \
- "adcs r12, r10 \n\t" \
- "adc r8, #0 \n\t" \
+ "adds r11, r11, r1 \n\t" \
+ "adcs r12, r12, r10 \n\t" \
+ "adc r8, r8, #0 \n\t" \
"stmia r0!, {r11} \n\t" \
\
"mov r11, #0 \n\t" \
"umull r1, r10, r5, r6 \n\t" \
- "adds r1, r1 \n\t" \
- "adcs r10, r10 \n\t" \
- "adc r11, #0 \n\t" \
- "adds r12, r1 \n\t" \
- "adcs r8, r10 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r1, r1, r1 \n\t" \
+ "adcs r10, r10, r10 \n\t" \
+ "adc r11, r11, #0 \n\t" \
+ "adds r12, r12, r1 \n\t" \
+ "adcs r8, r8, r10 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"stmia r0!, {r12} \n\t" \
\
"umull r1, r10, r6, r6 \n\t" \
- "adds r8, r1 \n\t" \
- "adcs r11, r10 \n\t" \
+ "adds r8, r8, r1 \n\t" \
+ "adcs r11, r11, r10 \n\t" \
"stmia r0!, {r8, r11} \n\t"
#define FAST_SQUARE_ASM_6 \
@@ -1198,137 +1198,137 @@
\
"mov r9, #0 \n\t" \
"umull r10, r11, r2, r3 \n\t" \
- "adds r12, r10 \n\t" \
+ "adds r12, r12, r10 \n\t" \
"adcs r8, r11, #0 \n\t" \
- "adc r9, #0 \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r8, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adc r9, r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r8, r8, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"stmia r0!, {r12} \n\t" \
\
"mov r10, #0 \n\t" \
"umull r11, r12, r2, r4 \n\t" \
- "adds r11, r11 \n\t" \
- "adcs r12, r12 \n\t" \
- "adc r10, #0 \n\t" \
- "adds r8, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r11, r11, r11 \n\t" \
+ "adcs r12, r12, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
+ "adds r8, r8, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"umull r11, r12, r3, r3 \n\t" \
- "adds r8, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r8, r8, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"stmia r0!, {r8} \n\t" \
\
"mov r12, #0 \n\t" \
"umull r8, r11, r2, r5 \n\t" \
"umull r1, r14, r3, r4 \n\t" \
- "adds r8, r1 \n\t" \
- "adcs r11, r14 \n\t" \
- "adc r12, #0 \n\t" \
- "adds r8, r8 \n\t" \
- "adcs r11, r11 \n\t" \
- "adc r12, r12 \n\t" \
- "adds r8, r9 \n\t" \
- "adcs r11, r10 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r8, r8, r1 \n\t" \
+ "adcs r11, r11, r14 \n\t" \
+ "adc r12, r12, #0 \n\t" \
+ "adds r8, r8, r8 \n\t" \
+ "adcs r11, r11, r11 \n\t" \
+ "adc r12, r12, r12 \n\t" \
+ "adds r8, r8, r9 \n\t" \
+ "adcs r11, r11, r10 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"stmia r0!, {r8} \n\t" \
\
"mov r10, #0 \n\t" \
"umull r8, r9, r2, r6 \n\t" \
"umull r1, r14, r3, r5 \n\t" \
- "adds r8, r1 \n\t" \
- "adcs r9, r14 \n\t" \
- "adc r10, #0 \n\t" \
- "adds r8, r8 \n\t" \
- "adcs r9, r9 \n\t" \
- "adc r10, r10 \n\t" \
+ "adds r8, r8, r1 \n\t" \
+ "adcs r9, r9, r14 \n\t" \
+ "adc r10, r10, #0 \n\t" \
+ "adds r8, r8, r8 \n\t" \
+ "adcs r9, r9, r9 \n\t" \
+ "adc r10, r10, r10 \n\t" \
"umull r1, r14, r4, r4 \n\t" \
- "adds r8, r1 \n\t" \
- "adcs r9, r14 \n\t" \
- "adc r10, #0 \n\t" \
- "adds r8, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r8, r8, r1 \n\t" \
+ "adcs r9, r9, r14 \n\t" \
+ "adc r10, r10, #0 \n\t" \
+ "adds r8, r8, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"stmia r0!, {r8} \n\t" \
\
"mov r12, #0 \n\t" \
"umull r8, r11, r2, r7 \n\t" \
"umull r1, r14, r3, r6 \n\t" \
- "adds r8, r1 \n\t" \
- "adcs r11, r14 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r8, r8, r1 \n\t" \
+ "adcs r11, r11, r14 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"umull r1, r14, r4, r5 \n\t" \
- "adds r8, r1 \n\t" \
- "adcs r11, r14 \n\t" \
- "adc r12, #0 \n\t" \
- "adds r8, r8 \n\t" \
- "adcs r11, r11 \n\t" \
- "adc r12, r12 \n\t" \
- "adds r8, r9 \n\t" \
- "adcs r11, r10 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r8, r8, r1 \n\t" \
+ "adcs r11, r11, r14 \n\t" \
+ "adc r12, r12, #0 \n\t" \
+ "adds r8, r8, r8 \n\t" \
+ "adcs r11, r11, r11 \n\t" \
+ "adc r12, r12, r12 \n\t" \
+ "adds r8, r8, r9 \n\t" \
+ "adcs r11, r11, r10 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"stmia r0!, {r8} \n\t" \
\
"mov r10, #0 \n\t" \
"umull r8, r9, r3, r7 \n\t" \
"umull r1, r14, r4, r6 \n\t" \
- "adds r8, r1 \n\t" \
- "adcs r9, r14 \n\t" \
- "adc r10, #0 \n\t" \
- "adds r8, r8 \n\t" \
- "adcs r9, r9 \n\t" \
- "adc r10, r10 \n\t" \
+ "adds r8, r8, r1 \n\t" \
+ "adcs r9, r9, r14 \n\t" \
+ "adc r10, r10, #0 \n\t" \
+ "adds r8, r8, r8 \n\t" \
+ "adcs r9, r9, r9 \n\t" \
+ "adc r10, r10, r10 \n\t" \
"umull r1, r14, r5, r5 \n\t" \
- "adds r8, r1 \n\t" \
- "adcs r9, r14 \n\t" \
- "adc r10, #0 \n\t" \
- "adds r8, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r8, r8, r1 \n\t" \
+ "adcs r9, r9, r14 \n\t" \
+ "adc r10, r10, #0 \n\t" \
+ "adds r8, r8, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"stmia r0!, {r8} \n\t" \
\
"mov r12, #0 \n\t" \
"umull r8, r11, r4, r7 \n\t" \
"umull r1, r14, r5, r6 \n\t" \
- "adds r8, r1 \n\t" \
- "adcs r11, r14 \n\t" \
- "adc r12, #0 \n\t" \
- "adds r8, r8 \n\t" \
- "adcs r11, r11 \n\t" \
- "adc r12, r12 \n\t" \
- "adds r8, r9 \n\t" \
- "adcs r11, r10 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r8, r8, r1 \n\t" \
+ "adcs r11, r11, r14 \n\t" \
+ "adc r12, r12, #0 \n\t" \
+ "adds r8, r8, r8 \n\t" \
+ "adcs r11, r11, r11 \n\t" \
+ "adc r12, r12, r12 \n\t" \
+ "adds r8, r8, r9 \n\t" \
+ "adcs r11, r11, r10 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"stmia r0!, {r8} \n\t" \
\
"mov r8, #0 \n\t" \
"umull r1, r10, r5, r7 \n\t" \
- "adds r1, r1 \n\t" \
- "adcs r10, r10 \n\t" \
- "adc r8, #0 \n\t" \
- "adds r11, r1 \n\t" \
- "adcs r12, r10 \n\t" \
- "adc r8, #0 \n\t" \
+ "adds r1, r1, r1 \n\t" \
+ "adcs r10, r10, r10 \n\t" \
+ "adc r8, r8, #0 \n\t" \
+ "adds r11, r11, r1 \n\t" \
+ "adcs r12, r12, r10 \n\t" \
+ "adc r8, r8, #0 \n\t" \
"umull r1, r10, r6, r6 \n\t" \
- "adds r11, r1 \n\t" \
- "adcs r12, r10 \n\t" \
- "adc r8, #0 \n\t" \
+ "adds r11, r11, r1 \n\t" \
+ "adcs r12, r12, r10 \n\t" \
+ "adc r8, r8, #0 \n\t" \
"stmia r0!, {r11} \n\t" \
\
"mov r11, #0 \n\t" \
"umull r1, r10, r6, r7 \n\t" \
- "adds r1, r1 \n\t" \
- "adcs r10, r10 \n\t" \
- "adc r11, #0 \n\t" \
- "adds r12, r1 \n\t" \
- "adcs r8, r10 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r1, r1, r1 \n\t" \
+ "adcs r10, r10, r10 \n\t" \
+ "adc r11, r11, #0 \n\t" \
+ "adds r12, r12, r1 \n\t" \
+ "adcs r8, r8, r10 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"stmia r0!, {r12} \n\t" \
\
"umull r1, r10, r7, r7 \n\t" \
- "adds r8, r1 \n\t" \
- "adcs r11, r10 \n\t" \
+ "adds r8, r8, r1 \n\t" \
+ "adcs r11, r11, r10 \n\t" \
"stmia r0!, {r8, r11} \n\t"
#define FAST_SQUARE_ASM_7 \
@@ -1348,26 +1348,26 @@
\
"mov r9, #0 \n\t" \
"umull r10, r11, r2, r3 \n\t" \
- "adds r12, r10 \n\t" \
+ "adds r12, r12, r10 \n\t" \
"adcs r8, r11, #0 \n\t" \
- "adc r9, #0 \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r8, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adc r9, r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r8, r8, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"stmia r0!, {r12} \n\t" \
\
"mov r10, #0 \n\t" \
"umull r11, r12, r2, r4 \n\t" \
- "adds r11, r11 \n\t" \
- "adcs r12, r12 \n\t" \
- "adc r10, #0 \n\t" \
- "adds r8, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r11, r11, r11 \n\t" \
+ "adcs r12, r12, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
+ "adds r8, r8, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"umull r11, r12, r3, r3 \n\t" \
- "adds r8, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r8, r8, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"stmia r0!, {r8} \n\t" \
\
"mov r12, #0 \n\t" \
@@ -1376,13 +1376,13 @@
"umlal r8, r11, r3, r4 \n\t" \
"cmp r14, r11 \n\t" \
"it hi \n\t" \
- "adchi r12, #0 \n\t" \
- "adds r8, r8 \n\t" \
- "adcs r11, r11 \n\t" \
- "adc r12, r12 \n\t" \
- "adds r8, r9 \n\t" \
- "adcs r11, r10 \n\t" \
- "adc r12, #0 \n\t" \
+ "adchi r12, r12, #0 \n\t" \
+ "adds r8, r8, r8 \n\t" \
+ "adcs r11, r11, r11 \n\t" \
+ "adc r12, r12, r12 \n\t" \
+ "adds r8, r8, r9 \n\t" \
+ "adcs r11, r11, r10 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"stmia r0!, {r8} \n\t" \
\
"mov r10, #0 \n\t" \
@@ -1391,18 +1391,18 @@
"umlal r8, r9, r3, r5 \n\t" \
"cmp r14, r9 \n\t" \
"it hi \n\t" \
- "adchi r10, #0 \n\t" \
- "adds r8, r8 \n\t" \
- "adcs r9, r9 \n\t" \
- "adc r10, r10 \n\t" \
+ "adchi r10, r10, #0 \n\t" \
+ "adds r8, r8, r8 \n\t" \
+ "adcs r9, r9, r9 \n\t" \
+ "adc r10, r10, r10 \n\t" \
"mov r14, r9 \n\t" \
"umlal r8, r9, r4, r4 \n\t" \
"cmp r14, r9 \n\t" \
"it hi \n\t" \
- "adchi r10, #0 \n\t" \
- "adds r8, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adchi r10, r10, #0 \n\t" \
+ "adds r8, r8, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"stmia r0!, {r8} \n\t" \
\
"mov r12, #0 \n\t" \
@@ -1411,18 +1411,18 @@
"umlal r8, r11, r3, r6 \n\t" \
"cmp r14, r11 \n\t" \
"it hi \n\t" \
- "adchi r12, #0 \n\t" \
+ "adchi r12, r12, #0 \n\t" \
"mov r14, r11 \n\t" \
"umlal r8, r11, r4, r5 \n\t" \
"cmp r14, r11 \n\t" \
"it hi \n\t" \
- "adchi r12, #0 \n\t" \
- "adds r8, r8 \n\t" \
- "adcs r11, r11 \n\t" \
- "adc r12, r12 \n\t" \
- "adds r8, r9 \n\t" \
- "adcs r11, r10 \n\t" \
- "adc r12, #0 \n\t" \
+ "adchi r12, r12, #0 \n\t" \
+ "adds r8, r8, r8 \n\t" \
+ "adcs r11, r11, r11 \n\t" \
+ "adc r12, r12, r12 \n\t" \
+ "adds r8, r8, r9 \n\t" \
+ "adcs r11, r11, r10 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"stmia r0!, {r8} \n\t" \
\
"ldmia r1!, {r2} \n\t" \
@@ -1432,22 +1432,22 @@
"umlal r8, r9, r4, r6 \n\t" \
"cmp r14, r9 \n\t" \
"it hi \n\t" \
- "adchi r10, #0 \n\t" \
+ "adchi r10, r10, #0 \n\t" \
"ldr r14, [r0] \n\t" \
- "adds r8, r14 \n\t" \
- "adcs r9, #0 \n\t" \
- "adc r10, #0 \n\t" \
- "adds r8, r8 \n\t" \
- "adcs r9, r9 \n\t" \
- "adc r10, r10 \n\t" \
+ "adds r8, r8, r14 \n\t" \
+ "adcs r9, r9, #0 \n\t" \
+ "adc r10, r10, #0 \n\t" \
+ "adds r8, r8, r8 \n\t" \
+ "adcs r9, r9, r9 \n\t" \
+ "adc r10, r10, r10 \n\t" \
"mov r14, r9 \n\t" \
"umlal r8, r9, r5, r5 \n\t" \
"cmp r14, r9 \n\t" \
"it hi \n\t" \
- "adchi r10, #0 \n\t" \
- "adds r8, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adchi r10, r10, #0 \n\t" \
+ "adds r8, r8, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"stmia r0!, {r8} \n\t" \
\
"mov r12, #0 \n\t" \
@@ -1456,22 +1456,22 @@
"umlal r8, r11, r4, r7 \n\t" \
"cmp r14, r11 \n\t" \
"it hi \n\t" \
- "adchi r12, #0 \n\t" \
+ "adchi r12, r12, #0 \n\t" \
"mov r14, r11 \n\t" \
"umlal r8, r11, r5, r6 \n\t" \
"cmp r14, r11 \n\t" \
"it hi \n\t" \
- "adchi r12, #0 \n\t" \
+ "adchi r12, r12, #0 \n\t" \
"ldr r14, [r0] \n\t" \
- "adds r8, r14 \n\t" \
- "adcs r11, #0 \n\t" \
- "adc r12, #0 \n\t" \
- "adds r8, r8 \n\t" \
- "adcs r11, r11 \n\t" \
- "adc r12, r12 \n\t" \
- "adds r8, r9 \n\t" \
- "adcs r11, r10 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r8, r8, r14 \n\t" \
+ "adcs r11, r11, #0 \n\t" \
+ "adc r12, r12, #0 \n\t" \
+ "adds r8, r8, r8 \n\t" \
+ "adcs r11, r11, r11 \n\t" \
+ "adc r12, r12, r12 \n\t" \
+ "adds r8, r8, r9 \n\t" \
+ "adcs r11, r11, r10 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"stmia r0!, {r8} \n\t" \
\
"mov r10, #0 \n\t" \
@@ -1480,18 +1480,18 @@
"umlal r8, r9, r5, r7 \n\t" \
"cmp r14, r9 \n\t" \
"it hi \n\t" \
- "adchi r10, #0 \n\t" \
- "adds r8, r8 \n\t" \
- "adcs r9, r9 \n\t" \
- "adc r10, r10 \n\t" \
+ "adchi r10, r10, #0 \n\t" \
+ "adds r8, r8, r8 \n\t" \
+ "adcs r9, r9, r9 \n\t" \
+ "adc r10, r10, r10 \n\t" \
"mov r14, r9 \n\t" \
"umlal r8, r9, r6, r6 \n\t" \
"cmp r14, r9 \n\t" \
"it hi \n\t" \
- "adchi r10, #0 \n\t" \
- "adds r8, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adchi r10, r10, #0 \n\t" \
+ "adds r8, r8, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"stmia r0!, {r8} \n\t" \
\
"mov r12, #0 \n\t" \
@@ -1500,42 +1500,42 @@
"umlal r8, r11, r6, r7 \n\t" \
"cmp r14, r11 \n\t" \
"it hi \n\t" \
- "adchi r12, #0 \n\t" \
- "adds r8, r8 \n\t" \
- "adcs r11, r11 \n\t" \
- "adc r12, r12 \n\t" \
- "adds r8, r9 \n\t" \
- "adcs r11, r10 \n\t" \
- "adc r12, #0 \n\t" \
+ "adchi r12, r12, #0 \n\t" \
+ "adds r8, r8, r8 \n\t" \
+ "adcs r11, r11, r11 \n\t" \
+ "adc r12, r12, r12 \n\t" \
+ "adds r8, r8, r9 \n\t" \
+ "adcs r11, r11, r10 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"stmia r0!, {r8} \n\t" \
\
"mov r8, #0 \n\t" \
"umull r1, r10, r6, r2 \n\t" \
- "adds r1, r1 \n\t" \
- "adcs r10, r10 \n\t" \
- "adc r8, #0 \n\t" \
- "adds r11, r1 \n\t" \
- "adcs r12, r10 \n\t" \
- "adc r8, #0 \n\t" \
+ "adds r1, r1, r1 \n\t" \
+ "adcs r10, r10, r10 \n\t" \
+ "adc r8, r8, #0 \n\t" \
+ "adds r11, r11, r1 \n\t" \
+ "adcs r12, r12, r10 \n\t" \
+ "adc r8, r8, #0 \n\t" \
"umull r1, r10, r7, r7 \n\t" \
- "adds r11, r1 \n\t" \
- "adcs r12, r10 \n\t" \
- "adc r8, #0 \n\t" \
+ "adds r11, r11, r1 \n\t" \
+ "adcs r12, r12, r10 \n\t" \
+ "adc r8, r8, #0 \n\t" \
"stmia r0!, {r11} \n\t" \
\
"mov r11, #0 \n\t" \
"umull r1, r10, r7, r2 \n\t" \
- "adds r1, r1 \n\t" \
- "adcs r10, r10 \n\t" \
- "adc r11, #0 \n\t" \
- "adds r12, r1 \n\t" \
- "adcs r8, r10 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r1, r1, r1 \n\t" \
+ "adcs r10, r10, r10 \n\t" \
+ "adc r11, r11, #0 \n\t" \
+ "adds r12, r12, r1 \n\t" \
+ "adcs r8, r8, r10 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"stmia r0!, {r12} \n\t" \
\
"umull r1, r10, r2, r2 \n\t" \
- "adds r8, r1 \n\t" \
- "adcs r11, r10 \n\t" \
+ "adds r8, r8, r1 \n\t" \
+ "adcs r11, r11, r10 \n\t" \
"stmia r0!, {r8, r11} \n\t"
#define FAST_SQUARE_ASM_8 \
@@ -1548,12 +1548,12 @@
"stmia r0!, {r8} \n\t" \
\
"umull r12, r10, r2, r6 \n\t" \
- "adds r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"stmia r0!, {r9} \n\t" \
\
"umull r8, r9, r3, r6 \n\t" \
- "adds r10, r8 \n\t" \
+ "adds r10, r10, r8 \n\t" \
"adc r11, r9, #0 \n\t" \
"stmia r0!, {r10, r11} \n\t" \
\
@@ -1566,26 +1566,26 @@
\
"mov r9, #0 \n\t" \
"umull r10, r11, r2, r3 \n\t" \
- "adds r12, r10 \n\t" \
+ "adds r12, r12, r10 \n\t" \
"adcs r8, r11, #0 \n\t" \
- "adc r9, #0 \n\t" \
- "adds r12, r10 \n\t" \
- "adcs r8, r11 \n\t" \
- "adc r9, #0 \n\t" \
+ "adc r9, r9, #0 \n\t" \
+ "adds r12, r12, r10 \n\t" \
+ "adcs r8, r8, r11 \n\t" \
+ "adc r9, r9, #0 \n\t" \
"stmia r0!, {r12} \n\t" \
\
"mov r10, #0 \n\t" \
"umull r11, r12, r2, r4 \n\t" \
- "adds r11, r11 \n\t" \
- "adcs r12, r12 \n\t" \
- "adc r10, #0 \n\t" \
- "adds r8, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r11, r11, r11 \n\t" \
+ "adcs r12, r12, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
+ "adds r8, r8, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"umull r11, r12, r3, r3 \n\t" \
- "adds r8, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adds r8, r8, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"stmia r0!, {r8} \n\t" \
\
"mov r12, #0 \n\t" \
@@ -1594,13 +1594,13 @@
"umlal r8, r11, r3, r4 \n\t" \
"cmp r14, r11 \n\t" \
"it hi \n\t" \
- "adchi r12, #0 \n\t" \
- "adds r8, r8 \n\t" \
- "adcs r11, r11 \n\t" \
- "adc r12, r12 \n\t" \
- "adds r8, r9 \n\t" \
- "adcs r11, r10 \n\t" \
- "adc r12, #0 \n\t" \
+ "adchi r12, r12, #0 \n\t" \
+ "adds r8, r8, r8 \n\t" \
+ "adcs r11, r11, r11 \n\t" \
+ "adc r12, r12, r12 \n\t" \
+ "adds r8, r8, r9 \n\t" \
+ "adcs r11, r11, r10 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"stmia r0!, {r8} \n\t" \
\
"mov r10, #0 \n\t" \
@@ -1609,18 +1609,18 @@
"umlal r8, r9, r3, r5 \n\t" \
"cmp r14, r9 \n\t" \
"it hi \n\t" \
- "adchi r10, #0 \n\t" \
- "adds r8, r8 \n\t" \
- "adcs r9, r9 \n\t" \
- "adc r10, r10 \n\t" \
+ "adchi r10, r10, #0 \n\t" \
+ "adds r8, r8, r8 \n\t" \
+ "adcs r9, r9, r9 \n\t" \
+ "adc r10, r10, r10 \n\t" \
"mov r14, r9 \n\t" \
"umlal r8, r9, r4, r4 \n\t" \
"cmp r14, r9 \n\t" \
"it hi \n\t" \
- "adchi r10, #0 \n\t" \
- "adds r8, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adchi r10, r10, #0 \n\t" \
+ "adds r8, r8, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"stmia r0!, {r8} \n\t" \
\
"mov r12, #0 \n\t" \
@@ -1629,18 +1629,18 @@
"umlal r8, r11, r3, r6 \n\t" \
"cmp r14, r11 \n\t" \
"it hi \n\t" \
- "adchi r12, #0 \n\t" \
+ "adchi r12, r12, #0 \n\t" \
"mov r14, r11 \n\t" \
"umlal r8, r11, r4, r5 \n\t" \
"cmp r14, r11 \n\t" \
"it hi \n\t" \
- "adchi r12, #0 \n\t" \
- "adds r8, r8 \n\t" \
- "adcs r11, r11 \n\t" \
- "adc r12, r12 \n\t" \
- "adds r8, r9 \n\t" \
- "adcs r11, r10 \n\t" \
- "adc r12, #0 \n\t" \
+ "adchi r12, r12, #0 \n\t" \
+ "adds r8, r8, r8 \n\t" \
+ "adcs r11, r11, r11 \n\t" \
+ "adc r12, r12, r12 \n\t" \
+ "adds r8, r8, r9 \n\t" \
+ "adcs r11, r11, r10 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"stmia r0!, {r8} \n\t" \
\
"ldmia r1!, {r2} \n\t" \
@@ -1650,22 +1650,22 @@
"umlal r8, r9, r4, r6 \n\t" \
"cmp r14, r9 \n\t" \
"it hi \n\t" \
- "adchi r10, #0 \n\t" \
+ "adchi r10, r10, #0 \n\t" \
"ldr r14, [r0] \n\t" \
- "adds r8, r14 \n\t" \
- "adcs r9, #0 \n\t" \
- "adc r10, #0 \n\t" \
- "adds r8, r8 \n\t" \
- "adcs r9, r9 \n\t" \
- "adc r10, r10 \n\t" \
+ "adds r8, r8, r14 \n\t" \
+ "adcs r9, r9, #0 \n\t" \
+ "adc r10, r10, #0 \n\t" \
+ "adds r8, r8, r8 \n\t" \
+ "adcs r9, r9, r9 \n\t" \
+ "adc r10, r10, r10 \n\t" \
"mov r14, r9 \n\t" \
"umlal r8, r9, r5, r5 \n\t" \
"cmp r14, r9 \n\t" \
"it hi \n\t" \
- "adchi r10, #0 \n\t" \
- "adds r8, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adchi r10, r10, #0 \n\t" \
+ "adds r8, r8, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"stmia r0!, {r8} \n\t" \
\
"mov r12, #0 \n\t" \
@@ -1674,22 +1674,22 @@
"umlal r8, r11, r4, r7 \n\t" \
"cmp r14, r11 \n\t" \
"it hi \n\t" \
- "adchi r12, #0 \n\t" \
+ "adchi r12, r12, #0 \n\t" \
"mov r14, r11 \n\t" \
"umlal r8, r11, r5, r6 \n\t" \
"cmp r14, r11 \n\t" \
"it hi \n\t" \
- "adchi r12, #0 \n\t" \
+ "adchi r12, r12, #0 \n\t" \
"ldr r14, [r0] \n\t" \
- "adds r8, r14 \n\t" \
- "adcs r11, #0 \n\t" \
- "adc r12, #0 \n\t" \
- "adds r8, r8 \n\t" \
- "adcs r11, r11 \n\t" \
- "adc r12, r12 \n\t" \
- "adds r8, r9 \n\t" \
- "adcs r11, r10 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r8, r8, r14 \n\t" \
+ "adcs r11, r11, #0 \n\t" \
+ "adc r12, r12, #0 \n\t" \
+ "adds r8, r8, r8 \n\t" \
+ "adcs r11, r11, r11 \n\t" \
+ "adc r12, r12, r12 \n\t" \
+ "adds r8, r8, r9 \n\t" \
+ "adcs r11, r11, r10 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"stmia r0!, {r8} \n\t" \
\
"ldmia r1!, {r3} \n\t" \
@@ -1699,22 +1699,22 @@
"umlal r8, r9, r5, r7 \n\t" \
"cmp r14, r9 \n\t" \
"it hi \n\t" \
- "adchi r10, #0 \n\t" \
+ "adchi r10, r10, #0 \n\t" \
"ldr r14, [r0] \n\t" \
- "adds r8, r14 \n\t" \
- "adcs r9, #0 \n\t" \
- "adc r10, #0 \n\t" \
- "adds r8, r8 \n\t" \
- "adcs r9, r9 \n\t" \
- "adc r10, r10 \n\t" \
+ "adds r8, r8, r14 \n\t" \
+ "adcs r9, r9, #0 \n\t" \
+ "adc r10, r10, #0 \n\t" \
+ "adds r8, r8, r8 \n\t" \
+ "adcs r9, r9, r9 \n\t" \
+ "adc r10, r10, r10 \n\t" \
"mov r14, r9 \n\t" \
"umlal r8, r9, r6, r6 \n\t" \
"cmp r14, r9 \n\t" \
"it hi \n\t" \
- "adchi r10, #0 \n\t" \
- "adds r8, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adchi r10, r10, #0 \n\t" \
+ "adds r8, r8, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"stmia r0!, {r8} \n\t" \
\
"mov r12, #0 \n\t" \
@@ -1723,22 +1723,22 @@
"umlal r8, r11, r5, r2 \n\t" \
"cmp r14, r11 \n\t" \
"it hi \n\t" \
- "adchi r12, #0 \n\t" \
+ "adchi r12, r12, #0 \n\t" \
"mov r14, r11 \n\t" \
"umlal r8, r11, r6, r7 \n\t" \
"cmp r14, r11 \n\t" \
"it hi \n\t" \
- "adchi r12, #0 \n\t" \
+ "adchi r12, r12, #0 \n\t" \
"ldr r14, [r0] \n\t" \
- "adds r8, r14 \n\t" \
- "adcs r11, #0 \n\t" \
- "adc r12, #0 \n\t" \
- "adds r8, r8 \n\t" \
- "adcs r11, r11 \n\t" \
- "adc r12, r12 \n\t" \
- "adds r8, r9 \n\t" \
- "adcs r11, r10 \n\t" \
- "adc r12, #0 \n\t" \
+ "adds r8, r8, r14 \n\t" \
+ "adcs r11, r11, #0 \n\t" \
+ "adc r12, r12, #0 \n\t" \
+ "adds r8, r8, r8 \n\t" \
+ "adcs r11, r11, r11 \n\t" \
+ "adc r12, r12, r12 \n\t" \
+ "adds r8, r8, r9 \n\t" \
+ "adcs r11, r11, r10 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"stmia r0!, {r8} \n\t" \
\
"mov r10, #0 \n\t" \
@@ -1747,18 +1747,18 @@
"umlal r8, r9, r6, r2 \n\t" \
"cmp r14, r9 \n\t" \
"it hi \n\t" \
- "adchi r10, #0 \n\t" \
- "adds r8, r8 \n\t" \
- "adcs r9, r9 \n\t" \
- "adc r10, r10 \n\t" \
+ "adchi r10, r10, #0 \n\t" \
+ "adds r8, r8, r8 \n\t" \
+ "adcs r9, r9, r9 \n\t" \
+ "adc r10, r10, r10 \n\t" \
"mov r14, r9 \n\t" \
"umlal r8, r9, r7, r7 \n\t" \
"cmp r14, r9 \n\t" \
"it hi \n\t" \
- "adchi r10, #0 \n\t" \
- "adds r8, r11 \n\t" \
- "adcs r9, r12 \n\t" \
- "adc r10, #0 \n\t" \
+ "adchi r10, r10, #0 \n\t" \
+ "adds r8, r8, r11 \n\t" \
+ "adcs r9, r9, r12 \n\t" \
+ "adc r10, r10, #0 \n\t" \
"stmia r0!, {r8} \n\t" \
\
"mov r12, #0 \n\t" \
@@ -1767,42 +1767,42 @@
"umlal r8, r11, r7, r2 \n\t" \
"cmp r14, r11 \n\t" \
"it hi \n\t" \
- "adchi r12, #0 \n\t" \
- "adds r8, r8 \n\t" \
- "adcs r11, r11 \n\t" \
- "adc r12, r12 \n\t" \
- "adds r8, r9 \n\t" \
- "adcs r11, r10 \n\t" \
- "adc r12, #0 \n\t" \
+ "adchi r12, r12, #0 \n\t" \
+ "adds r8, r8, r8 \n\t" \
+ "adcs r11, r11, r11 \n\t" \
+ "adc r12, r12, r12 \n\t" \
+ "adds r8, r8, r9 \n\t" \
+ "adcs r11, r11, r10 \n\t" \
+ "adc r12, r12, #0 \n\t" \
"stmia r0!, {r8} \n\t" \
\
"mov r8, #0 \n\t" \
"umull r1, r10, r7, r3 \n\t" \
- "adds r1, r1 \n\t" \
- "adcs r10, r10 \n\t" \
- "adc r8, #0 \n\t" \
- "adds r11, r1 \n\t" \
- "adcs r12, r10 \n\t" \
- "adc r8, #0 \n\t" \
+ "adds r1, r1, r1 \n\t" \
+ "adcs r10, r10, r10 \n\t" \
+ "adc r8, r8, #0 \n\t" \
+ "adds r11, r11, r1 \n\t" \
+ "adcs r12, r12, r10 \n\t" \
+ "adc r8, r8, #0 \n\t" \
"umull r1, r10, r2, r2 \n\t" \
- "adds r11, r1 \n\t" \
- "adcs r12, r10 \n\t" \
- "adc r8, #0 \n\t" \
+ "adds r11, r11, r1 \n\t" \
+ "adcs r12, r12, r10 \n\t" \
+ "adc r8, r8, #0 \n\t" \
"stmia r0!, {r11} \n\t" \
\
"mov r11, #0 \n\t" \
"umull r1, r10, r2, r3 \n\t" \
- "adds r1, r1 \n\t" \
- "adcs r10, r10 \n\t" \
- "adc r11, #0 \n\t" \
- "adds r12, r1 \n\t" \
- "adcs r8, r10 \n\t" \
- "adc r11, #0 \n\t" \
+ "adds r1, r1, r1 \n\t" \
+ "adcs r10, r10, r10 \n\t" \
+ "adc r11, r11, #0 \n\t" \
+ "adds r12, r12, r1 \n\t" \
+ "adcs r8, r8, r10 \n\t" \
+ "adc r11, r11, #0 \n\t" \
"stmia r0!, {r12} \n\t" \
\
"umull r1, r10, r3, r3 \n\t" \
- "adds r8, r1 \n\t" \
- "adcs r11, r10 \n\t" \
+ "adds r8, r8, r1 \n\t" \
+ "adcs r11, r11, r10 \n\t" \
"stmia r0!, {r8, r11} \n\t"
#endif /* _UECC_ASM_ARM_MULT_SQUARE_H_ */
diff --git a/scripts/mult_arm.py b/scripts/mult_arm.py
index 6715d4b..402ace1 100755
--- a/scripts/mult_arm.py
+++ b/scripts/mult_arm.py
@@ -40,12 +40,12 @@
print ""
emit("mov r10, #0")
emit("umull r11, r9, r3, r7")
- emit("adds r12, r11")
- emit("adc r9, #0")
+ emit("adds r12, r12, r11")
+ emit("adc r9, r9, #0")
emit("umull r11, r14, r4, r6")
- emit("adds r12, r11")
- emit("adcs r9, r14")
- emit("adc r10, #0")
+ emit("adds r12, r12, r11")
+ emit("adcs r9, r9, r14")
+ emit("adc r10, r10, #0")
emit("stmia r0!, {r12}")
print ""
@@ -55,9 +55,9 @@
emit("mov r%s, #0", acc[2])
for i in xrange(0, 3):
emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[i], ry[2 - i])
- emit("adds r%s, r%s", acc[0], acc[3])
- emit("adcs r%s, r%s", acc[1], acc[4])
- emit("adc r%s, #0", acc[2])
+ emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
+ emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4])
+ emit("adc r%s, r%s, #0", acc[2], acc[2])
emit("stmia r0!, {r%s}", acc[0])
print ""
acc = acc[1:] + acc[:1]
@@ -65,16 +65,16 @@
emit("mov r%s, #0", acc[2])
for i in xrange(0, 2):
emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[i + 1], ry[2 - i])
- emit("adds r%s, r%s", acc[0], acc[3])
- emit("adcs r%s, r%s", acc[1], acc[4])
- emit("adc r%s, #0", acc[2])
+ emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
+ emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4])
+ emit("adc r%s, r%s, #0", acc[2], acc[2])
emit("stmia r0!, {r%s}", acc[0])
print ""
acc = acc[1:] + acc[:1]
emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[init_size-1], ry[init_size-1])
- emit("adds r%s, r%s", acc[0], acc[3])
- emit("adc r%s, r%s", acc[1], acc[4])
+ emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
+ emit("adc r%s, r%s, r%s", acc[1], acc[1], acc[4])
emit("stmia r0!, {r%s}", acc[0])
emit("stmia r0!, {r%s}", acc[1])
print ""
@@ -98,12 +98,12 @@
print ""
emit("mov r10, #0")
emit("umull r11, r9, r3, r7")
- emit("adds r12, r11")
- emit("adc r9, #0")
+ emit("adds r12, r12, r11")
+ emit("adc r9, r9, #0")
emit("umull r11, r14, r4, r6")
- emit("adds r12, r11")
- emit("adcs r9, r14")
- emit("adc r10, #0")
+ emit("adds r12, r12, r11")
+ emit("adcs r9, r9, r14")
+ emit("adc r10, r10, #0")
emit("stmia r0!, {r12}")
print ""
@@ -111,9 +111,9 @@
emit("mov r%s, #0", acc[2])
for i in xrange(0, 3):
emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[i], ry[2 - i])
- emit("adds r%s, r%s", acc[0], acc[3])
- emit("adcs r%s, r%s", acc[1], acc[4])
- emit("adc r%s, #0", acc[2])
+ emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
+ emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4])
+ emit("adc r%s, r%s, #0", acc[2], acc[2])
emit("stmia r0!, {r%s}", acc[0])
print ""
acc = acc[1:] + acc[:1]
@@ -126,13 +126,13 @@
emit("mov r%s, #0", acc[2])
for i in xrange(0, 3):
emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[i], ry[2 - i])
- emit("adds r%s, r%s", acc[0], acc[3])
- emit("adcs r%s, r%s", acc[1], acc[4])
- emit("adc r%s, #0", acc[2])
+ emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
+ emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4])
+ emit("adc r%s, r%s, #0", acc[2], acc[2])
emit("ldr r%s, [r0]", acc[3]) # load stored value from initial block, and add to accumulator
- emit("adds r%s, r%s", acc[0], acc[3])
- emit("adcs r%s, #0", acc[1])
- emit("adc r%s, #0", acc[2])
+ emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
+ emit("adcs r%s, r%s, #0", acc[1], acc[1])
+ emit("adc r%s, r%s, #0", acc[2], acc[2])
emit("stmia r0!, {r%s}", acc[0])
print ""
acc = acc[1:] + acc[:1]
@@ -145,13 +145,13 @@
emit("mov r%s, #0", acc[2])
for i in xrange(0, 3):
emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[i], y_regs[2 - i])
- emit("adds r%s, r%s", acc[0], acc[3])
- emit("adcs r%s, r%s", acc[1], acc[4])
- emit("adc r%s, #0", acc[2])
+ emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
+ emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4])
+ emit("adc r%s, r%s, #0", acc[2], acc[2])
emit("ldr r%s, [r0]", acc[3]) # load stored value from initial block, and add to accumulator
- emit("adds r%s, r%s", acc[0], acc[3])
- emit("adcs r%s, #0", acc[1])
- emit("adc r%s, #0", acc[2])
+ emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
+ emit("adcs r%s, r%s, #0", acc[1], acc[1])
+ emit("adc r%s, r%s, #0", acc[2], acc[2])
emit("stmia r0!, {r%s}", acc[0])
print ""
acc = acc[1:] + acc[:1]
@@ -160,16 +160,16 @@
emit("mov r%s, #0", acc[2])
for i in xrange(0, 2):
emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[i + 1], y_regs[2 - i])
- emit("adds r%s, r%s", acc[0], acc[3])
- emit("adcs r%s, r%s", acc[1], acc[4])
- emit("adc r%s, #0", acc[2])
+ emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
+ emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4])
+ emit("adc r%s, r%s, #0", acc[2], acc[2])
emit("stmia r0!, {r%s}", acc[0])
print ""
acc = acc[1:] + acc[:1]
emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[2], y_regs[2])
- emit("adds r%s, r%s", acc[0], acc[3])
- emit("adc r%s, r%s", acc[1], acc[4])
+ emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
+ emit("adc r%s, r%s, r%s", acc[1], acc[1], acc[4])
emit("stmia r0!, {r%s}", acc[0])
emit("stmia r0!, {r%s}", acc[1])
print ""
diff --git a/scripts/square_arm.py b/scripts/square_arm.py
index ae11072..5330c7e 100755
--- a/scripts/square_arm.py
+++ b/scripts/square_arm.py
@@ -23,15 +23,15 @@
def mulacc(acc, r1, r2):
if size <= 6:
emit("umull r1, r14, r%s, r%s", r1, r2)
- emit("adds r%s, r1", acc[0])
- emit("adcs r%s, r14", acc[1])
- emit("adc r%s, #0", acc[2])
+ emit("adds r%s, r%s, r1", acc[0], acc[0])
+ emit("adcs r%s, r%s, r14", acc[1], acc[1])
+ emit("adc r%s, r%s, #0", acc[2], acc[2])
else:
emit("mov r14, r%s", acc[1])
emit("umlal r%s, r%s, r%s, r%s", acc[0], acc[1], r1, r2)
emit("cmp r14, r%s", acc[1])
emit("it hi")
- emit("adchi r%s, #0", acc[2])
+ emit("adchi r%s, r%s, #0", acc[2], acc[2])
r = [2, 3, 4, 5, 6, 7]
@@ -62,13 +62,13 @@
print ""
emit("umull r12, r10, r2, r6")
- emit("adds r9, r12")
- emit("adc r10, #0")
+ emit("adds r9, r9, r12")
+ emit("adc r10, r10, #0")
emit("stmia r0!, {r9}")
print ""
emit("umull r8, r9, r3, r6")
- emit("adds r10, r8")
+ emit("adds r10, r10, r8")
emit("adc r11, r9, #0")
emit("stmia r0!, {r10, r11}")
print ""
@@ -85,26 +85,26 @@
print ""
emit("mov r9, #0")
emit("umull r10, r11, r2, r3")
-emit("adds r12, r10")
+emit("adds r12, r12, r10")
emit("adcs r8, r11, #0")
-emit("adc r9, #0")
-emit("adds r12, r10")
-emit("adcs r8, r11")
-emit("adc r9, #0")
+emit("adc r9, r9, #0")
+emit("adds r12, r12, r10")
+emit("adcs r8, r8, r11")
+emit("adc r9, r9, #0")
emit("stmia r0!, {r12}")
print ""
emit("mov r10, #0")
emit("umull r11, r12, r2, r4")
-emit("adds r11, r11")
-emit("adcs r12, r12")
-emit("adc r10, #0")
-emit("adds r8, r11")
-emit("adcs r9, r12")
-emit("adc r10, #0")
+emit("adds r11, r11, r11")
+emit("adcs r12, r12, r12")
+emit("adc r10, r10, #0")
+emit("adds r8, r8, r11")
+emit("adcs r9, r9, r12")
+emit("adc r10, r10, #0")
emit("umull r11, r12, r3, r3")
-emit("adds r8, r11")
-emit("adcs r9, r12")
-emit("adc r10, #0")
+emit("adds r8, r8, r11")
+emit("adcs r9, r9, r12")
+emit("adc r10, r10, #0")
emit("stmia r0!, {r8}")
print ""
@@ -121,18 +121,18 @@
for j in xrange(1, (i+1)//2):
mulacc(acc, r[j], r[i-j])
# multiply by 2
- emit("adds r%s, r%s", acc[0], acc[0])
- emit("adcs r%s, r%s", acc[1], acc[1])
- emit("adc r%s, r%s", acc[2], acc[2])
+ emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[0])
+ emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[1])
+ emit("adc r%s, r%s, r%s", acc[2], acc[2], acc[2])
# add equal word (if any)
if ((i+1) % 2) != 0:
mulacc(acc, r[i//2], r[i//2])
# add old accumulator
- emit("adds r%s, r%s", acc[0], old_acc[0])
- emit("adcs r%s, r%s", acc[1], old_acc[1])
- emit("adc r%s, #0", acc[2])
+ emit("adds r%s, r%s, r%s", acc[0], acc[0], old_acc[0])
+ emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
+ emit("adc r%s, r%s, #0", acc[2], acc[2])
# store
emit("stmia r0!, {r%s}", acc[0])
@@ -155,23 +155,23 @@
mulacc(acc, regs[j], regs[limit-j])
emit("ldr r14, [r0]") # load stored value from initial block, and add to accumulator
- emit("adds r%s, r14", acc[0])
- emit("adcs r%s, #0", acc[1])
- emit("adc r%s, #0", acc[2])
+ emit("adds r%s, r%s, r14", acc[0], acc[0])
+ emit("adcs r%s, r%s, #0", acc[1], acc[1])
+ emit("adc r%s, r%s, #0", acc[2], acc[2])
# multiply by 2
- emit("adds r%s, r%s", acc[0], acc[0])
- emit("adcs r%s, r%s", acc[1], acc[1])
- emit("adc r%s, r%s", acc[2], acc[2])
+ emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[0])
+ emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[1])
+ emit("adc r%s, r%s, r%s", acc[2], acc[2], acc[2])
# add equal word
if limit == 4:
mulacc(acc, regs[2], regs[2])
# add old accumulator
- emit("adds r%s, r%s", acc[0], old_acc[0])
- emit("adcs r%s, r%s", acc[1], old_acc[1])
- emit("adc r%s, #0", acc[2])
+ emit("adds r%s, r%s, r%s", acc[0], acc[0], old_acc[0])
+ emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
+ emit("adc r%s, r%s, #0", acc[2], acc[2])
# store
emit("stmia r0!, {r%s}", acc[0])
@@ -189,18 +189,18 @@
mulacc(acc, regs[i+j], regs[s - 1 - j])
# multiply by 2
- emit("adds r%s, r%s", acc[0], acc[0])
- emit("adcs r%s, r%s", acc[1], acc[1])
- emit("adc r%s, r%s", acc[2], acc[2])
+ emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[0])
+ emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[1])
+ emit("adc r%s, r%s, r%s", acc[2], acc[2], acc[2])
# add equal word (if any)
if ((s-i) % 2) != 0:
mulacc(acc, regs[i + (s-i)//2], regs[i + (s-i)//2])
# add old accumulator
- emit("adds r%s, r%s", acc[0], old_acc[0])
- emit("adcs r%s, r%s", acc[1], old_acc[1])
- emit("adc r%s, #0", acc[2])
+ emit("adds r%s, r%s, r%s", acc[0], acc[0], old_acc[0])
+ emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
+ emit("adc r%s, r%s, #0", acc[2], acc[2])
# store
emit("stmia r0!, {r%s}", acc[0])
@@ -209,34 +209,34 @@
acc = acc[1:] + acc[:1]
emit("mov r%s, #0", acc[2])
emit("umull r1, r%s, r%s, r%s", old_acc[1], regs[s - 3], regs[s - 1])
-emit("adds r1, r1")
-emit("adcs r%s, r%s", old_acc[1], old_acc[1])
-emit("adc r%s, #0", acc[2])
-emit("adds r%s, r1", acc[0])
-emit("adcs r%s, r%s", acc[1], old_acc[1])
-emit("adc r%s, #0", acc[2])
+emit("adds r1, r1, r1")
+emit("adcs r%s, r%s, r%s", old_acc[1], old_acc[1], old_acc[1])
+emit("adc r%s, r%s, #0", acc[2], acc[2])
+emit("adds r%s, r%s, r1", acc[0], acc[0])
+emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
+emit("adc r%s, r%s, #0", acc[2], acc[2])
emit("umull r1, r%s, r%s, r%s", old_acc[1], regs[s - 2], regs[s - 2])
-emit("adds r%s, r1", acc[0])
-emit("adcs r%s, r%s", acc[1], old_acc[1])
-emit("adc r%s, #0", acc[2])
+emit("adds r%s, r%s, r1", acc[0], acc[0])
+emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
+emit("adc r%s, r%s, #0", acc[2], acc[2])
emit("stmia r0!, {r%s}", acc[0])
print ""
acc = acc[1:] + acc[:1]
emit("mov r%s, #0", acc[2])
emit("umull r1, r%s, r%s, r%s", old_acc[1], regs[s - 2], regs[s - 1])
-emit("adds r1, r1")
-emit("adcs r%s, r%s", old_acc[1], old_acc[1])
-emit("adc r%s, #0", acc[2])
-emit("adds r%s, r1", acc[0])
-emit("adcs r%s, r%s", acc[1], old_acc[1])
-emit("adc r%s, #0", acc[2])
+emit("adds r1, r1, r1")
+emit("adcs r%s, r%s, r%s", old_acc[1], old_acc[1], old_acc[1])
+emit("adc r%s, r%s, #0", acc[2], acc[2])
+emit("adds r%s, r%s, r1", acc[0], acc[0])
+emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
+emit("adc r%s, r%s, #0", acc[2], acc[2])
emit("stmia r0!, {r%s}", acc[0])
print ""
acc = acc[1:] + acc[:1]
emit("umull r1, r%s, r%s, r%s", old_acc[1], regs[s - 1], regs[s - 1])
-emit("adds r%s, r1", acc[0])
-emit("adcs r%s, r%s", acc[1], old_acc[1])
+emit("adds r%s, r%s, r1", acc[0], acc[0])
+emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
emit("stmia r0!, {r%s}", acc[0])
emit("stmia r0!, {r%s}", acc[1])
diff --git a/uECC.c b/uECC.c
index a119738..70b8ce4 100644
--- a/uECC.c
+++ b/uECC.c
@@ -386,11 +386,13 @@
#else /* uECC_SQUARE_FUNC */
+#if uECC_ENABLE_VLI_API
uECC_VLI_API void uECC_vli_square(uECC_word_t *result,
const uECC_word_t *left,
wordcount_t num_words) {
uECC_vli_mult(result, left, left, num_words);
}
+#endif /* uECC_ENABLE_VLI_API */
#endif /* uECC_SQUARE_FUNC */
@@ -493,6 +495,7 @@
#if uECC_SQUARE_FUNC
+#if uECC_ENABLE_VLI_API
/* Computes result = left^2 % mod. */
uECC_VLI_API void uECC_vli_modSquare(uECC_word_t *result,
const uECC_word_t *left,
@@ -502,6 +505,7 @@
uECC_vli_square(product, left, num_words);
uECC_vli_mmod(result, product, mod, num_words);
}
+#endif /* uECC_ENABLE_VLI_API */
uECC_VLI_API void uECC_vli_modSquare_fast(uECC_word_t *result,
const uECC_word_t *left,
@@ -517,12 +521,14 @@
#else /* uECC_SQUARE_FUNC */
+#if uECC_ENABLE_VLI_API
uECC_VLI_API void uECC_vli_modSquare(uECC_word_t *result,
const uECC_word_t *left,
const uECC_word_t *mod,
wordcount_t num_words) {
uECC_vli_modMult(result, left, left, mod, num_words);
}
+#endif /* uECC_ENABLE_VLI_API */
uECC_VLI_API void uECC_vli_modSquare_fast(uECC_word_t *result,
const uECC_word_t *left,