Add AVR asm implementations for runtime curve selection.

Fast mult/square are not yet supported.
diff --git a/asm_arm.inc b/asm_arm.inc
index 572f9da..510af70 100644
--- a/asm_arm.inc
+++ b/asm_arm.inc
@@ -77,44 +77,12 @@
         "bx %[jump] \n\t"
     #endif
         "1: \n\t"
-        "ldmia %[lptr]!, {%[left]} \n\t"
-        "ldmia %[rptr]!, {%[right]} \n\t"
-        "adcs %[left], %[right] \n\t"
-        "stmia %[dptr]!, {%[left]} \n\t"
+        REPEAT(DEC(uECC_MAX_WORDS),
+            "ldmia %[lptr]!, {%[left]} \n\t"
+            "ldmia %[rptr]!, {%[right]} \n\t"
+            "adcs %[left], %[right] \n\t"
+            "stmia %[dptr]!, {%[left]} \n\t")
         
-        "ldmia %[lptr]!, {%[left]} \n\t"
-        "ldmia %[rptr]!, {%[right]} \n\t"
-        "adcs %[left], %[right] \n\t"
-        "stmia %[dptr]!, {%[left]} \n\t"
-        
-        "ldmia %[lptr]!, {%[left]} \n\t"
-        "ldmia %[rptr]!, {%[right]} \n\t"
-        "adcs %[left], %[right] \n\t"
-        "stmia %[dptr]!, {%[left]} \n\t"
-        
-        "ldmia %[lptr]!, {%[left]} \n\t"
-        "ldmia %[rptr]!, {%[right]} \n\t"
-        "adcs %[left], %[right] \n\t"
-        "stmia %[dptr]!, {%[left]} \n\t"
-        
-    #if (uECC_MAX_WORDS >= 6)
-        "ldmia %[lptr]!, {%[left]} \n\t"
-        "ldmia %[rptr]!, {%[right]} \n\t"
-        "adcs %[left], %[right] \n\t"
-        "stmia %[dptr]!, {%[left]} \n\t"
-    #endif
-    #if (uECC_MAX_WORDS >= 7)
-        "ldmia %[lptr]!, {%[left]} \n\t"
-        "ldmia %[rptr]!, {%[right]} \n\t"
-        "adcs %[left], %[right] \n\t"
-        "stmia %[dptr]!, {%[left]} \n\t"
-    #endif
-    #if (uECC_MAX_WORDS >= 8)
-        "ldmia %[lptr]!, {%[left]} \n\t"
-        "ldmia %[rptr]!, {%[right]} \n\t"
-        "adcs %[left], %[right] \n\t"
-        "stmia %[dptr]!, {%[left]} \n\t"
-    #endif
         "adcs %[carry], %[carry] \n\t"
         RESUME_SYNTAX
         : [dptr] REG_RW_LO (result), [lptr] REG_RW_LO (left), [rptr] REG_RW_LO (right),
@@ -162,44 +130,12 @@
         "bx %[jump] \n\t"
     #endif
         "1: \n\t"
-        "ldmia %[lptr]!, {%[left]} \n\t"
-        "ldmia %[rptr]!, {%[right]} \n\t"
-        "sbcs %[left], %[right] \n\t"
-        "stmia %[dptr]!, {%[left]} \n\t"
+        REPEAT(DEC(uECC_MAX_WORDS),
+            "ldmia %[lptr]!, {%[left]} \n\t"
+            "ldmia %[rptr]!, {%[right]} \n\t"
+            "sbcs %[left], %[right] \n\t"
+            "stmia %[dptr]!, {%[left]} \n\t")
         
-        "ldmia %[lptr]!, {%[left]} \n\t"
-        "ldmia %[rptr]!, {%[right]} \n\t"
-        "sbcs %[left], %[right] \n\t"
-        "stmia %[dptr]!, {%[left]} \n\t"
-        
-        "ldmia %[lptr]!, {%[left]} \n\t"
-        "ldmia %[rptr]!, {%[right]} \n\t"
-        "sbcs %[left], %[right] \n\t"
-        "stmia %[dptr]!, {%[left]} \n\t"
-        
-        "ldmia %[lptr]!, {%[left]} \n\t"
-        "ldmia %[rptr]!, {%[right]} \n\t"
-        "sbcs %[left], %[right] \n\t"
-        "stmia %[dptr]!, {%[left]} \n\t"
-        
-    #if (uECC_MAX_WORDS >= 6)
-        "ldmia %[lptr]!, {%[left]} \n\t"
-        "ldmia %[rptr]!, {%[right]} \n\t"
-        "sbcs %[left], %[right] \n\t"
-        "stmia %[dptr]!, {%[left]} \n\t"
-    #endif
-    #if (uECC_MAX_WORDS >= 7)
-        "ldmia %[lptr]!, {%[left]} \n\t"
-        "ldmia %[rptr]!, {%[right]} \n\t"
-        "sbcs %[left], %[right] \n\t"
-        "stmia %[dptr]!, {%[left]} \n\t"
-    #endif
-    #if (uECC_MAX_WORDS >= 8)
-        "ldmia %[lptr]!, {%[left]} \n\t"
-        "ldmia %[rptr]!, {%[right]} \n\t"
-        "sbcs %[left], %[right] \n\t"
-        "stmia %[dptr]!, {%[left]} \n\t"
-    #endif
         "adcs %[carry], %[carry] \n\t"
         RESUME_SYNTAX
         : [dptr] REG_RW_LO (result), [lptr] REG_RW_LO (left), [rptr] REG_RW_LO (right),
diff --git a/asm_avr.inc b/asm_avr.inc
index 6bd786c..2729a3c 100644
--- a/asm_avr.inc
+++ b/asm_avr.inc
@@ -1,21481 +1,192 @@
-#define DEC_20 19
-#define DEC_24 23
-#define DEC_28 27
-#define DEC_32 31
+#if __AVR_HAVE_EIJMP_EICALL__
+    #define IJMP "eijmp \n\t"
+#else
+    #define IJMP "ijmp \n\t"
+#endif
 
-#define DEC(N) uECC_CONCAT(DEC_, N)
+#if (uECC_OPTIMIZATION_LEVEL >= 2)
 
-#define REPEAT_1(stuff) stuff
-#define REPEAT_2(stuff) REPEAT_1(stuff) stuff
-#define REPEAT_3(stuff) REPEAT_2(stuff) stuff
-#define REPEAT_4(stuff) REPEAT_3(stuff) stuff
-#define REPEAT_5(stuff) REPEAT_4(stuff) stuff
-#define REPEAT_6(stuff) REPEAT_5(stuff) stuff
-#define REPEAT_7(stuff) REPEAT_6(stuff) stuff
-#define REPEAT_8(stuff) REPEAT_7(stuff) stuff
-#define REPEAT_9(stuff) REPEAT_8(stuff) stuff
-#define REPEAT_10(stuff) REPEAT_9(stuff) stuff
-#define REPEAT_11(stuff) REPEAT_10(stuff) stuff
-#define REPEAT_12(stuff) REPEAT_11(stuff) stuff
-#define REPEAT_13(stuff) REPEAT_12(stuff) stuff
-#define REPEAT_14(stuff) REPEAT_13(stuff) stuff
-#define REPEAT_15(stuff) REPEAT_14(stuff) stuff
-#define REPEAT_16(stuff) REPEAT_15(stuff) stuff
-#define REPEAT_17(stuff) REPEAT_16(stuff) stuff
-#define REPEAT_18(stuff) REPEAT_17(stuff) stuff
-#define REPEAT_19(stuff) REPEAT_18(stuff) stuff
-#define REPEAT_20(stuff) REPEAT_19(stuff) stuff
-#define REPEAT_21(stuff) REPEAT_20(stuff) stuff
-#define REPEAT_22(stuff) REPEAT_21(stuff) stuff
-#define REPEAT_23(stuff) REPEAT_22(stuff) stuff
-#define REPEAT_24(stuff) REPEAT_23(stuff) stuff
-#define REPEAT_25(stuff) REPEAT_24(stuff) stuff
-#define REPEAT_26(stuff) REPEAT_25(stuff) stuff
-#define REPEAT_27(stuff) REPEAT_26(stuff) stuff
-#define REPEAT_28(stuff) REPEAT_27(stuff) stuff
-#define REPEAT_29(stuff) REPEAT_28(stuff) stuff
-#define REPEAT_30(stuff) REPEAT_29(stuff) stuff
-#define REPEAT_31(stuff) REPEAT_30(stuff) stuff
-#define REPEAT_32(stuff) REPEAT_31(stuff) stuff
-
-#define REPEAT(N, stuff) uECC_CONCAT(REPEAT_, N)(stuff)
-
-#define STR2(thing) #thing
-#define STR(thing) STR2(thing)
-
-#if (uECC_ASM == uECC_asm_fast)
-
-uECC_VLI_API void uECC_vli_clear(uint8_t *vli) {
+uECC_VLI_API void uECC_vli_clear(uECC_word_t *vli, wordcount_t num_words) {
+    volatile uECC_word_t *v = vli;
     __asm__ volatile (
-        REPEAT(uECC_BYTES,
-            "st %a[ptr]+, r1 \n\t")
-        : [ptr] "+e" (vli)
-        :
-        : "r0", "cc", "memory"
+    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
+        "ldi r30, pm_lo8(1f) \n\t"
+        "ldi r31, pm_hi8(1f) \n\t"
+        "sub r30, %[num] \n\t"
+        "sbc r31, __zero_reg__ \n\t"
+        IJMP
+    #endif
+        
+        REPEAT(uECC_MAX_WORDS, "st x+, __zero_reg__ \n\t")
+        "1: \n\t"
+        : "+x" (v)
+        : [num] "r" (num_words)
+        : 
+    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
+          "r30", "r31", "cc"
+    #endif
     );
 }
 #define asm_clear 1
 
-uECC_VLI_API void uECC_vli_set(uint8_t *dest, const uint8_t *src) {
+uECC_VLI_API void uECC_vli_set(uECC_word_t *dest, const uECC_word_t *src, wordcount_t num_words) {
+    volatile uECC_word_t *d = dest;
     __asm__ volatile (
-        REPEAT(uECC_BYTES,
-            "ld r0, %a[sptr]+ \n\t"
-            "st %a[dptr]+, r0 \n\t")
-        : [dptr] "+e" (dest), [sptr] "+e" (src)
-        :
-        : "r0", "cc", "memory"
+    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
+        "ldi r30, pm_lo8(1f) \n\t"
+        "ldi r31, pm_hi8(1f) \n\t"
+        "sub r30, %[num] \n\t"
+        "sbc r31, __zero_reg__ \n\t"
+        IJMP
+    #endif
+        
+        REPEAT(uECC_MAX_WORDS,
+            "ld r0, y+ \n\t"
+            "st x+, r0 \n\t")
+        "1: \n\t"
+        : "+x" (d), "+y" (src)
+        : [num] "r" ((uint8_t)(num_words * 2))
+        : "r0",
+    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
+          "r30", "r31", "cc"
+    #endif
     );
 }
 #define asm_set 1
 
-uECC_VLI_API void uECC_vli_rshift1(uint8_t *vli) {
+uECC_VLI_API void uECC_vli_rshift1(uECC_word_t *vli, wordcount_t num_words) {
+    volatile uECC_word_t *v = vli;
     __asm__ volatile (
-        "adiw r30, " STR(uECC_BYTES) " \n\t"
-        "ld r0, -z \n\t"  /* Load byte. */
-        "lsr r0 \n\t" /* Shift. */
-        "st z, r0 \n\t"  /* Store the first result byte. */
-
-        /* Now we just do the remaining bytes with the carry bit (using ROR) */
-        REPEAT(DEC(uECC_BYTES),
-            "ld r0, -z \n\t"
+    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
+        "ldi r30, pm_lo8(1f) \n\t"
+        "ldi r31, pm_hi8(1f) \n\t"
+        "sub r30, %[jump] \n\t"
+        "sbc r31, __zero_reg__ \n\t"
+    #endif
+        
+        "add r26, %[num] \n\t"
+        "adc r27, __zero_reg__ \n\t"
+        "ld r0, -x \n\t"
+        "lsr r0 \n\t"
+        "st x, r0 \n\t"
+    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
+        IJMP
+    #endif
+        
+        REPEAT(DEC(uECC_MAX_WORDS),
+            "ld r0, -x \n\t"
             "ror r0 \n\t"
-            "st z, r0 \n\t")
-        : "+z" (vli)
-        :
-        : "r0", "cc", "memory"
+            "st x, r0 \n\t")
+        "1: \n\t"
+        : "+x" (v)
+    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
+        : [num] "r" (num_words), [jump] "r" ((uint8_t)(3 * (num_words - 1)))
+        : "r0", "r30", "r31", "cc"
+    #else
+        : [num] "r" (num_words)
+        : "r0", "cc"
+    #endif
     );
 }
 #define asm_rshift1 1
 
-/* Computes result = left + right, returning carry. Can modify in place. */
-uECC_VLI_API uint8_t uECC_vli_add(uint8_t *result, const uint8_t *left, const uint8_t *right) {
-    uint8_t carry = 0;
-    uint8_t left_byte;
+#define ADD_RJPM_TABLE(N)       \
+    "movw r30, %A[result] \n\t" \
+    "rjmp add_%=_" #N " \n\t"
+
+#define ADD_RJPM_DEST(N)     \
+    "add_%=_" #N ":"         \
+    "ld %[clb], x+ \n\t"     \
+    "ld %[rb], y+ \n\t"      \
+    "adc %[clb], %[rb] \n\t" \
+    "st z+, %[clb] \n\t"
+
+uECC_VLI_API uECC_word_t uECC_vli_add(uECC_word_t *result,
+                                      const uECC_word_t *left,
+                                      const uECC_word_t *right,
+                                      wordcount_t num_words) {
+    volatile uECC_word_t *r = result;
+    uint8_t carry;
     uint8_t right_byte;
 
     __asm__ volatile (
-        "ld %[left], x+ \n\t"  /* Load left byte. */
-        "ld %[right], y+ \n\t" /* Load right byte. */
-        "add %[left], %[right] \n\t" /* Add the first byte. */
-        "st z+, %[left] \n\t"  /* Store the first result byte. */
+    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
+        "ldi r30, pm_lo8(add_%=_" STR(uECC_MAX_WORDS) ") \n\t"
+        "ldi r31, pm_hi8(add_%=_" STR(uECC_MAX_WORDS) ") \n\t"
+        "sub r30, %[num] \n\t"
+        "sbc r31, __zero_reg__ \n\t"
+    #endif
+    
+        "clc \n\t"
+    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
+        IJMP
+        REPEATM(uECC_MAX_WORDS, ADD_RJPM_TABLE)
+    #endif
+    
+        REPEATM(uECC_MAX_WORDS, ADD_RJPM_DEST)
         
-        /* Now we just do the remaining bytes with the carry bit (using ADC) */
-        REPEAT(DEC(uECC_BYTES),
-            "ld %[left], x+ \n\t"
-            "ld %[right], y+ \n\t"
-            "adc %[left], %[right] \n\t"
-            "st z+, %[left] \n\t")
-        
-        "adc %[carry], %[carry] \n\t" /* Store carry bit. */
-        "sbiw r28, " STR(uECC_BYTES) " \n\t" /* Restore Y */
+        "mov %[clb], __zero_reg__ \n\t"
+        "adc %[clb], %[clb] \n\t" /* Store carry bit. */
 
-        : "+z" (result), "+x" (left),
-          [carry] "+r" (carry), [left] "=&r" (left_byte), [right] "=&r" (right_byte)
-        : "y" (right)
-        : "cc", "memory"
+        : "+x" (left), "+y" (right),
+          [clb] "=&r" (carry), [rb] "=&r" (right_byte)
+        : [result] "r" (r), [num] "r" ((uint8_t)(num_words * 2))
+        : "r30", "r31", "cc"
     );
     return carry;
 }
 #define asm_add 1
 
-/* Computes result = left - right, returning borrow. Can modify in place. */
-uECC_VLI_API uint8_t uECC_vli_sub(uint8_t *result, const uint8_t *left, const uint8_t *right) {
-    uint8_t borrow = 0;
-    uint8_t left_byte;
+#define SUB_RJPM_TABLE(N)       \
+    "movw r30, %A[result] \n\t" \
+    "rjmp sub_%=_" #N " \n\t"
+
+#define SUB_RJPM_DEST(N)     \
+    "sub_%=_" #N ":"         \
+    "ld %[clb], x+ \n\t"     \
+    "ld %[rb], y+ \n\t"      \
+    "sbc %[clb], %[rb] \n\t" \
+    "st z+, %[clb] \n\t"
+
+uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result,
+                                      const uECC_word_t *left,
+                                      const uECC_word_t *right,
+                                      wordcount_t num_words) {
+    volatile uECC_word_t *r = result;
+    uint8_t carry;
     uint8_t right_byte;
 
     __asm__ volatile (
-        "ld %[left], x+ \n\t"  /* Load left byte. */
-        "ld %[right], y+ \n\t" /* Load right byte. */
-        "sub %[left], %[right] \n\t" /* Subtract the first byte. */
-        "st z+, %[left] \n\t"  /* Store the first result byte. */
+    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
+        "ldi r30, pm_lo8(sub_%=_" STR(uECC_MAX_WORDS) ") \n\t"
+        "ldi r31, pm_hi8(sub_%=_" STR(uECC_MAX_WORDS) ") \n\t"
+        "sub r30, %[num] \n\t"
+        "sbc r31, __zero_reg__ \n\t"
+    #endif
+    
+        "clc \n\t"
+    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
+        IJMP
+        REPEATM(uECC_MAX_WORDS, SUB_RJPM_TABLE)
+    #endif
+    
+        REPEATM(uECC_MAX_WORDS, SUB_RJPM_DEST)
         
-        /* Now we just do the remaining bytes with the carry bit (using SBC) */
-        REPEAT(DEC(uECC_BYTES),
-            "ld %[left], x+ \n\t"
-            "ld %[right], y+ \n\t"
-            "sbc %[left], %[right] \n\t"
-            "st z+, %[left] \n\t")
-        
-        "adc %[borrow], %[borrow] \n\t" /* Store carry bit in borrow. */
-        "sbiw r28, " STR(uECC_BYTES) " \n\t" /* Restore Y */
+        "mov %[clb], __zero_reg__ \n\t"
+        "adc %[clb], %[clb] \n\t" /* Store carry bit. */
 
-        : "+z" (result), "+x" (left),
-            [borrow] "+r" (borrow), [left] "=&r" (left_byte), [right] "=&r" (right_byte)
-        : "y" (right)
-        : "cc", "memory"
+        : "+x" (left), "+y" (right),
+          [clb] "=&r" (carry), [rb] "=&r" (right_byte)
+        : [result] "r" (r), [num] "r" ((uint8_t)(num_words * 2))
+        : "r30", "r31", "cc"
     );
-    return borrow;
+    return carry;
 }
 #define asm_sub 1
 
-#if (uECC_BYTES == 20)
-__attribute((noinline))
-uECC_VLI_API void uECC_vli_mult(uint8_t *result, const uint8_t *left, const uint8_t *right) {
-    __asm__ volatile (
-        "adiw r30, 10 \n\t"
-        "adiw r28, 10 \n\t"
-        "ld r2, x+ \n\t"
-        "ld r3, x+ \n\t"
-        "ld r4, x+ \n\t"
-        "ld r5, x+ \n\t"
-        "ld r6, x+ \n\t"
-        "ld r7, x+ \n\t"
-        "ld r8, x+ \n\t"
-        "ld r9, x+ \n\t"
-        "ld r10, x+ \n\t"
-        "ld r11, x+ \n\t"
-        "ld r12, y+ \n\t"
-        "ld r13, y+ \n\t"
-        "ld r14, y+ \n\t"
-        "ld r15, y+ \n\t"
-        "ld r16, y+ \n\t"
-        "ld r17, y+ \n\t"
-        "ld r18, y+ \n\t"
-        "ld r19, y+ \n\t"
-        "ld r20, y+ \n\t"
-        "ld r21, y+ \n\t"
-        "ldi r25, 0 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r12 \n\t"
-        "st z+, r0 \n\t"
-        "mov r22, r1 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "mul r3, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r3, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r4, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r5, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r6, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r7, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r8, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r9, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r10, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "mul r11, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "st z+, r24 \n\t"
-        "st z+, r22 \n\t"
-
-        "sbiw r30, 30 \n\t"
-        "sbiw r28, 20 \n\t"
-        "ld r12, y+ \n\t"
-        "ld r13, y+ \n\t"
-        "ld r14, y+ \n\t"
-        "ld r15, y+ \n\t"
-        "ld r16, y+ \n\t"
-        "ld r17, y+ \n\t"
-        "ld r18, y+ \n\t"
-        "ld r19, y+ \n\t"
-        "ld r20, y+ \n\t"
-        "ld r21, y+ \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r12 \n\t"
-        "st z+, r0 \n\t"
-        "mov r22, r1 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "mul r3, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r2, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r3, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r3, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r4, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r4, x+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r5, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r5, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r6, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r6, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r7, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r7, x+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r8, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r8, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r9, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r9, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r10, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r10, x+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r11, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r11, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r2, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r12, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r2, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r13, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r2, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r14, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r2, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r15, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r2, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r16, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r2, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r17, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r2, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r18, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r2, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r19, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r2, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r20, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r2, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r21, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r2, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r3, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r4, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r5, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r6, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r7, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r8, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r9, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r10, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "mul r11, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "st z+, r23 \n\t"
-        "st z+, r24 \n\t"
-        "eor r1, r1 \n\t"
-        : "+x" (left), "+y" (right), "+z" (result)
-        :
-        : "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
-          "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20",
-          "r21", "r22", "r23", "r24", "r25", "cc", "memory"
-    );
-}
-#define asm_mult 1
-#elif (uECC_BYTES == 24)
-__attribute((noinline))
-uECC_VLI_API void uECC_vli_mult(uint8_t *result, const uint8_t *left, const uint8_t *right) {
-    __asm__ volatile (
-        "adiw r30, 20 \n\t"
-        "adiw r28, 20 \n\t"
-        "ld r2, x+ \n\t"
-        "ld r3, x+ \n\t"
-        "ld r4, x+ \n\t"
-        "ld r5, x+ \n\t"
-        "ld r12, y+ \n\t"
-        "ld r13, y+ \n\t"
-        "ld r14, y+ \n\t"
-        "ld r15, y+ \n\t"
-        "ldi r25, 0 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r12 \n\t"
-        "st z+, r0 \n\t"
-        "mov r22, r1 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "mul r3, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r3, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r4, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "mul r5, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "st z+, r24 \n\t"
-        "st z+, r22 \n\t"
-
-        "sbiw r30, 18 \n\t"
-        "sbiw r28, 14 \n\t"
-        "ld r12, y+ \n\t"
-        "ld r13, y+ \n\t"
-        "ld r14, y+ \n\t"
-        "ld r15, y+ \n\t"
-        "ld r16, y+ \n\t"
-        "ld r17, y+ \n\t"
-        "ld r18, y+ \n\t"
-        "ld r19, y+ \n\t"
-        "ld r20, y+ \n\t"
-        "ld r21, y+ \n\t"
-        "ld r6, x+ \n\t"
-        "ld r7, x+ \n\t"
-        "ld r8, x+ \n\t"
-        "ld r9, x+ \n\t"
-        "ld r10, x+ \n\t"
-        "ld r11, x+ \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r12 \n\t"
-        "st z+, r0 \n\t"
-        "mov r22, r1 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "mul r3, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r2, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r3, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r3, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r4, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r4, x+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r5, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r5, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r6, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r12, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r6, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r13, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r6, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r14, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r6, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r15, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r6, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r7, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r8, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r9, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r10, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r11, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r3, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r4, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "mul r5, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "st z+, r23 \n\t"
-        "st z+, r24 \n\t"
-
-        "sbiw r30, 38 \n\t"
-        "sbiw r28, 24 \n\t"
-        "sbiw r26, 14 \n\t"
-        "ld r2, x+ \n\t"
-        "ld r12, y+ \n\t"
-        "ld r3, x+ \n\t"
-        "ld r13, y+ \n\t"
-        "ld r4, x+ \n\t"
-        "ld r14, y+ \n\t"
-        "ld r5, x+ \n\t"
-        "ld r15, y+ \n\t"
-        "ld r6, x+ \n\t"
-        "ld r16, y+ \n\t"
-        "ld r7, x+ \n\t"
-        "ld r17, y+ \n\t"
-        "ld r8, x+ \n\t"
-        "ld r18, y+ \n\t"
-        "ld r9, x+ \n\t"
-        "ld r19, y+ \n\t"
-        "ld r10, x+ \n\t"
-        "ld r20, y+ \n\t"
-        "ld r11, x+ \n\t"
-        "ld r21, y+ \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r12 \n\t"
-        "st z+, r0 \n\t"
-        "mov r22, r1 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "mul r3, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r2, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r3, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r3, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r4, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r4, x+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r5, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r5, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r6, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r6, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r7, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r7, x+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r8, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r8, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r9, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r9, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r10, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r10, x+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r11, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r11, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r2, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r2, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r3, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r3, x+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r4, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r4, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r5, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r5, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r6, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r12, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r6, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r13, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r6, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r14, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r6, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r15, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r6, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r16, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r6, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r17, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r6, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r18, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r6, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r19, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r6, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r20, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r6, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r21, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r6, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r12, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r6, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r13, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r6, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r14, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r6, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r15, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r6, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r7, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r8, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r9, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r10, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r11, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r3, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r4, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "mul r5, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "st z+, r22 \n\t"
-        "st z+, r23 \n\t"
-
-        "eor r1, r1 \n\t"
-        : "+x" (left), "+y" (right), "+z" (result)
-        :
-        : "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
-          "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20",
-          "r21", "r22", "r23", "r24", "r25", "cc", "memory"
-    );
-}
-#define asm_mult 1
-#elif (uECC_BYTES == 28)
-__attribute((noinline))
-uECC_VLI_API void uECC_vli_mult(uint8_t *result, const uint8_t *left, const uint8_t *right) {
-    __asm__ volatile (
-        "adiw r30, 20 \n\t"
-        "adiw r28, 20 \n\t"
-        "ld r2, x+ \n\t"
-        "ld r3, x+ \n\t"
-        "ld r4, x+ \n\t"
-        "ld r5, x+ \n\t"
-        "ld r6, x+ \n\t"
-        "ld r7, x+ \n\t"
-        "ld r8, x+ \n\t"
-        "ld r9, x+ \n\t"
-        "ld r12, y+ \n\t"
-        "ld r13, y+ \n\t"
-        "ld r14, y+ \n\t"
-        "ld r15, y+ \n\t"
-        "ld r16, y+ \n\t"
-        "ld r17, y+ \n\t"
-        "ld r18, y+ \n\t"
-        "ld r19, y+ \n\t"
-        "ldi r25, 0 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r12 \n\t"
-        "st z+, r0 \n\t"
-        "mov r22, r1 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "mul r3, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r3, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r4, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r5, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r6, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r7, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r8, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "mul r9, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "st z+, r23 \n\t"
-        "st z+, r24 \n\t"
-
-        "sbiw r30, 26 \n\t"
-        "sbiw r28, 18 \n\t"
-        "ld r12, y+ \n\t"
-        "ld r13, y+ \n\t"
-        "ld r14, y+ \n\t"
-        "ld r15, y+ \n\t"
-        "ld r16, y+ \n\t"
-        "ld r17, y+ \n\t"
-        "ld r18, y+ \n\t"
-        "ld r19, y+ \n\t"
-        "ld r20, y+ \n\t"
-        "ld r21, y+ \n\t"
-        "ld r10, x+ \n\t"
-        "ld r11, x+ \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r12 \n\t"
-        "st z+, r0 \n\t"
-        "mov r22, r1 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "mul r3, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r2, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r3, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r3, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r4, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r4, x+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r5, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r5, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r6, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r6, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r7, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r7, x+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r8, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r8, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r9, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r9, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r10, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r12, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r10, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r13, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r10, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r14, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r10, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r15, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r10, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r16, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r10, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r17, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r10, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r18, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r10, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r19, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r10, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r11, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r3, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r4, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r5, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r6, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r7, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r8, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "mul r9, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "st z+, r22 \n\t"
-        "st z+, r23 \n\t"
-
-        "sbiw r30, 46 \n\t"
-        "sbiw r28, 28 \n\t"
-        "sbiw r26, 18 \n\t"
-        "ld r2, x+ \n\t"
-        "ld r12, y+ \n\t"
-        "ld r3, x+ \n\t"
-        "ld r13, y+ \n\t"
-        "ld r4, x+ \n\t"
-        "ld r14, y+ \n\t"
-        "ld r5, x+ \n\t"
-        "ld r15, y+ \n\t"
-        "ld r6, x+ \n\t"
-        "ld r16, y+ \n\t"
-        "ld r7, x+ \n\t"
-        "ld r17, y+ \n\t"
-        "ld r8, x+ \n\t"
-        "ld r18, y+ \n\t"
-        "ld r9, x+ \n\t"
-        "ld r19, y+ \n\t"
-        "ld r10, x+ \n\t"
-        "ld r20, y+ \n\t"
-        "ld r11, x+ \n\t"
-        "ld r21, y+ \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r12 \n\t"
-        "st z+, r0 \n\t"
-        "mov r22, r1 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "mul r3, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r2, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r3, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r3, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r4, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r4, x+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r5, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r5, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r6, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r6, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r7, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r7, x+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r8, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r8, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r9, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r9, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r10, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r10, x+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r11, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r11, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r2, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r2, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r3, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r3, x+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r4, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r4, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r5, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r5, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r6, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r6, x+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r7, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r7, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r8, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r8, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r9, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r9, x+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r10, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r12, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r10, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r13, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r10, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r14, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r10, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r15, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r10, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r16, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r10, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r17, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r10, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r18, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r10, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r19, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r10, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r20, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r10, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r21, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r10, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r12, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r10, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r13, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r10, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r14, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r10, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r15, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r10, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r16, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r10, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r17, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r10, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r18, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r10, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r19, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r10, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r11, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r3, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r4, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r5, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r6, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r7, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r8, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "mul r9, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "st z+, r24 \n\t"
-        "st z+, r22 \n\t"
-        "eor r1, r1 \n\t"
-        : "+x" (left), "+y" (right), "+z" (result)
-        :
-        : "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
-          "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20",
-          "r21", "r22", "r23", "r24", "r25", "cc", "memory"
-    );
-}
-#define asm_mult 1
-#elif (uECC_BYTES == 32)
-__attribute((noinline))
-uECC_VLI_API void uECC_vli_mult(uint8_t *result, const uint8_t *left, const uint8_t *right) {
-    __asm__ volatile (
-        "adiw r30, 30 \n\t"
-        "adiw r28, 30 \n\t"
-        "ld r2, x+ \n\t"
-        "ld r3, x+ \n\t"
-        "ld r12, y+ \n\t"
-        "ld r13, y+ \n\t"
-        "ldi r25, 0 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r12 \n\t"
-        "st z+, r0 \n\t"
-        "mov r22, r1 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "mul r3, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "mul r3, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "st z+, r23 \n\t"
-        "st z+, r24 \n\t"
-
-        "sbiw r30, 14 \n\t"
-        "sbiw r28, 12 \n\t"
-        "ld r12, y+ \n\t"
-        "ld r13, y+ \n\t"
-        "ld r14, y+ \n\t"
-        "ld r15, y+ \n\t"
-        "ld r16, y+ \n\t"
-        "ld r17, y+ \n\t"
-        "ld r18, y+ \n\t"
-        "ld r19, y+ \n\t"
-        "ld r20, y+ \n\t"
-        "ld r21, y+ \n\t"
-        "ld r4, x+ \n\t"
-        "ld r5, x+ \n\t"
-        "ld r6, x+ \n\t"
-        "ld r7, x+ \n\t"
-        "ld r8, x+ \n\t"
-        "ld r9, x+ \n\t"
-        "ld r10, x+ \n\t"
-        "ld r11, x+ \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r12 \n\t"
-        "st z+, r0 \n\t"
-        "mov r22, r1 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "mul r3, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r2, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r3, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r3, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r4, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r12, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r4, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r13, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r4, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r5, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r6, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r7, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r8, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r9, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r10, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r11, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "mul r3, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "st z+, r22 \n\t"
-        "st z+, r23 \n\t"
-
-        "sbiw r30, 34 \n\t"
-        "sbiw r28, 22 \n\t"
-        "sbiw r26, 12 \n\t"
-        "ld r2, x+ \n\t"
-        "ld r12, y+ \n\t"
-        "ld r3, x+ \n\t"
-        "ld r13, y+ \n\t"
-        "ld r4, x+ \n\t"
-        "ld r14, y+ \n\t"
-        "ld r5, x+ \n\t"
-        "ld r15, y+ \n\t"
-        "ld r6, x+ \n\t"
-        "ld r16, y+ \n\t"
-        "ld r7, x+ \n\t"
-        "ld r17, y+ \n\t"
-        "ld r8, x+ \n\t"
-        "ld r18, y+ \n\t"
-        "ld r9, x+ \n\t"
-        "ld r19, y+ \n\t"
-        "ld r10, x+ \n\t"
-        "ld r20, y+ \n\t"
-        "ld r11, x+ \n\t"
-        "ld r21, y+ \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r12 \n\t"
-        "st z+, r0 \n\t"
-        "mov r22, r1 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "mul r3, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r2, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r3, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r3, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r4, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r4, x+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r5, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r5, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r6, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r6, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r7, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r7, x+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r8, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r8, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r9, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r9, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r10, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r10, x+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r11, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r11, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r2, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r2, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r3, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r3, x+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r4, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r12, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r4, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r13, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r4, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r14, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r4, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r15, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r4, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r16, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r4, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r17, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r4, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r18, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r4, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r19, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r4, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r20, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r4, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r21, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r4, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r12, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r4, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r13, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r4, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r5, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r6, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r7, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r8, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r9, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r10, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r11, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "mul r3, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "st z+, r24 \n\t"
-        "st z+, r22 \n\t"
-
-        "sbiw r30, 54 \n\t"
-        "sbiw r28, 32 \n\t"
-        "sbiw r26, 22 \n\t"
-        "ld r2, x+ \n\t"
-        "ld r12, y+ \n\t"
-        "ld r3, x+ \n\t"
-        "ld r13, y+ \n\t"
-        "ld r4, x+ \n\t"
-        "ld r14, y+ \n\t"
-        "ld r5, x+ \n\t"
-        "ld r15, y+ \n\t"
-        "ld r6, x+ \n\t"
-        "ld r16, y+ \n\t"
-        "ld r7, x+ \n\t"
-        "ld r17, y+ \n\t"
-        "ld r8, x+ \n\t"
-        "ld r18, y+ \n\t"
-        "ld r9, x+ \n\t"
-        "ld r19, y+ \n\t"
-        "ld r10, x+ \n\t"
-        "ld r20, y+ \n\t"
-        "ld r11, x+ \n\t"
-        "ld r21, y+ \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r12 \n\t"
-        "st z+, r0 \n\t"
-        "mov r22, r1 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "mul r3, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r2, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r3, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r3, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r4, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r4, x+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r5, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r5, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r6, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r6, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r7, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r7, x+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r8, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r8, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r9, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r9, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r10, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r10, x+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r11, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r11, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r2, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r2, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r3, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r3, x+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r4, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r4, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r5, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r5, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r6, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r6, x+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r7, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r7, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r8, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r8, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r9, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r9, x+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r10, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r10, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r11, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r11, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r2, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r2, x+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r3, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r3, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r4, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r12, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r4, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r13, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r4, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r14, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r4, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r15, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r4, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r16, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r4, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r17, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r4, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r18, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r4, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r19, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r4, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r20, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r4, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r21, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r4, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r12, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r4, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r13, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r4, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r14, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r4, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r15, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r4, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r16, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r4, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r17, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r4, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r18, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r4, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r19, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r4, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r20, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r4, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r21, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r4, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r25 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r12, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r4, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r25 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r13, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r4, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r5, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r6, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r8, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r9, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r7, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r8, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r10, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r11, r20 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r19 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r18 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r9, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r10, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r11, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r10, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r11, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r21 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "mul r3, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "st z+, r23 \n\t"
-        "st z+, r24 \n\t"
-
-        "eor r1, r1 \n\t"
-        : "+x" (left), "+y" (right), "+z" (result)
-        :
-        : "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
-          "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20",
-          "r21", "r22", "r23", "r24", "r25", "cc", "memory"
-    );
-}
-#define asm_mult 1
-#endif /* uECC_BYTES == 32 */
-
-#if uECC_SQUARE_FUNC
-
-#if (uECC_BYTES == 20)
-// Inlining is allowed because y (r28, r29) is not used.
-uECC_VLI_API void uECC_vli_square(uint8_t *result, const uint8_t *left) {
-    __asm__ volatile (
-        "ld r2, x+ \n\t"
-        "ld r3, x+ \n\t"
-        "ld r4, x+ \n\t"
-        "ld r5, x+ \n\t"
-        "ld r6, x+ \n\t"
-        "ld r7, x+ \n\t"
-        "ld r8, x+ \n\t"
-        "ld r9, x+ \n\t"
-        "ld r10, x+ \n\t"
-        "ld r11, x+ \n\t"
-        "ld r12, x+ \n\t"
-        "ld r13, x+ \n\t"
-        "ld r14, x+ \n\t"
-        "ld r15, x+ \n\t"
-        "ld r16, x+ \n\t"
-        "ld r17, x+ \n\t"
-        "ld r18, x+ \n\t"
-        "ld r19, x+ \n\t"
-        "ld r20, x+ \n\t"
-        "ld r21, x+ \n\t"
-        "ldi r27, 0 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r2 \n\t"
-        "st z+, r0 \n\t"
-        "mov r22, r1 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r3 \n\t"
-        "lsl r0 \n\t"
-        "rol r1 \n\t"
-        "adc r24, r27 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r27 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r4 \n\t"
-        "lsl r0 \n\t"
-        "rol r1 \n\t"
-        "adc r22, r27 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r3, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r26, 0 \n\t"
-        "mul r2, r5 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r25, r1 \n\t"
-        "mul r3, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r25 \n\t"
-        "rol r26 \n\t"
-        "add r23, r24 \n\t"
-        "adc r25, r22 \n\t"
-        "adc r26, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r6 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r4, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "add r23, r25 \n\t"
-        "adc r24, r26 \n\t"
-        "adc r22, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r26, 0 \n\t"
-        "mul r2, r7 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r25, r1 \n\t"
-        "mul r3, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r4, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r25 \n\t"
-        "rol r26 \n\t"
-        "add r23, r24 \n\t"
-        "adc r25, r22 \n\t"
-        "adc r26, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r8 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r4, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r5, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "add r23, r25 \n\t"
-        "adc r24, r26 \n\t"
-        "adc r22, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r26, 0 \n\t"
-        "mul r2, r9 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r25, r1 \n\t"
-        "mul r3, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r4, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r5, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r25 \n\t"
-        "rol r26 \n\t"
-        "add r23, r24 \n\t"
-        "adc r25, r22 \n\t"
-        "adc r26, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r10 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r4, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r5, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r6, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "add r23, r25 \n\t"
-        "adc r24, r26 \n\t"
-        "adc r22, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r26, 0 \n\t"
-        "mul r2, r11 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r25, r1 \n\t"
-        "mul r3, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r4, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r5, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r6, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r25 \n\t"
-        "rol r26 \n\t"
-        "add r23, r24 \n\t"
-        "adc r25, r22 \n\t"
-        "adc r26, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r12 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r4, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r5, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r6, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r7, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "add r23, r25 \n\t"
-        "adc r24, r26 \n\t"
-        "adc r22, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r26, 0 \n\t"
-        "mul r2, r13 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r25, r1 \n\t"
-        "mul r3, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r4, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r5, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r6, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r7, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r25 \n\t"
-        "rol r26 \n\t"
-        "add r23, r24 \n\t"
-        "adc r25, r22 \n\t"
-        "adc r26, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r14 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r4, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r5, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r6, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r7, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r8, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "add r23, r25 \n\t"
-        "adc r24, r26 \n\t"
-        "adc r22, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r26, 0 \n\t"
-        "mul r2, r15 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r25, r1 \n\t"
-        "mul r3, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r4, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r5, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r6, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r7, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r8, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r25 \n\t"
-        "rol r26 \n\t"
-        "add r23, r24 \n\t"
-        "adc r25, r22 \n\t"
-        "adc r26, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r16 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r4, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r5, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r6, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r7, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r8, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r9, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "add r23, r25 \n\t"
-        "adc r24, r26 \n\t"
-        "adc r22, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r26, 0 \n\t"
-        "mul r2, r17 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r25, r1 \n\t"
-        "mul r3, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r4, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r5, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r6, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r7, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r8, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r9, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r25 \n\t"
-        "rol r26 \n\t"
-        "add r23, r24 \n\t"
-        "adc r25, r22 \n\t"
-        "adc r26, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r18 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r4, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r5, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r6, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r7, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r8, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r9, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r10, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "add r23, r25 \n\t"
-        "adc r24, r26 \n\t"
-        "adc r22, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r26, 0 \n\t"
-        "mul r2, r19 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r25, r1 \n\t"
-        "mul r3, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r4, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r5, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r6, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r7, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r8, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r9, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r10, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r25 \n\t"
-        "rol r26 \n\t"
-        "add r23, r24 \n\t"
-        "adc r25, r22 \n\t"
-        "adc r26, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r20 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r4, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r5, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r6, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r7, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r8, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r9, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r10, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r11, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "add r23, r25 \n\t"
-        "adc r24, r26 \n\t"
-        "adc r22, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r26, 0 \n\t"
-        "mul r2, r21 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r25, r1 \n\t"
-        "mul r3, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r4, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r5, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r6, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r7, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r8, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r9, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r10, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r11, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r25 \n\t"
-        "rol r26 \n\t"
-        "add r23, r24 \n\t"
-        "adc r25, r22 \n\t"
-        "adc r26, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r3, r21 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r4, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r5, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r6, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r7, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r8, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r9, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r10, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r11, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r12, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "add r23, r25 \n\t"
-        "adc r24, r26 \n\t"
-        "adc r22, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r26, 0 \n\t"
-        "mul r4, r21 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r25, r1 \n\t"
-        "mul r5, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r6, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r7, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r8, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r9, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r10, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r11, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r12, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r25 \n\t"
-        "rol r26 \n\t"
-        "add r23, r24 \n\t"
-        "adc r25, r22 \n\t"
-        "adc r26, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r5, r21 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r6, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r7, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r8, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r9, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r10, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r11, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r12, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r13, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "add r23, r25 \n\t"
-        "adc r24, r26 \n\t"
-        "adc r22, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r26, 0 \n\t"
-        "mul r6, r21 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r25, r1 \n\t"
-        "mul r7, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r8, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r9, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r10, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r11, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r12, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r13, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r25 \n\t"
-        "rol r26 \n\t"
-        "add r23, r24 \n\t"
-        "adc r25, r22 \n\t"
-        "adc r26, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r7, r21 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r8, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r9, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r10, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r11, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r12, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r13, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r14, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "add r23, r25 \n\t"
-        "adc r24, r26 \n\t"
-        "adc r22, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r26, 0 \n\t"
-        "mul r8, r21 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r25, r1 \n\t"
-        "mul r9, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r10, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r11, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r12, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r13, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r14, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r25 \n\t"
-        "rol r26 \n\t"
-        "add r23, r24 \n\t"
-        "adc r25, r22 \n\t"
-        "adc r26, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r9, r21 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r10, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r11, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r12, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r13, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r14, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r15, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "add r23, r25 \n\t"
-        "adc r24, r26 \n\t"
-        "adc r22, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r26, 0 \n\t"
-        "mul r10, r21 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r25, r1 \n\t"
-        "mul r11, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r12, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r13, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r14, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r15, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r25 \n\t"
-        "rol r26 \n\t"
-        "add r23, r24 \n\t"
-        "adc r25, r22 \n\t"
-        "adc r26, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r11, r21 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r12, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r13, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r14, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r15, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r16, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "add r23, r25 \n\t"
-        "adc r24, r26 \n\t"
-        "adc r22, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r26, 0 \n\t"
-        "mul r12, r21 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r25, r1 \n\t"
-        "mul r13, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r14, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r15, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r16, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r25 \n\t"
-        "rol r26 \n\t"
-        "add r23, r24 \n\t"
-        "adc r25, r22 \n\t"
-        "adc r26, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r13, r21 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r14, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r15, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r16, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r17, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "add r23, r25 \n\t"
-        "adc r24, r26 \n\t"
-        "adc r22, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r26, 0 \n\t"
-        "mul r14, r21 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r25, r1 \n\t"
-        "mul r15, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r16, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r17, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r25 \n\t"
-        "rol r26 \n\t"
-        "add r23, r24 \n\t"
-        "adc r25, r22 \n\t"
-        "adc r26, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r15, r21 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r16, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "mul r17, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r18, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "add r23, r25 \n\t"
-        "adc r24, r26 \n\t"
-        "adc r22, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r26, 0 \n\t"
-        "mul r16, r21 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r25, r1 \n\t"
-        "mul r17, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "mul r18, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r25 \n\t"
-        "rol r26 \n\t"
-        "add r23, r24 \n\t"
-        "adc r25, r22 \n\t"
-        "adc r26, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r17, r21 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r18, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r19, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r27 \n\t"
-        "add r23, r25 \n\t"
-        "adc r24, r26 \n\t"
-        "adc r22, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r26, 0 \n\t"
-        "mul r18, r21 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r25, r1 \n\t"
-        "mul r19, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "adc r26, r27 \n\t"
-        "lsl r23 \n\t"
-        "rol r25 \n\t"
-        "rol r26 \n\t"
-        "add r23, r24 \n\t"
-        "adc r25, r22 \n\t"
-        "adc r26, r27 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r19, r21 \n\t"
-        "lsl r0 \n\t"
-        "rol r1 \n\t"
-        "adc r23, r27 \n\t"
-        "add r25, r0 \n\t"
-        "adc r26, r1 \n\t"
-        "adc r23, r27 \n\t"
-        "mul r20, r20 \n\t"
-        "add r25, r0 \n\t"
-        "adc r26, r1 \n\t"
-        "adc r23, r27 \n\t"
-        "st z+, r25 \n\t"
-
-        "ldi r25, 0 \n\t"
-        "mul r20, r21 \n\t"
-        "lsl r0 \n\t"
-        "rol r1 \n\t"
-        "adc r25, r27 \n\t"
-        "add r26, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r25, r27 \n\t"
-        "st z+, r26 \n\t"
-
-        "mul r21, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r25, r1 \n\t"
-        "st z+, r23 \n\t"
-        "st z+, r25 \n\t"
-        "eor r1, r1 \n\t"
-        : "+x" (left), "+z" (result)
-        :
-        : "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
-          "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20",
-          "r21", "r22", "r23", "r24", "r25", "cc", "memory"
-    );
-}
-#define asm_square 1
-
-#elif (uECC_BYTES == 24)
-
-__attribute((noinline))
-uECC_VLI_API void uECC_vli_square(uint8_t *result, const uint8_t *left) {
-    __asm__ volatile (
-        "ldi r25, 0 \n\t"
-        "movw r28, r26 \n\t"
-        "ld r2, x+ \n\t"
-        "ld r3, x+ \n\t"
-        "adiw r28, 20 \n\t"
-        "ld r12, y+ \n\t"
-        "ld r13, y+ \n\t"
-        "adiw r30, 20 \n\t"
-        
-        "ldi r23, 0 \n\t"
-        "mul 2, 12 \n\t"
-        "st z+, r0 \n\t"
-        "mov r22, r1 \n\t"
-        
-        "ldi r24, 0 \n\t"
-        "mul r2, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-        
-        "ld r12, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r2, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ld r13, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r2, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-        
-        "ld r2, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r3, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-        
-        "ld r3, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r2, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "mul r3, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "st z+, r24 \n\t"
-        "st z+, r22 \n\t"
-        
-        "sbiw r26, 4 \n\t"
-        "sbiw r30, 28 \n\t"
-        "ld r2, x+ \n\t"
-        "ld r3, x+ \n\t"
-        "ld r4, x+ \n\t"
-        "ld r5, x+ \n\t"
-        "ld r6, x+ \n\t"
-        "ld r7, x+ \n\t"
-        "ld r8, x+ \n\t"
-        "ld r9, x+ \n\t"
-        "ld r10, x+ \n\t"
-        "ld r11, x+ \n\t"
-        "ld r12, x+ \n\t"
-        "ld r13, x+ \n\t"
-        "ld r14, x+ \n\t"
-        "ld r15, x+ \n\t"
-        "ld r16, x+ \n\t"
-        "ld r17, x+ \n\t"
-        "ld r18, x+ \n\t"
-        "ld r19, x+ \n\t"
-        "ld r20, x+ \n\t"
-        "ld r21, x+ \n\t"
-        
-        "ldi r23, 0 \n\t"
-        "mul r2, r2 \n\t"
-        "st z+, r0 \n\t"
-        "mov r22, r1 \n\t"
-        
-        "ldi r24, 0 \n\t"
-        "mul r2, r3 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-        
-        "ldi r22, 0 \n\t"
-        "mul r2, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r29, 0 \n\t"
-        "mul r2, r5 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r3, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r22, 0 \n\t"
-        "mul r2, r6 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r4, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r29, 0 \n\t"
-        "mul r2, r7 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r3, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r4, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r22, 0 \n\t"
-        "mul r2, r8 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r5, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r29, 0 \n\t"
-        "mul r2, r9 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r3, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r4, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r5, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r22, 0 \n\t"
-        "mul r2, r10 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r6, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r29, 0 \n\t"
-        "mul r2, r11 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r3, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r4, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r5, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r6, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r22, 0 \n\t"
-        "mul r2, r12 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r7, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r29, 0 \n\t"
-        "mul r2, r13 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r3, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r4, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r5, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r6, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r7, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r22, 0 \n\t"
-        "mul r2, r14 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r8, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r29, 0 \n\t"
-        "mul r2, r15 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r3, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r6, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r7, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r8, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r22, 0 \n\t"
-        "mul r2, r16 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r9, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r29, 0 \n\t"
-        "mul r2, r17 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r3, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r6, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r8, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r9, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r22, 0 \n\t"
-        "mul r2, r18 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r10, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r29, 0 \n\t"
-        "mul r2, r19 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r3, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r4, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r6, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r8, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r10, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r22, 0 \n\t"
-        "mul r2, r20 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r11, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r29, 0 \n\t"
-        "mul r2, r21 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r3, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r4, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r5, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r6, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r7, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r8, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r9, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r10, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r11, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ld r2, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r3, r21 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r4, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r12, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r29, 0 \n\t"
-        "mul r3, r2 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r4, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r5, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r6, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r7, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r8, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r9, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r10, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r11, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r12, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r25 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ld r3, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r4, r2 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r5, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r12, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r13, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r29, 0 \n\t"
-        "mul r4, r3 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r5, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r6, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r7, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r8, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r9, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r10, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r11, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r12, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r13, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r25 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ld r4, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r5, r3 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r6, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r12, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r13, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r14, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r29, 0 \n\t"
-        "mul r5, r4 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r6, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r7, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r8, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r9, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r10, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r11, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r12, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r13, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r14, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r25 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ld r5, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r6, r4 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r7, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r12, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r13, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r14, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r15, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r29, 0 \n\t"
-        "mul r6, r5 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r7, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r8, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r9, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r10, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r11, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r12, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r13, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r14, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r15, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r25 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r22, 0 \n\t"
-        "mul r7, r5 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r8, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r12, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r13, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r14, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r15, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r16, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r29, 0 \n\t"
-        "mul r8, r5 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r9, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r10, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r11, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r12, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r13, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r14, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r15, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r16, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r22, 0 \n\t"
-        "mul r9, r5 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r10, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r12, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r13, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r14, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r15, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r16, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r17, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r29, 0 \n\t"
-        "mul r10, r5 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r11, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r12, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r13, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r14, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r15, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r16, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r17, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r22, 0 \n\t"
-        "mul r11, r5 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r12, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r13, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r14, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r15, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r16, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r17, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r18, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r29, 0 \n\t"
-        "mul r12, r5 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r13, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r14, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r15, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r16, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r17, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r18, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r22, 0 \n\t"
-        "mul r13, r5 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r14, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r15, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r16, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r17, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r18, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r19, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r29, 0 \n\t"
-        "mul r14, r5 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r15, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r16, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r17, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r18, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r19, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r22, 0 \n\t"
-        "mul r15, r5 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r16, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r17, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r18, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r19, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r20, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r29, 0 \n\t"
-        "mul r16, r5 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r17, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r18, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r19, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r20, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r22, 0 \n\t"
-        "mul r17, r5 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r18, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r19, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r20, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r21, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r29, 0 \n\t"
-        "mul r18, r5 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r19, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r20, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r21, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r22, 0 \n\t"
-        "mul r19, r5 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r20, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r21, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r2, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r29, 0 \n\t"
-        "mul r20, r5 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r21, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r2, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r22, 0 \n\t"
-        "mul r21, r5 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r2, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r3, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r29, 0 \n\t"
-        "mul r2, r5 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r3, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-        
-        "ldi r23, 0 \n\t"
-        "mul r3, r5 \n\t"
-        "add r28, r0 \n\t"
-        "adc r29, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "add r28, r0 \n\t"
-        "adc r29, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r4 \n\t"
-        "add r28, r0 \n\t"
-        "adc r29, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r28 \n\t"
-        
-        "ldi r28, 0 \n\t"
-        "mul r4, r5 \n\t"
-        "add r29, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r28, r25 \n\t"
-        "add r29, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r28, r25 \n\t"
-        "st z+, r29 \n\t"
-        
-        "mul r5, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "st z+, r23 \n\t"
-        "st z+, r28 \n\t"
-        "eor r1, r1 \n\t"
-        : "+x" (left), "+z" (result)
-        :
-        : "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
-          "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20",
-          "r21", "r22", "r23", "r24", "r25", "r28", "r29", "cc", "memory"
-    );
-}
-#define asm_square 1
-
-#elif (uECC_BYTES == 28)
-
-__attribute((noinline))
-uECC_VLI_API void uECC_vli_square(uint8_t *result, const uint8_t *left) {
-    __asm__ volatile (
-        "ldi r25, 0 \n\t"
-        "movw r28, r26 \n\t"
-        "ld r2, x+ \n\t"
-        "ld r3, x+ \n\t"
-        "ld r4, x+ \n\t"
-        "ld r5, x+ \n\t"
-        "adiw r28, 20 \n\t"
-        "ld r12, y+ \n\t"
-        "ld r13, y+ \n\t"
-        "ld r14, y+ \n\t"
-        "ld r15, y+ \n\t"
-        "adiw r30, 20 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul 2, 12 \n\t"
-        "st z+, r0 \n\t"
-        "mov r22, r1 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r12, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r2, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r13, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r2, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r14, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r2, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r15, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r2, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r2, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r3, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r3, x+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r4, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r4, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r5, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r5, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r2, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r3, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r4, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "mul r5, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "st z+, r23 \n\t"
-        "st z+, r24 \n\t"
-
-        "sbiw r26, 8 \n\t"
-        "sbiw r30, 36 \n\t"
-        "ld r2, x+ \n\t"
-        "ld r3, x+ \n\t"
-        "ld r4, x+ \n\t"
-        "ld r5, x+ \n\t"
-        "ld r6, x+ \n\t"
-        "ld r7, x+ \n\t"
-        "ld r8, x+ \n\t"
-        "ld r9, x+ \n\t"
-        "ld r10, x+ \n\t"
-        "ld r11, x+ \n\t"
-        "ld r12, x+ \n\t"
-        "ld r13, x+ \n\t"
-        "ld r14, x+ \n\t"
-        "ld r15, x+ \n\t"
-        "ld r16, x+ \n\t"
-        "ld r17, x+ \n\t"
-        "ld r18, x+ \n\t"
-        "ld r19, x+ \n\t"
-        "ld r20, x+ \n\t"
-        "ld r21, x+ \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r2 \n\t"
-        "st z+, r0 \n\t"
-        "mov r22, r1 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r3 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r2, r5 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r3, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r6 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r4, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r2, r7 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r3, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r4, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r8 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r5, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r2, r9 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r3, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r4, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r5, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r10 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r6, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r2, r11 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r3, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r4, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r5, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r6, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r12 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r7, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r2, r13 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r3, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r4, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r5, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r6, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r7, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r14 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r8, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r2, r15 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r3, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r6, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r7, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r8, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r16 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r9, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r2, r17 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r3, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r6, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r8, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r9, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r18 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r10, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r2, r19 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r3, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r4, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r6, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r8, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r10, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r20 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r11, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r2, r21 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r3, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r4, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r5, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r6, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r7, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r8, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r9, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r10, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r11, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r2, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r3, r21 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r4, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r12, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r3, r2 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r4, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r5, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r6, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r7, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r8, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r9, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r10, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r11, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r12, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r25 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r3, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r4, r2 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r5, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r12, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r13, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r4, r3 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r5, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r6, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r7, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r8, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r9, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r10, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r11, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r12, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r13, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r25 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r4, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r5, r3 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r6, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r12, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r13, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r14, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r5, r4 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r6, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r7, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r8, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r9, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r10, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r11, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r12, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r13, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r14, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r25 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r5, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r6, r4 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r7, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r12, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r13, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r14, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r15, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r6, r5 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r7, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r8, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r9, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r10, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r11, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r12, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r13, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r14, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r15, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r25 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r6, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r7, r5 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r8, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r12, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r13, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r14, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r15, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r16, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r7, r6 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r8, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r9, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r10, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r11, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r12, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r13, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r14, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r15, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r16, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r25 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r7, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r8, r6 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r9, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r12, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r13, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r14, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r15, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r16, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r17, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r8, r7 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r9, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r10, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r11, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r12, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r13, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r14, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r15, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r16, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r17, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r25 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r8, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r9, r7 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r10, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r12, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r13, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r14, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r15, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r16, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r17, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r18, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r9, r8 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r10, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r11, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r12, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r13, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r14, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r15, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r16, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r17, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r18, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r25 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r9, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r10, r8 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r11, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r12, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r13, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r14, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r15, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r16, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r17, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r18, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r19, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r10, r9 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r11, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r12, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r13, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r14, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r15, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r16, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r17, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r18, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r19, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r25 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r11, r9 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r12, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r13, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r14, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r15, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r16, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r17, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r18, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r19, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r20, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r12, r9 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r13, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r14, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r15, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r16, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r17, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r18, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r19, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r20, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r13, r9 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r14, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r15, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r16, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r17, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r18, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r19, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r20, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r21, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r14, r9 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r15, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r16, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r17, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r18, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r19, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r20, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r21, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r15, r9 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r16, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r17, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r18, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r19, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r20, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r21, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r2, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r16, r9 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r17, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r18, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r19, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r20, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r21, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r2, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r17, r9 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r18, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r19, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r20, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r21, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r3, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r18, r9 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r19, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r20, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r21, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r2, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r3, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r19, r9 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r20, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r21, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r4, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r20, r9 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r21, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r2, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r3, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r4, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r21, r9 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r2, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r5, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r2, r9 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r3, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r4, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r5, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r3, r9 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r4, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r6, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r4, r9 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r5, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r6, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r5, r9 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r6, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r7, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r6, r9 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r7, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r7, r9 \n\t"
-        "add r28, r0 \n\t"
-        "adc r29, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "add r28, r0 \n\t"
-        "adc r29, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r8, r8 \n\t"
-        "add r28, r0 \n\t"
-        "adc r29, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r28 \n\t"
-
-        "ldi r28, 0 \n\t"
-        "mul r8, r9 \n\t"
-        "add r29, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r28, r25 \n\t"
-        "add r29, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r28, r25 \n\t"
-        "st z+, r29 \n\t"
-
-        "mul r9, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "st z+, r23 \n\t"
-        "st z+, r28 \n\t"
-        "eor r1, r1 \n\t"
-        : "+x" (left), "+z" (result)
-        :
-        : "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
-          "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20",
-          "r21", "r22", "r23", "r24", "r25", "r28", "r29", "cc", "memory"
-    );
-}
-#define asm_square 1
-
-#elif (uECC_BYTES == 32)
-
-__attribute((noinline))
-uECC_VLI_API void uECC_vli_square(uint8_t *result, const uint8_t *left) {
-    __asm__ volatile (
-        "ldi r25, 0 \n\t"
-        "movw r28, r26 \n\t"
-        "ld r2, x+ \n\t"
-        "ld r3, x+ \n\t"
-        "ld r4, x+ \n\t"
-        "ld r5, x+ \n\t"
-        "ld r6, x+ \n\t"
-        "ld r7, x+ \n\t"
-        "adiw r28, 20 \n\t"
-        "ld r12, y+ \n\t"
-        "ld r13, y+ \n\t"
-        "ld r14, y+ \n\t"
-        "ld r15, y+ \n\t"
-        "ld r16, y+ \n\t"
-        "ld r17, y+ \n\t"
-        "adiw r30, 20 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul 2, 12 \n\t"
-        "st z+, r0 \n\t"
-        "mov r22, r1 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r12, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r2, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r13, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r2, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r14, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r2, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r15, y+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r2, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r16, y+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r2, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r17, y+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r2, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r2, x+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r3, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r6, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r12 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r3, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r4, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r6, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r13 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r4, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r5, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r5, x+ \n\t"
-        "ldi r23, 0 \n\t"
-        "mul r6, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r7, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r2, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r3, r14 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ld r6, x+ \n\t"
-        "ldi r24, 0 \n\t"
-        "mul r7, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r2, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r3, r15 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ld r7, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r2, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r3, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r4, r16 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r4, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r5, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r6, r17 \n\t"
-        "add r24, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r24 \n\t"
-
-        "mul r7, r17 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "st z+, r22 \n\t"
-        "st z+, r23 \n\t"
-
-        "sbiw r26, 12 \n\t"
-        "sbiw r30, 44 \n\t"
-        "ld r2, x+ \n\t"
-        "ld r3, x+ \n\t"
-        "ld r4, x+ \n\t"
-        "ld r5, x+ \n\t"
-        "ld r6, x+ \n\t"
-        "ld r7, x+ \n\t"
-        "ld r8, x+ \n\t"
-        "ld r9, x+ \n\t"
-        "ld r10, x+ \n\t"
-        "ld r11, x+ \n\t"
-        "ld r12, x+ \n\t"
-        "ld r13, x+ \n\t"
-        "ld r14, x+ \n\t"
-        "ld r15, x+ \n\t"
-        "ld r16, x+ \n\t"
-        "ld r17, x+ \n\t"
-        "ld r18, x+ \n\t"
-        "ld r19, x+ \n\t"
-        "ld r20, x+ \n\t"
-        "ld r21, x+ \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r2, r2 \n\t"
-        "st z+, r0 \n\t"
-        "mov r22, r1 \n\t"
-
-        "ldi r24, 0 \n\t"
-        "mul r2, r3 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r24, r25 \n\t"
-        "st z+, r22 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r2, r5 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r3, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r6 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r4, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r2, r7 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r3, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r4, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r8 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r5, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r2, r9 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r3, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r4, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r5, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r10 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r6, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r2, r11 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r3, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r4, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r5, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r6, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r12 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r7, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r2, r13 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r3, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r4, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r5, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r6, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r7, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r14 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r8, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r2, r15 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r3, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r4, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r5, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r6, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r7, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r8, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r16 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r9, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r2, r17 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r3, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r4, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r5, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r6, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r7, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r8, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r9, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r18 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r10, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r2, r19 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r3, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r4, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r5, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r6, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r7, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r8, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r9, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r10, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r2, r20 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r3, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r11, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r2, r21 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r3, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r4, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r5, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r6, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r7, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r8, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r9, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r10, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r11, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r2, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r3, r21 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r4, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r12, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r3, r2 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r4, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r5, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r6, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r7, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r8, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r9, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r10, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r11, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r12, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r25 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r3, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r4, r2 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r5, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r12, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r13, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r4, r3 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r5, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r6, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r7, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r8, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r9, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r10, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r11, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r12, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r13, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r25 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r4, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r5, r3 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r6, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r12, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r13, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r14, r14 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r5, r4 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r6, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r7, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r8, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r9, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r10, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r11, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r12, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r13, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r14, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r25 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r5, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r6, r4 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r7, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r12, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r13, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r14, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r15, r15 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r6, r5 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r7, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r8, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r9, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r10, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r11, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r12, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r13, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r14, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r15, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r25 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r6, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r7, r5 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r8, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r12, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r13, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r14, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r15, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r16, r16 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r7, r6 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r8, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r9, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r10, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r11, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r12, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r13, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r14, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r15, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r16, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r25 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r7, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r8, r6 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r9, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r10, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r12, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r13, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r14, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r15, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r16, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r17, r17 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r8, r7 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r9, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r10, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r11, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r12, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r13, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r14, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r15, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r16, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r17, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r25 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r8, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r9, r7 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r10, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r11, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r12, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r13, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r14, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r15, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r16, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r17, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r18, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r9, r8 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r10, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r11, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r12, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r13, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r14, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r15, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r16, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r17, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r18, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r25 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r9, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r10, r8 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r11, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r12, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r13, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r14, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r15, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r16, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r17, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r18, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r19, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r10, r9 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r11, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r12, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r13, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r14, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r15, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r16, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r17, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r18, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r19, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r25 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r10, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r11, r9 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r12, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r13, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r14, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r15, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r16, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r17, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r18, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r19, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r20, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r11, r10 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r12, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r13, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r14, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r15, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r16, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r17, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r18, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r19, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r20, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r25 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r11, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r12, r10 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r13, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r14, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r15, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r16, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r17, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r18, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r19, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r20, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r21, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r12, r11 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r13, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r14, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r15, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r16, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r17, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r18, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r19, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r20, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r21, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r25 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r12, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r13, r11 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r14, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r15, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r16, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r17, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r18, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r19, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r20, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r21, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r2, r2 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r13, r12 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r14, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r15, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r16, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r17, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r18, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r19, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r20, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r21, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r2, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r25 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ld r13, x+ \n\t"
-        "ldi r22, 0 \n\t"
-        "mul r14, r12 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r15, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r16, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r17, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r18, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r19, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r20, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r21, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r25 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r3, r3 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r14, r13 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r15, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r16, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r17, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r18, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r19, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r20, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r21, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r2, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r3, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "ld r0, z \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r25 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r15, r13 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r16, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r17, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r18, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r19, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r20, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r21, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r4, r4 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r16, r13 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r17, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r18, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r19, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r20, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r21, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r2, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r3, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r4, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r17, r13 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r18, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r19, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r20, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r21, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r5, r5 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r18, r13 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r19, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r20, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r21, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r2, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r3, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r4, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r5, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r19, r13 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r20, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r21, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r2, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r6, r6 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r20, r13 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r21, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r2, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r3, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r4, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r5, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r6, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r21, r13 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r2, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r3, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r4, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r7, r7 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r2, r13 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r3, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r4, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r5, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r6, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r7, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r3, r13 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r4, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r5, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r6, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r8, r8 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r4, r13 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r5, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r6, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r7, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r8, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r5, r13 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r6, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r7, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r8, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r9, r9 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r6, r13 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r7, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r8, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r9, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r7, r13 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r8, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "mul r9, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r10, r10 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r8, r13 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r9, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "mul r10, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r22, 0 \n\t"
-        "mul r9, r13 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r24, r1 \n\t"
-        "mul r10, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r24 \n\t"
-        "rol r22 \n\t"
-        "mul r11, r11 \n\t"
-        "add r23, r0 \n\t"
-        "adc r24, r1 \n\t"
-        "adc r22, r25 \n\t"
-        "add r23, r28 \n\t"
-        "adc r24, r29 \n\t"
-        "adc r22, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r29, 0 \n\t"
-        "mul r10, r13 \n\t"
-        "mov r23, r0 \n\t"
-        "mov r28, r1 \n\t"
-        "mul r11, r12 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "adc r29, r25 \n\t"
-        "lsl r23 \n\t"
-        "rol r28 \n\t"
-        "rol r29 \n\t"
-        "add r23, r24 \n\t"
-        "adc r28, r22 \n\t"
-        "adc r29, r25 \n\t"
-        "st z+, r23 \n\t"
-
-        "ldi r23, 0 \n\t"
-        "mul r11, r13 \n\t"
-        "add r28, r0 \n\t"
-        "adc r29, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "add r28, r0 \n\t"
-        "adc r29, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "mul r12, r12 \n\t"
-        "add r28, r0 \n\t"
-        "adc r29, r1 \n\t"
-        "adc r23, r25 \n\t"
-        "st z+, r28 \n\t"
-
-        "ldi r28, 0 \n\t"
-        "mul r12, r13 \n\t"
-        "add r29, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r28, r25 \n\t"
-        "add r29, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "adc r28, r25 \n\t"
-        "st z+, r29 \n\t"
-
-        "mul r13, r13 \n\t"
-        "add r23, r0 \n\t"
-        "adc r28, r1 \n\t"
-        "st z+, r23 \n\t"
-        "st z+, r28 \n\t"
-        "eor r1, r1 \n\t"
-        : "+x" (left), "+z" (result)
-        :
-        : "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
-          "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20",
-          "r21", "r22", "r23", "r24", "r25", "r28", "r29", "cc", "memory"
-    );
-}
-#define asm_square 1
-
-#endif /* uECC_BYTES == xx */
-#endif /* uECC_SQUARE_FUNC */
-
-uECC_VLI_API void uECC_vli_modSub_fast(uint8_t *result, const uint8_t *left, const uint8_t *right) {
-    uint8_t t1, t2;
-    __asm__ volatile (
-        "push r28 \n\t" /* Save Y */
-        "push r29 \n\t"
-        
-        "ld %[t1], x+ \n\t"     /* Load left word. */
-        "ld %[t2], y+ \n\t"     /* Load right word. */
-        "sub %[t1], %[t2] \n\t" /* Subtract the first word. */
-        "st z+, %[t1] \n\t"     /* Store the first result word. */
-        
-        /* Now we just do the remaining words with the carry bit (using SBC) */
-        REPEAT(DEC(uECC_BYTES),
-            "ld %[t1], x+ \n\t"
-            "ld %[t2], y+ \n\t"
-            "sbc %[t1], %[t2] \n\t"
-            "st z+, %[t1] \n\t")
-        
-        "brcs 1f \n\t"   /* If borrow is set, then we need to add */
-        "rjmp done \n\t" /* otherwise we are done */
-        "1: \n\t"
-        
-        "sbiw r30, " STR(uECC_BYTES) " \n\t" /* make z point at result again */
-        "ldi r28, lo8(curve_p) \n\t" /* make y point at curve_p */
-    	"ldi r29, hi8(curve_p) \n\t"
-    	
-    	/* do the addition */
-    	"ld %[t1], z \n\t"
-        "ld %[t2], y+ \n\t"
-        "add %[t1], %[t2] \n\t"
-        "st z+, %[t1] \n\t"
-        REPEAT(DEC(uECC_BYTES),
-            "ld %[t1], z \n\t"
-            "ld %[t2], y+ \n\t"
-            "adc %[t1], %[t2] \n\t"
-            "st z+, %[t1] \n\t")
-        
-        "done: \n\t"
-        "pop r29 \n\t" /* Restore Y */
-        "pop r28 \n\t"
-
-        : "+z" (result), "+x" (left),
-          [t1] "=&r" (t1), [t2] "=&r" (t2)
-        : "y" (right)
-        : "cc", "memory"
-    );
-}
-#define asm_modSub_fast 1
-
-#if uECC_CURVE == uECC_secp160r1
-uECC_VLI_API void uECC_vli_mmod_fast(uint8_t *RESTRICT result, uint8_t *RESTRICT product) {
+#if uECC_SUPPORTS_secp160r1
+static const struct uECC_Curve_t curve_secp160r1;
+static void vli_mmod_fast_secp160r1(uECC_word_t *result, uECC_word_t *product) {
     uint8_t carry = 0;
     __asm__ volatile (
         "in r30, __SP_L__ \n\t"
@@ -21646,22 +357,24 @@
         : "y" (result)
         : "r0", "r18", "r19", "r30", "r31", "cc", "memory"
     );
-    
+
     if (carry > 0) {
         --carry;
-        uECC_vli_sub(result, result, curve_p);
+        uECC_vli_sub(result, result, curve_secp160r1.p, 20);
     }
     if (carry > 0) {
-        uECC_vli_sub(result, result, curve_p);
+        uECC_vli_sub(result, result, curve_secp160r1.p, 20);
     }
-    if (uECC_vli_cmp_unsafe(result, curve_p) > 0) {
-        uECC_vli_sub(result, result, curve_p);
+    if (uECC_vli_cmp_unsafe(result, curve_secp160r1.p, 20) > 0) {
+        uECC_vli_sub(result, result, curve_secp160r1.p, 20);
     }
 }
-#define asm_mmod_fast 1
+#define asm_mmod_fast_secp160r1 1
+#endif /* uECC_SUPPORTS_secp160r1 */
 
-#elif (uECC_CURVE == uECC_secp256k1)
-uECC_VLI_API void uECC_vli_mmod_fast(uint8_t *RESTRICT result, uint8_t *RESTRICT product) {
+#if uECC_SUPPORTS_secp256r1
+static const struct uECC_Curve_t curve_secp256r1;
+static void vli_mmod_fast_secp256r1(uECC_word_t *result, uECC_word_t *product) {
     uint8_t carry = 0;
     __asm__ volatile (
         "in r30, __SP_L__ \n\t"
@@ -21972,46 +685,26 @@
     
     if (carry > 0) {
         --carry;
-        uECC_vli_sub(result, result, curve_p);
+        uECC_vli_sub(result, result, curve_secp256r1.p, 32);
     }
     if (carry > 0) {
-        uECC_vli_sub(result, result, curve_p);
+        uECC_vli_sub(result, result, curve_secp256r1.p, 32);
     }
-    if (uECC_vli_cmp_unsafe(result, curve_p) > 0) {
-        uECC_vli_sub(result, result, curve_p);
+    if (uECC_vli_cmp_unsafe(result, curve_secp256r1.p, 32) > 0) {
+        uECC_vli_sub(result, result, curve_secp256r1.p, 32);
     }
 }
-#define asm_mmod_fast 1
+#define asm_mmod_fast_secp256r1 1
+#endif /* uECC_SUPPORTS_secp256r1 */
 
-#endif /* (uECC_CURVE == uECC_secp256k1) */
-
-#endif /* (uECC_ASM == uECC_asm_fast) */
-
-#if !asm_rshift1
-uECC_VLI_API void uECC_vli_rshift1(uint8_t *vli) {
-    uint8_t i = uECC_BYTES;
-    __asm__ volatile (
-        "adiw r30, " STR(uECC_BYTES) " \n\t"
-        "clc \n\t"
-        
-        "1: \n\t"
-        "ld r0, -z \n\t"
-        "ror r0 \n\t"
-        "st z, r0 \n\t"
-        "dec %[i] \n\t"
-        "brne 1b \n\t"
-
-        : "+z" (vli), [i] "+r" (i)
-        : 
-        : "r0", "cc", "memory"
-    );
-}
-#define asm_rshift1 1
-#endif
+#endif /* (uECC_OPTIMIZATION_LEVEL >= 2) */
 
 #if !asm_add
-uECC_VLI_API uint8_t uECC_vli_add(uint8_t *result, const uint8_t *left, const uint8_t *right) {
-    uint8_t i = uECC_BYTES;
+uECC_VLI_API uECC_word_t uECC_vli_add(uECC_word_t *result,
+                                      const uECC_word_t *left,
+                                      const uECC_word_t *right,
+                                      wordcount_t num_words) {
+    volatile uECC_word_t *r = result;
     uint8_t carry = 0;
     uint8_t left_byte;
     uint8_t right_byte;
@@ -22028,12 +721,11 @@
         "brne 1b \n\t"
         
         "adc %[carry], %[carry] \n\t" /* Store carry bit. */
-        "sbiw r28, " STR(uECC_BYTES) " \n\t" /* Restore Y */
 
-        : "+z" (result), "+x" (left), [i] "+r" (i),
+        : "+z" (r), "+x" (left), "+y" (right), [i] "+r" (num_words),
             [carry] "+r" (carry), [left] "=&r" (left_byte), [right] "=&r" (right_byte)
-        : "y" (right)
-        : "cc", "memory"
+        : 
+        : "cc"
     );
     return carry;
 }
@@ -22041,8 +733,11 @@
 #endif
 
 #if !asm_sub
-uECC_VLI_API uint8_t uECC_vli_sub(uint8_t *result, const uint8_t *left, const uint8_t *right) {
-    uint8_t i = uECC_BYTES;
+uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result,
+                                      const uECC_word_t *left,
+                                      const uECC_word_t *right,
+                                      wordcount_t num_words) {
+    volatile uECC_word_t *r = result;
     uint8_t borrow = 0;
     uint8_t left_byte;
     uint8_t right_byte;
@@ -22059,12 +754,11 @@
         "brne 1b \n\t"
         
         "adc %[borrow], %[borrow] \n\t" /* Store carry bit in borrow. */
-        "sbiw r28, " STR(uECC_BYTES) " \n\t" /* Restore Y */
 
-        : "+z" (result), "+x" (left), [i] "+r" (i),
+        : "+z" (r), "+x" (left), "+y" (right), [i] "+r" (i),
             [borrow] "+r" (borrow), [left] "=&r" (left_byte), [right] "=&r" (right_byte)
-        : "y" (right)
-        : "cc", "memory"
+        :
+        : "cc"
     );
     return borrow;
 }
@@ -22073,7 +767,11 @@
 
 #if !asm_mult
 __attribute((noinline))
-uECC_VLI_API void uECC_vli_mult(uint8_t *result, const uint8_t *left, const uint8_t *right) {
+uECC_VLI_API void uECC_vli_mult(uECC_word_t *result,
+                                const uECC_word_t *left,
+                                const uECC_word_t *right,
+                                wordcount_t num_words) {
+    volatile uECC_word_t *r = result;
     uint8_t r0 = 0;
     uint8_t r1 = 0;
     uint8_t r2 = 0;
@@ -22081,7 +779,7 @@
     uint8_t k, i;
     
     __asm__ volatile (
-        "ldi %[k], 1 \n\t" /* k = 1; k < uECC_BYTES; ++k */
+        "ldi %[k], 1 \n\t" /* k = 1; k < num_words; ++k */
         
         "1: \n\t"
         "ldi %[i], 0 \n\t"  /* i = 0; i < k; ++i */
@@ -22111,12 +809,13 @@
         "mov %[r2], %[zero] \n\t"
         
         "inc %[k] \n\t"
-        "cpi %[k], " STR(uECC_BYTES) " \n\t"
-        "brlo 1b \n\t" /* loop if k < uECC_BYTES */
+        "cp %[k], %[num] \n\t"
+        "brlo 1b \n\t" /* loop if k < num_words */
         
         /* second half */
-        "ldi %[k], " STR(uECC_BYTES) " \n\t" /* k = uECC_BYTES; k > 0; --k */
-        "adiw r28, " STR(uECC_BYTES) " \n\t" /* move right ptr to point at the end of right */
+        "mov %[k], %[num] \n\t" /* k = num_words; k > 0; --k */
+        "add r28, %[num] \n\t" /* move right ptr to point at the end of right */
+        "adc r29, %[zero] \n\t"
         
         "1: \n\t"
         "ldi %[i], 0 \n\t" /* i = 0; i < k; ++i */
@@ -22147,26 +846,28 @@
                                 we start 1 higher) */
         "sbc r27, %[zero] \n\t"
         
-        "cpi %[k], 0 \n\t"
+        "cp %[k], %[zero] \n\t"
         "brne 1b \n\t" /* loop if k > 0 */
         
         "st z+, %[r0] \n\t"  /* Store last result byte. */
         "eor r1, r1 \n\t" /* fix r1 to be 0 again */
-        "sbiw r28, " STR(uECC_BYTES) " \n\t" /* Restore Y */
     
-        : "+z" (result), "+x" (left),
-          [r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2), [zero] "+r" (zero),
-          [k] "=&a" (k), [i] "=&a" (i)
-        : "y" (right)
-        : "r0", "cc", "memory"
+        : "+z" (result), "+x" (left), "+y" (right),
+          [r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2),
+          [zero] "+r" (zero), [num] "+r" (num_words),
+          [k] "=&r" (k), [i] "=&r" (i)
+        : 
+        : "r0", "cc"
     );
 }
 #define asm_mult 1
 #endif
 
-#if uECC_SQUARE_FUNC
-#if !asm_square
-uECC_VLI_API void uECC_vli_square(uint8_t *result, const uint8_t *left) {
+#if (uECC_SQUARE_FUNC && !asm_square)
+uECC_VLI_API void uECC_vli_square(uECC_word_t *result,
+                                  const uECC_word_t *left,
+                                  wordcount_t num_words) {
+    volatile uECC_word_t *r = result;
     uint8_t r0 = 0;
     uint8_t r1 = 0;
     uint8_t r2 = 0;
@@ -22174,25 +875,26 @@
     uint8_t k;
     
     __asm__ volatile (
-        "ldi %[k], 1 \n\t" /* k = 1; k < uECC_BYTES * 2; ++k */
+        "ldi %[k], 1 \n\t" /* k = 1; k < num_words * 2; ++k */
         
         "1: \n\t"
         
         "movw r26, %[orig] \n\t"  /* copy orig ptr to 'left' ptr */
         "movw r30, %[orig] \n\t"  /* copy orig ptr to 'right' ptr */
-        "cpi %[k], " STR(uECC_BYTES) " \n\t"
+        "cp %[k], %[num] \n\t"
         "brlo 2f \n\t"
         "breq 2f \n\t"
         
-        /* when k > uECC_BYTES, we start from (k - uECC_BYTES) on the 'left' ptr */
+        /* when k > num_words, we start from (k - num_words) on the 'left' ptr */
         "add r26, %[k] \n\t"
         "adc r27, %[zero] \n\t"
-        "subi r26, " STR(uECC_BYTES) " \n\t"
+        "sub r26, %[num] \n\t"
         "sbc r27, %[zero] \n\t"
-        "adiw r30, " STR(uECC_BYTES) " \n\t" /* move right ptr to point at the end */
+        "add r30, %[num] \n\t" /* move right ptr to point at the end */
+        "adc r31, %[zero] \n\t"
         "rjmp 3f \n\t"
         
-        "2: \n\t" /* when k <= uECC_BYTES, we add k to the 'right' ptr */
+        "2: \n\t" /* when k <= num_words, we add k to the 'right' ptr */
         "add r30, %[k] \n\t" /* pre-add 'right' ptr */
         "adc r31, %[zero] \n\t"
         
@@ -22232,20 +934,20 @@
         "mov %[r2], %[zero] \n\t"
         
         "inc %[k] \n\t"
-        "cpi %[k], %[max] \n\t"
-        "brlo 1b \n\t" /* loop if k < uECC_BYTES */
+        "cp %[k], %[max] \n\t"
+        "brlo 1b \n\t" /* loop if k < num_words * 2 */
         
         "movw r30, %[result] \n\t"  /* make z point to result */
         "st z+, %[r0] \n\t"  /* Store last result byte. */
         "eor r1, r1 \n\t" /* fix r1 to be 0 again */
     
-        : [result] "+r" (result),
+        : [result] "+r" (r),
           [r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2), [zero] "+r" (zero),
           [k] "=&a" (k)
-        : [orig] "r" (left), [max] "M" (2*uECC_BYTES)
-        : "r0", "r26", "r27", "r30", "r31", "cc", "memory"
+        : [orig] "r" (left), [max] "r" ((uint8_t)(2 * num_words)),
+          [num] "r" (num_words)
+        : "r0", "r26", "r27", "r30", "r31", "cc"
     );
 }
 #define asm_square 1
-#endif
-#endif /* uECC_SQUARE_FUNC */
+#endif /* uECC_SQUARE_FUNC && !asm_square */
diff --git a/asm_avr_mult_square.inc b/asm_avr_mult_square.inc
new file mode 100644
index 0000000..5c416d6
--- /dev/null
+++ b/asm_avr_mult_square.inc
@@ -0,0 +1,21179 @@
+#define FAST_MULT_ASM_5    \
+    "adiw r30, 10 \n\t"    \
+    "adiw r28, 10 \n\t"    \
+    "ld r2, x+ \n\t"       \
+    "ld r3, x+ \n\t"       \
+    "ld r4, x+ \n\t"       \
+    "ld r5, x+ \n\t"       \
+    "ld r6, x+ \n\t"       \
+    "ld r7, x+ \n\t"       \
+    "ld r8, x+ \n\t"       \
+    "ld r9, x+ \n\t"       \
+    "ld r10, x+ \n\t"      \
+    "ld r11, x+ \n\t"      \
+    "ld r12, y+ \n\t"      \
+    "ld r13, y+ \n\t"      \
+    "ld r14, y+ \n\t"      \
+    "ld r15, y+ \n\t"      \
+    "ld r16, y+ \n\t"      \
+    "ld r17, y+ \n\t"      \
+    "ld r18, y+ \n\t"      \
+    "ld r19, y+ \n\t"      \
+    "ld r20, y+ \n\t"      \
+    "ld r21, y+ \n\t"      \
+    "ldi r25, 0 \n\t"      \
+                           \
+    "ldi r23, 0 \n\t"      \
+    "mul r2, r12 \n\t"     \
+    "st z+, r0 \n\t"       \
+    "mov r22, r1 \n\t"     \
+                           \
+    "ldi r24, 0 \n\t"      \
+    "mul r2, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "mul r3, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ldi r22, 0 \n\t"      \
+    "mul r2, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ldi r23, 0 \n\t"      \
+    "mul r2, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r12 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ldi r24, 0 \n\t"      \
+    "mul r2, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ldi r22, 0 \n\t"      \
+    "mul r2, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ldi r23, 0 \n\t"      \
+    "mul r2, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r12 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ldi r24, 0 \n\t"      \
+    "mul r2, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ldi r22, 0 \n\t"      \
+    "mul r2, r20 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r10, r12 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ldi r23, 0 \n\t"      \
+    "mul r2, r21 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r20 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r10, r13 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r11, r12 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ldi r24, 0 \n\t"      \
+    "mul r3, r21 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r20 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r10, r14 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r11, r13 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ldi r22, 0 \n\t"      \
+    "mul r4, r21 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r20 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r10, r15 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r11, r14 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ldi r23, 0 \n\t"      \
+    "mul r5, r21 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r20 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r10, r16 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r11, r15 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ldi r24, 0 \n\t"      \
+    "mul r6, r21 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r20 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r10, r17 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r11, r16 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ldi r22, 0 \n\t"      \
+    "mul r7, r21 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r20 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r10, r18 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r11, r17 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ldi r23, 0 \n\t"      \
+    "mul r8, r21 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r20 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r10, r19 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r11, r18 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ldi r24, 0 \n\t"      \
+    "mul r9, r21 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r10, r20 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r11, r19 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ldi r22, 0 \n\t"      \
+    "mul r10, r21 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r11, r20 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "mul r11, r21 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "st z+, r24 \n\t"      \
+    "st z+, r22 \n\t"      \
+                           \
+    "sbiw r30, 30 \n\t"    \
+    "sbiw r28, 20 \n\t"    \
+    "ld r12, y+ \n\t"      \
+    "ld r13, y+ \n\t"      \
+    "ld r14, y+ \n\t"      \
+    "ld r15, y+ \n\t"      \
+    "ld r16, y+ \n\t"      \
+    "ld r17, y+ \n\t"      \
+    "ld r18, y+ \n\t"      \
+    "ld r19, y+ \n\t"      \
+    "ld r20, y+ \n\t"      \
+    "ld r21, y+ \n\t"      \
+                           \
+    "ldi r23, 0 \n\t"      \
+    "mul r2, r12 \n\t"     \
+    "st z+, r0 \n\t"       \
+    "mov r22, r1 \n\t"     \
+                           \
+    "ldi r24, 0 \n\t"      \
+    "mul r2, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "mul r3, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ldi r22, 0 \n\t"      \
+    "mul r2, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ldi r23, 0 \n\t"      \
+    "mul r2, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r12 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ldi r24, 0 \n\t"      \
+    "mul r2, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ldi r22, 0 \n\t"      \
+    "mul r2, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ldi r23, 0 \n\t"      \
+    "mul r2, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r12 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ldi r24, 0 \n\t"      \
+    "mul r2, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ldi r22, 0 \n\t"      \
+    "mul r2, r20 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r10, r12 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ldi r23, 0 \n\t"      \
+    "mul r2, r21 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r20 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r10, r13 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r11, r12 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ld r2, x+ \n\t"       \
+    "ldi r24, 0 \n\t"      \
+    "mul r3, r21 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r20 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r10, r14 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r11, r13 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r2, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r22, r0 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ld r3, x+ \n\t"       \
+    "ldi r22, 0 \n\t"      \
+    "mul r4, r21 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r20 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r10, r15 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r11, r14 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r2, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r23, r0 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ld r4, x+ \n\t"       \
+    "ldi r23, 0 \n\t"      \
+    "mul r5, r21 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r20 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r10, r16 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r11, r15 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r2, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r12 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r24, r0 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ld r5, x+ \n\t"       \
+    "ldi r24, 0 \n\t"      \
+    "mul r6, r21 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r20 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r10, r17 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r11, r16 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r2, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r22, r0 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ld r6, x+ \n\t"       \
+    "ldi r22, 0 \n\t"      \
+    "mul r7, r21 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r20 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r10, r18 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r11, r17 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r2, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r23, r0 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ld r7, x+ \n\t"       \
+    "ldi r23, 0 \n\t"      \
+    "mul r8, r21 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r20 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r10, r19 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r11, r18 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r2, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r12 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r24, r0 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ld r8, x+ \n\t"       \
+    "ldi r24, 0 \n\t"      \
+    "mul r9, r21 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r10, r20 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r11, r19 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r2, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r22, r0 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ld r9, x+ \n\t"       \
+    "ldi r22, 0 \n\t"      \
+    "mul r10, r21 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r11, r20 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r2, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r23, r0 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ld r10, x+ \n\t"      \
+    "ldi r23, 0 \n\t"      \
+    "mul r11, r21 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r2, r20 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r10, r12 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r24, r0 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ld r11, x+ \n\t"      \
+    "ldi r24, 0 \n\t"      \
+    "mul r2, r21 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r20 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r10, r13 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r11, r12 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r22, r0 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ld r12, y+ \n\t"      \
+    "ldi r22, 0 \n\t"      \
+    "mul r2, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r21 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r20 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r10, r14 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r11, r13 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r23, r0 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ld r13, y+ \n\t"      \
+    "ldi r23, 0 \n\t"      \
+    "mul r2, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r12 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r21 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r20 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r10, r15 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r11, r14 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r24, r0 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ld r14, y+ \n\t"      \
+    "ldi r24, 0 \n\t"      \
+    "mul r2, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r21 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r20 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r10, r16 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r11, r15 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r22, r0 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ld r15, y+ \n\t"      \
+    "ldi r22, 0 \n\t"      \
+    "mul r2, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r21 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r20 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r10, r17 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r11, r16 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r23, r0 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ld r16, y+ \n\t"      \
+    "ldi r23, 0 \n\t"      \
+    "mul r2, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r12 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r21 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r20 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r10, r18 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r11, r17 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r24, r0 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ld r17, y+ \n\t"      \
+    "ldi r24, 0 \n\t"      \
+    "mul r2, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r21 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r20 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r10, r19 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r11, r18 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r22, r0 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ld r18, y+ \n\t"      \
+    "ldi r22, 0 \n\t"      \
+    "mul r2, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r21 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r10, r20 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r11, r19 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r23, r0 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ld r19, y+ \n\t"      \
+    "ldi r23, 0 \n\t"      \
+    "mul r2, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r12 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r10, r21 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r11, r20 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r24, r0 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ld r20, y+ \n\t"      \
+    "ldi r24, 0 \n\t"      \
+    "mul r2, r20 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r10, r12 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r11, r21 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r22, r0 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ld r21, y+ \n\t"      \
+    "ldi r22, 0 \n\t"      \
+    "mul r2, r21 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r20 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r10, r13 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r11, r12 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r23, r0 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ldi r23, 0 \n\t"      \
+    "mul r3, r21 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r20 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r10, r14 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r11, r13 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ldi r24, 0 \n\t"      \
+    "mul r4, r21 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r20 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r10, r15 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r11, r14 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ldi r22, 0 \n\t"      \
+    "mul r5, r21 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r20 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r10, r16 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r11, r15 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ldi r23, 0 \n\t"      \
+    "mul r6, r21 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r20 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r10, r17 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r11, r16 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ldi r24, 0 \n\t"      \
+    "mul r7, r21 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r20 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r10, r18 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r11, r17 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ldi r22, 0 \n\t"      \
+    "mul r8, r21 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r20 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r10, r19 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r11, r18 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ldi r23, 0 \n\t"      \
+    "mul r9, r21 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r10, r20 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r11, r19 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ldi r24, 0 \n\t"      \
+    "mul r10, r21 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r11, r20 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "mul r11, r21 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "st z+, r23 \n\t"      \
+    "st z+, r24 \n\t"      \
+    "eor r1, r1 \n\t"
+
+#define FAST_MULT_ASM_6        \
+    "adiw r30, 20 \n\t"        \
+    "adiw r28, 20 \n\t"        \
+    "ld r2, x+ \n\t"           \
+    "ld r3, x+ \n\t"           \
+    "ld r4, x+ \n\t"           \
+    "ld r5, x+ \n\t"           \
+    "ld r12, y+ \n\t"          \
+    "ld r13, y+ \n\t"          \
+    "ld r14, y+ \n\t"          \
+    "ld r15, y+ \n\t"          \
+    "ldi r25, 0 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r2, r12 \n\t"         \
+    "st z+, r0 \n\t"           \
+    "mov r22, r1 \n\t"         \
+                               \
+    "ldi r24, 0 \n\t"          \
+    "mul r2, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "mul r3, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ldi r22, 0 \n\t"          \
+    "mul r2, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r2, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ldi r24, 0 \n\t"          \
+    "mul r3, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ldi r22, 0 \n\t"          \
+    "mul r4, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "mul r5, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "st z+, r24 \n\t"          \
+    "st z+, r22 \n\t"          \
+                               \
+    "sbiw r30, 18 \n\t"        \
+    "sbiw r28, 14 \n\t"        \
+    "ld r12, y+ \n\t"          \
+    "ld r13, y+ \n\t"          \
+    "ld r14, y+ \n\t"          \
+    "ld r15, y+ \n\t"          \
+    "ld r16, y+ \n\t"          \
+    "ld r17, y+ \n\t"          \
+    "ld r18, y+ \n\t"          \
+    "ld r19, y+ \n\t"          \
+    "ld r20, y+ \n\t"          \
+    "ld r21, y+ \n\t"          \
+    "ld r6, x+ \n\t"           \
+    "ld r7, x+ \n\t"           \
+    "ld r8, x+ \n\t"           \
+    "ld r9, x+ \n\t"           \
+    "ld r10, x+ \n\t"          \
+    "ld r11, x+ \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r2, r12 \n\t"         \
+    "st z+, r0 \n\t"           \
+    "mov r22, r1 \n\t"         \
+                               \
+    "ldi r24, 0 \n\t"          \
+    "mul r2, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "mul r3, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ldi r22, 0 \n\t"          \
+    "mul r2, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r2, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ldi r24, 0 \n\t"          \
+    "mul r2, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ldi r22, 0 \n\t"          \
+    "mul r2, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r2, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ldi r24, 0 \n\t"          \
+    "mul r2, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ldi r22, 0 \n\t"          \
+    "mul r2, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r12 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r2, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r13 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r12 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r2, x+ \n\t"           \
+    "ldi r24, 0 \n\t"          \
+    "mul r3, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r14 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r13 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r3, x+ \n\t"           \
+    "ldi r22, 0 \n\t"          \
+    "mul r4, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r15 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r14 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r4, x+ \n\t"           \
+    "ldi r23, 0 \n\t"          \
+    "mul r5, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r16 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r15 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r5, x+ \n\t"           \
+    "ldi r24, 0 \n\t"          \
+    "mul r6, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r17 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r16 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r12, y+ \n\t"          \
+    "ldi r22, 0 \n\t"          \
+    "mul r6, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r18 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r17 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r13, y+ \n\t"          \
+    "ldi r23, 0 \n\t"          \
+    "mul r6, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r19 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r18 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r14, y+ \n\t"          \
+    "ldi r24, 0 \n\t"          \
+    "mul r6, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r20 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r19 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r15, y+ \n\t"          \
+    "ldi r22, 0 \n\t"          \
+    "mul r6, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r21 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r20 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r7, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r12 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r21 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ldi r24, 0 \n\t"          \
+    "mul r8, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r13 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r12 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ldi r22, 0 \n\t"          \
+    "mul r9, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r14 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r13 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r10, r15 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r14 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ldi r24, 0 \n\t"          \
+    "mul r11, r15 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ldi r22, 0 \n\t"          \
+    "mul r2, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r3, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ldi r24, 0 \n\t"          \
+    "mul r4, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "mul r5, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "st z+, r23 \n\t"          \
+    "st z+, r24 \n\t"          \
+                               \
+    "sbiw r30, 38 \n\t"        \
+    "sbiw r28, 24 \n\t"        \
+    "sbiw r26, 14 \n\t"        \
+    "ld r2, x+ \n\t"           \
+    "ld r12, y+ \n\t"          \
+    "ld r3, x+ \n\t"           \
+    "ld r13, y+ \n\t"          \
+    "ld r4, x+ \n\t"           \
+    "ld r14, y+ \n\t"          \
+    "ld r5, x+ \n\t"           \
+    "ld r15, y+ \n\t"          \
+    "ld r6, x+ \n\t"           \
+    "ld r16, y+ \n\t"          \
+    "ld r7, x+ \n\t"           \
+    "ld r17, y+ \n\t"          \
+    "ld r8, x+ \n\t"           \
+    "ld r18, y+ \n\t"          \
+    "ld r9, x+ \n\t"           \
+    "ld r19, y+ \n\t"          \
+    "ld r10, x+ \n\t"          \
+    "ld r20, y+ \n\t"          \
+    "ld r11, x+ \n\t"          \
+    "ld r21, y+ \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r2, r12 \n\t"         \
+    "st z+, r0 \n\t"           \
+    "mov r22, r1 \n\t"         \
+                               \
+    "ldi r24, 0 \n\t"          \
+    "mul r2, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "mul r3, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ldi r22, 0 \n\t"          \
+    "mul r2, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r2, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ldi r24, 0 \n\t"          \
+    "mul r2, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ldi r22, 0 \n\t"          \
+    "mul r2, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r2, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ldi r24, 0 \n\t"          \
+    "mul r2, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ldi r22, 0 \n\t"          \
+    "mul r2, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r12 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r2, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r13 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r12 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r2, x+ \n\t"           \
+    "ldi r24, 0 \n\t"          \
+    "mul r3, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r14 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r13 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r3, x+ \n\t"           \
+    "ldi r22, 0 \n\t"          \
+    "mul r4, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r15 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r14 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r4, x+ \n\t"           \
+    "ldi r23, 0 \n\t"          \
+    "mul r5, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r16 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r15 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r5, x+ \n\t"           \
+    "ldi r24, 0 \n\t"          \
+    "mul r6, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r17 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r16 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r6, x+ \n\t"           \
+    "ldi r22, 0 \n\t"          \
+    "mul r7, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r18 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r17 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r7, x+ \n\t"           \
+    "ldi r23, 0 \n\t"          \
+    "mul r8, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r19 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r18 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r8, x+ \n\t"           \
+    "ldi r24, 0 \n\t"          \
+    "mul r9, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r20 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r19 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r9, x+ \n\t"           \
+    "ldi r22, 0 \n\t"          \
+    "mul r10, r21 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r20 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r10, x+ \n\t"          \
+    "ldi r23, 0 \n\t"          \
+    "mul r11, r21 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r12 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r11, x+ \n\t"          \
+    "ldi r24, 0 \n\t"          \
+    "mul r2, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r13 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r12 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r2, x+ \n\t"           \
+    "ldi r22, 0 \n\t"          \
+    "mul r3, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r14 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r13 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r3, x+ \n\t"           \
+    "ldi r23, 0 \n\t"          \
+    "mul r4, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r15 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r14 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r4, x+ \n\t"           \
+    "ldi r24, 0 \n\t"          \
+    "mul r5, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r16 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r15 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r5, x+ \n\t"           \
+    "ldi r22, 0 \n\t"          \
+    "mul r6, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r17 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r16 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r12, y+ \n\t"          \
+    "ldi r23, 0 \n\t"          \
+    "mul r6, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r18 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r17 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r13, y+ \n\t"          \
+    "ldi r24, 0 \n\t"          \
+    "mul r6, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r19 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r18 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r14, y+ \n\t"          \
+    "ldi r22, 0 \n\t"          \
+    "mul r6, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r20 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r19 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r15, y+ \n\t"          \
+    "ldi r23, 0 \n\t"          \
+    "mul r6, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r21 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r20 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r16, y+ \n\t"          \
+    "ldi r24, 0 \n\t"          \
+    "mul r6, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r12 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r21 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r17, y+ \n\t"          \
+    "ldi r22, 0 \n\t"          \
+    "mul r6, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r13 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r12 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r18, y+ \n\t"          \
+    "ldi r23, 0 \n\t"          \
+    "mul r6, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r14 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r13 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r19, y+ \n\t"          \
+    "ldi r24, 0 \n\t"          \
+    "mul r6, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r15 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r14 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r20, y+ \n\t"          \
+    "ldi r22, 0 \n\t"          \
+    "mul r6, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r16 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r15 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r21, y+ \n\t"          \
+    "ldi r23, 0 \n\t"          \
+    "mul r6, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r17 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r16 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r12, y+ \n\t"          \
+    "ldi r24, 0 \n\t"          \
+    "mul r6, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r18 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r17 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r13, y+ \n\t"          \
+    "ldi r22, 0 \n\t"          \
+    "mul r6, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r19 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r18 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r14, y+ \n\t"          \
+    "ldi r23, 0 \n\t"          \
+    "mul r6, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r20 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r19 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r15, y+ \n\t"          \
+    "ldi r24, 0 \n\t"          \
+    "mul r6, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r21 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r20 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ldi r22, 0 \n\t"          \
+    "mul r7, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r12 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r21 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r8, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r13 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r12 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ldi r24, 0 \n\t"          \
+    "mul r9, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r14 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r13 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ldi r22, 0 \n\t"          \
+    "mul r10, r15 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r14 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r11, r15 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ldi r24, 0 \n\t"          \
+    "mul r2, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ldi r22, 0 \n\t"          \
+    "mul r3, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r4, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "mul r5, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "st z+, r22 \n\t"          \
+    "st z+, r23 \n\t"          \
+    "eor r1, r1 \n\t"
+
+#define FAST_MULT_ASM_7    \
+    "adiw r30, 20 \n\t"    \
+    "adiw r28, 20 \n\t"    \
+    "ld r2, x+ \n\t"       \
+    "ld r3, x+ \n\t"       \
+    "ld r4, x+ \n\t"       \
+    "ld r5, x+ \n\t"       \
+    "ld r6, x+ \n\t"       \
+    "ld r7, x+ \n\t"       \
+    "ld r8, x+ \n\t"       \
+    "ld r9, x+ \n\t"       \
+    "ld r12, y+ \n\t"      \
+    "ld r13, y+ \n\t"      \
+    "ld r14, y+ \n\t"      \
+    "ld r15, y+ \n\t"      \
+    "ld r16, y+ \n\t"      \
+    "ld r17, y+ \n\t"      \
+    "ld r18, y+ \n\t"      \
+    "ld r19, y+ \n\t"      \
+    "ldi r25, 0 \n\t"      \
+                           \
+    "ldi r23, 0 \n\t"      \
+    "mul r2, r12 \n\t"     \
+    "st z+, r0 \n\t"       \
+    "mov r22, r1 \n\t"     \
+                           \
+    "ldi r24, 0 \n\t"      \
+    "mul r2, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "mul r3, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ldi r22, 0 \n\t"      \
+    "mul r2, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ldi r23, 0 \n\t"      \
+    "mul r2, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r12 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ldi r24, 0 \n\t"      \
+    "mul r2, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ldi r22, 0 \n\t"      \
+    "mul r2, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ldi r23, 0 \n\t"      \
+    "mul r2, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r12 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ldi r24, 0 \n\t"      \
+    "mul r2, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ldi r22, 0 \n\t"      \
+    "mul r3, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ldi r23, 0 \n\t"      \
+    "mul r4, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ldi r24, 0 \n\t"      \
+    "mul r5, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ldi r22, 0 \n\t"      \
+    "mul r6, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ldi r23, 0 \n\t"      \
+    "mul r7, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ldi r24, 0 \n\t"      \
+    "mul r8, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "mul r9, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "st z+, r23 \n\t"      \
+    "st z+, r24 \n\t"      \
+                           \
+    "sbiw r30, 26 \n\t"    \
+    "sbiw r28, 18 \n\t"    \
+    "ld r12, y+ \n\t"      \
+    "ld r13, y+ \n\t"      \
+    "ld r14, y+ \n\t"      \
+    "ld r15, y+ \n\t"      \
+    "ld r16, y+ \n\t"      \
+    "ld r17, y+ \n\t"      \
+    "ld r18, y+ \n\t"      \
+    "ld r19, y+ \n\t"      \
+    "ld r20, y+ \n\t"      \
+    "ld r21, y+ \n\t"      \
+    "ld r10, x+ \n\t"      \
+    "ld r11, x+ \n\t"      \
+                           \
+    "ldi r23, 0 \n\t"      \
+    "mul r2, r12 \n\t"     \
+    "st z+, r0 \n\t"       \
+    "mov r22, r1 \n\t"     \
+                           \
+    "ldi r24, 0 \n\t"      \
+    "mul r2, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "mul r3, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ldi r22, 0 \n\t"      \
+    "mul r2, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ldi r23, 0 \n\t"      \
+    "mul r2, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r12 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ldi r24, 0 \n\t"      \
+    "mul r2, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ldi r22, 0 \n\t"      \
+    "mul r2, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ldi r23, 0 \n\t"      \
+    "mul r2, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r12 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ldi r24, 0 \n\t"      \
+    "mul r2, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ldi r22, 0 \n\t"      \
+    "mul r2, r20 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r10, r12 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ldi r23, 0 \n\t"      \
+    "mul r2, r21 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r20 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r10, r13 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r11, r12 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ld r2, x+ \n\t"       \
+    "ldi r24, 0 \n\t"      \
+    "mul r3, r21 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r20 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r10, r14 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r11, r13 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r2, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r22, r0 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ld r3, x+ \n\t"       \
+    "ldi r22, 0 \n\t"      \
+    "mul r4, r21 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r20 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r10, r15 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r11, r14 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r2, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r23, r0 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ld r4, x+ \n\t"       \
+    "ldi r23, 0 \n\t"      \
+    "mul r5, r21 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r20 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r10, r16 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r11, r15 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r2, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r12 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r24, r0 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ld r5, x+ \n\t"       \
+    "ldi r24, 0 \n\t"      \
+    "mul r6, r21 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r20 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r10, r17 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r11, r16 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r2, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r22, r0 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ld r6, x+ \n\t"       \
+    "ldi r22, 0 \n\t"      \
+    "mul r7, r21 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r20 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r10, r18 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r11, r17 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r2, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r23, r0 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ld r7, x+ \n\t"       \
+    "ldi r23, 0 \n\t"      \
+    "mul r8, r21 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r20 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r10, r19 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r11, r18 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r2, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r12 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r24, r0 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ld r8, x+ \n\t"       \
+    "ldi r24, 0 \n\t"      \
+    "mul r9, r21 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r10, r20 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r11, r19 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r2, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r22, r0 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ld r9, x+ \n\t"       \
+    "ldi r22, 0 \n\t"      \
+    "mul r10, r21 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r11, r20 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r2, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r23, r0 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ld r12, y+ \n\t"      \
+    "ldi r23, 0 \n\t"      \
+    "mul r10, r12 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r11, r21 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r2, r20 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r24, r0 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ld r13, y+ \n\t"      \
+    "ldi r24, 0 \n\t"      \
+    "mul r10, r13 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r11, r12 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r2, r21 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r20 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r22, r0 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ld r14, y+ \n\t"      \
+    "ldi r22, 0 \n\t"      \
+    "mul r10, r14 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r11, r13 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r2, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r21 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r20 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r23, r0 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ld r15, y+ \n\t"      \
+    "ldi r23, 0 \n\t"      \
+    "mul r10, r15 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r11, r14 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r2, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r12 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r21 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r20 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r24, r0 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ld r16, y+ \n\t"      \
+    "ldi r24, 0 \n\t"      \
+    "mul r10, r16 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r11, r15 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r2, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r21 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r20 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r22, r0 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ld r17, y+ \n\t"      \
+    "ldi r22, 0 \n\t"      \
+    "mul r10, r17 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r11, r16 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r2, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r21 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r20 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r23, r0 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ld r18, y+ \n\t"      \
+    "ldi r23, 0 \n\t"      \
+    "mul r10, r18 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r11, r17 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r2, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r12 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r21 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r20 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r24, r0 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ld r19, y+ \n\t"      \
+    "ldi r24, 0 \n\t"      \
+    "mul r10, r19 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r11, r18 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r2, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r21 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r20 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r22, r0 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ldi r22, 0 \n\t"      \
+    "mul r11, r19 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r2, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r21 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ldi r23, 0 \n\t"      \
+    "mul r2, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r12 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ldi r24, 0 \n\t"      \
+    "mul r3, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ldi r22, 0 \n\t"      \
+    "mul r4, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ldi r23, 0 \n\t"      \
+    "mul r5, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ldi r24, 0 \n\t"      \
+    "mul r6, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ldi r22, 0 \n\t"      \
+    "mul r7, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ldi r23, 0 \n\t"      \
+    "mul r8, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "mul r9, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "st z+, r22 \n\t"      \
+    "st z+, r23 \n\t"      \
+                           \
+    "sbiw r30, 46 \n\t"    \
+    "sbiw r28, 28 \n\t"    \
+    "sbiw r26, 18 \n\t"    \
+    "ld r2, x+ \n\t"       \
+    "ld r12, y+ \n\t"      \
+    "ld r3, x+ \n\t"       \
+    "ld r13, y+ \n\t"      \
+    "ld r4, x+ \n\t"       \
+    "ld r14, y+ \n\t"      \
+    "ld r5, x+ \n\t"       \
+    "ld r15, y+ \n\t"      \
+    "ld r6, x+ \n\t"       \
+    "ld r16, y+ \n\t"      \
+    "ld r7, x+ \n\t"       \
+    "ld r17, y+ \n\t"      \
+    "ld r8, x+ \n\t"       \
+    "ld r18, y+ \n\t"      \
+    "ld r9, x+ \n\t"       \
+    "ld r19, y+ \n\t"      \
+    "ld r10, x+ \n\t"      \
+    "ld r20, y+ \n\t"      \
+    "ld r11, x+ \n\t"      \
+    "ld r21, y+ \n\t"      \
+                           \
+    "ldi r23, 0 \n\t"      \
+    "mul r2, r12 \n\t"     \
+    "st z+, r0 \n\t"       \
+    "mov r22, r1 \n\t"     \
+                           \
+    "ldi r24, 0 \n\t"      \
+    "mul r2, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "mul r3, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ldi r22, 0 \n\t"      \
+    "mul r2, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ldi r23, 0 \n\t"      \
+    "mul r2, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r12 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ldi r24, 0 \n\t"      \
+    "mul r2, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ldi r22, 0 \n\t"      \
+    "mul r2, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ldi r23, 0 \n\t"      \
+    "mul r2, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r12 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ldi r24, 0 \n\t"      \
+    "mul r2, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ldi r22, 0 \n\t"      \
+    "mul r2, r20 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r10, r12 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ldi r23, 0 \n\t"      \
+    "mul r2, r21 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r20 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r10, r13 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r11, r12 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ld r2, x+ \n\t"       \
+    "ldi r24, 0 \n\t"      \
+    "mul r3, r21 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r20 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r10, r14 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r11, r13 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r2, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r22, r0 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ld r3, x+ \n\t"       \
+    "ldi r22, 0 \n\t"      \
+    "mul r4, r21 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r20 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r10, r15 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r11, r14 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r2, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r23, r0 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ld r4, x+ \n\t"       \
+    "ldi r23, 0 \n\t"      \
+    "mul r5, r21 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r20 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r10, r16 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r11, r15 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r2, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r12 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r24, r0 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ld r5, x+ \n\t"       \
+    "ldi r24, 0 \n\t"      \
+    "mul r6, r21 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r20 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r10, r17 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r11, r16 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r2, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r22, r0 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ld r6, x+ \n\t"       \
+    "ldi r22, 0 \n\t"      \
+    "mul r7, r21 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r20 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r10, r18 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r11, r17 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r2, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r23, r0 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ld r7, x+ \n\t"       \
+    "ldi r23, 0 \n\t"      \
+    "mul r8, r21 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r20 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r10, r19 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r11, r18 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r2, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r12 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r24, r0 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ld r8, x+ \n\t"       \
+    "ldi r24, 0 \n\t"      \
+    "mul r9, r21 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r10, r20 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r11, r19 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r2, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r22, r0 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ld r9, x+ \n\t"       \
+    "ldi r22, 0 \n\t"      \
+    "mul r10, r21 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r11, r20 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r2, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r23, r0 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ld r10, x+ \n\t"      \
+    "ldi r23, 0 \n\t"      \
+    "mul r11, r21 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r2, r20 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r10, r12 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r24, r0 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ld r11, x+ \n\t"      \
+    "ldi r24, 0 \n\t"      \
+    "mul r2, r21 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r20 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r10, r13 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r11, r12 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r22, r0 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ld r2, x+ \n\t"       \
+    "ldi r22, 0 \n\t"      \
+    "mul r3, r21 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r20 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r10, r14 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r11, r13 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r2, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r23, r0 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ld r3, x+ \n\t"       \
+    "ldi r23, 0 \n\t"      \
+    "mul r4, r21 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r20 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r10, r15 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r11, r14 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r2, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r12 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r24, r0 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ld r4, x+ \n\t"       \
+    "ldi r24, 0 \n\t"      \
+    "mul r5, r21 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r20 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r10, r16 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r11, r15 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r2, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r22, r0 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ld r5, x+ \n\t"       \
+    "ldi r22, 0 \n\t"      \
+    "mul r6, r21 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r20 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r10, r17 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r11, r16 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r2, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r23, r0 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ld r6, x+ \n\t"       \
+    "ldi r23, 0 \n\t"      \
+    "mul r7, r21 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r20 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r10, r18 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r11, r17 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r2, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r12 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r24, r0 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ld r7, x+ \n\t"       \
+    "ldi r24, 0 \n\t"      \
+    "mul r8, r21 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r20 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r10, r19 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r11, r18 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r2, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r22, r0 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ld r8, x+ \n\t"       \
+    "ldi r22, 0 \n\t"      \
+    "mul r9, r21 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r10, r20 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r11, r19 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r2, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r23, r0 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ld r9, x+ \n\t"       \
+    "ldi r23, 0 \n\t"      \
+    "mul r10, r21 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r11, r20 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r2, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r12 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r24, r0 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ld r12, y+ \n\t"      \
+    "ldi r24, 0 \n\t"      \
+    "mul r10, r12 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r11, r21 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r2, r20 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r22, r0 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ld r13, y+ \n\t"      \
+    "ldi r22, 0 \n\t"      \
+    "mul r10, r13 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r11, r12 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r2, r21 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r20 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r23, r0 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ld r14, y+ \n\t"      \
+    "ldi r23, 0 \n\t"      \
+    "mul r10, r14 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r11, r13 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r2, r12 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r21 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r20 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r24, r0 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ld r15, y+ \n\t"      \
+    "ldi r24, 0 \n\t"      \
+    "mul r10, r15 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r11, r14 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r2, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r21 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r20 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r22, r0 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ld r16, y+ \n\t"      \
+    "ldi r22, 0 \n\t"      \
+    "mul r10, r16 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r11, r15 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r2, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r21 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r20 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r23, r0 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ld r17, y+ \n\t"      \
+    "ldi r23, 0 \n\t"      \
+    "mul r10, r17 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r11, r16 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r2, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r12 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r21 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r20 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r24, r0 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ld r18, y+ \n\t"      \
+    "ldi r24, 0 \n\t"      \
+    "mul r10, r18 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r11, r17 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r2, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r21 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r20 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r22, r0 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ld r19, y+ \n\t"      \
+    "ldi r22, 0 \n\t"      \
+    "mul r10, r19 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r11, r18 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r2, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r21 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r20 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r23, r0 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ld r20, y+ \n\t"      \
+    "ldi r23, 0 \n\t"      \
+    "mul r10, r20 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r11, r19 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r2, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r12 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r21 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r24, r0 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ld r21, y+ \n\t"      \
+    "ldi r24, 0 \n\t"      \
+    "mul r10, r21 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r11, r20 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r2, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r22, r0 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ld r12, y+ \n\t"      \
+    "ldi r22, 0 \n\t"      \
+    "mul r10, r12 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r11, r21 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r2, r20 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r23, r0 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ld r13, y+ \n\t"      \
+    "ldi r23, 0 \n\t"      \
+    "mul r10, r13 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r11, r12 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r2, r21 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r20 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r24, r0 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ld r14, y+ \n\t"      \
+    "ldi r24, 0 \n\t"      \
+    "mul r10, r14 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r11, r13 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r2, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r21 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r20 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r22, r0 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ld r15, y+ \n\t"      \
+    "ldi r22, 0 \n\t"      \
+    "mul r10, r15 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r11, r14 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r2, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r21 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r20 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r23, r0 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ld r16, y+ \n\t"      \
+    "ldi r23, 0 \n\t"      \
+    "mul r10, r16 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r11, r15 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r2, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r12 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r21 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r20 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r24, r0 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ld r17, y+ \n\t"      \
+    "ldi r24, 0 \n\t"      \
+    "mul r10, r17 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r11, r16 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r2, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r21 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r20 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r22, r0 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ld r18, y+ \n\t"      \
+    "ldi r22, 0 \n\t"      \
+    "mul r10, r18 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r11, r17 \n\t"    \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r2, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r21 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r20 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r23, r0 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ld r19, y+ \n\t"      \
+    "ldi r23, 0 \n\t"      \
+    "mul r10, r19 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r11, r18 \n\t"    \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r2, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r3, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r12 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r21 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r20 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "ld r0, z \n\t"        \
+    "add r24, r0 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ldi r24, 0 \n\t"      \
+    "mul r11, r19 \n\t"    \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r2, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r3, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r4, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r13 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r12 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r21 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ldi r22, 0 \n\t"      \
+    "mul r2, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r3, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r4, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r5, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r14 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r13 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r12 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ldi r23, 0 \n\t"      \
+    "mul r3, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r4, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r5, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r6, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r15 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r14 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r13 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ldi r24, 0 \n\t"      \
+    "mul r4, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r5, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r6, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r7, r16 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r15 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r14 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ldi r22, 0 \n\t"      \
+    "mul r5, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r6, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r7, r17 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r8, r16 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r15 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "ldi r23, 0 \n\t"      \
+    "mul r6, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r7, r18 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r8, r17 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "mul r9, r16 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "adc r23, r25 \n\t"    \
+    "st z+, r24 \n\t"      \
+                           \
+    "ldi r24, 0 \n\t"      \
+    "mul r7, r19 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r8, r18 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "mul r9, r17 \n\t"     \
+    "add r22, r0 \n\t"     \
+    "adc r23, r1 \n\t"     \
+    "adc r24, r25 \n\t"    \
+    "st z+, r22 \n\t"      \
+                           \
+    "ldi r22, 0 \n\t"      \
+    "mul r8, r19 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "mul r9, r18 \n\t"     \
+    "add r23, r0 \n\t"     \
+    "adc r24, r1 \n\t"     \
+    "adc r22, r25 \n\t"    \
+    "st z+, r23 \n\t"      \
+                           \
+    "mul r9, r19 \n\t"     \
+    "add r24, r0 \n\t"     \
+    "adc r22, r1 \n\t"     \
+    "st z+, r24 \n\t"      \
+    "st z+, r22 \n\t"      \
+    "eor r1, r1 \n\t"
+
+#define FAST_MULT_ASM_8        \
+    "adiw r30, 30 \n\t"        \
+    "adiw r28, 30 \n\t"        \
+    "ld r2, x+ \n\t"           \
+    "ld r3, x+ \n\t"           \
+    "ld r12, y+ \n\t"          \
+    "ld r13, y+ \n\t"          \
+    "ldi r25, 0 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r2, r12 \n\t"         \
+    "st z+, r0 \n\t"           \
+    "mov r22, r1 \n\t"         \
+                               \
+    "ldi r24, 0 \n\t"          \
+    "mul r2, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "mul r3, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "mul r3, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "st z+, r23 \n\t"          \
+    "st z+, r24 \n\t"          \
+                               \
+    "sbiw r30, 14 \n\t"        \
+    "sbiw r28, 12 \n\t"        \
+    "ld r12, y+ \n\t"          \
+    "ld r13, y+ \n\t"          \
+    "ld r14, y+ \n\t"          \
+    "ld r15, y+ \n\t"          \
+    "ld r16, y+ \n\t"          \
+    "ld r17, y+ \n\t"          \
+    "ld r18, y+ \n\t"          \
+    "ld r19, y+ \n\t"          \
+    "ld r20, y+ \n\t"          \
+    "ld r21, y+ \n\t"          \
+    "ld r4, x+ \n\t"           \
+    "ld r5, x+ \n\t"           \
+    "ld r6, x+ \n\t"           \
+    "ld r7, x+ \n\t"           \
+    "ld r8, x+ \n\t"           \
+    "ld r9, x+ \n\t"           \
+    "ld r10, x+ \n\t"          \
+    "ld r11, x+ \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r2, r12 \n\t"         \
+    "st z+, r0 \n\t"           \
+    "mov r22, r1 \n\t"         \
+                               \
+    "ldi r24, 0 \n\t"          \
+    "mul r2, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "mul r3, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ldi r22, 0 \n\t"          \
+    "mul r2, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r2, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ldi r24, 0 \n\t"          \
+    "mul r2, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ldi r22, 0 \n\t"          \
+    "mul r2, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r2, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ldi r24, 0 \n\t"          \
+    "mul r2, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ldi r22, 0 \n\t"          \
+    "mul r2, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r12 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r2, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r13 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r12 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r2, x+ \n\t"           \
+    "ldi r24, 0 \n\t"          \
+    "mul r3, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r14 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r13 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r3, x+ \n\t"           \
+    "ldi r22, 0 \n\t"          \
+    "mul r4, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r15 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r14 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r12, y+ \n\t"          \
+    "ldi r23, 0 \n\t"          \
+    "mul r4, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r16 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r15 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r13, y+ \n\t"          \
+    "ldi r24, 0 \n\t"          \
+    "mul r4, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r17 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r16 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ldi r22, 0 \n\t"          \
+    "mul r5, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r18 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r17 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r6, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r19 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r18 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ldi r24, 0 \n\t"          \
+    "mul r7, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r20 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r19 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ldi r22, 0 \n\t"          \
+    "mul r8, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r21 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r20 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r9, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r12 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r21 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ldi r24, 0 \n\t"          \
+    "mul r10, r13 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r12 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ldi r22, 0 \n\t"          \
+    "mul r11, r13 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r2, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "mul r3, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "st z+, r22 \n\t"          \
+    "st z+, r23 \n\t"          \
+                               \
+    "sbiw r30, 34 \n\t"        \
+    "sbiw r28, 22 \n\t"        \
+    "sbiw r26, 12 \n\t"        \
+    "ld r2, x+ \n\t"           \
+    "ld r12, y+ \n\t"          \
+    "ld r3, x+ \n\t"           \
+    "ld r13, y+ \n\t"          \
+    "ld r4, x+ \n\t"           \
+    "ld r14, y+ \n\t"          \
+    "ld r5, x+ \n\t"           \
+    "ld r15, y+ \n\t"          \
+    "ld r6, x+ \n\t"           \
+    "ld r16, y+ \n\t"          \
+    "ld r7, x+ \n\t"           \
+    "ld r17, y+ \n\t"          \
+    "ld r8, x+ \n\t"           \
+    "ld r18, y+ \n\t"          \
+    "ld r9, x+ \n\t"           \
+    "ld r19, y+ \n\t"          \
+    "ld r10, x+ \n\t"          \
+    "ld r20, y+ \n\t"          \
+    "ld r11, x+ \n\t"          \
+    "ld r21, y+ \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r2, r12 \n\t"         \
+    "st z+, r0 \n\t"           \
+    "mov r22, r1 \n\t"         \
+                               \
+    "ldi r24, 0 \n\t"          \
+    "mul r2, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "mul r3, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ldi r22, 0 \n\t"          \
+    "mul r2, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r2, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ldi r24, 0 \n\t"          \
+    "mul r2, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ldi r22, 0 \n\t"          \
+    "mul r2, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r2, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ldi r24, 0 \n\t"          \
+    "mul r2, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ldi r22, 0 \n\t"          \
+    "mul r2, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r12 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r2, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r13 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r12 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r2, x+ \n\t"           \
+    "ldi r24, 0 \n\t"          \
+    "mul r3, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r14 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r13 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r3, x+ \n\t"           \
+    "ldi r22, 0 \n\t"          \
+    "mul r4, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r15 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r14 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r4, x+ \n\t"           \
+    "ldi r23, 0 \n\t"          \
+    "mul r5, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r16 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r15 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r5, x+ \n\t"           \
+    "ldi r24, 0 \n\t"          \
+    "mul r6, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r17 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r16 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r6, x+ \n\t"           \
+    "ldi r22, 0 \n\t"          \
+    "mul r7, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r18 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r17 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r7, x+ \n\t"           \
+    "ldi r23, 0 \n\t"          \
+    "mul r8, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r19 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r18 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r8, x+ \n\t"           \
+    "ldi r24, 0 \n\t"          \
+    "mul r9, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r20 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r19 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r9, x+ \n\t"           \
+    "ldi r22, 0 \n\t"          \
+    "mul r10, r21 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r20 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r10, x+ \n\t"          \
+    "ldi r23, 0 \n\t"          \
+    "mul r11, r21 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r12 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r11, x+ \n\t"          \
+    "ldi r24, 0 \n\t"          \
+    "mul r2, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r13 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r12 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r2, x+ \n\t"           \
+    "ldi r22, 0 \n\t"          \
+    "mul r3, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r14 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r13 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r3, x+ \n\t"           \
+    "ldi r23, 0 \n\t"          \
+    "mul r4, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r15 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r14 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r12, y+ \n\t"          \
+    "ldi r24, 0 \n\t"          \
+    "mul r4, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r16 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r15 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r13, y+ \n\t"          \
+    "ldi r22, 0 \n\t"          \
+    "mul r4, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r17 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r16 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r14, y+ \n\t"          \
+    "ldi r23, 0 \n\t"          \
+    "mul r4, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r18 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r17 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r15, y+ \n\t"          \
+    "ldi r24, 0 \n\t"          \
+    "mul r4, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r19 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r18 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r16, y+ \n\t"          \
+    "ldi r22, 0 \n\t"          \
+    "mul r4, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r20 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r19 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r17, y+ \n\t"          \
+    "ldi r23, 0 \n\t"          \
+    "mul r4, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r21 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r20 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r18, y+ \n\t"          \
+    "ldi r24, 0 \n\t"          \
+    "mul r4, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r12 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r21 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r19, y+ \n\t"          \
+    "ldi r22, 0 \n\t"          \
+    "mul r4, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r13 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r12 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r20, y+ \n\t"          \
+    "ldi r23, 0 \n\t"          \
+    "mul r4, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r14 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r13 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r21, y+ \n\t"          \
+    "ldi r24, 0 \n\t"          \
+    "mul r4, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r15 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r14 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r12, y+ \n\t"          \
+    "ldi r22, 0 \n\t"          \
+    "mul r4, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r16 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r15 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r13, y+ \n\t"          \
+    "ldi r23, 0 \n\t"          \
+    "mul r4, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r17 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r16 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ldi r24, 0 \n\t"          \
+    "mul r5, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r18 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r17 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ldi r22, 0 \n\t"          \
+    "mul r6, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r19 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r18 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r7, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r20 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r19 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ldi r24, 0 \n\t"          \
+    "mul r8, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r21 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r20 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ldi r22, 0 \n\t"          \
+    "mul r9, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r12 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r21 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r10, r13 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r12 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ldi r24, 0 \n\t"          \
+    "mul r11, r13 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ldi r22, 0 \n\t"          \
+    "mul r2, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "mul r3, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "st z+, r24 \n\t"          \
+    "st z+, r22 \n\t"          \
+                               \
+    "sbiw r30, 54 \n\t"        \
+    "sbiw r28, 32 \n\t"        \
+    "sbiw r26, 22 \n\t"        \
+    "ld r2, x+ \n\t"           \
+    "ld r12, y+ \n\t"          \
+    "ld r3, x+ \n\t"           \
+    "ld r13, y+ \n\t"          \
+    "ld r4, x+ \n\t"           \
+    "ld r14, y+ \n\t"          \
+    "ld r5, x+ \n\t"           \
+    "ld r15, y+ \n\t"          \
+    "ld r6, x+ \n\t"           \
+    "ld r16, y+ \n\t"          \
+    "ld r7, x+ \n\t"           \
+    "ld r17, y+ \n\t"          \
+    "ld r8, x+ \n\t"           \
+    "ld r18, y+ \n\t"          \
+    "ld r9, x+ \n\t"           \
+    "ld r19, y+ \n\t"          \
+    "ld r10, x+ \n\t"          \
+    "ld r20, y+ \n\t"          \
+    "ld r11, x+ \n\t"          \
+    "ld r21, y+ \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r2, r12 \n\t"         \
+    "st z+, r0 \n\t"           \
+    "mov r22, r1 \n\t"         \
+                               \
+    "ldi r24, 0 \n\t"          \
+    "mul r2, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "mul r3, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ldi r22, 0 \n\t"          \
+    "mul r2, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r2, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ldi r24, 0 \n\t"          \
+    "mul r2, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ldi r22, 0 \n\t"          \
+    "mul r2, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r2, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ldi r24, 0 \n\t"          \
+    "mul r2, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ldi r22, 0 \n\t"          \
+    "mul r2, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r12 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r2, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r13 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r12 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r2, x+ \n\t"           \
+    "ldi r24, 0 \n\t"          \
+    "mul r3, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r14 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r13 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r3, x+ \n\t"           \
+    "ldi r22, 0 \n\t"          \
+    "mul r4, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r15 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r14 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r4, x+ \n\t"           \
+    "ldi r23, 0 \n\t"          \
+    "mul r5, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r16 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r15 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r5, x+ \n\t"           \
+    "ldi r24, 0 \n\t"          \
+    "mul r6, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r17 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r16 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r6, x+ \n\t"           \
+    "ldi r22, 0 \n\t"          \
+    "mul r7, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r18 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r17 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r7, x+ \n\t"           \
+    "ldi r23, 0 \n\t"          \
+    "mul r8, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r19 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r18 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r8, x+ \n\t"           \
+    "ldi r24, 0 \n\t"          \
+    "mul r9, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r20 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r19 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r9, x+ \n\t"           \
+    "ldi r22, 0 \n\t"          \
+    "mul r10, r21 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r20 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r10, x+ \n\t"          \
+    "ldi r23, 0 \n\t"          \
+    "mul r11, r21 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r12 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r11, x+ \n\t"          \
+    "ldi r24, 0 \n\t"          \
+    "mul r2, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r13 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r12 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r2, x+ \n\t"           \
+    "ldi r22, 0 \n\t"          \
+    "mul r3, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r14 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r13 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r3, x+ \n\t"           \
+    "ldi r23, 0 \n\t"          \
+    "mul r4, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r15 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r14 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r4, x+ \n\t"           \
+    "ldi r24, 0 \n\t"          \
+    "mul r5, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r16 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r15 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r5, x+ \n\t"           \
+    "ldi r22, 0 \n\t"          \
+    "mul r6, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r17 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r16 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r6, x+ \n\t"           \
+    "ldi r23, 0 \n\t"          \
+    "mul r7, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r18 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r17 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r7, x+ \n\t"           \
+    "ldi r24, 0 \n\t"          \
+    "mul r8, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r19 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r18 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r8, x+ \n\t"           \
+    "ldi r22, 0 \n\t"          \
+    "mul r9, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r20 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r19 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r9, x+ \n\t"           \
+    "ldi r23, 0 \n\t"          \
+    "mul r10, r21 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r20 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r10, x+ \n\t"          \
+    "ldi r24, 0 \n\t"          \
+    "mul r11, r21 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r4, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r12 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r11, x+ \n\t"          \
+    "ldi r22, 0 \n\t"          \
+    "mul r2, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r4, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r13 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r12 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r2, x+ \n\t"           \
+    "ldi r23, 0 \n\t"          \
+    "mul r3, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r4, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r14 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r13 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r3, x+ \n\t"           \
+    "ldi r24, 0 \n\t"          \
+    "mul r4, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r15 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r14 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r12, y+ \n\t"          \
+    "ldi r22, 0 \n\t"          \
+    "mul r4, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r16 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r15 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r13, y+ \n\t"          \
+    "ldi r23, 0 \n\t"          \
+    "mul r4, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r17 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r16 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r14, y+ \n\t"          \
+    "ldi r24, 0 \n\t"          \
+    "mul r4, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r18 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r17 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r15, y+ \n\t"          \
+    "ldi r22, 0 \n\t"          \
+    "mul r4, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r19 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r18 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r16, y+ \n\t"          \
+    "ldi r23, 0 \n\t"          \
+    "mul r4, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r20 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r19 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r17, y+ \n\t"          \
+    "ldi r24, 0 \n\t"          \
+    "mul r4, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r21 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r20 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r18, y+ \n\t"          \
+    "ldi r22, 0 \n\t"          \
+    "mul r4, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r12 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r21 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r19, y+ \n\t"          \
+    "ldi r23, 0 \n\t"          \
+    "mul r4, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r13 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r12 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r20, y+ \n\t"          \
+    "ldi r24, 0 \n\t"          \
+    "mul r4, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r14 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r13 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r21, y+ \n\t"          \
+    "ldi r22, 0 \n\t"          \
+    "mul r4, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r15 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r14 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r12, y+ \n\t"          \
+    "ldi r23, 0 \n\t"          \
+    "mul r4, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r16 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r15 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r13, y+ \n\t"          \
+    "ldi r24, 0 \n\t"          \
+    "mul r4, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r17 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r16 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r14, y+ \n\t"          \
+    "ldi r22, 0 \n\t"          \
+    "mul r4, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r18 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r17 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r15, y+ \n\t"          \
+    "ldi r23, 0 \n\t"          \
+    "mul r4, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r19 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r18 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r16, y+ \n\t"          \
+    "ldi r24, 0 \n\t"          \
+    "mul r4, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r20 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r19 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r17, y+ \n\t"          \
+    "ldi r22, 0 \n\t"          \
+    "mul r4, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r21 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r20 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r18, y+ \n\t"          \
+    "ldi r23, 0 \n\t"          \
+    "mul r4, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r14 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r12 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r21 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r19, y+ \n\t"          \
+    "ldi r24, 0 \n\t"          \
+    "mul r4, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r15 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r13 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r12 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r20, y+ \n\t"          \
+    "ldi r22, 0 \n\t"          \
+    "mul r4, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r16 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r14 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r13 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ld r21, y+ \n\t"          \
+    "ldi r23, 0 \n\t"          \
+    "mul r4, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r5, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r17 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r15 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r14 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r24, r0 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ld r12, y+ \n\t"          \
+    "ldi r24, 0 \n\t"          \
+    "mul r4, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r5, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r6, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r18 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r16 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r15 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r14 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r22, r0 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ld r13, y+ \n\t"          \
+    "ldi r22, 0 \n\t"          \
+    "mul r4, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r5, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r6, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r7, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r19 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r17 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r16 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r15 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r14 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "ld r0, z \n\t"            \
+    "add r23, r0 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r5, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r6, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r7, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r8, r20 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r18 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r17 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r16 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r15 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ldi r24, 0 \n\t"          \
+    "mul r6, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r7, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r8, r21 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r9, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r19 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r18 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r17 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r16 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ldi r22, 0 \n\t"          \
+    "mul r7, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r8, r12 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r9, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r10, r20 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r19 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r18 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r17 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r8, r13 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r9, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r10, r21 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r11, r20 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r19 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r18 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ldi r24, 0 \n\t"          \
+    "mul r9, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r10, r12 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r11, r21 \n\t"        \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r2, r20 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r19 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "ldi r22, 0 \n\t"          \
+    "mul r10, r13 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r11, r12 \n\t"        \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r2, r21 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "mul r3, r20 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "adc r22, r25 \n\t"        \
+    "st z+, r23 \n\t"          \
+                               \
+    "ldi r23, 0 \n\t"          \
+    "mul r11, r13 \n\t"        \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r2, r12 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "mul r3, r21 \n\t"         \
+    "add r24, r0 \n\t"         \
+    "adc r22, r1 \n\t"         \
+    "adc r23, r25 \n\t"        \
+    "st z+, r24 \n\t"          \
+                               \
+    "ldi r24, 0 \n\t"          \
+    "mul r2, r13 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "mul r3, r12 \n\t"         \
+    "add r22, r0 \n\t"         \
+    "adc r23, r1 \n\t"         \
+    "adc r24, r25 \n\t"        \
+    "st z+, r22 \n\t"          \
+                               \
+    "mul r3, r13 \n\t"         \
+    "add r23, r0 \n\t"         \
+    "adc r24, r1 \n\t"         \
+    "st z+, r23 \n\t"          \
+    "st z+, r24 \n\t"          \
+    "eor r1, r1 \n\t"
+
+#define FAST_SQUARE_ASM_5          \
+    "ld r2, x+ \n\t"               \
+    "ld r3, x+ \n\t"               \
+    "ld r4, x+ \n\t"               \
+    "ld r5, x+ \n\t"               \
+    "ld r6, x+ \n\t"               \
+    "ld r7, x+ \n\t"               \
+    "ld r8, x+ \n\t"               \
+    "ld r9, x+ \n\t"               \
+    "ld r10, x+ \n\t"              \
+    "ld r11, x+ \n\t"              \
+    "ld r12, x+ \n\t"              \
+    "ld r13, x+ \n\t"              \
+    "ld r14, x+ \n\t"              \
+    "ld r15, x+ \n\t"              \
+    "ld r16, x+ \n\t"              \
+    "ld r17, x+ \n\t"              \
+    "ld r18, x+ \n\t"              \
+    "ld r19, x+ \n\t"              \
+    "ld r20, x+ \n\t"              \
+    "ld r21, x+ \n\t"              \
+    "ldi r27, 0 \n\t"              \
+                                   \
+    "ldi r23, 0 \n\t"              \
+    "mul r2, r2 \n\t"              \
+    "st z+, r0 \n\t"               \
+    "mov r22, r1 \n\t"             \
+                                   \
+    "ldi r24, 0 \n\t"              \
+    "mul r2, r3 \n\t"              \
+    "lsl r0 \n\t"                  \
+    "rol r1 \n\t"                  \
+    "adc r24, r27 \n\t"            \
+    "add r22, r0 \n\t"             \
+    "adc r23, r1 \n\t"             \
+    "adc r24, r27 \n\t"            \
+    "st z+, r22 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r2, r4 \n\t"              \
+    "lsl r0 \n\t"                  \
+    "rol r1 \n\t"                  \
+    "adc r22, r27 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r3, r3 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r26, 0 \n\t"              \
+    "mul r2, r5 \n\t"              \
+    "mov r23, r0 \n\t"             \
+    "mov r25, r1 \n\t"             \
+    "mul r3, r4 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r25 \n\t"                 \
+    "rol r26 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r25, r22 \n\t"            \
+    "adc r26, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r2, r6 \n\t"              \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r3, r5 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r4, r4 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "add r23, r25 \n\t"            \
+    "adc r24, r26 \n\t"            \
+    "adc r22, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r26, 0 \n\t"              \
+    "mul r2, r7 \n\t"              \
+    "mov r23, r0 \n\t"             \
+    "mov r25, r1 \n\t"             \
+    "mul r3, r6 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r4, r5 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r25 \n\t"                 \
+    "rol r26 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r25, r22 \n\t"            \
+    "adc r26, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r2, r8 \n\t"              \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r3, r7 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r4, r6 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r5, r5 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "add r23, r25 \n\t"            \
+    "adc r24, r26 \n\t"            \
+    "adc r22, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r26, 0 \n\t"              \
+    "mul r2, r9 \n\t"              \
+    "mov r23, r0 \n\t"             \
+    "mov r25, r1 \n\t"             \
+    "mul r3, r8 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r4, r7 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r5, r6 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r25 \n\t"                 \
+    "rol r26 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r25, r22 \n\t"            \
+    "adc r26, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r2, r10 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r3, r9 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r4, r8 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r5, r7 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r6, r6 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "add r23, r25 \n\t"            \
+    "adc r24, r26 \n\t"            \
+    "adc r22, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r26, 0 \n\t"              \
+    "mul r2, r11 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r25, r1 \n\t"             \
+    "mul r3, r10 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r4, r9 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r5, r8 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r6, r7 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r25 \n\t"                 \
+    "rol r26 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r25, r22 \n\t"            \
+    "adc r26, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r2, r12 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r3, r11 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r4, r10 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r5, r9 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r6, r8 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r7, r7 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "add r23, r25 \n\t"            \
+    "adc r24, r26 \n\t"            \
+    "adc r22, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r26, 0 \n\t"              \
+    "mul r2, r13 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r25, r1 \n\t"             \
+    "mul r3, r12 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r4, r11 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r5, r10 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r6, r9 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r7, r8 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r25 \n\t"                 \
+    "rol r26 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r25, r22 \n\t"            \
+    "adc r26, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r2, r14 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r3, r13 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r4, r12 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r5, r11 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r6, r10 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r7, r9 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r8, r8 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "add r23, r25 \n\t"            \
+    "adc r24, r26 \n\t"            \
+    "adc r22, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r26, 0 \n\t"              \
+    "mul r2, r15 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r25, r1 \n\t"             \
+    "mul r3, r14 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r4, r13 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r5, r12 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r6, r11 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r7, r10 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r8, r9 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r25 \n\t"                 \
+    "rol r26 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r25, r22 \n\t"            \
+    "adc r26, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r2, r16 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r3, r15 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r4, r14 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r5, r13 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r6, r12 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r7, r11 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r8, r10 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r9, r9 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "add r23, r25 \n\t"            \
+    "adc r24, r26 \n\t"            \
+    "adc r22, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r26, 0 \n\t"              \
+    "mul r2, r17 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r25, r1 \n\t"             \
+    "mul r3, r16 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r4, r15 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r5, r14 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r6, r13 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r7, r12 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r8, r11 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r9, r10 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r25 \n\t"                 \
+    "rol r26 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r25, r22 \n\t"            \
+    "adc r26, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r2, r18 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r3, r17 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r4, r16 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r5, r15 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r6, r14 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r7, r13 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r8, r12 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r9, r11 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r10, r10 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "add r23, r25 \n\t"            \
+    "adc r24, r26 \n\t"            \
+    "adc r22, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r26, 0 \n\t"              \
+    "mul r2, r19 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r25, r1 \n\t"             \
+    "mul r3, r18 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r4, r17 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r5, r16 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r6, r15 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r7, r14 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r8, r13 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r9, r12 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r10, r11 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r25 \n\t"                 \
+    "rol r26 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r25, r22 \n\t"            \
+    "adc r26, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r2, r20 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r3, r19 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r4, r18 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r5, r17 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r6, r16 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r7, r15 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r8, r14 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r9, r13 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r10, r12 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r11, r11 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "add r23, r25 \n\t"            \
+    "adc r24, r26 \n\t"            \
+    "adc r22, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r26, 0 \n\t"              \
+    "mul r2, r21 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r25, r1 \n\t"             \
+    "mul r3, r20 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r4, r19 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r5, r18 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r6, r17 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r7, r16 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r8, r15 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r9, r14 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r10, r13 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r11, r12 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r25 \n\t"                 \
+    "rol r26 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r25, r22 \n\t"            \
+    "adc r26, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r3, r21 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r4, r20 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r5, r19 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r6, r18 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r7, r17 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r8, r16 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r9, r15 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r10, r14 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r11, r13 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r12, r12 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "add r23, r25 \n\t"            \
+    "adc r24, r26 \n\t"            \
+    "adc r22, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r26, 0 \n\t"              \
+    "mul r4, r21 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r25, r1 \n\t"             \
+    "mul r5, r20 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r6, r19 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r7, r18 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r8, r17 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r9, r16 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r10, r15 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r11, r14 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r12, r13 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r25 \n\t"                 \
+    "rol r26 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r25, r22 \n\t"            \
+    "adc r26, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r5, r21 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r6, r20 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r7, r19 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r8, r18 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r9, r17 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r10, r16 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r11, r15 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r12, r14 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r13, r13 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "add r23, r25 \n\t"            \
+    "adc r24, r26 \n\t"            \
+    "adc r22, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r26, 0 \n\t"              \
+    "mul r6, r21 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r25, r1 \n\t"             \
+    "mul r7, r20 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r8, r19 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r9, r18 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r10, r17 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r11, r16 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r12, r15 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r13, r14 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r25 \n\t"                 \
+    "rol r26 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r25, r22 \n\t"            \
+    "adc r26, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r7, r21 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r8, r20 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r9, r19 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r10, r18 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r11, r17 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r12, r16 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r13, r15 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r14, r14 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "add r23, r25 \n\t"            \
+    "adc r24, r26 \n\t"            \
+    "adc r22, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r26, 0 \n\t"              \
+    "mul r8, r21 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r25, r1 \n\t"             \
+    "mul r9, r20 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r10, r19 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r11, r18 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r12, r17 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r13, r16 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r14, r15 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r25 \n\t"                 \
+    "rol r26 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r25, r22 \n\t"            \
+    "adc r26, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r9, r21 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r10, r20 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r11, r19 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r12, r18 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r13, r17 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r14, r16 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r15, r15 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "add r23, r25 \n\t"            \
+    "adc r24, r26 \n\t"            \
+    "adc r22, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r26, 0 \n\t"              \
+    "mul r10, r21 \n\t"            \
+    "mov r23, r0 \n\t"             \
+    "mov r25, r1 \n\t"             \
+    "mul r11, r20 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r12, r19 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r13, r18 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r14, r17 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r15, r16 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r25 \n\t"                 \
+    "rol r26 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r25, r22 \n\t"            \
+    "adc r26, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r11, r21 \n\t"            \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r12, r20 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r13, r19 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r14, r18 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r15, r17 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r16, r16 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "add r23, r25 \n\t"            \
+    "adc r24, r26 \n\t"            \
+    "adc r22, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r26, 0 \n\t"              \
+    "mul r12, r21 \n\t"            \
+    "mov r23, r0 \n\t"             \
+    "mov r25, r1 \n\t"             \
+    "mul r13, r20 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r14, r19 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r15, r18 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r16, r17 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r25 \n\t"                 \
+    "rol r26 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r25, r22 \n\t"            \
+    "adc r26, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r13, r21 \n\t"            \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r14, r20 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r15, r19 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r16, r18 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r17, r17 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "add r23, r25 \n\t"            \
+    "adc r24, r26 \n\t"            \
+    "adc r22, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r26, 0 \n\t"              \
+    "mul r14, r21 \n\t"            \
+    "mov r23, r0 \n\t"             \
+    "mov r25, r1 \n\t"             \
+    "mul r15, r20 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r16, r19 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r17, r18 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r25 \n\t"                 \
+    "rol r26 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r25, r22 \n\t"            \
+    "adc r26, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r15, r21 \n\t"            \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r16, r20 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "mul r17, r19 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r18, r18 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "add r23, r25 \n\t"            \
+    "adc r24, r26 \n\t"            \
+    "adc r22, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r26, 0 \n\t"              \
+    "mul r16, r21 \n\t"            \
+    "mov r23, r0 \n\t"             \
+    "mov r25, r1 \n\t"             \
+    "mul r17, r20 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "mul r18, r19 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r25 \n\t"                 \
+    "rol r26 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r25, r22 \n\t"            \
+    "adc r26, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r17, r21 \n\t"            \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r18, r20 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r19, r19 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r27 \n\t"            \
+    "add r23, r25 \n\t"            \
+    "adc r24, r26 \n\t"            \
+    "adc r22, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r26, 0 \n\t"              \
+    "mul r18, r21 \n\t"            \
+    "mov r23, r0 \n\t"             \
+    "mov r25, r1 \n\t"             \
+    "mul r19, r20 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "adc r26, r27 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r25 \n\t"                 \
+    "rol r26 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r25, r22 \n\t"            \
+    "adc r26, r27 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r23, 0 \n\t"              \
+    "mul r19, r21 \n\t"            \
+    "lsl r0 \n\t"                  \
+    "rol r1 \n\t"                  \
+    "adc r23, r27 \n\t"            \
+    "add r25, r0 \n\t"             \
+    "adc r26, r1 \n\t"             \
+    "adc r23, r27 \n\t"            \
+    "mul r20, r20 \n\t"            \
+    "add r25, r0 \n\t"             \
+    "adc r26, r1 \n\t"             \
+    "adc r23, r27 \n\t"            \
+    "st z+, r25 \n\t"              \
+                                   \
+    "ldi r25, 0 \n\t"              \
+    "mul r20, r21 \n\t"            \
+    "lsl r0 \n\t"                  \
+    "rol r1 \n\t"                  \
+    "adc r25, r27 \n\t"            \
+    "add r26, r0 \n\t"             \
+    "adc r23, r1 \n\t"             \
+    "adc r25, r27 \n\t"            \
+    "st z+, r26 \n\t"              \
+                                   \
+    "mul r21, r21 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r25, r1 \n\t"             \
+    "st z+, r23 \n\t"              \
+    "st z+, r25 \n\t"              \
+    "eor r1, r1 \n\t"
+
+#define FAST_SQUARE_ASM_6              \
+    "ldi r25, 0 \n\t"                  \
+    "movw r28, r26 \n\t"               \
+    "ld r2, x+ \n\t"                   \
+    "ld r3, x+ \n\t"                   \
+    "adiw r28, 20 \n\t"                \
+    "ld r12, y+ \n\t"                  \
+    "ld r13, y+ \n\t"                  \
+    "adiw r30, 20 \n\t"                \
+                                       \
+    "ldi r23, 0 \n\t"                  \
+    "mul 2, 12 \n\t"                   \
+    "st z+, r0 \n\t"                   \
+    "mov r22, r1 \n\t"                 \
+                                       \
+    "ldi r24, 0 \n\t"                  \
+    "mul r2, r13 \n\t"                 \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "st z+, r22 \n\t"                  \
+                                       \
+    "ld r12, y+ \n\t"                  \
+    "ldi r22, 0 \n\t"                  \
+    "mul r2, r12 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r3, r13 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ld r13, y+ \n\t"                  \
+    "ldi r23, 0 \n\t"                  \
+    "mul r2, r13 \n\t"                 \
+    "add r24, r0 \n\t"                 \
+    "adc r22, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "mul r3, r12 \n\t"                 \
+    "add r24, r0 \n\t"                 \
+    "adc r22, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "st z+, r24 \n\t"                  \
+                                       \
+    "ld r2, x+ \n\t"                   \
+    "ldi r24, 0 \n\t"                  \
+    "mul r3, r13 \n\t"                 \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "mul r2, r12 \n\t"                 \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "st z+, r22 \n\t"                  \
+                                       \
+    "ld r3, x+ \n\t"                   \
+    "ldi r22, 0 \n\t"                  \
+    "mul r2, r13 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "mul r3, r13 \n\t"                 \
+    "add r24, r0 \n\t"                 \
+    "adc r22, r1 \n\t"                 \
+    "st z+, r24 \n\t"                  \
+    "st z+, r22 \n\t"                  \
+                                       \
+    "sbiw r26, 4 \n\t"                 \
+    "sbiw r30, 28 \n\t"                \
+    "ld r2, x+ \n\t"                   \
+    "ld r3, x+ \n\t"                   \
+    "ld r4, x+ \n\t"                   \
+    "ld r5, x+ \n\t"                   \
+    "ld r6, x+ \n\t"                   \
+    "ld r7, x+ \n\t"                   \
+    "ld r8, x+ \n\t"                   \
+    "ld r9, x+ \n\t"                   \
+    "ld r10, x+ \n\t"                  \
+    "ld r11, x+ \n\t"                  \
+    "ld r12, x+ \n\t"                  \
+    "ld r13, x+ \n\t"                  \
+    "ld r14, x+ \n\t"                  \
+    "ld r15, x+ \n\t"                  \
+    "ld r16, x+ \n\t"                  \
+    "ld r17, x+ \n\t"                  \
+    "ld r18, x+ \n\t"                  \
+    "ld r19, x+ \n\t"                  \
+    "ld r20, x+ \n\t"                  \
+    "ld r21, x+ \n\t"                  \
+                                       \
+    "ldi r23, 0 \n\t"                  \
+    "mul r2, r2 \n\t"                  \
+    "st z+, r0 \n\t"                   \
+    "mov r22, r1 \n\t"                 \
+                                       \
+    "ldi r24, 0 \n\t"                  \
+    "mul r2, r3 \n\t"                  \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "st z+, r22 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r2, r4 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r3, r3 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r2, r5 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r3, r4 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r2, r6 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r3, r5 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r4, r4 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r2, r7 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r3, r6 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r4, r5 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r2, r8 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r3, r7 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r4, r6 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r5, r5 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r2, r9 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r3, r8 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r4, r7 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r5, r6 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r2, r10 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r3, r9 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r4, r8 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r5, r7 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r6, r6 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r2, r11 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r3, r10 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r4, r9 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r5, r8 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r6, r7 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r2, r12 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r3, r11 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r4, r10 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r5, r9 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r6, r8 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r7, r7 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r2, r13 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r3, r12 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r4, r11 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r5, r10 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r6, r9 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r7, r8 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r2, r14 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r3, r13 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r4, r12 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r5, r11 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r6, r10 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r7, r9 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r8, r8 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r2, r15 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r3, r14 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r4, r13 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r5, r12 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r6, r11 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r7, r10 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r8, r9 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r2, r16 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r3, r15 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r4, r14 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r5, r13 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r6, r12 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r7, r11 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r8, r10 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r9, r9 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r2, r17 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r3, r16 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r4, r15 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r5, r14 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r6, r13 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r7, r12 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r8, r11 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r9, r10 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r2, r18 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r3, r17 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r4, r16 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r5, r15 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r6, r14 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r7, r13 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r8, r12 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r9, r11 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r10, r10 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r2, r19 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r3, r18 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r4, r17 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r5, r16 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r6, r15 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r7, r14 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r8, r13 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r9, r12 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r10, r11 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r2, r20 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r3, r19 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r4, r18 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r5, r17 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r6, r16 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r7, r15 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r8, r14 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r9, r13 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r10, r12 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r11, r11 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r2, r21 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r3, r20 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r4, r19 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r5, r18 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r6, r17 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r7, r16 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r8, r15 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r9, r14 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r10, r13 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r11, r12 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ld r2, x+ \n\t"                   \
+    "ldi r22, 0 \n\t"                  \
+    "mul r3, r21 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r4, r20 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r5, r19 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r6, r18 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r7, r17 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r8, r16 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r9, r15 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r10, r14 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r11, r13 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r12, r12 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r3, r2 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r4, r21 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r5, r20 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r6, r19 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r7, r18 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r8, r17 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r9, r16 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r10, r15 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r11, r14 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r12, r13 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r25 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ld r3, x+ \n\t"                   \
+    "ldi r22, 0 \n\t"                  \
+    "mul r4, r2 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r5, r21 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r6, r20 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r7, r19 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r8, r18 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r9, r17 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r10, r16 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r11, r15 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r12, r14 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r13, r13 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r4, r3 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r5, r2 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r6, r21 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r7, r20 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r8, r19 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r9, r18 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r10, r17 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r11, r16 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r12, r15 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r13, r14 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r25 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ld r4, x+ \n\t"                   \
+    "ldi r22, 0 \n\t"                  \
+    "mul r5, r3 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r6, r2 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r7, r21 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r8, r20 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r9, r19 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r10, r18 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r11, r17 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r12, r16 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r13, r15 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r14, r14 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r5, r4 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r6, r3 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r7, r2 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r8, r21 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r9, r20 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r10, r19 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r11, r18 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r12, r17 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r13, r16 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r14, r15 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r25 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ld r5, x+ \n\t"                   \
+    "ldi r22, 0 \n\t"                  \
+    "mul r6, r4 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r7, r3 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r8, r2 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r9, r21 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r10, r20 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r11, r19 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r12, r18 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r13, r17 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r14, r16 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r15, r15 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r6, r5 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r7, r4 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r8, r3 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r9, r2 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r10, r21 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r11, r20 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r12, r19 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r13, r18 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r14, r17 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r15, r16 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r25 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r7, r5 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r8, r4 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r9, r3 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r10, r2 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r11, r21 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r12, r20 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r13, r19 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r14, r18 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r15, r17 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r16, r16 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r8, r5 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r9, r4 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r10, r3 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r11, r2 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r12, r21 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r13, r20 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r14, r19 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r15, r18 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r16, r17 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r9, r5 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r10, r4 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r11, r3 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r12, r2 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r13, r21 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r14, r20 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r15, r19 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r16, r18 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r17, r17 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r10, r5 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r11, r4 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r12, r3 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r13, r2 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r14, r21 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r15, r20 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r16, r19 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r17, r18 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r11, r5 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r12, r4 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r13, r3 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r14, r2 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r15, r21 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r16, r20 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r17, r19 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r18, r18 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r12, r5 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r13, r4 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r14, r3 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r15, r2 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r16, r21 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r17, r20 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r18, r19 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r13, r5 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r14, r4 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r15, r3 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r16, r2 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r17, r21 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r18, r20 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r19, r19 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r14, r5 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r15, r4 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r16, r3 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r17, r2 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r18, r21 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r19, r20 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r15, r5 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r16, r4 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r17, r3 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r18, r2 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r19, r21 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r20, r20 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r16, r5 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r17, r4 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r18, r3 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r19, r2 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r20, r21 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r17, r5 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r18, r4 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r19, r3 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r20, r2 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r21, r21 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r18, r5 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r19, r4 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r20, r3 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r21, r2 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r19, r5 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r20, r4 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r21, r3 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r2, r2 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r20, r5 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r21, r4 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r2, r3 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r21, r5 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r2, r4 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r3, r3 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r2, r5 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r3, r4 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r23, 0 \n\t"                  \
+    "mul r3, r5 \n\t"                  \
+    "add r28, r0 \n\t"                 \
+    "adc r29, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "add r28, r0 \n\t"                 \
+    "adc r29, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "mul r4, r4 \n\t"                  \
+    "add r28, r0 \n\t"                 \
+    "adc r29, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "st z+, r28 \n\t"                  \
+                                       \
+    "ldi r28, 0 \n\t"                  \
+    "mul r4, r5 \n\t"                  \
+    "add r29, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r28, r25 \n\t"                \
+    "add r29, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r28, r25 \n\t"                \
+    "st z+, r29 \n\t"                  \
+                                       \
+    "mul r5, r5 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "st z+, r23 \n\t"                  \
+    "st z+, r28 \n\t"                  \
+    "eor r1, r1 \n\t"
+
+#define FAST_SQUARE_ASM_7          \
+    "ldi r25, 0 \n\t"              \
+    "movw r28, r26 \n\t"           \
+    "ld r2, x+ \n\t"               \
+    "ld r3, x+ \n\t"               \
+    "ld r4, x+ \n\t"               \
+    "ld r5, x+ \n\t"               \
+    "adiw r28, 20 \n\t"            \
+    "ld r12, y+ \n\t"              \
+    "ld r13, y+ \n\t"              \
+    "ld r14, y+ \n\t"              \
+    "ld r15, y+ \n\t"              \
+    "adiw r30, 20 \n\t"            \
+                                   \
+    "ldi r23, 0 \n\t"              \
+    "mul 2, 12 \n\t"               \
+    "st z+, r0 \n\t"               \
+    "mov r22, r1 \n\t"             \
+                                   \
+    "ldi r24, 0 \n\t"              \
+    "mul r2, r13 \n\t"             \
+    "add r22, r0 \n\t"             \
+    "adc r23, r1 \n\t"             \
+    "adc r24, r25 \n\t"            \
+    "st z+, r22 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r2, r14 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r3, r13 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r23, 0 \n\t"              \
+    "mul r2, r15 \n\t"             \
+    "add r24, r0 \n\t"             \
+    "adc r22, r1 \n\t"             \
+    "adc r23, r25 \n\t"            \
+    "mul r3, r14 \n\t"             \
+    "add r24, r0 \n\t"             \
+    "adc r22, r1 \n\t"             \
+    "adc r23, r25 \n\t"            \
+    "st z+, r24 \n\t"              \
+                                   \
+    "ld r12, y+ \n\t"              \
+    "ldi r24, 0 \n\t"              \
+    "mul r2, r12 \n\t"             \
+    "add r22, r0 \n\t"             \
+    "adc r23, r1 \n\t"             \
+    "adc r24, r25 \n\t"            \
+    "mul r3, r15 \n\t"             \
+    "add r22, r0 \n\t"             \
+    "adc r23, r1 \n\t"             \
+    "adc r24, r25 \n\t"            \
+    "mul r4, r14 \n\t"             \
+    "add r22, r0 \n\t"             \
+    "adc r23, r1 \n\t"             \
+    "adc r24, r25 \n\t"            \
+    "st z+, r22 \n\t"              \
+                                   \
+    "ld r13, y+ \n\t"              \
+    "ldi r22, 0 \n\t"              \
+    "mul r2, r13 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r3, r12 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r4, r15 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ld r14, y+ \n\t"              \
+    "ldi r23, 0 \n\t"              \
+    "mul r2, r14 \n\t"             \
+    "add r24, r0 \n\t"             \
+    "adc r22, r1 \n\t"             \
+    "adc r23, r25 \n\t"            \
+    "mul r3, r13 \n\t"             \
+    "add r24, r0 \n\t"             \
+    "adc r22, r1 \n\t"             \
+    "adc r23, r25 \n\t"            \
+    "mul r4, r12 \n\t"             \
+    "add r24, r0 \n\t"             \
+    "adc r22, r1 \n\t"             \
+    "adc r23, r25 \n\t"            \
+    "mul r5, r15 \n\t"             \
+    "add r24, r0 \n\t"             \
+    "adc r22, r1 \n\t"             \
+    "adc r23, r25 \n\t"            \
+    "st z+, r24 \n\t"              \
+                                   \
+    "ld r15, y+ \n\t"              \
+    "ldi r24, 0 \n\t"              \
+    "mul r2, r15 \n\t"             \
+    "add r22, r0 \n\t"             \
+    "adc r23, r1 \n\t"             \
+    "adc r24, r25 \n\t"            \
+    "mul r3, r14 \n\t"             \
+    "add r22, r0 \n\t"             \
+    "adc r23, r1 \n\t"             \
+    "adc r24, r25 \n\t"            \
+    "mul r4, r13 \n\t"             \
+    "add r22, r0 \n\t"             \
+    "adc r23, r1 \n\t"             \
+    "adc r24, r25 \n\t"            \
+    "mul r5, r12 \n\t"             \
+    "add r22, r0 \n\t"             \
+    "adc r23, r1 \n\t"             \
+    "adc r24, r25 \n\t"            \
+    "st z+, r22 \n\t"              \
+                                   \
+    "ld r2, x+ \n\t"               \
+    "ldi r22, 0 \n\t"              \
+    "mul r3, r15 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r4, r14 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r5, r13 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r2, r12 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ld r3, x+ \n\t"               \
+    "ldi r23, 0 \n\t"              \
+    "mul r4, r15 \n\t"             \
+    "add r24, r0 \n\t"             \
+    "adc r22, r1 \n\t"             \
+    "adc r23, r25 \n\t"            \
+    "mul r5, r14 \n\t"             \
+    "add r24, r0 \n\t"             \
+    "adc r22, r1 \n\t"             \
+    "adc r23, r25 \n\t"            \
+    "mul r2, r13 \n\t"             \
+    "add r24, r0 \n\t"             \
+    "adc r22, r1 \n\t"             \
+    "adc r23, r25 \n\t"            \
+    "st z+, r24 \n\t"              \
+                                   \
+    "ld r4, x+ \n\t"               \
+    "ldi r24, 0 \n\t"              \
+    "mul r5, r15 \n\t"             \
+    "add r22, r0 \n\t"             \
+    "adc r23, r1 \n\t"             \
+    "adc r24, r25 \n\t"            \
+    "mul r2, r14 \n\t"             \
+    "add r22, r0 \n\t"             \
+    "adc r23, r1 \n\t"             \
+    "adc r24, r25 \n\t"            \
+    "mul r3, r13 \n\t"             \
+    "add r22, r0 \n\t"             \
+    "adc r23, r1 \n\t"             \
+    "adc r24, r25 \n\t"            \
+    "st z+, r22 \n\t"              \
+                                   \
+    "ld r5, x+ \n\t"               \
+    "ldi r22, 0 \n\t"              \
+    "mul r2, r15 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r3, r14 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r23, 0 \n\t"              \
+    "mul r3, r15 \n\t"             \
+    "add r24, r0 \n\t"             \
+    "adc r22, r1 \n\t"             \
+    "adc r23, r25 \n\t"            \
+    "mul r4, r14 \n\t"             \
+    "add r24, r0 \n\t"             \
+    "adc r22, r1 \n\t"             \
+    "adc r23, r25 \n\t"            \
+    "st z+, r24 \n\t"              \
+                                   \
+    "ldi r24, 0 \n\t"              \
+    "mul r4, r15 \n\t"             \
+    "add r22, r0 \n\t"             \
+    "adc r23, r1 \n\t"             \
+    "adc r24, r25 \n\t"            \
+    "st z+, r22 \n\t"              \
+                                   \
+    "mul r5, r15 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "st z+, r23 \n\t"              \
+    "st z+, r24 \n\t"              \
+                                   \
+    "sbiw r26, 8 \n\t"             \
+    "sbiw r30, 36 \n\t"            \
+    "ld r2, x+ \n\t"               \
+    "ld r3, x+ \n\t"               \
+    "ld r4, x+ \n\t"               \
+    "ld r5, x+ \n\t"               \
+    "ld r6, x+ \n\t"               \
+    "ld r7, x+ \n\t"               \
+    "ld r8, x+ \n\t"               \
+    "ld r9, x+ \n\t"               \
+    "ld r10, x+ \n\t"              \
+    "ld r11, x+ \n\t"              \
+    "ld r12, x+ \n\t"              \
+    "ld r13, x+ \n\t"              \
+    "ld r14, x+ \n\t"              \
+    "ld r15, x+ \n\t"              \
+    "ld r16, x+ \n\t"              \
+    "ld r17, x+ \n\t"              \
+    "ld r18, x+ \n\t"              \
+    "ld r19, x+ \n\t"              \
+    "ld r20, x+ \n\t"              \
+    "ld r21, x+ \n\t"              \
+                                   \
+    "ldi r23, 0 \n\t"              \
+    "mul r2, r2 \n\t"              \
+    "st z+, r0 \n\t"               \
+    "mov r22, r1 \n\t"             \
+                                   \
+    "ldi r24, 0 \n\t"              \
+    "mul r2, r3 \n\t"              \
+    "add r22, r0 \n\t"             \
+    "adc r23, r1 \n\t"             \
+    "adc r24, r25 \n\t"            \
+    "add r22, r0 \n\t"             \
+    "adc r23, r1 \n\t"             \
+    "adc r24, r25 \n\t"            \
+    "st z+, r22 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r2, r4 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r3, r3 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r29, 0 \n\t"              \
+    "mul r2, r5 \n\t"              \
+    "mov r23, r0 \n\t"             \
+    "mov r28, r1 \n\t"             \
+    "mul r3, r4 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r28 \n\t"                 \
+    "rol r29 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r28, r22 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r2, r6 \n\t"              \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r3, r5 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r4, r4 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "add r23, r28 \n\t"            \
+    "adc r24, r29 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r29, 0 \n\t"              \
+    "mul r2, r7 \n\t"              \
+    "mov r23, r0 \n\t"             \
+    "mov r28, r1 \n\t"             \
+    "mul r3, r6 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r4, r5 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r28 \n\t"                 \
+    "rol r29 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r28, r22 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r2, r8 \n\t"              \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r3, r7 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r4, r6 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r5, r5 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "add r23, r28 \n\t"            \
+    "adc r24, r29 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r29, 0 \n\t"              \
+    "mul r2, r9 \n\t"              \
+    "mov r23, r0 \n\t"             \
+    "mov r28, r1 \n\t"             \
+    "mul r3, r8 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r4, r7 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r5, r6 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r28 \n\t"                 \
+    "rol r29 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r28, r22 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r2, r10 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r3, r9 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r4, r8 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r5, r7 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r6, r6 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "add r23, r28 \n\t"            \
+    "adc r24, r29 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r29, 0 \n\t"              \
+    "mul r2, r11 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r28, r1 \n\t"             \
+    "mul r3, r10 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r4, r9 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r5, r8 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r6, r7 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r28 \n\t"                 \
+    "rol r29 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r28, r22 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r2, r12 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r3, r11 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r4, r10 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r5, r9 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r6, r8 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r7, r7 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "add r23, r28 \n\t"            \
+    "adc r24, r29 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r29, 0 \n\t"              \
+    "mul r2, r13 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r28, r1 \n\t"             \
+    "mul r3, r12 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r4, r11 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r5, r10 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r6, r9 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r7, r8 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r28 \n\t"                 \
+    "rol r29 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r28, r22 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r2, r14 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r3, r13 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r4, r12 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r5, r11 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r6, r10 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r7, r9 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r8, r8 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "add r23, r28 \n\t"            \
+    "adc r24, r29 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r29, 0 \n\t"              \
+    "mul r2, r15 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r28, r1 \n\t"             \
+    "mul r3, r14 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r4, r13 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r5, r12 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r6, r11 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r7, r10 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r8, r9 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r28 \n\t"                 \
+    "rol r29 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r28, r22 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r2, r16 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r3, r15 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r4, r14 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r5, r13 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r6, r12 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r7, r11 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r8, r10 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r9, r9 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "add r23, r28 \n\t"            \
+    "adc r24, r29 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r29, 0 \n\t"              \
+    "mul r2, r17 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r28, r1 \n\t"             \
+    "mul r3, r16 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r4, r15 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r5, r14 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r6, r13 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r7, r12 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r8, r11 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r9, r10 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r28 \n\t"                 \
+    "rol r29 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r28, r22 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r2, r18 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r3, r17 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r4, r16 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r5, r15 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r6, r14 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r7, r13 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r8, r12 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r9, r11 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r10, r10 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "add r23, r28 \n\t"            \
+    "adc r24, r29 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r29, 0 \n\t"              \
+    "mul r2, r19 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r28, r1 \n\t"             \
+    "mul r3, r18 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r4, r17 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r5, r16 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r6, r15 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r7, r14 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r8, r13 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r9, r12 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r10, r11 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r28 \n\t"                 \
+    "rol r29 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r28, r22 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r2, r20 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r3, r19 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r4, r18 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r5, r17 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r6, r16 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r7, r15 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r8, r14 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r9, r13 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r10, r12 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r11, r11 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "add r23, r28 \n\t"            \
+    "adc r24, r29 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r29, 0 \n\t"              \
+    "mul r2, r21 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r28, r1 \n\t"             \
+    "mul r3, r20 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r4, r19 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r5, r18 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r6, r17 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r7, r16 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r8, r15 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r9, r14 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r10, r13 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r11, r12 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r28 \n\t"                 \
+    "rol r29 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r28, r22 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ld r2, x+ \n\t"               \
+    "ldi r22, 0 \n\t"              \
+    "mul r3, r21 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r4, r20 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r5, r19 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r6, r18 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r7, r17 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r8, r16 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r9, r15 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r10, r14 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r11, r13 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "ld r0, z \n\t"                \
+    "add r23, r0 \n\t"             \
+    "adc r24, r25 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r12, r12 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "add r23, r28 \n\t"            \
+    "adc r24, r29 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r29, 0 \n\t"              \
+    "mul r3, r2 \n\t"              \
+    "mov r23, r0 \n\t"             \
+    "mov r28, r1 \n\t"             \
+    "mul r4, r21 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r5, r20 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r6, r19 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r7, r18 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r8, r17 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r9, r16 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r10, r15 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r11, r14 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r12, r13 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "ld r0, z \n\t"                \
+    "add r23, r0 \n\t"             \
+    "adc r28, r25 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r28 \n\t"                 \
+    "rol r29 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r28, r22 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ld r3, x+ \n\t"               \
+    "ldi r22, 0 \n\t"              \
+    "mul r4, r2 \n\t"              \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r5, r21 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r6, r20 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r7, r19 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r8, r18 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r9, r17 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r10, r16 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r11, r15 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r12, r14 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "ld r0, z \n\t"                \
+    "add r23, r0 \n\t"             \
+    "adc r24, r25 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r13, r13 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "add r23, r28 \n\t"            \
+    "adc r24, r29 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r29, 0 \n\t"              \
+    "mul r4, r3 \n\t"              \
+    "mov r23, r0 \n\t"             \
+    "mov r28, r1 \n\t"             \
+    "mul r5, r2 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r6, r21 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r7, r20 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r8, r19 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r9, r18 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r10, r17 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r11, r16 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r12, r15 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r13, r14 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "ld r0, z \n\t"                \
+    "add r23, r0 \n\t"             \
+    "adc r28, r25 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r28 \n\t"                 \
+    "rol r29 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r28, r22 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ld r4, x+ \n\t"               \
+    "ldi r22, 0 \n\t"              \
+    "mul r5, r3 \n\t"              \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r6, r2 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r7, r21 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r8, r20 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r9, r19 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r10, r18 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r11, r17 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r12, r16 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r13, r15 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "ld r0, z \n\t"                \
+    "add r23, r0 \n\t"             \
+    "adc r24, r25 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r14, r14 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "add r23, r28 \n\t"            \
+    "adc r24, r29 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r29, 0 \n\t"              \
+    "mul r5, r4 \n\t"              \
+    "mov r23, r0 \n\t"             \
+    "mov r28, r1 \n\t"             \
+    "mul r6, r3 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r7, r2 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r8, r21 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r9, r20 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r10, r19 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r11, r18 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r12, r17 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r13, r16 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r14, r15 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "ld r0, z \n\t"                \
+    "add r23, r0 \n\t"             \
+    "adc r28, r25 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r28 \n\t"                 \
+    "rol r29 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r28, r22 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ld r5, x+ \n\t"               \
+    "ldi r22, 0 \n\t"              \
+    "mul r6, r4 \n\t"              \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r7, r3 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r8, r2 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r9, r21 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r10, r20 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r11, r19 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r12, r18 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r13, r17 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r14, r16 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "ld r0, z \n\t"                \
+    "add r23, r0 \n\t"             \
+    "adc r24, r25 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r15, r15 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "add r23, r28 \n\t"            \
+    "adc r24, r29 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r29, 0 \n\t"              \
+    "mul r6, r5 \n\t"              \
+    "mov r23, r0 \n\t"             \
+    "mov r28, r1 \n\t"             \
+    "mul r7, r4 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r8, r3 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r9, r2 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r10, r21 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r11, r20 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r12, r19 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r13, r18 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r14, r17 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r15, r16 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "ld r0, z \n\t"                \
+    "add r23, r0 \n\t"             \
+    "adc r28, r25 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r28 \n\t"                 \
+    "rol r29 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r28, r22 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ld r6, x+ \n\t"               \
+    "ldi r22, 0 \n\t"              \
+    "mul r7, r5 \n\t"              \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r8, r4 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r9, r3 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r10, r2 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r11, r21 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r12, r20 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r13, r19 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r14, r18 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r15, r17 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "ld r0, z \n\t"                \
+    "add r23, r0 \n\t"             \
+    "adc r24, r25 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r16, r16 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "add r23, r28 \n\t"            \
+    "adc r24, r29 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r29, 0 \n\t"              \
+    "mul r7, r6 \n\t"              \
+    "mov r23, r0 \n\t"             \
+    "mov r28, r1 \n\t"             \
+    "mul r8, r5 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r9, r4 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r10, r3 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r11, r2 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r12, r21 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r13, r20 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r14, r19 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r15, r18 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r16, r17 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "ld r0, z \n\t"                \
+    "add r23, r0 \n\t"             \
+    "adc r28, r25 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r28 \n\t"                 \
+    "rol r29 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r28, r22 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ld r7, x+ \n\t"               \
+    "ldi r22, 0 \n\t"              \
+    "mul r8, r6 \n\t"              \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r9, r5 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r10, r4 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r11, r3 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r12, r2 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r13, r21 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r14, r20 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r15, r19 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r16, r18 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "ld r0, z \n\t"                \
+    "add r23, r0 \n\t"             \
+    "adc r24, r25 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r17, r17 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "add r23, r28 \n\t"            \
+    "adc r24, r29 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r29, 0 \n\t"              \
+    "mul r8, r7 \n\t"              \
+    "mov r23, r0 \n\t"             \
+    "mov r28, r1 \n\t"             \
+    "mul r9, r6 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r10, r5 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r11, r4 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r12, r3 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r13, r2 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r14, r21 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r15, r20 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r16, r19 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r17, r18 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "ld r0, z \n\t"                \
+    "add r23, r0 \n\t"             \
+    "adc r28, r25 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r28 \n\t"                 \
+    "rol r29 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r28, r22 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ld r8, x+ \n\t"               \
+    "ldi r22, 0 \n\t"              \
+    "mul r9, r7 \n\t"              \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r10, r6 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r11, r5 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r12, r4 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r13, r3 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r14, r2 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r15, r21 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r16, r20 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r17, r19 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "ld r0, z \n\t"                \
+    "add r23, r0 \n\t"             \
+    "adc r24, r25 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r18, r18 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "add r23, r28 \n\t"            \
+    "adc r24, r29 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r29, 0 \n\t"              \
+    "mul r9, r8 \n\t"              \
+    "mov r23, r0 \n\t"             \
+    "mov r28, r1 \n\t"             \
+    "mul r10, r7 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r11, r6 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r12, r5 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r13, r4 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r14, r3 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r15, r2 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r16, r21 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r17, r20 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r18, r19 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "ld r0, z \n\t"                \
+    "add r23, r0 \n\t"             \
+    "adc r28, r25 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r28 \n\t"                 \
+    "rol r29 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r28, r22 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ld r9, x+ \n\t"               \
+    "ldi r22, 0 \n\t"              \
+    "mul r10, r8 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r11, r7 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r12, r6 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r13, r5 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r14, r4 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r15, r3 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r16, r2 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r17, r21 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r18, r20 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "ld r0, z \n\t"                \
+    "add r23, r0 \n\t"             \
+    "adc r24, r25 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r19, r19 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "add r23, r28 \n\t"            \
+    "adc r24, r29 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r29, 0 \n\t"              \
+    "mul r10, r9 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r28, r1 \n\t"             \
+    "mul r11, r8 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r12, r7 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r13, r6 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r14, r5 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r15, r4 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r16, r3 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r17, r2 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r18, r21 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r19, r20 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "ld r0, z \n\t"                \
+    "add r23, r0 \n\t"             \
+    "adc r28, r25 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r28 \n\t"                 \
+    "rol r29 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r28, r22 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r11, r9 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r12, r8 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r13, r7 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r14, r6 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r15, r5 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r16, r4 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r17, r3 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r18, r2 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r19, r21 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r20, r20 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "add r23, r28 \n\t"            \
+    "adc r24, r29 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r29, 0 \n\t"              \
+    "mul r12, r9 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r28, r1 \n\t"             \
+    "mul r13, r8 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r14, r7 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r15, r6 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r16, r5 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r17, r4 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r18, r3 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r19, r2 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r20, r21 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r28 \n\t"                 \
+    "rol r29 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r28, r22 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r13, r9 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r14, r8 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r15, r7 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r16, r6 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r17, r5 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r18, r4 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r19, r3 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r20, r2 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r21, r21 \n\t"            \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "add r23, r28 \n\t"            \
+    "adc r24, r29 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r29, 0 \n\t"              \
+    "mul r14, r9 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r28, r1 \n\t"             \
+    "mul r15, r8 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r16, r7 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r17, r6 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r18, r5 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r19, r4 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r20, r3 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r21, r2 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r28 \n\t"                 \
+    "rol r29 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r28, r22 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r15, r9 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r16, r8 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r17, r7 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r18, r6 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r19, r5 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r20, r4 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r21, r3 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r2, r2 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "add r23, r28 \n\t"            \
+    "adc r24, r29 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r29, 0 \n\t"              \
+    "mul r16, r9 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r28, r1 \n\t"             \
+    "mul r17, r8 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r18, r7 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r19, r6 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r20, r5 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r21, r4 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r2, r3 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r28 \n\t"                 \
+    "rol r29 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r28, r22 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r17, r9 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r18, r8 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r19, r7 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r20, r6 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r21, r5 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r2, r4 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r3, r3 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "add r23, r28 \n\t"            \
+    "adc r24, r29 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r29, 0 \n\t"              \
+    "mul r18, r9 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r28, r1 \n\t"             \
+    "mul r19, r8 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r20, r7 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r21, r6 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r2, r5 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r3, r4 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r28 \n\t"                 \
+    "rol r29 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r28, r22 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r19, r9 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r20, r8 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r21, r7 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r2, r6 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r3, r5 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r4, r4 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "add r23, r28 \n\t"            \
+    "adc r24, r29 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r29, 0 \n\t"              \
+    "mul r20, r9 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r28, r1 \n\t"             \
+    "mul r21, r8 \n\t"             \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r2, r7 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r3, r6 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r4, r5 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r28 \n\t"                 \
+    "rol r29 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r28, r22 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r21, r9 \n\t"             \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r2, r8 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r3, r7 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r4, r6 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r5, r5 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "add r23, r28 \n\t"            \
+    "adc r24, r29 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r29, 0 \n\t"              \
+    "mul r2, r9 \n\t"              \
+    "mov r23, r0 \n\t"             \
+    "mov r28, r1 \n\t"             \
+    "mul r3, r8 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r4, r7 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r5, r6 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r28 \n\t"                 \
+    "rol r29 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r28, r22 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r3, r9 \n\t"              \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r4, r8 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "mul r5, r7 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r6, r6 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "add r23, r28 \n\t"            \
+    "adc r24, r29 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r29, 0 \n\t"              \
+    "mul r4, r9 \n\t"              \
+    "mov r23, r0 \n\t"             \
+    "mov r28, r1 \n\t"             \
+    "mul r5, r8 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "mul r6, r7 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r28 \n\t"                 \
+    "rol r29 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r28, r22 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r22, 0 \n\t"              \
+    "mul r5, r9 \n\t"              \
+    "mov r23, r0 \n\t"             \
+    "mov r24, r1 \n\t"             \
+    "mul r6, r8 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r24 \n\t"                 \
+    "rol r22 \n\t"                 \
+    "mul r7, r7 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r24, r1 \n\t"             \
+    "adc r22, r25 \n\t"            \
+    "add r23, r28 \n\t"            \
+    "adc r24, r29 \n\t"            \
+    "adc r22, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r29, 0 \n\t"              \
+    "mul r6, r9 \n\t"              \
+    "mov r23, r0 \n\t"             \
+    "mov r28, r1 \n\t"             \
+    "mul r7, r8 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "adc r29, r25 \n\t"            \
+    "lsl r23 \n\t"                 \
+    "rol r28 \n\t"                 \
+    "rol r29 \n\t"                 \
+    "add r23, r24 \n\t"            \
+    "adc r28, r22 \n\t"            \
+    "adc r29, r25 \n\t"            \
+    "st z+, r23 \n\t"              \
+                                   \
+    "ldi r23, 0 \n\t"              \
+    "mul r7, r9 \n\t"              \
+    "add r28, r0 \n\t"             \
+    "adc r29, r1 \n\t"             \
+    "adc r23, r25 \n\t"            \
+    "add r28, r0 \n\t"             \
+    "adc r29, r1 \n\t"             \
+    "adc r23, r25 \n\t"            \
+    "mul r8, r8 \n\t"              \
+    "add r28, r0 \n\t"             \
+    "adc r29, r1 \n\t"             \
+    "adc r23, r25 \n\t"            \
+    "st z+, r28 \n\t"              \
+                                   \
+    "ldi r28, 0 \n\t"              \
+    "mul r8, r9 \n\t"              \
+    "add r29, r0 \n\t"             \
+    "adc r23, r1 \n\t"             \
+    "adc r28, r25 \n\t"            \
+    "add r29, r0 \n\t"             \
+    "adc r23, r1 \n\t"             \
+    "adc r28, r25 \n\t"            \
+    "st z+, r29 \n\t"              \
+                                   \
+    "mul r9, r9 \n\t"              \
+    "add r23, r0 \n\t"             \
+    "adc r28, r1 \n\t"             \
+    "st z+, r23 \n\t"              \
+    "st z+, r28 \n\t"              \
+    "eor r1, r1 \n\t"
+
+#define FAST_SQUARE_ASM_8              \
+    "ldi r25, 0 \n\t"                  \
+    "movw r28, r26 \n\t"               \
+    "ld r2, x+ \n\t"                   \
+    "ld r3, x+ \n\t"                   \
+    "ld r4, x+ \n\t"                   \
+    "ld r5, x+ \n\t"                   \
+    "ld r6, x+ \n\t"                   \
+    "ld r7, x+ \n\t"                   \
+    "adiw r28, 20 \n\t"                \
+    "ld r12, y+ \n\t"                  \
+    "ld r13, y+ \n\t"                  \
+    "ld r14, y+ \n\t"                  \
+    "ld r15, y+ \n\t"                  \
+    "ld r16, y+ \n\t"                  \
+    "ld r17, y+ \n\t"                  \
+    "adiw r30, 20 \n\t"                \
+                                       \
+    "ldi r23, 0 \n\t"                  \
+    "mul 2, 12 \n\t"                   \
+    "st z+, r0 \n\t"                   \
+    "mov r22, r1 \n\t"                 \
+                                       \
+    "ldi r24, 0 \n\t"                  \
+    "mul r2, r13 \n\t"                 \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "st z+, r22 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r2, r14 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r3, r13 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r23, 0 \n\t"                  \
+    "mul r2, r15 \n\t"                 \
+    "add r24, r0 \n\t"                 \
+    "adc r22, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "mul r3, r14 \n\t"                 \
+    "add r24, r0 \n\t"                 \
+    "adc r22, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "st z+, r24 \n\t"                  \
+                                       \
+    "ldi r24, 0 \n\t"                  \
+    "mul r2, r16 \n\t"                 \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "mul r3, r15 \n\t"                 \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "mul r4, r14 \n\t"                 \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "st z+, r22 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r2, r17 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r3, r16 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r4, r15 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ld r12, y+ \n\t"                  \
+    "ldi r23, 0 \n\t"                  \
+    "mul r2, r12 \n\t"                 \
+    "add r24, r0 \n\t"                 \
+    "adc r22, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "mul r3, r17 \n\t"                 \
+    "add r24, r0 \n\t"                 \
+    "adc r22, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "mul r4, r16 \n\t"                 \
+    "add r24, r0 \n\t"                 \
+    "adc r22, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "mul r5, r15 \n\t"                 \
+    "add r24, r0 \n\t"                 \
+    "adc r22, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "st z+, r24 \n\t"                  \
+                                       \
+    "ld r13, y+ \n\t"                  \
+    "ldi r24, 0 \n\t"                  \
+    "mul r2, r13 \n\t"                 \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "mul r3, r12 \n\t"                 \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "mul r4, r17 \n\t"                 \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "mul r5, r16 \n\t"                 \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "st z+, r22 \n\t"                  \
+                                       \
+    "ld r14, y+ \n\t"                  \
+    "ldi r22, 0 \n\t"                  \
+    "mul r2, r14 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r3, r13 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r4, r12 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r5, r17 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r6, r16 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ld r15, y+ \n\t"                  \
+    "ldi r23, 0 \n\t"                  \
+    "mul r2, r15 \n\t"                 \
+    "add r24, r0 \n\t"                 \
+    "adc r22, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "mul r3, r14 \n\t"                 \
+    "add r24, r0 \n\t"                 \
+    "adc r22, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "mul r4, r13 \n\t"                 \
+    "add r24, r0 \n\t"                 \
+    "adc r22, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "mul r5, r12 \n\t"                 \
+    "add r24, r0 \n\t"                 \
+    "adc r22, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "mul r6, r17 \n\t"                 \
+    "add r24, r0 \n\t"                 \
+    "adc r22, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "st z+, r24 \n\t"                  \
+                                       \
+    "ld r16, y+ \n\t"                  \
+    "ldi r24, 0 \n\t"                  \
+    "mul r2, r16 \n\t"                 \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "mul r3, r15 \n\t"                 \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "mul r4, r14 \n\t"                 \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "mul r5, r13 \n\t"                 \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "mul r6, r12 \n\t"                 \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "mul r7, r17 \n\t"                 \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "st z+, r22 \n\t"                  \
+                                       \
+    "ld r17, y+ \n\t"                  \
+    "ldi r22, 0 \n\t"                  \
+    "mul r2, r17 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r3, r16 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r4, r15 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r5, r14 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r6, r13 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r7, r12 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ld r2, x+ \n\t"                   \
+    "ldi r23, 0 \n\t"                  \
+    "mul r3, r17 \n\t"                 \
+    "add r24, r0 \n\t"                 \
+    "adc r22, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "mul r4, r16 \n\t"                 \
+    "add r24, r0 \n\t"                 \
+    "adc r22, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "mul r5, r15 \n\t"                 \
+    "add r24, r0 \n\t"                 \
+    "adc r22, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "mul r6, r14 \n\t"                 \
+    "add r24, r0 \n\t"                 \
+    "adc r22, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "mul r7, r13 \n\t"                 \
+    "add r24, r0 \n\t"                 \
+    "adc r22, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "mul r2, r12 \n\t"                 \
+    "add r24, r0 \n\t"                 \
+    "adc r22, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "st z+, r24 \n\t"                  \
+                                       \
+    "ld r3, x+ \n\t"                   \
+    "ldi r24, 0 \n\t"                  \
+    "mul r4, r17 \n\t"                 \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "mul r5, r16 \n\t"                 \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "mul r6, r15 \n\t"                 \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "mul r7, r14 \n\t"                 \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "mul r2, r13 \n\t"                 \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "st z+, r22 \n\t"                  \
+                                       \
+    "ld r4, x+ \n\t"                   \
+    "ldi r22, 0 \n\t"                  \
+    "mul r5, r17 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r6, r16 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r7, r15 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r2, r14 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r3, r13 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ld r5, x+ \n\t"                   \
+    "ldi r23, 0 \n\t"                  \
+    "mul r6, r17 \n\t"                 \
+    "add r24, r0 \n\t"                 \
+    "adc r22, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "mul r7, r16 \n\t"                 \
+    "add r24, r0 \n\t"                 \
+    "adc r22, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "mul r2, r15 \n\t"                 \
+    "add r24, r0 \n\t"                 \
+    "adc r22, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "mul r3, r14 \n\t"                 \
+    "add r24, r0 \n\t"                 \
+    "adc r22, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "st z+, r24 \n\t"                  \
+                                       \
+    "ld r6, x+ \n\t"                   \
+    "ldi r24, 0 \n\t"                  \
+    "mul r7, r17 \n\t"                 \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "mul r2, r16 \n\t"                 \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "mul r3, r15 \n\t"                 \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "mul r4, r14 \n\t"                 \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "st z+, r22 \n\t"                  \
+                                       \
+    "ld r7, x+ \n\t"                   \
+    "ldi r22, 0 \n\t"                  \
+    "mul r2, r17 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r3, r16 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r4, r15 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r23, 0 \n\t"                  \
+    "mul r3, r17 \n\t"                 \
+    "add r24, r0 \n\t"                 \
+    "adc r22, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "mul r4, r16 \n\t"                 \
+    "add r24, r0 \n\t"                 \
+    "adc r22, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "mul r5, r15 \n\t"                 \
+    "add r24, r0 \n\t"                 \
+    "adc r22, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "st z+, r24 \n\t"                  \
+                                       \
+    "ldi r24, 0 \n\t"                  \
+    "mul r4, r17 \n\t"                 \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "mul r5, r16 \n\t"                 \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "st z+, r22 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r5, r17 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r6, r16 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r23, 0 \n\t"                  \
+    "mul r6, r17 \n\t"                 \
+    "add r24, r0 \n\t"                 \
+    "adc r22, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "st z+, r24 \n\t"                  \
+                                       \
+    "mul r7, r17 \n\t"                 \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "st z+, r22 \n\t"                  \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "sbiw r26, 12 \n\t"                \
+    "sbiw r30, 44 \n\t"                \
+    "ld r2, x+ \n\t"                   \
+    "ld r3, x+ \n\t"                   \
+    "ld r4, x+ \n\t"                   \
+    "ld r5, x+ \n\t"                   \
+    "ld r6, x+ \n\t"                   \
+    "ld r7, x+ \n\t"                   \
+    "ld r8, x+ \n\t"                   \
+    "ld r9, x+ \n\t"                   \
+    "ld r10, x+ \n\t"                  \
+    "ld r11, x+ \n\t"                  \
+    "ld r12, x+ \n\t"                  \
+    "ld r13, x+ \n\t"                  \
+    "ld r14, x+ \n\t"                  \
+    "ld r15, x+ \n\t"                  \
+    "ld r16, x+ \n\t"                  \
+    "ld r17, x+ \n\t"                  \
+    "ld r18, x+ \n\t"                  \
+    "ld r19, x+ \n\t"                  \
+    "ld r20, x+ \n\t"                  \
+    "ld r21, x+ \n\t"                  \
+                                       \
+    "ldi r23, 0 \n\t"                  \
+    "mul r2, r2 \n\t"                  \
+    "st z+, r0 \n\t"                   \
+    "mov r22, r1 \n\t"                 \
+                                       \
+    "ldi r24, 0 \n\t"                  \
+    "mul r2, r3 \n\t"                  \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "add r22, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "st z+, r22 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r2, r4 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r3, r3 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r2, r5 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r3, r4 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r2, r6 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r3, r5 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r4, r4 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r2, r7 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r3, r6 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r4, r5 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r2, r8 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r3, r7 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r4, r6 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r5, r5 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r2, r9 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r3, r8 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r4, r7 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r5, r6 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r2, r10 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r3, r9 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r4, r8 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r5, r7 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r6, r6 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r2, r11 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r3, r10 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r4, r9 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r5, r8 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r6, r7 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r2, r12 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r3, r11 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r4, r10 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r5, r9 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r6, r8 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r7, r7 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r2, r13 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r3, r12 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r4, r11 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r5, r10 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r6, r9 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r7, r8 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r2, r14 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r3, r13 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r4, r12 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r5, r11 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r6, r10 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r7, r9 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r8, r8 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r2, r15 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r3, r14 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r4, r13 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r5, r12 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r6, r11 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r7, r10 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r8, r9 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r2, r16 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r3, r15 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r4, r14 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r5, r13 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r6, r12 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r7, r11 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r8, r10 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r9, r9 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r2, r17 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r3, r16 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r4, r15 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r5, r14 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r6, r13 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r7, r12 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r8, r11 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r9, r10 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r2, r18 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r3, r17 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r4, r16 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r5, r15 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r6, r14 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r7, r13 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r8, r12 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r9, r11 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r10, r10 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r2, r19 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r3, r18 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r4, r17 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r5, r16 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r6, r15 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r7, r14 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r8, r13 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r9, r12 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r10, r11 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r2, r20 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r3, r19 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r4, r18 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r5, r17 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r6, r16 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r7, r15 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r8, r14 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r9, r13 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r10, r12 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r11, r11 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r2, r21 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r3, r20 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r4, r19 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r5, r18 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r6, r17 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r7, r16 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r8, r15 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r9, r14 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r10, r13 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r11, r12 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ld r2, x+ \n\t"                   \
+    "ldi r22, 0 \n\t"                  \
+    "mul r3, r21 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r4, r20 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r5, r19 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r6, r18 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r7, r17 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r8, r16 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r9, r15 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r10, r14 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r11, r13 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r12, r12 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r3, r2 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r4, r21 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r5, r20 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r6, r19 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r7, r18 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r8, r17 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r9, r16 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r10, r15 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r11, r14 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r12, r13 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r25 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ld r3, x+ \n\t"                   \
+    "ldi r22, 0 \n\t"                  \
+    "mul r4, r2 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r5, r21 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r6, r20 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r7, r19 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r8, r18 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r9, r17 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r10, r16 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r11, r15 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r12, r14 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r13, r13 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r4, r3 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r5, r2 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r6, r21 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r7, r20 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r8, r19 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r9, r18 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r10, r17 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r11, r16 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r12, r15 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r13, r14 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r25 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ld r4, x+ \n\t"                   \
+    "ldi r22, 0 \n\t"                  \
+    "mul r5, r3 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r6, r2 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r7, r21 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r8, r20 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r9, r19 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r10, r18 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r11, r17 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r12, r16 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r13, r15 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r14, r14 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r5, r4 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r6, r3 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r7, r2 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r8, r21 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r9, r20 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r10, r19 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r11, r18 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r12, r17 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r13, r16 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r14, r15 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r25 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ld r5, x+ \n\t"                   \
+    "ldi r22, 0 \n\t"                  \
+    "mul r6, r4 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r7, r3 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r8, r2 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r9, r21 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r10, r20 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r11, r19 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r12, r18 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r13, r17 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r14, r16 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r15, r15 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r6, r5 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r7, r4 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r8, r3 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r9, r2 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r10, r21 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r11, r20 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r12, r19 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r13, r18 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r14, r17 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r15, r16 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r25 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ld r6, x+ \n\t"                   \
+    "ldi r22, 0 \n\t"                  \
+    "mul r7, r5 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r8, r4 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r9, r3 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r10, r2 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r11, r21 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r12, r20 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r13, r19 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r14, r18 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r15, r17 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r16, r16 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r7, r6 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r8, r5 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r9, r4 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r10, r3 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r11, r2 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r12, r21 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r13, r20 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r14, r19 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r15, r18 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r16, r17 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r25 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ld r7, x+ \n\t"                   \
+    "ldi r22, 0 \n\t"                  \
+    "mul r8, r6 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r9, r5 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r10, r4 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r11, r3 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r12, r2 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r13, r21 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r14, r20 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r15, r19 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r16, r18 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r17, r17 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r8, r7 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r9, r6 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r10, r5 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r11, r4 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r12, r3 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r13, r2 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r14, r21 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r15, r20 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r16, r19 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r17, r18 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r25 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ld r8, x+ \n\t"                   \
+    "ldi r22, 0 \n\t"                  \
+    "mul r9, r7 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r10, r6 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r11, r5 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r12, r4 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r13, r3 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r14, r2 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r15, r21 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r16, r20 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r17, r19 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r18, r18 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r9, r8 \n\t"                  \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r10, r7 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r11, r6 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r12, r5 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r13, r4 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r14, r3 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r15, r2 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r16, r21 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r17, r20 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r18, r19 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r25 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ld r9, x+ \n\t"                   \
+    "ldi r22, 0 \n\t"                  \
+    "mul r10, r8 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r11, r7 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r12, r6 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r13, r5 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r14, r4 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r15, r3 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r16, r2 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r17, r21 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r18, r20 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r19, r19 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r10, r9 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r11, r8 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r12, r7 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r13, r6 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r14, r5 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r15, r4 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r16, r3 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r17, r2 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r18, r21 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r19, r20 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r25 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ld r10, x+ \n\t"                  \
+    "ldi r22, 0 \n\t"                  \
+    "mul r11, r9 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r12, r8 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r13, r7 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r14, r6 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r15, r5 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r16, r4 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r17, r3 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r18, r2 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r19, r21 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r20, r20 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r11, r10 \n\t"                \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r12, r9 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r13, r8 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r14, r7 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r15, r6 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r16, r5 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r17, r4 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r18, r3 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r19, r2 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r20, r21 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r25 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ld r11, x+ \n\t"                  \
+    "ldi r22, 0 \n\t"                  \
+    "mul r12, r10 \n\t"                \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r13, r9 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r14, r8 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r15, r7 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r16, r6 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r17, r5 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r18, r4 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r19, r3 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r20, r2 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r21, r21 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r12, r11 \n\t"                \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r13, r10 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r14, r9 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r15, r8 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r16, r7 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r17, r6 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r18, r5 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r19, r4 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r20, r3 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r21, r2 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r25 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ld r12, x+ \n\t"                  \
+    "ldi r22, 0 \n\t"                  \
+    "mul r13, r11 \n\t"                \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r14, r10 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r15, r9 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r16, r8 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r17, r7 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r18, r6 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r19, r5 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r20, r4 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r21, r3 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r2, r2 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r13, r12 \n\t"                \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r14, r11 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r15, r10 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r16, r9 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r17, r8 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r18, r7 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r19, r6 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r20, r5 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r21, r4 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r2, r3 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r25 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ld r13, x+ \n\t"                  \
+    "ldi r22, 0 \n\t"                  \
+    "mul r14, r12 \n\t"                \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r15, r11 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r16, r10 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r17, r9 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r18, r8 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r19, r7 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r20, r6 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r21, r5 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r2, r4 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r25 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r3, r3 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r14, r13 \n\t"                \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r15, r12 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r16, r11 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r17, r10 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r18, r9 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r19, r8 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r20, r7 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r21, r6 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r2, r5 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r3, r4 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "ld r0, z \n\t"                    \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r25 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r15, r13 \n\t"                \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r16, r12 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r17, r11 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r18, r10 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r19, r9 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r20, r8 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r21, r7 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r2, r6 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r3, r5 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r4, r4 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r16, r13 \n\t"                \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r17, r12 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r18, r11 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r19, r10 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r20, r9 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r21, r8 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r2, r7 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r3, r6 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r4, r5 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r17, r13 \n\t"                \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r18, r12 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r19, r11 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r20, r10 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r21, r9 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r2, r8 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r3, r7 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r4, r6 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r5, r5 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r18, r13 \n\t"                \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r19, r12 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r20, r11 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r21, r10 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r2, r9 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r3, r8 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r4, r7 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r5, r6 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r19, r13 \n\t"                \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r20, r12 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r21, r11 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r2, r10 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r3, r9 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r4, r8 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r5, r7 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r6, r6 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r20, r13 \n\t"                \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r21, r12 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r2, r11 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r3, r10 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r4, r9 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r5, r8 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r6, r7 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r21, r13 \n\t"                \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r2, r12 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r3, r11 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r4, r10 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r5, r9 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r6, r8 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r7, r7 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r2, r13 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r3, r12 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r4, r11 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r5, r10 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r6, r9 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r7, r8 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r3, r13 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r4, r12 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r5, r11 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r6, r10 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r7, r9 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r8, r8 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r4, r13 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r5, r12 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r6, r11 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r7, r10 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r8, r9 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r5, r13 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r6, r12 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r7, r11 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r8, r10 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r9, r9 \n\t"                  \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r6, r13 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r7, r12 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r8, r11 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r9, r10 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r7, r13 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r8, r12 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "mul r9, r11 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r10, r10 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r8, r13 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r9, r12 \n\t"                 \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "mul r10, r11 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r22, 0 \n\t"                  \
+    "mul r9, r13 \n\t"                 \
+    "mov r23, r0 \n\t"                 \
+    "mov r24, r1 \n\t"                 \
+    "mul r10, r12 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r24 \n\t"                     \
+    "rol r22 \n\t"                     \
+    "mul r11, r11 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r24, r1 \n\t"                 \
+    "adc r22, r25 \n\t"                \
+    "add r23, r28 \n\t"                \
+    "adc r24, r29 \n\t"                \
+    "adc r22, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r29, 0 \n\t"                  \
+    "mul r10, r13 \n\t"                \
+    "mov r23, r0 \n\t"                 \
+    "mov r28, r1 \n\t"                 \
+    "mul r11, r12 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "adc r29, r25 \n\t"                \
+    "lsl r23 \n\t"                     \
+    "rol r28 \n\t"                     \
+    "rol r29 \n\t"                     \
+    "add r23, r24 \n\t"                \
+    "adc r28, r22 \n\t"                \
+    "adc r29, r25 \n\t"                \
+    "st z+, r23 \n\t"                  \
+                                       \
+    "ldi r23, 0 \n\t"                  \
+    "mul r11, r13 \n\t"                \
+    "add r28, r0 \n\t"                 \
+    "adc r29, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "add r28, r0 \n\t"                 \
+    "adc r29, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "mul r12, r12 \n\t"                \
+    "add r28, r0 \n\t"                 \
+    "adc r29, r1 \n\t"                 \
+    "adc r23, r25 \n\t"                \
+    "st z+, r28 \n\t"                  \
+                                       \
+    "ldi r28, 0 \n\t"                  \
+    "mul r12, r13 \n\t"                \
+    "add r29, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r28, r25 \n\t"                \
+    "add r29, r0 \n\t"                 \
+    "adc r23, r1 \n\t"                 \
+    "adc r28, r25 \n\t"                \
+    "st z+, r29 \n\t"                  \
+                                       \
+    "mul r13, r13 \n\t"                \
+    "add r23, r0 \n\t"                 \
+    "adc r28, r1 \n\t"                 \
+    "st z+, r23 \n\t"                  \
+    "st z+, r28 \n\t"                  \
+    "eor r1, r1 \n\t"
diff --git a/curve-specific.inc b/curve-specific.inc
index 9d8cd7f..15586c3 100644
--- a/curve-specific.inc
+++ b/curve-specific.inc
@@ -168,7 +168,7 @@
 
 uECC_Curve uECC_secp160r1(void) { return &curve_secp160r1; }
 
-#if (uECC_OPTIMIZATION_LEVEL > 0)
+#if (uECC_OPTIMIZATION_LEVEL > 0 && !asm_mmod_fast_secp160r1)
 /* Computes result = product % curve_p
     see http://www.isys.uni-klu.ac.at/PDF/2001-0126-MT.pdf page 354
     
@@ -271,7 +271,7 @@
     }
 }
 #endif /* uECC_WORD_SIZE */
-#endif /* (uECC_OPTIMIZATION_LEVEL > 0) */
+#endif /* (uECC_OPTIMIZATION_LEVEL > 0 && !asm_mmod_fast_secp160r1) */
 
 #endif /* uECC_SUPPORTS_secp160r1 */
 
@@ -771,7 +771,7 @@
 uECC_Curve uECC_secp256r1(void) { return &curve_secp256r1; }
 
 
-#if (uECC_OPTIMIZATION_LEVEL > 0)
+#if (uECC_OPTIMIZATION_LEVEL > 0 && !asm_mmod_fast_secp256r1)
 /* Computes result = product % curve_p
    from http://www.nsa.gov/ia/_files/nist-routines.pdf */
 #if uECC_WORD_SIZE == 1
@@ -1053,7 +1053,7 @@
     }
 }
 #endif /* uECC_WORD_SIZE */
-#endif /* (uECC_OPTIMIZATION_LEVEL > 0) */
+#endif /* (uECC_OPTIMIZATION_LEVEL > 0 && !asm_mmod_fast_secp256r1) */
 
 #endif /* uECC_SUPPORTS_secp256r1 */
 
diff --git a/test/ecc_test/ecc_test.ino b/test/ecc_test/ecc_test.ino
index 8fde301..c3c8900 100644
--- a/test/ecc_test/ecc_test.ino
+++ b/test/ecc_test/ecc_test.ino
@@ -38,27 +38,28 @@
 }
 
 void loop() {
-  uint8_t private1[uECC_BYTES];
-  uint8_t private2[uECC_BYTES];
+  const struct uECC_Curve_t * curve = uECC_secp160r1();
+  uint8_t private1[21];
+  uint8_t private2[21];
   
-  uint8_t public1[uECC_BYTES * 2];
-  uint8_t public2[uECC_BYTES * 2];
+  uint8_t public1[40];
+  uint8_t public2[40];
   
-  uint8_t secret1[uECC_BYTES];
-  uint8_t secret2[uECC_BYTES];
+  uint8_t secret1[20];
+  uint8_t secret2[20];
   
   unsigned long a = millis();
-  uECC_make_key(public1, private1);
+  uECC_make_key(public1, private1, curve);
   unsigned long b = millis();
   
   Serial.print("Made key 1 in "); Serial.println(b-a);
   a = millis();
-  uECC_make_key(public2, private2);
+  uECC_make_key(public2, private2, curve);
   b = millis();
   Serial.print("Made key 2 in "); Serial.println(b-a);
 
   a = millis();
-  int r = uECC_shared_secret(public2, private1, secret1);
+  int r = uECC_shared_secret(public2, private1, secret1, curve);
   b = millis();
   Serial.print("Shared secret 1 in "); Serial.println(b-a);
   if (!r) {
@@ -67,7 +68,7 @@
   }
 
   a = millis();
-  r = uECC_shared_secret(public1, private2, secret2);
+  r = uECC_shared_secret(public1, private2, secret2, curve);
   b = millis();
   Serial.print("Shared secret 2 in "); Serial.println(b-a);
   if (!r) {
@@ -75,7 +76,7 @@
     return;
   }
     
-  if (memcmp(secret1, secret2, sizeof(secret1)) != 0) {
+  if (memcmp(secret1, secret2, 20) != 0) {
     Serial.print("Shared secrets are not identical!\n");
   } else {
     Serial.print("Shared secrets are identical\n");
diff --git a/uECC.c b/uECC.c
index 02619e5..73bee2c 100644
--- a/uECC.c
+++ b/uECC.c
@@ -7,36 +7,130 @@
     #define uECC_RNG_MAX_TRIES 64
 #endif
 
-#if uECC_SUPPORTS_secp160r1
-    #define uECC_MAX_BYTES 21 /* Due to the size of curve_n. */
-#endif
-#if uECC_SUPPORTS_secp192r1
-    #undef uECC_MAX_BYTES
-    #define uECC_MAX_BYTES 24
-#endif
-#if uECC_SUPPORTS_secp224r1
-    #undef uECC_MAX_BYTES
-    #define uECC_MAX_BYTES 28
-#endif
-#if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1)
-    #undef uECC_MAX_BYTES
-    #define uECC_MAX_BYTES 32
-#endif
-
 #if uECC_ENABLE_VLI_API
     #define uECC_VLI_API
 #else
     #define uECC_VLI_API static
 #endif
 
+#define CONCATX(a, ...) a ## __VA_ARGS__
+#define CONCAT(a, ...) CONCATX(a, __VA_ARGS__)
+
+#define STRX(a) #a
+#define STR(a) STRX(a)
+
+#define EVAL(...)  EVAL1(EVAL1(EVAL1(EVAL1(__VA_ARGS__))))
+#define EVAL1(...) EVAL2(EVAL2(EVAL2(EVAL2(__VA_ARGS__))))
+#define EVAL2(...) EVAL3(EVAL3(EVAL3(EVAL3(__VA_ARGS__))))
+#define EVAL3(...) EVAL4(EVAL4(EVAL4(EVAL4(__VA_ARGS__))))
+#define EVAL4(...) __VA_ARGS__
+
+#define DEC_1  0
+#define DEC_2  1
+#define DEC_3  2
+#define DEC_4  3
+#define DEC_5  4
+#define DEC_6  5
+#define DEC_7  6
+#define DEC_8  7
+#define DEC_9  8
+#define DEC_10 9
+#define DEC_11 10
+#define DEC_12 11
+#define DEC_13 12
+#define DEC_14 13
+#define DEC_15 14
+#define DEC_16 15
+#define DEC_17 16
+#define DEC_18 17
+#define DEC_19 18
+#define DEC_20 19
+#define DEC_21 20
+#define DEC_22 21
+#define DEC_23 22
+#define DEC_24 23
+#define DEC_25 24
+#define DEC_26 25
+#define DEC_27 26
+#define DEC_28 27
+#define DEC_29 28
+#define DEC_30 29
+#define DEC_31 30
+#define DEC_32 31
+
+#define DEC(N) CONCAT(DEC_, N)
+
+#define SECOND_ARG(_, val, ...) val
+#define SOME_CHECK_0 ~, 0
+#define GET_SECOND_ARG(...) SECOND_ARG(__VA_ARGS__, SOME,)
+#define SOME_OR_0(N) GET_SECOND_ARG(CONCAT(SOME_CHECK_, N))
+
+#define EMPTY(...)
+#define DEFER(...) __VA_ARGS__ EMPTY()
+
+#define REPEAT_NAME_0() REPEAT_0
+#define REPEAT_NAME_SOME() REPEAT_SOME
+#define REPEAT_0(...)
+#define REPEAT_SOME(N, stuff) DEFER(CONCAT(REPEAT_NAME_, SOME_OR_0(DEC(N))))()(DEC(N), stuff) stuff
+#define REPEAT(N, stuff) EVAL(REPEAT_SOME(N, stuff))
+
+#define REPEATM_NAME_0() REPEATM_0
+#define REPEATM_NAME_SOME() REPEATM_SOME
+#define REPEATM_0(...)
+#define REPEATM_SOME(N, macro) macro(N) \
+    DEFER(CONCAT(REPEATM_NAME_, SOME_OR_0(DEC(N))))()(DEC(N), macro)
+#define REPEATM(N, macro) EVAL(REPEATM_SOME(N, macro))
+
 #include "platform-specific.inc"
 
 #if (uECC_WORD_SIZE == 1)
-    #define uECC_MAX_WORDS uECC_MAX_BYTES
+    #if uECC_SUPPORTS_secp160r1
+        #define uECC_MAX_WORDS 21 /* Due to the size of curve_n. */
+    #endif
+    #if uECC_SUPPORTS_secp192r1
+        #undef uECC_MAX_WORDS
+        #define uECC_MAX_WORDS 24
+    #endif
+    #if uECC_SUPPORTS_secp224r1
+        #undef uECC_MAX_WORDS
+        #define uECC_MAX_WORDS 28
+    #endif
+    #if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1)
+        #undef uECC_MAX_WORDS
+        #define uECC_MAX_WORDS 32
+    #endif
 #elif (uECC_WORD_SIZE == 4)
-    #define uECC_MAX_WORDS ((uECC_MAX_BYTES + 3) / 4)
+    #if uECC_SUPPORTS_secp160r1
+        #define uECC_MAX_WORDS 6 /* Due to the size of curve_n. */
+    #endif
+    #if uECC_SUPPORTS_secp192r1
+        #undef uECC_MAX_WORDS
+        #define uECC_MAX_WORDS 6
+    #endif
+    #if uECC_SUPPORTS_secp224r1
+        #undef uECC_MAX_WORDS
+        #define uECC_MAX_WORDS 7
+    #endif
+    #if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1)
+        #undef uECC_MAX_WORDS
+        #define uECC_MAX_WORDS 8
+    #endif
 #elif (uECC_WORD_SIZE == 8)
-    #define uECC_MAX_WORDS ((uECC_MAX_BYTES + 7) / 8)
+    #if uECC_SUPPORTS_secp160r1
+        #define uECC_MAX_WORDS 3
+    #endif
+    #if uECC_SUPPORTS_secp192r1
+        #undef uECC_MAX_WORDS
+        #define uECC_MAX_WORDS 3
+    #endif
+    #if uECC_SUPPORTS_secp224r1
+        #undef uECC_MAX_WORDS
+        #define uECC_MAX_WORDS 4
+    #endif
+    #if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1)
+        #undef uECC_MAX_WORDS
+        #define uECC_MAX_WORDS 4
+    #endif
 #endif /* uECC_WORD_SIZE */
 
 #define BITS_TO_WORDS(num_bits) ((num_bits + ((uECC_WORD_SIZE * 8) - 1)) / (uECC_WORD_SIZE * 8))
@@ -63,11 +157,19 @@
 #endif
 };
 
+static cmpresult_t uECC_vli_cmp_unsafe(const uECC_word_t *left,
+                                       const uECC_word_t *right,
+                                       wordcount_t num_words);
+
 #if (uECC_PLATFORM == uECC_arm || uECC_PLATFORM == uECC_arm_thumb || \
         uECC_PLATFORM == uECC_arm_thumb2)
     #include "asm_arm.inc"
 #endif
 
+#if (uECC_PLATFORM == uECC_avr)
+    #include "asm_avr.inc"
+#endif
+
 #if default_RNG_defined
 static uECC_RNG_Function g_rng_function = &default_RNG;
 #else 
@@ -78,12 +180,14 @@
     g_rng_function = rng_function;
 }
 
+#if !asm_clear
 uECC_VLI_API void uECC_vli_clear(uECC_word_t *vli, wordcount_t num_words) {
     wordcount_t i;
     for (i = 0; i < num_words; ++i) {
         vli[i] = 0;
     }
 }
+#endif /* !asm_clear */
 
 /* Constant-time comparison to zero - secure way to compare long integers */
 /* Returns 1 if vli == 0, 0 otherwise. */
@@ -131,12 +235,14 @@
 }
 
 /* Sets dest = src. */
+#if !asm_set
 uECC_VLI_API void uECC_vli_set(uECC_word_t *dest, const uECC_word_t *src, wordcount_t num_words) {
     wordcount_t i;
     for (i = 0; i < num_words; ++i) {
         dest[i] = src[i];
     }
 }
+#endif /* !asm_set */
 
 /* Returns sign of left - right. */
 static cmpresult_t uECC_vli_cmp_unsafe(const uECC_word_t *left,
@@ -182,6 +288,7 @@
 }
 
 /* Computes vli = vli >> 1. */
+#if !asm_rshift1
 uECC_VLI_API void uECC_vli_rshift1(uECC_word_t *vli, wordcount_t num_words) {
     uECC_word_t *end = vli;
     uECC_word_t carry = 0;
@@ -193,6 +300,7 @@
         carry = temp << (uECC_WORD_BITS - 1);
     }
 }
+#endif /* !asm_rshift1 */
 
 /* Computes result = left + right, returning carry. Can modify in place. */
 #if !asm_add