| #!/usr/bin/env python |
| |
| import sys |
| |
| if len(sys.argv) < 2: |
| print "Provide the integer size in bytes" |
| sys.exit(1) |
| |
| size = int(sys.argv[1]) |
| |
| def lhi(i): |
| return i + 2 |
| |
| def rhi(i): |
| return i + 6 |
| |
| left_lo = [10, 11, 12, 13] |
| right_lo = [14, 15, 16, 17] |
| |
| def llo(i): |
| return left_lo[i] |
| |
| def rlo(i): |
| return right_lo[i] |
| |
| def emit(line, *args): |
| s = '"' + line + r' \n\t"' |
| print s % args |
| |
| def update_low(): |
| global left_lo |
| global right_lo |
| left_lo = left_lo[1:] + left_lo[:1] |
| right_lo = right_lo[1:] + right_lo[:1] |
| emit("ld r%s, x+", left_lo[3]) |
| emit("ld r%s, y+", right_lo[3]) |
| |
| accum = [19, 20, 21] |
| |
| def acc(i): |
| return accum[i] |
| |
| def rotate_acc(): |
| global accum |
| accum = accum[1:] + accum[:1] |
| |
| # Load high values |
| for i in xrange(4): |
| emit("ld r%s, x+", lhi(i)) |
| emit("ld r%s, y+", rhi(i)) |
| |
| emit("sbiw r26, %s", size + 4) |
| emit("sbiw r28, %s", size + 4) |
| emit("sbiw r30, %s", size) |
| |
| # Load low values |
| for i in xrange(4): |
| emit("ld r%s, x+", llo(i)) |
| emit("ld r%s, y+", rlo(i)) |
| print "" |
| |
| # Compute initial triangles |
| emit("mul r%s, r%s", lhi(0), rlo(0)) |
| emit("mov r%s, r0", acc(0)) |
| emit("mov r%s, r1", acc(1)) |
| emit("ldi r%s, 0", acc(2)) |
| emit("ld r0, z") |
| emit("add r%s, r0", acc(0)) |
| emit("adc r%s, r25", acc(1)) |
| emit("mul r%s, r%s", rhi(0), llo(0)) |
| emit("add r%s, r0", acc(0)) |
| emit("adc r%s, r1", acc(1)) |
| emit("adc r%s, r25", acc(2)) |
| emit("st z+, r%s", acc(0)) |
| print "" |
| rotate_acc() |
| |
| for i in xrange(1, 4): |
| emit("ldi r%s, 0", acc(2)) |
| emit("ld r0, z") |
| emit("add r%s, r0", acc(0)) |
| emit("adc r%s, r25", acc(1)) |
| for j in xrange(i + 1): |
| emit("mul r%s, r%s", lhi(j), rlo(i-j)) |
| emit("add r%s, r0", acc(0)) |
| emit("adc r%s, r1", acc(1)) |
| emit("adc r%s, r25", acc(2)) |
| emit("mul r%s, r%s", rhi(j), llo(i-j)) |
| emit("add r%s, r0", acc(0)) |
| emit("adc r%s, r1", acc(1)) |
| emit("adc r%s, r25", acc(2)) |
| emit("st z+, r%s", acc(0)) |
| print "" |
| rotate_acc() |
| |
| # Compute rows overlapping old block |
| for i in xrange(4, size): |
| emit("ldi r%s, 0", acc(2)) |
| emit("ld r0, z") |
| emit("add r%s, r0", acc(0)) |
| emit("adc r%s, r25", acc(1)) |
| update_low() |
| for j in xrange(4): |
| emit("mul r%s, r%s", lhi(j), rlo(3-j)) |
| emit("add r%s, r0", acc(0)) |
| emit("adc r%s, r1", acc(1)) |
| emit("adc r%s, r25", acc(2)) |
| emit("mul r%s, r%s", rhi(j), llo(3-j)) |
| emit("add r%s, r0", acc(0)) |
| emit("adc r%s, r1", acc(1)) |
| emit("adc r%s, r25", acc(2)) |
| emit("st z+, r%s", acc(0)) |
| print "" |
| rotate_acc() |
| |
| # Compute new triangle |
| left_combined = [llo(1), llo(2), llo(3), lhi(0), lhi(1), lhi(2), lhi(3)] |
| right_combined = [rlo(1), rlo(2), rlo(3), rhi(0), rhi(1), rhi(2), rhi(3)] |
| |
| def left(i): |
| return left_combined[i] |
| |
| def right(i): |
| return right_combined[i] |
| |
| for i in xrange(6): |
| emit("ldi r%s, 0", acc(2)) |
| for j in xrange(7 - i): |
| emit("mul r%s, r%s", left(i+j), right(6-j)) |
| emit("add r%s, r0", acc(0)) |
| emit("adc r%s, r1", acc(1)) |
| emit("adc r%s, r25", acc(2)) |
| emit("st z+, r%s", acc(0)) |
| print "" |
| rotate_acc() |
| |
| emit("mul r%s, r%s", left(6), right(6)) |
| emit("add r%s, r0", acc(0)) |
| emit("adc r%s, r1", acc(1)) |
| emit("st z+, r%s", acc(0)) |
| emit("st z+, r%s", acc(1)) |
| emit("adiw r26, 4") |
| emit("adiw r28, 4") |