blob: f6e654f6ad87b7fa60acf3c1152781feae375a75 [file] [log] [blame]
#!/usr/bin/env python
import sys
if len(sys.argv) < 2:
print "Provide the integer size in bytes"
sys.exit(1)
size = int(sys.argv[1])
def lhi(i):
return i + 2
def rhi(i):
return i + 6
left_lo = [10, 11, 12, 13]
right_lo = [14, 15, 16, 17]
def llo(i):
return left_lo[i]
def rlo(i):
return right_lo[i]
def emit(line, *args):
s = '"' + line + r' \n\t"'
print s % args
def update_low():
global left_lo
global right_lo
left_lo = left_lo[1:] + left_lo[:1]
right_lo = right_lo[1:] + right_lo[:1]
emit("ld r%s, x+", left_lo[3])
emit("ld r%s, y+", right_lo[3])
accum = [19, 20, 21]
def acc(i):
return accum[i]
def rotate_acc():
global accum
accum = accum[1:] + accum[:1]
# Load high values
for i in xrange(4):
emit("ld r%s, x+", lhi(i))
emit("ld r%s, y+", rhi(i))
emit("sbiw r26, %s", size + 4)
emit("sbiw r28, %s", size + 4)
emit("sbiw r30, %s", size)
# Load low values
for i in xrange(4):
emit("ld r%s, x+", llo(i))
emit("ld r%s, y+", rlo(i))
print ""
# Compute initial triangles
emit("mul r%s, r%s", lhi(0), rlo(0))
emit("mov r%s, r0", acc(0))
emit("mov r%s, r1", acc(1))
emit("ldi r%s, 0", acc(2))
emit("ld r0, z")
emit("add r%s, r0", acc(0))
emit("adc r%s, r25", acc(1))
emit("mul r%s, r%s", rhi(0), llo(0))
emit("add r%s, r0", acc(0))
emit("adc r%s, r1", acc(1))
emit("adc r%s, r25", acc(2))
emit("st z+, r%s", acc(0))
print ""
rotate_acc()
for i in xrange(1, 4):
emit("ldi r%s, 0", acc(2))
emit("ld r0, z")
emit("add r%s, r0", acc(0))
emit("adc r%s, r25", acc(1))
for j in xrange(i + 1):
emit("mul r%s, r%s", lhi(j), rlo(i-j))
emit("add r%s, r0", acc(0))
emit("adc r%s, r1", acc(1))
emit("adc r%s, r25", acc(2))
emit("mul r%s, r%s", rhi(j), llo(i-j))
emit("add r%s, r0", acc(0))
emit("adc r%s, r1", acc(1))
emit("adc r%s, r25", acc(2))
emit("st z+, r%s", acc(0))
print ""
rotate_acc()
# Compute rows overlapping old block
for i in xrange(4, size):
emit("ldi r%s, 0", acc(2))
emit("ld r0, z")
emit("add r%s, r0", acc(0))
emit("adc r%s, r25", acc(1))
update_low()
for j in xrange(4):
emit("mul r%s, r%s", lhi(j), rlo(3-j))
emit("add r%s, r0", acc(0))
emit("adc r%s, r1", acc(1))
emit("adc r%s, r25", acc(2))
emit("mul r%s, r%s", rhi(j), llo(3-j))
emit("add r%s, r0", acc(0))
emit("adc r%s, r1", acc(1))
emit("adc r%s, r25", acc(2))
emit("st z+, r%s", acc(0))
print ""
rotate_acc()
# Compute new triangle
left_combined = [llo(1), llo(2), llo(3), lhi(0), lhi(1), lhi(2), lhi(3)]
right_combined = [rlo(1), rlo(2), rlo(3), rhi(0), rhi(1), rhi(2), rhi(3)]
def left(i):
return left_combined[i]
def right(i):
return right_combined[i]
for i in xrange(6):
emit("ldi r%s, 0", acc(2))
for j in xrange(7 - i):
emit("mul r%s, r%s", left(i+j), right(6-j))
emit("add r%s, r0", acc(0))
emit("adc r%s, r1", acc(1))
emit("adc r%s, r25", acc(2))
emit("st z+, r%s", acc(0))
print ""
rotate_acc()
emit("mul r%s, r%s", left(6), right(6))
emit("add r%s, r0", acc(0))
emit("adc r%s, r1", acc(1))
emit("st z+, r%s", acc(0))
emit("st z+, r%s", acc(1))
emit("adiw r26, 4")
emit("adiw r28, 4")