blob: dd47a939e1f28c58c7b72f5691a6b4897e44f06c [file] [log] [blame]
/*
* Copyright (c) 2024 Raspberry Pi (Trading) Ltd.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#if !PICO_RP2040
#include "pico/asm_helper.S"
pico_default_asm_setup
.macro float_section name
#if PICO_FLOAT_IN_RAM
.section RAM_SECTION_NAME(\name), "ax"
#else
.section SECTION_NAME(\name), "ax"
#endif
.endm
.macro float_wrapper_section func
float_section WRAPPER_FUNC_NAME(\func)
.endm
float_wrapper_section conv_tof
@ convert int64 to float, rounding
wrapper_func __aeabi_l2f
regular_func int642float
movs r2,#0 @ fall through
@ convert signed 64-bit fix to float, rounding; number of r0:r1 bits after point in r2
regular_func fix642float
cmp r1,#0
bge 10f @ positive? use unsigned code
rsbs r0,#0
sbc r1,r1,r1,lsl#1 @ make positive
cbz r1,7f @ high word is zero?
clz r3,r1
subs r3,#8
bmi 2f
lsls r1,r3
lsls r12,r0,r3 @ bits that will be lost
rsb r3,#32
lsr r0,r3
orr r0,r0,r1
sub r2,r2,r3
rsb r2,#149
adds r12,r12,r12 @ rounding bit into carry
adc r0,r0,r2,lsl#23 @ insert exponent, add rounding
orr r0,r0,#0x80000000
beq 4f @ potential rounding tie?
cmp r2,#0xfe
bhs 3f @ over/underflow?
bx r14
2:
add r3,#33
lsls r12,r1,r3 @ rounding bit in carry, sticky bits in r12
orrs r12,r12,r0 @ all of low word into sticky bits: affects Z but not C
rsb r3,#33
lsr r0,r1,r3
@ push {r14}
@ bl dumpreg
@ pop {r14}
sub r2,r3,r2
add r2,#22+127+32
adc r0,r0,r2,lsl#23 @ insert exponent, add rounding
orr r0,r0,#0x80000000
beq 4f @ potential rounding tie?
cmp r2,#0xfe
it lo
bxlo r14
@ over/underflow?
3:
mov r0,#0x80000000 @ underflow
it ge
movtge r0,#0xff80 @ overflow
1:
bx r14
7:
mov r1,r2
b fix2float_neg
4:
it cs @ rounding tie?
biccs r0,r0,#1 @ force to even if we rounded up
cmp r2,#0xfe
it lo
bxlo r14
b 3b
@ convert signed 32-bit fix to float, rounding; number of r0 bits after point in r1
.thumb_func
regular_func fix2float
cmp r0,#0
bge ufix2float @ positive? can use unsigned code
rsbs r0,#0 @ make positive
fix2float_neg:
clz r3,r0
subs r3,#8
bmi 2f
lsls r0,r3
add r2,r1,r3
rsb r2,#149
add r0,r0,r2,lsl#23 @ insert exponent
orr r0,#0x80000000
cmp r2,#0xfe
it lo @ over/underflow?
bxlo r14
b 3f
2:
add r3,#33
lsls r12,r0,r3 @ rounding bit in carry, sticky bits in r12
rsb r3,#33
lsr r0,r3
@ push {r14}
@ bl dumpreg
@ pop {r14}
sub r2,r3,r1
add r2,#22+127
adc r0,r0,r2,lsl#23 @ insert exponent
orr r0,#0x80000000
beq 4f @ potential rounding tie?
cmp r2,#0xfe
it lo
bxlo r14
@ over/underflow?
3:
mov r0,#0x80000000 @ underflow
it ge
orrge r0,#0x7f800000 @ overflow
1:
bx r14
4:
it cs @ rounding tie?
biccs r0,r0,#1 @ force to even if we rounded up
cmp r2,#0xfe
it lo
bxlo r14
b 3b
@ convert unsigned 32-bit fix to float, rounding; number of r0 bits after point in r1
regular_func ufix2float
cbz r0,1f @ zero? return it
clz r3,r0
subs r3,#8
bmi 2f
lsls r0,r3
add r2,r1,r3
rsb r2,#149
add r0,r0,r2,lsl#23 @ insert exponent
@ push {r14}
@ bl dumpreg
@ pop {r14}
cmp r2,#0xfe
it lo @ over/underflow?
bxlo r14
b 3f
2:
add r3,#33
lsls r12,r0,r3 @ rounding bit in carry, sticky bits in r12
rsb r3,#33
lsr r0,r3
@ push {r14}
@ bl dumpreg
@ pop {r14}
sub r2,r3,r1
add r2,#22+127
adc r0,r0,r2,lsl#23 @ insert exponent
beq 4f @ potential rounding tie?
cmp r2,#0xfe
it lo
bxlo r14
@ over/underflow?
3:
ite ge
movge r0,#0x7f800000 @ overflow
movlt r0,#0x00000000 @ underflow
1:
bx r14
4:
it cs @ rounding tie?
biccs r0,r0,#1 @ force to even if we rounded up
cmp r2,#0xfe
it lo
bxlo r14
b 3b
@ convert uint64 to float, rounding
wrapper_func __aeabi_ul2f
regular_func uint642float
movs r2,#0 @ fall through
@ convert unsigned 64-bit fix to float, rounding; number of r0:r1 bits after point in r2
regular_func ufix642float
10:
cbz r1,7f @ high word is zero?
clz r3,r1
subs r3,#8
bmi 2f
lsls r1,r3
lsls r12,r0,r3 @ bits that will be lost
rsb r3,#32
lsr r0,r3
orr r0,r0,r1
sub r2,r2,r3
rsb r2,#149
adds r12,r12,r12 @ rounding bit into carry
adc r0,r0,r2,lsl#23 @ insert exponent, add rounding
beq 4f @ potential rounding tie?
cmp r2,#0xfe
bhs 3f @ over/underflow?
bx r14
2:
add r3,#33
lsls r12,r1,r3 @ rounding bit in carry, sticky bits in r12
orrs r12,r12,r0 @ all of low word into sticky bits: affects Z but not C
rsb r3,#33
lsr r0,r1,r3
@ push {r14}
@ bl dumpreg
@ pop {r14}
sub r2,r3,r2
add r2,#22+127+32
adc r0,r0,r2,lsl#23 @ insert exponent, add rounding
beq 4f @ potential rounding tie?
cmp r2,#0xfe
it lo
bxlo r14
@ over/underflow?
3:
ite ge
movge r0,#0x7f800000 @ overflow
movlt r0,#0x00000000 @ underflow
1:
bx r14
7:
mov r1,r2
b ufix2float
4:
it cs @ rounding tie?
biccs r0,r0,#1 @ force to even if we rounded up
cmp r2,#0xfe
it lo
bxlo r14
b 3b
float_wrapper_section conv_ftoi64
@ convert float to signed int64, rounding towards 0, clamping
wrapper_func __aeabi_f2lz
regular_func float2int64_z
movs r1,#0 @ fall through
@ convert float in r0 to signed fixed point in r0:r1, clamping
regular_func float2fix64_z
subs r1,#0x95 @ remove exponent bias, compensate for mantissa length
asrs r2,r0,#23 @ sign and exponent
sub r3,r2,#1
sub r0,r0,r3,lsl#23 @ install implied 1, clear exponent
uxtb r3,r3
cmp r3,#0xfe
bhs 1f @ 0 or Inf/NaN?
adds r1,r3 @ offset exponent by fix precision; r1 is now required left shift
bmi 4f @ actually a right shift?
subs r3,r1,#32 @ result fits in high 32 bits only?
bge 8f
subs r3,r1,#8 @ result fits in low 32 bits only?
ble 7f
lsls r0,#8
rsbs r1,r3,#32
lsrs r1,r0,r1
lsls r0,r3
cmp r2,#0
it ge
bxge r14
rsbs r0,#0 @ negate if necessary
sbcs r1,r1,r1,lsl#1
bx r14
7:
lsls r0,r0,r1
movs r1,r2,asr#31 @ sign extend
eors r0,r0,r1 @ negate if necessary
subs r0,r0,r1
bx r14
8:
cmp r3,#8 @ overflow?
bge 5f
lsls r0,r0,r3
eor r1,r0,r2,asr#31 @ negate if necessary
add r1,r1,r2,lsr#31
movs r0,#0
bx r14
1:
bhi 3f @ 0?
lsls r1,r0,#9 @ mantissa field
it ne @ NaN?
movne r2,#0 @ treat NaNs as +∞
5:
mvn r1,#0x80000000 @ = 0x7fffffff
add r1,r1,r2,lsr#31 @ so -Inf → 0x80000000, +Inf → 0x7fffffff
mvn r0,r2,asr#31
bx r14
3:
movs r0,#0
movs r1,#0
bx r14
4:
rsbs r1,#0
usat r1,#5,r1
lsrs r0,r0,r1
eors r0,r0,r2,asr#31 @ negate if necessary
adds r0,r0,r2,lsr#31
movs r1,r0,asr#31 @ sign extend
bx r14
float_wrapper_section conv_ftoui64
@ convert float to unsigned int64, rounding towards -Inf, clamping
wrapper_func __aeabi_f2ulz
regular_func float2uint64
regular_func float2uint64_z
movs r1,#0 @ fall through
@ convert float in r0 to unsigned fixed point in r0:r1, clamping
regular_func float2ufix64
//regular_func float2ufix64_z
subs r1,#0x96 @ remove exponent bias, compensate for mantissa length
asrs r2,r0,#23 @ sign and exponent
sub r3,r2,#1
cmp r3,#0xfe
bhs 1f @ -ve, 0 or Inf/NaN?
sub r0,r0,r3,lsl#23 @ install implied 1, clear exponent
adds r1,r2 @ offset exponent by fix precision; r1 is now required left shift
bmi 4f @ actually a right shift?
subs r2,r1,#7
ble 7f @ result (easily) fits in lo 32 bits?
subs r3,r1,#32
bge 8f @ results might fit in hi 32 bits?
lsls r0,r0,#7
rsbs r3,r2,#32
lsrs r1,r0,r3
lsls r0,r0,r2
bx r14
7:
lsls r0,r1
movs r1,#0
bx r14
8:
cmp r1,#32+9 @ overflow?
bge 5f
lsls r1,r0,r3
movs r0,#0
bx r14
5:
mvn r0,#0 @ = 0xffffffff
mvn r1,#0 @ = 0xffffffff
bx r14
4:
rsbs r1,#0
usat r1,#5,r1 @ if shift is long return 0
lsrs r0,r0,r1
movs r1,#0
bx r14
1:
cmp r0,#0xff800000
bhi 5b @ -NaN, return 0xffffffff
cmp r0,#0x00800000
bgt 5b @ +Inf or +NaN, return 0xfffffff
2:
movs r0,#0 @ return 0
movs r1,#0
bx r14
#endif