src/rp2_common/pico_float/float_aeabi.S - third_party/github/raspberrypi/pico-sdk - Git at Google

 /*
  * Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */

 #include "pico/asm_helper.S"
 #include "pico/bootrom/sf_table.h"

 __pre_init __aeabi_float_init, 00020

 .syntax unified
 .cpu cortex-m0plus
 .thumb

 .macro float_section name
 #if PICO_FLOAT_IN_RAM
 .section RAM_SECTION_NAME(\name), "ax"
 #else
 .section SECTION_NAME(\name), "ax"
 #endif
 .endm

 .macro float_wrapper_section func
 float_section WRAPPER_FUNC_NAME(\func)
 .endm

 .macro _float_wrapper_func x
     wrapper_func \x
 .endm

 .macro wrapper_func_f1 x
    _float_wrapper_func \x
 #if PICO_FLOAT_PROPAGATE_NANS
     mov ip, lr
     bl __check_nan_f1
     mov lr, ip
 #endif
 .endm

 .macro wrapper_func_f2 x
    _float_wrapper_func \x
 #if PICO_FLOAT_PROPAGATE_NANS
     mov ip, lr
     bl __check_nan_f2
     mov lr, ip
 #endif
 .endm

 .section .text

 #if PICO_FLOAT_PROPAGATE_NANS
 .thumb_func
 __check_nan_f1:
    movs r3, #1
    lsls r3, #24
    lsls r2, r0, #1
    adds r2, r3
    bhi 1f
    bx lr
 1:
    bx ip

 .thumb_func
 __check_nan_f2:
    movs r3, #1
    lsls r3, #24
    lsls r2, r0, #1
    adds r2, r3
    bhi 1f
    lsls r2, r1, #1
    adds r2, r3
    bhi 2f
    bx lr
 2:
    mov r0, r1
 1:
    bx ip
 #endif

 .macro table_tail_call SF_TABLE_OFFSET
 #if PICO_FLOAT_SUPPORT_ROM_V1
 #ifndef NDEBUG
     movs r3, #0
     mov ip, r3
 #endif
 #endif
     ldr r3, =sf_table
     ldr r3, [r3, #\SF_TABLE_OFFSET]
     bx r3
 .endm

 .macro shimmable_table_tail_call SF_TABLE_OFFSET shim
     ldr r3, =sf_table
     ldr r3, [r3, #\SF_TABLE_OFFSET]
 #if PICO_FLOAT_SUPPORT_ROM_V1
     mov ip, pc
 #endif
     bx r3
 #if PICO_FLOAT_SUPPORT_ROM_V1
 .byte \SF_TABLE_OFFSET, 0xdf
 .word \shim
 #endif
 .endm


 # note generally each function is in a separate section unless there is fall thru or branching between them
 # note fadd, fsub, fmul, fdiv are so tiny and just defer to rom so are lumped together so they can share constant pool

 # note functions are word aligned except where they are an odd number of linear instructions

 // float FUNC_NAME(__aeabi_fadd)(float, float)         single-precision addition
 float_wrapper_section __aeabi_farithmetic
 // float FUNC_NAME(__aeabi_frsub)(float x, float y)    single-precision reverse subtraction, y - x

 # frsub first because it is the only one that needs alignment
 .align 2
 wrapper_func __aeabi_frsub
     eors r0, r1
     eors r1, r0
     eors r0, r1
     // fall thru

 // float FUNC_NAME(__aeabi_fsub)(float x, float y)     single-precision subtraction, x - y
 wrapper_func_f2 __aeabi_fsub
 #if PICO_FLOAT_PROPAGATE_NANS
     // we want to return nan for inf-inf or -inf - -inf, but without too much upfront cost
     mov r2, r0
     eors r2, r1
     bmi 1f // different signs
     push {r0, r1, lr}
     bl 1f
     b fdiv_fsub_nan_helper
 1:
 #endif
     table_tail_call SF_TABLE_FSUB

 wrapper_func_f2 __aeabi_fadd
     table_tail_call SF_TABLE_FADD

 // float FUNC_NAME(__aeabi_fdiv)(float n, float d)     single-precision division, n / d
 wrapper_func_f2 __aeabi_fdiv
 #if PICO_FLOAT_PROPAGATE_NANS
     push {r0, r1, lr}
     bl 1f
     b fdiv_fsub_nan_helper
 1:
 #endif
     table_tail_call SF_TABLE_FDIV

 fdiv_fsub_nan_helper:
 #if PICO_FLOAT_PROPAGATE_NANS
     pop {r1, r2}

     // check for infinite op infinite (or rather check for infinite result with both
     // operands being infinite)
     lsls r3, r0, #1
     asrs r3, r3, #24
     adds r3, #1
     beq 2f
     pop {pc}
 2:
     lsls r1, #1
     asrs r1, r1, #24
     lsls r2, #1
     asrs r2, r2, #24
     ands r1, r2
     adds r1, #1
     bne 3f
     // infinite to nan
     movs r1, #1
     lsls r1, #22
     orrs r0, r1
 3:
     pop {pc}
 #endif

 // float FUNC_NAME(__aeabi_fmul)(float, float)         single-precision multiplication
 wrapper_func_f2 __aeabi_fmul
 #if PICO_FLOAT_PROPAGATE_NANS
     push {r0, r1, lr}
     bl 1f
     pop {r1, r2}

     // check for multiplication of infinite by zero (or rather check for infinite result with either
     // operand 0)
     lsls r3, r0, #1
     asrs r3, r3, #24
     adds r3, #1
     beq 2f
     pop {pc}
 2:
     ands r1, r2
     bne 3f
     // infinite to nan
     movs r1, #1
     lsls r1, #22
     orrs r0, r1
 3:
     pop {pc}
 1:
 #endif
     table_tail_call SF_TABLE_FMUL

 // void FUNC_NAME(__aeabi_cfrcmple)(float, float)         reversed 3-way (<, =, ?>) compare [1], result in PSR ZC flags
 float_wrapper_section __aeabi_cfcmple
 .align 2
 wrapper_func __aeabi_cfrcmple
     push {r0-r2, lr}
     eors r0, r1
     eors r1, r0
     eors r0, r1
     b __aeabi_cfcmple_guts

 // NOTE these share an implementation as we have no excepting NaNs.
 // void FUNC_NAME(__aeabi_cfcmple)(float, float)         3-way (<, =, ?>) compare [1], result in PSR ZC flags
 // void FUNC_NAME(__aeabi_cfcmpeq)(float, float)         non-excepting equality comparison [1], result in PSR ZC flags
 .align 2
 wrapper_func __aeabi_cfcmple
 wrapper_func __aeabi_cfcmpeq
     push {r0-r2, lr}

 __aeabi_cfcmple_guts:
     lsls r2,r0,#1
     lsrs r2,#24
     beq 1f
     cmp r2,#0xff
     bne 2f
     lsls r2, r0, #9
     bhi 3f
 1:
     lsrs r0,#23     @ clear mantissa if denormal or infinite
     lsls r0,#23
 2:
     lsls r2,r1,#1
     lsrs r2,#24
     beq 1f
     cmp r2,#0xff
     bne 2f
     lsls r2, r1, #9
     bhi 3f
 1:
     lsrs r1,#23     @ clear mantissa if denormal or infinite
     lsls r1,#23
 2:
     movs r2,#1      @ initialise result
     eors r1,r0
     bmi 2f          @ opposite signs? then can proceed on basis of sign of x
     eors r1,r0      @ restore y
     bpl 1f
     cmp r1,r0
     pop {r0-r2, pc}
 1:
     cmp r0,r1
     pop {r0-r2, pc}
 2:
     orrs r1, r0     @ handle 0/-0
     adds r1, r1     @ note this always sets C
     beq 3f
     mvns r0, r0     @ carry inverse of r0 sign
     adds r0, r0
 3:
     pop {r0-r2, pc}


 // int FUNC_NAME(__aeabi_fcmpeq)(float, float)         result (1, 0) denotes (=, ?<>) [2], use for C == and !=
 float_wrapper_section __aeabi_fcmpeq
 .align 2
 wrapper_func __aeabi_fcmpeq
     push {lr}
     bl __aeabi_cfcmpeq
     beq 1f
     movs r0, #0
     pop {pc}
 1:
     movs r0, #1
     pop {pc}

 // int FUNC_NAME(__aeabi_fcmplt)(float, float)         result (1, 0) denotes (<, ?>=) [2], use for C <
 float_wrapper_section __aeabi_fcmplt
 .align 2
 wrapper_func __aeabi_fcmplt
     push {lr}
     bl __aeabi_cfcmple
     sbcs r0, r0
     pop {pc}

 // int FUNC_NAME(__aeabi_fcmple)(float, float)         result (1, 0) denotes (<=, ?>) [2], use for C <=
 float_wrapper_section __aeabi_fcmple
 .align 2
 wrapper_func __aeabi_fcmple
     push {lr}
     bl __aeabi_cfcmple
     bls 1f
     movs r0, #0
     pop {pc}
 1:
     movs r0, #1
     pop {pc}

 // int FUNC_NAME(__aeabi_fcmpge)(float, float)         result (1, 0) denotes (>=, ?<) [2], use for C >=
 float_wrapper_section __aeabi_fcmpge
 .align 2
 wrapper_func __aeabi_fcmpge
     push {lr}
     // because of NaNs it is better to reverse the args than the result
     bl __aeabi_cfrcmple
     bls 1f
     movs r0, #0
     pop {pc}
 1:
     movs r0, #1
     pop {pc}

 // int FUNC_NAME(__aeabi_fcmpgt)(float, float)         result (1, 0) denotes (>, ?<=) [2], use for C >
 float_wrapper_section __aeabi_fcmpgt
 wrapper_func __aeabi_fcmpgt
     push {lr}
     // because of NaNs it is better to reverse the args than the result
     bl __aeabi_cfrcmple
     sbcs r0, r0
     pop {pc}

 // int FUNC_NAME(__aeabi_fcmpun)(float, float)         result (1, 0) denotes (?, <=>) [2], use for C99 isunordered()
 float_wrapper_section __aeabi_fcmpun
 wrapper_func __aeabi_fcmpun
    movs r3, #1
    lsls r3, #24
    lsls r2, r0, #1
    adds r2, r3
    bhi 1f
    lsls r2, r1, #1
    adds r2, r3
    bhi 1f
    movs r0, #0
    bx lr
 1:
    movs r0, #1
    bx lr


 // float FUNC_NAME(__aeabi_ui2f)(unsigned)             unsigned to float (single precision) conversion
 float_wrapper_section __aeabi_ui2f
 wrapper_func __aeabi_ui2f
         subs r1, r1
         cmp r0, #0
         bne __aeabi_i2f_main
         mov r0, r1
         bx lr

 float_wrapper_section __aeabi_i2f
 // float FUNC_NAME(__aeabi_i2f)(int)                     integer to float (single precision) conversion
 wrapper_func __aeabi_i2f
         lsrs r1, r0, #31
         lsls r1, #31
         bpl 1f
         rsbs r0, #0
 1:
         cmp r0, #0
         beq 7f
 __aeabi_i2f_main:

         mov ip, lr
         push {r0, r1}
         ldr r3, =sf_clz_func
         ldr r3, [r3]
         blx r3
         pop {r1, r2}
         lsls r1, r0
         subs r0, #158
         rsbs r0, #0

         adds r1,#0x80  @ rounding
         bcs 5f         @ tripped carry? then have leading 1 in C as required (and result is even so can ignore sticky bits)

         lsls r3,r1,#24 @ check bottom 8 bits of r1
         beq 6f         @ in rounding-tie case?
         lsls r1,#1     @ remove leading 1
 3:
         lsrs r1,#9     @ align mantissa
         lsls r0,#23    @ align exponent
         orrs r0,r2     @ assemble exponent and mantissa
 4:
         orrs r0,r1     @ apply sign
 1:
         bx ip
 5:
         adds r0,#1     @ correct exponent offset
         b 3b
 6:
         lsrs r1,#9     @ ensure even result
         lsls r1,#10
         b 3b
 7:
         bx lr


 // int FUNC_NAME(__aeabi_f2iz)(float)                     float (single precision) to integer C-style conversion [3]
 float_wrapper_section __aeabi_f2iz
 wrapper_func __aeabi_f2iz
 regular_func float2int_z
     lsls r1, r0, #1
     lsrs r2, r1, #24
     movs r3, #0x80
     lsls r3, #24
     cmp r2, #126
     ble 1f
     subs r2, #158
     bge 2f
     asrs r1, r0, #31
     lsls r0, #9
     lsrs r0, #1
     orrs r0, r3
     negs r2, r2
     lsrs r0, r2
     lsls r1, #1
     adds r1, #1
     muls r0, r1
     bx lr
 1:
     movs r0, #0
     bx lr
 2:
     lsrs r0, #31
     adds r0, r3
     subs r0, #1
     bx lr

     cmn r0, r0
     bcc float2int
     push {lr}
     lsls r0, #1
     lsrs r0, #1
     movs r1, #0
     bl __aeabi_f2uiz
     cmp r0, #0
     bmi 1f
     rsbs r0, #0
     pop {pc}
 1:
     movs r0, #128
     lsls r0, #24
     pop {pc}

 float_section float2int
 regular_func float2int
     shimmable_table_tail_call SF_TABLE_FLOAT2INT float2int_shim

 float_section float2fix
 regular_func float2fix
     shimmable_table_tail_call SF_TABLE_FLOAT2FIX float2fix_shim

 float_section float2ufix
 regular_func float2ufix
     table_tail_call SF_TABLE_FLOAT2UFIX

 // unsigned FUNC_NAME(__aeabi_f2uiz)(float)             float (single precision) to unsigned C-style conversion [3]
 float_wrapper_section __aeabi_f2uiz
 wrapper_func __aeabi_f2uiz
     table_tail_call SF_TABLE_FLOAT2UINT

 float_section fix2float
 regular_func fix2float
     table_tail_call SF_TABLE_FIX2FLOAT

 float_section ufix2float
 regular_func ufix2float
     table_tail_call SF_TABLE_UFIX2FLOAT

 float_section fix642float
 regular_func fix642float
     shimmable_table_tail_call SF_TABLE_FIX642FLOAT fix642float_shim

 float_section ufix642float
 regular_func ufix642float
     shimmable_table_tail_call SF_TABLE_UFIX642FLOAT ufix642float_shim

 // float FUNC_NAME(__aeabi_l2f)(long long)             long long to float (single precision) conversion
 float_wrapper_section __aeabi_l2f
 1:
     ldr r2, =__aeabi_i2f
     bx r2
 wrapper_func __aeabi_l2f
     asrs r2, r0, #31
     cmp r1, r2
     beq 1b
     shimmable_table_tail_call SF_TABLE_INT642FLOAT int642float_shim

 // float FUNC_NAME(__aeabi_l2f)(long long)             long long to float (single precision) conversion
 float_wrapper_section __aeabi_ul2f
 1:
     ldr r2, =__aeabi_ui2f
     bx r2
 wrapper_func __aeabi_ul2f
     cmp r1, #0
     beq 1b
     shimmable_table_tail_call SF_TABLE_UINT642FLOAT uint642float_shim

 // long long FUNC_NAME(__aeabi_f2lz)(float)             float (single precision) to long long C-style conversion [3]
 float_wrapper_section __aeabi_f2lz
 wrapper_func __aeabi_f2lz
 regular_func float2int64_z
     cmn r0, r0
     bcc float2int64
     push {lr}
     lsls r0, #1
     lsrs r0, #1
     movs r1, #0
     bl float2ufix64
     cmp r1, #0
     bmi 1f
     movs r2, #0
     rsbs r0, #0
     sbcs r2, r1
     mov r1, r2
     pop {pc}
 1:
     movs r1, #128
     lsls r1, #24
     movs r0, #0
     pop {pc}

 float_section float2int64
 regular_func float2int64
     shimmable_table_tail_call SF_TABLE_FLOAT2INT64 float2int64_shim

 float_section float2fix64
 regular_func float2fix64
     shimmable_table_tail_call SF_TABLE_FLOAT2FIX64 float2fix64_shim

 // unsigned long long FUNC_NAME(__aeabi_f2ulz)(float)     float to unsigned long long C-style conversion [3]
 float_wrapper_section __aeabi_f2ulz
 wrapper_func __aeabi_f2ulz
     shimmable_table_tail_call SF_TABLE_FLOAT2UINT64 float2uint64_shim

 float_section float2ufix64
 regular_func float2ufix64
     shimmable_table_tail_call SF_TABLE_FLOAT2UFIX64 float2ufix64_shim

 float_wrapper_section __aeabi_f2d
 1:
 #if PICO_FLOAT_PROPAGATE_NANS
     // copy sign bit and 25 NAN id bits into sign bit and significant ID bits, also setting the high id bit
     asrs r1, r0, #3
     movs r2, #0xf
     lsls r2, #27
     orrs r1, r2
     lsls r0, #25
     bx lr
 #endif
 wrapper_func __aeabi_f2d
 #if PICO_FLOAT_PROPAGATE_NANS
     movs r3, #1
     lsls r3, #24
     lsls r2, r0, #1
     adds r2, r3
     bhi 1b
 #endif
     shimmable_table_tail_call SF_TABLE_FLOAT2DOUBLE float2double_shim

 float_wrapper_section srqtf
 wrapper_func_f1 sqrtf
 #if PICO_FLOAT_SUPPORT_ROM_V1
     // check for negative
     asrs r1, r0, #23
     bmi 1f
 #endif
     table_tail_call SF_TABLE_FSQRT
 #if PICO_FLOAT_SUPPORT_ROM_V1
 1:
     mvns r0, r1
     cmp r0, #255
     bne 2f
     // -0 or -Denormal return -0 (0x80000000)
     lsls r0, #31
     bx lr
 2:
     // return -Inf (0xff800000)
     asrs r0, r1, #31
     lsls r0, #23
     bx lr
 #endif

 float_wrapper_section cosf
 // note we don't use _f1 since we do an infinity/nan check for outside of range
 wrapper_func cosf
     // rom version only works for -128 < angle < 128
     lsls r1, r0, #1
     lsrs r1, #24
     cmp r1, #127 + 7
     bge 1f
 2:
     table_tail_call SF_TABLE_FCOS
 1:
 #if PICO_FLOAT_PROPAGATE_NANS
     // also check for infinites
     cmp r1, #255
     bne 3f
     // infinite to nan
     movs r1, #1
     lsls r1, #22
     orrs r0, r1
     bx lr
 3:
 #endif
     ldr r1, =0x40c90fdb // 2 * M_PI
     push {lr}
     bl remainderf
     pop {r1}
     mov lr, r1
     b 2b

 float_wrapper_section sinf
 // note we don't use _f1 since we do an infinity/nan check for outside of range
 wrapper_func sinf
     // rom version only works for -128 < angle < 128
     lsls r1, r0, #1
     lsrs r1, #24
     cmp r1, #127 + 7
     bge 1f
 2:
     table_tail_call SF_TABLE_FSIN
 1:
 #if PICO_FLOAT_PROPAGATE_NANS
     // also check for infinites
     cmp r1, #255
     bne 3f
     // infinite to nan
     movs r1, #1
     lsls r1, #22
     orrs r0, r1
     bx lr
 3:
 #endif
     ldr r1, =0x40c90fdb // 2 * M_PI
     push {lr}
     bl remainderf
     pop {r1}
     mov lr, r1
     b 2b

 float_wrapper_section sincosf
 // note we don't use _f1 since we do an infinity/nan check for outside of range
 wrapper_func sincosf
     push {r1, r2, lr}
     // rom version only works for -128 < angle < 128
     lsls r3, r0, #1
     lsrs r3, #24
     cmp r3, #127 + 7
     bge 3f
 2:
     ldr r3, =sf_table
     ldr r3, [r3, #SF_TABLE_FSIN]
     blx r3
     pop {r2, r3}
     str r0, [r2]
     str r1, [r3]
     pop {pc}
 #if PICO_FLOAT_PROPAGATE_NANS
 .align 2
     pop {pc}
 #endif
 3:
 #if PICO_FLOAT_PROPAGATE_NANS
     // also check for infinites
     cmp r3, #255
     bne 4f
     // infinite to nan
     movs r3, #1
     lsls r3, #22
     orrs r0, r3
     str r0, [r1]
     str r0, [r2]
     add sp, #12
     bx lr
 4:
 #endif
     ldr r1, =0x40c90fdb // 2 * M_PI
     push {lr}
     bl remainderf
     pop {r1}
     mov lr, r1
     b 2b

 float_wrapper_section tanf
 // note we don't use _f1 since we do an infinity/nan check for outside of range
 wrapper_func tanf
     // rom version only works for -128 < angle < 128
     lsls r1, r0, #1
     lsrs r1, #24
     cmp r1, #127 + 7
     bge 1f
 2:
     table_tail_call SF_TABLE_FTAN
 1:
 #if PICO_FLOAT_PROPAGATE_NANS
     // also check for infinites
     cmp r1, #255
     bne 3f
     // infinite to nan
     movs r1, #1
     lsls r1, #22
     orrs r0, r1
     bx lr
 3:
 #endif
     ldr r1, =0x40c90fdb // 2 * M_PI
     push {lr}
     bl remainderf
     pop {r1}
     mov lr, r1
     b 2b

 float_wrapper_section atan2f
 wrapper_func_f2 atan2f
     shimmable_table_tail_call SF_TABLE_FATAN2 fatan2_shim

 float_wrapper_section expf
 wrapper_func_f1 expf
     table_tail_call SF_TABLE_FEXP

 float_wrapper_section logf
 wrapper_func_f1 logf
     table_tail_call SF_TABLE_FLN
	/*
	* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
	*
	* SPDX-License-Identifier: BSD-3-Clause
	*/

	#include "pico/asm_helper.S"
	#include "pico/bootrom/sf_table.h"

	__pre_init __aeabi_float_init, 00020

	.syntax unified
	.cpu cortex-m0plus
	.thumb

	.macro float_section name
	#if PICO_FLOAT_IN_RAM
	.section RAM_SECTION_NAME(\name), "ax"
	#else
	.section SECTION_NAME(\name), "ax"
	#endif
	.endm

	.macro float_wrapper_section func
	float_section WRAPPER_FUNC_NAME(\func)
	.endm

	.macro _float_wrapper_func x
	wrapper_func \x
	.endm

	.macro wrapper_func_f1 x
	_float_wrapper_func \x
	#if PICO_FLOAT_PROPAGATE_NANS
	mov ip, lr
	bl __check_nan_f1
	mov lr, ip
	#endif
	.endm

	.macro wrapper_func_f2 x
	_float_wrapper_func \x
	#if PICO_FLOAT_PROPAGATE_NANS
	mov ip, lr
	bl __check_nan_f2
	mov lr, ip
	#endif
	.endm

	.section .text

	#if PICO_FLOAT_PROPAGATE_NANS
	.thumb_func
	__check_nan_f1:
	movs r3, #1
	lsls r3, #24
	lsls r2, r0, #1
	adds r2, r3
	bhi 1f
	bx lr
	1:
	bx ip

	.thumb_func
	__check_nan_f2:
	movs r3, #1
	lsls r3, #24
	lsls r2, r0, #1
	adds r2, r3
	bhi 1f
	lsls r2, r1, #1
	adds r2, r3
	bhi 2f
	bx lr
	2:
	mov r0, r1
	1:
	bx ip
	#endif

	.macro table_tail_call SF_TABLE_OFFSET
	#if PICO_FLOAT_SUPPORT_ROM_V1
	#ifndef NDEBUG
	movs r3, #0
	mov ip, r3
	#endif
	#endif
	ldr r3, =sf_table
	ldr r3, [r3, #\SF_TABLE_OFFSET]
	bx r3
	.endm

	.macro shimmable_table_tail_call SF_TABLE_OFFSET shim
	ldr r3, =sf_table
	ldr r3, [r3, #\SF_TABLE_OFFSET]
	#if PICO_FLOAT_SUPPORT_ROM_V1
	mov ip, pc
	#endif
	bx r3
	#if PICO_FLOAT_SUPPORT_ROM_V1
	.byte \SF_TABLE_OFFSET, 0xdf
	.word \shim
	#endif
	.endm


	# note generally each function is in a separate section unless there is fall thru or branching between them
	# note fadd, fsub, fmul, fdiv are so tiny and just defer to rom so are lumped together so they can share constant pool

	# note functions are word aligned except where they are an odd number of linear instructions

	// float FUNC_NAME(__aeabi_fadd)(float, float) single-precision addition
	float_wrapper_section __aeabi_farithmetic
	// float FUNC_NAME(__aeabi_frsub)(float x, float y) single-precision reverse subtraction, y - x

	# frsub first because it is the only one that needs alignment
	.align 2
	wrapper_func __aeabi_frsub
	eors r0, r1
	eors r1, r0
	eors r0, r1
	// fall thru

	// float FUNC_NAME(__aeabi_fsub)(float x, float y) single-precision subtraction, x - y
	wrapper_func_f2 __aeabi_fsub
	#if PICO_FLOAT_PROPAGATE_NANS
	// we want to return nan for inf-inf or -inf - -inf, but without too much upfront cost
	mov r2, r0
	eors r2, r1
	bmi 1f // different signs
	push {r0, r1, lr}
	bl 1f
	b fdiv_fsub_nan_helper
	1:
	#endif
	table_tail_call SF_TABLE_FSUB

	wrapper_func_f2 __aeabi_fadd
	table_tail_call SF_TABLE_FADD

	// float FUNC_NAME(__aeabi_fdiv)(float n, float d) single-precision division, n / d
	wrapper_func_f2 __aeabi_fdiv
	#if PICO_FLOAT_PROPAGATE_NANS
	push {r0, r1, lr}
	bl 1f
	b fdiv_fsub_nan_helper
	1:
	#endif
	table_tail_call SF_TABLE_FDIV

	fdiv_fsub_nan_helper:
	#if PICO_FLOAT_PROPAGATE_NANS
	pop {r1, r2}

	// check for infinite op infinite (or rather check for infinite result with both
	// operands being infinite)
	lsls r3, r0, #1
	asrs r3, r3, #24
	adds r3, #1
	beq 2f
	pop {pc}
	2:
	lsls r1, #1
	asrs r1, r1, #24
	lsls r2, #1
	asrs r2, r2, #24
	ands r1, r2
	adds r1, #1
	bne 3f
	// infinite to nan
	movs r1, #1
	lsls r1, #22
	orrs r0, r1
	3:
	pop {pc}
	#endif

	// float FUNC_NAME(__aeabi_fmul)(float, float) single-precision multiplication
	wrapper_func_f2 __aeabi_fmul
	#if PICO_FLOAT_PROPAGATE_NANS
	push {r0, r1, lr}
	bl 1f
	pop {r1, r2}

	// check for multiplication of infinite by zero (or rather check for infinite result with either
	// operand 0)
	lsls r3, r0, #1
	asrs r3, r3, #24
	adds r3, #1
	beq 2f
	pop {pc}
	2:
	ands r1, r2
	bne 3f
	// infinite to nan
	movs r1, #1
	lsls r1, #22
	orrs r0, r1
	3:
	pop {pc}
	1:
	#endif
	table_tail_call SF_TABLE_FMUL

	// void FUNC_NAME(__aeabi_cfrcmple)(float, float) reversed 3-way (<, =, ?>) compare [1], result in PSR ZC flags
	float_wrapper_section __aeabi_cfcmple
	.align 2
	wrapper_func __aeabi_cfrcmple
	push {r0-r2, lr}
	eors r0, r1
	eors r1, r0
	eors r0, r1
	b __aeabi_cfcmple_guts

	// NOTE these share an implementation as we have no excepting NaNs.
	// void FUNC_NAME(__aeabi_cfcmple)(float, float) 3-way (<, =, ?>) compare [1], result in PSR ZC flags
	// void FUNC_NAME(__aeabi_cfcmpeq)(float, float) non-excepting equality comparison [1], result in PSR ZC flags
	.align 2
	wrapper_func __aeabi_cfcmple
	wrapper_func __aeabi_cfcmpeq
	push {r0-r2, lr}

	__aeabi_cfcmple_guts:
	lsls r2,r0,#1
	lsrs r2,#24
	beq 1f
	cmp r2,#0xff
	bne 2f
	lsls r2, r0, #9
	bhi 3f
	1:
	lsrs r0,#23 @ clear mantissa if denormal or infinite
	lsls r0,#23
	2:
	lsls r2,r1,#1
	lsrs r2,#24
	beq 1f
	cmp r2,#0xff
	bne 2f
	lsls r2, r1, #9
	bhi 3f
	1:
	lsrs r1,#23 @ clear mantissa if denormal or infinite
	lsls r1,#23
	2:
	movs r2,#1 @ initialise result
	eors r1,r0
	bmi 2f @ opposite signs? then can proceed on basis of sign of x
	eors r1,r0 @ restore y
	bpl 1f
	cmp r1,r0
	pop {r0-r2, pc}
	1:
	cmp r0,r1
	pop {r0-r2, pc}
	2:
	orrs r1, r0 @ handle 0/-0
	adds r1, r1 @ note this always sets C
	beq 3f
	mvns r0, r0 @ carry inverse of r0 sign
	adds r0, r0
	3:
	pop {r0-r2, pc}


	// int FUNC_NAME(__aeabi_fcmpeq)(float, float) result (1, 0) denotes (=, ?<>) [2], use for C == and !=
	float_wrapper_section __aeabi_fcmpeq
	.align 2
	wrapper_func __aeabi_fcmpeq
	push {lr}
	bl __aeabi_cfcmpeq
	beq 1f
	movs r0, #0
	pop {pc}
	1:
	movs r0, #1
	pop {pc}

	// int FUNC_NAME(__aeabi_fcmplt)(float, float) result (1, 0) denotes (<, ?>=) [2], use for C <
	float_wrapper_section __aeabi_fcmplt
	.align 2
	wrapper_func __aeabi_fcmplt
	push {lr}
	bl __aeabi_cfcmple
	sbcs r0, r0
	pop {pc}

	// int FUNC_NAME(__aeabi_fcmple)(float, float) result (1, 0) denotes (<=, ?>) [2], use for C <=
	float_wrapper_section __aeabi_fcmple
	.align 2
	wrapper_func __aeabi_fcmple
	push {lr}
	bl __aeabi_cfcmple
	bls 1f
	movs r0, #0
	pop {pc}
	1:
	movs r0, #1
	pop {pc}

	// int FUNC_NAME(__aeabi_fcmpge)(float, float) result (1, 0) denotes (>=, ?<) [2], use for C >=
	float_wrapper_section __aeabi_fcmpge
	.align 2
	wrapper_func __aeabi_fcmpge
	push {lr}
	// because of NaNs it is better to reverse the args than the result
	bl __aeabi_cfrcmple
	bls 1f
	movs r0, #0
	pop {pc}
	1:
	movs r0, #1
	pop {pc}

	// int FUNC_NAME(__aeabi_fcmpgt)(float, float) result (1, 0) denotes (>, ?<=) [2], use for C >
	float_wrapper_section __aeabi_fcmpgt
	wrapper_func __aeabi_fcmpgt
	push {lr}
	// because of NaNs it is better to reverse the args than the result
	bl __aeabi_cfrcmple
	sbcs r0, r0
	pop {pc}

	// int FUNC_NAME(__aeabi_fcmpun)(float, float) result (1, 0) denotes (?, <=>) [2], use for C99 isunordered()
	float_wrapper_section __aeabi_fcmpun
	wrapper_func __aeabi_fcmpun
	movs r3, #1
	lsls r3, #24
	lsls r2, r0, #1
	adds r2, r3
	bhi 1f
	lsls r2, r1, #1
	adds r2, r3
	bhi 1f
	movs r0, #0
	bx lr
	1:
	movs r0, #1
	bx lr


	// float FUNC_NAME(__aeabi_ui2f)(unsigned) unsigned to float (single precision) conversion
	float_wrapper_section __aeabi_ui2f
	wrapper_func __aeabi_ui2f
	subs r1, r1
	cmp r0, #0
	bne __aeabi_i2f_main
	mov r0, r1
	bx lr

	float_wrapper_section __aeabi_i2f
	// float FUNC_NAME(__aeabi_i2f)(int) integer to float (single precision) conversion
	wrapper_func __aeabi_i2f
	lsrs r1, r0, #31
	lsls r1, #31
	bpl 1f
	rsbs r0, #0
	1:
	cmp r0, #0
	beq 7f
	__aeabi_i2f_main:

	mov ip, lr
	push {r0, r1}
	ldr r3, =sf_clz_func
	ldr r3, [r3]
	blx r3
	pop {r1, r2}
	lsls r1, r0
	subs r0, #158
	rsbs r0, #0

	adds r1,#0x80 @ rounding
	bcs 5f @ tripped carry? then have leading 1 in C as required (and result is even so can ignore sticky bits)

	lsls r3,r1,#24 @ check bottom 8 bits of r1
	beq 6f @ in rounding-tie case?
	lsls r1,#1 @ remove leading 1
	3:
	lsrs r1,#9 @ align mantissa
	lsls r0,#23 @ align exponent
	orrs r0,r2 @ assemble exponent and mantissa
	4:
	orrs r0,r1 @ apply sign
	1:
	bx ip
	5:
	adds r0,#1 @ correct exponent offset
	b 3b
	6:
	lsrs r1,#9 @ ensure even result
	lsls r1,#10
	b 3b
	7:
	bx lr


	// int FUNC_NAME(__aeabi_f2iz)(float) float (single precision) to integer C-style conversion [3]
	float_wrapper_section __aeabi_f2iz
	wrapper_func __aeabi_f2iz
	regular_func float2int_z
	lsls r1, r0, #1
	lsrs r2, r1, #24
	movs r3, #0x80
	lsls r3, #24
	cmp r2, #126
	ble 1f
	subs r2, #158
	bge 2f
	asrs r1, r0, #31
	lsls r0, #9
	lsrs r0, #1
	orrs r0, r3
	negs r2, r2
	lsrs r0, r2
	lsls r1, #1
	adds r1, #1
	muls r0, r1
	bx lr
	1:
	movs r0, #0
	bx lr
	2:
	lsrs r0, #31
	adds r0, r3
	subs r0, #1
	bx lr

	cmn r0, r0
	bcc float2int
	push {lr}
	lsls r0, #1
	lsrs r0, #1
	movs r1, #0
	bl __aeabi_f2uiz
	cmp r0, #0
	bmi 1f
	rsbs r0, #0
	pop {pc}
	1:
	movs r0, #128
	lsls r0, #24
	pop {pc}

	float_section float2int
	regular_func float2int
	shimmable_table_tail_call SF_TABLE_FLOAT2INT float2int_shim

	float_section float2fix
	regular_func float2fix
	shimmable_table_tail_call SF_TABLE_FLOAT2FIX float2fix_shim

	float_section float2ufix
	regular_func float2ufix
	table_tail_call SF_TABLE_FLOAT2UFIX

	// unsigned FUNC_NAME(__aeabi_f2uiz)(float) float (single precision) to unsigned C-style conversion [3]
	float_wrapper_section __aeabi_f2uiz
	wrapper_func __aeabi_f2uiz
	table_tail_call SF_TABLE_FLOAT2UINT

	float_section fix2float
	regular_func fix2float
	table_tail_call SF_TABLE_FIX2FLOAT

	float_section ufix2float
	regular_func ufix2float
	table_tail_call SF_TABLE_UFIX2FLOAT

	float_section fix642float
	regular_func fix642float
	shimmable_table_tail_call SF_TABLE_FIX642FLOAT fix642float_shim

	float_section ufix642float
	regular_func ufix642float
	shimmable_table_tail_call SF_TABLE_UFIX642FLOAT ufix642float_shim

	// float FUNC_NAME(__aeabi_l2f)(long long) long long to float (single precision) conversion
	float_wrapper_section __aeabi_l2f
	1:
	ldr r2, =__aeabi_i2f
	bx r2
	wrapper_func __aeabi_l2f
	asrs r2, r0, #31
	cmp r1, r2
	beq 1b
	shimmable_table_tail_call SF_TABLE_INT642FLOAT int642float_shim

	// float FUNC_NAME(__aeabi_l2f)(long long) long long to float (single precision) conversion
	float_wrapper_section __aeabi_ul2f
	1:
	ldr r2, =__aeabi_ui2f
	bx r2
	wrapper_func __aeabi_ul2f
	cmp r1, #0
	beq 1b
	shimmable_table_tail_call SF_TABLE_UINT642FLOAT uint642float_shim

	// long long FUNC_NAME(__aeabi_f2lz)(float) float (single precision) to long long C-style conversion [3]
	float_wrapper_section __aeabi_f2lz
	wrapper_func __aeabi_f2lz
	regular_func float2int64_z
	cmn r0, r0
	bcc float2int64
	push {lr}
	lsls r0, #1
	lsrs r0, #1
	movs r1, #0
	bl float2ufix64
	cmp r1, #0
	bmi 1f
	movs r2, #0
	rsbs r0, #0
	sbcs r2, r1
	mov r1, r2
	pop {pc}
	1:
	movs r1, #128
	lsls r1, #24
	movs r0, #0
	pop {pc}

	float_section float2int64
	regular_func float2int64
	shimmable_table_tail_call SF_TABLE_FLOAT2INT64 float2int64_shim

	float_section float2fix64
	regular_func float2fix64
	shimmable_table_tail_call SF_TABLE_FLOAT2FIX64 float2fix64_shim

	// unsigned long long FUNC_NAME(__aeabi_f2ulz)(float) float to unsigned long long C-style conversion [3]
	float_wrapper_section __aeabi_f2ulz
	wrapper_func __aeabi_f2ulz
	shimmable_table_tail_call SF_TABLE_FLOAT2UINT64 float2uint64_shim

	float_section float2ufix64
	regular_func float2ufix64
	shimmable_table_tail_call SF_TABLE_FLOAT2UFIX64 float2ufix64_shim

	float_wrapper_section __aeabi_f2d
	1:
	#if PICO_FLOAT_PROPAGATE_NANS
	// copy sign bit and 25 NAN id bits into sign bit and significant ID bits, also setting the high id bit
	asrs r1, r0, #3
	movs r2, #0xf
	lsls r2, #27
	orrs r1, r2
	lsls r0, #25
	bx lr
	#endif
	wrapper_func __aeabi_f2d
	#if PICO_FLOAT_PROPAGATE_NANS
	movs r3, #1
	lsls r3, #24
	lsls r2, r0, #1
	adds r2, r3
	bhi 1b
	#endif
	shimmable_table_tail_call SF_TABLE_FLOAT2DOUBLE float2double_shim

	float_wrapper_section srqtf
	wrapper_func_f1 sqrtf
	#if PICO_FLOAT_SUPPORT_ROM_V1
	// check for negative
	asrs r1, r0, #23
	bmi 1f
	#endif
	table_tail_call SF_TABLE_FSQRT
	#if PICO_FLOAT_SUPPORT_ROM_V1
	1:
	mvns r0, r1
	cmp r0, #255
	bne 2f
	// -0 or -Denormal return -0 (0x80000000)
	lsls r0, #31
	bx lr
	2:
	// return -Inf (0xff800000)
	asrs r0, r1, #31
	lsls r0, #23
	bx lr
	#endif

	float_wrapper_section cosf
	// note we don't use _f1 since we do an infinity/nan check for outside of range
	wrapper_func cosf
	// rom version only works for -128 < angle < 128
	lsls r1, r0, #1
	lsrs r1, #24
	cmp r1, #127 + 7
	bge 1f
	2:
	table_tail_call SF_TABLE_FCOS
	1:
	#if PICO_FLOAT_PROPAGATE_NANS
	// also check for infinites
	cmp r1, #255
	bne 3f
	// infinite to nan
	movs r1, #1
	lsls r1, #22
	orrs r0, r1
	bx lr
	3:
	#endif
	ldr r1, =0x40c90fdb // 2 * M_PI
	push {lr}
	bl remainderf
	pop {r1}
	mov lr, r1
	b 2b

	float_wrapper_section sinf
	// note we don't use _f1 since we do an infinity/nan check for outside of range
	wrapper_func sinf
	// rom version only works for -128 < angle < 128
	lsls r1, r0, #1
	lsrs r1, #24
	cmp r1, #127 + 7
	bge 1f
	2:
	table_tail_call SF_TABLE_FSIN
	1:
	#if PICO_FLOAT_PROPAGATE_NANS
	// also check for infinites
	cmp r1, #255
	bne 3f
	// infinite to nan
	movs r1, #1
	lsls r1, #22
	orrs r0, r1
	bx lr
	3:
	#endif
	ldr r1, =0x40c90fdb // 2 * M_PI
	push {lr}
	bl remainderf
	pop {r1}
	mov lr, r1
	b 2b

	float_wrapper_section sincosf
	// note we don't use _f1 since we do an infinity/nan check for outside of range
	wrapper_func sincosf
	push {r1, r2, lr}
	// rom version only works for -128 < angle < 128
	lsls r3, r0, #1
	lsrs r3, #24
	cmp r3, #127 + 7
	bge 3f
	2:
	ldr r3, =sf_table
	ldr r3, [r3, #SF_TABLE_FSIN]
	blx r3
	pop {r2, r3}
	str r0, [r2]
	str r1, [r3]
	pop {pc}
	#if PICO_FLOAT_PROPAGATE_NANS
	.align 2
	pop {pc}
	#endif
	3:
	#if PICO_FLOAT_PROPAGATE_NANS
	// also check for infinites
	cmp r3, #255
	bne 4f
	// infinite to nan
	movs r3, #1
	lsls r3, #22
	orrs r0, r3
	str r0, [r1]
	str r0, [r2]
	add sp, #12
	bx lr
	4:
	#endif
	ldr r1, =0x40c90fdb // 2 * M_PI
	push {lr}
	bl remainderf
	pop {r1}
	mov lr, r1
	b 2b

	float_wrapper_section tanf
	// note we don't use _f1 since we do an infinity/nan check for outside of range
	wrapper_func tanf
	// rom version only works for -128 < angle < 128
	lsls r1, r0, #1
	lsrs r1, #24
	cmp r1, #127 + 7
	bge 1f
	2:
	table_tail_call SF_TABLE_FTAN
	1:
	#if PICO_FLOAT_PROPAGATE_NANS
	// also check for infinites
	cmp r1, #255
	bne 3f
	// infinite to nan
	movs r1, #1
	lsls r1, #22
	orrs r0, r1
	bx lr
	3:
	#endif
	ldr r1, =0x40c90fdb // 2 * M_PI
	push {lr}
	bl remainderf
	pop {r1}
	mov lr, r1
	b 2b

	float_wrapper_section atan2f
	wrapper_func_f2 atan2f
	shimmable_table_tail_call SF_TABLE_FATAN2 fatan2_shim

	float_wrapper_section expf
	wrapper_func_f1 expf
	table_tail_call SF_TABLE_FEXP

	float_wrapper_section logf
	wrapper_func_f1 logf
	table_tail_call SF_TABLE_FLN