| /* |
| * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. |
| * |
| * SPDX-License-Identifier: BSD-3-Clause |
| */ |
| |
| #include "pico/asm_helper.S" |
| |
| #if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED |
| |
| #ifndef PICO_FLOAT_IN_RAM |
| #define PICO_FLOAT_IN_RAM 0 |
| #endif |
| |
| pico_default_asm_setup |
| |
| .macro float_section name |
| // todo separate flag for shims? |
| #if PICO_FLOAT_IN_RAM |
| .section RAM_SECTION_NAME(\name), "ax" |
| #else |
| .section SECTION_NAME(\name), "ax" |
| #endif |
| .endm |
| |
| float_section float_table_shim_on_use_helper |
| regular_func float_table_shim_on_use_helper |
| push {r0-r2, lr} |
| mov r0, ip |
| #ifndef NDEBUG |
| // sanity check to make sure we weren't called by non (shimmable_) table_tail_call macro |
| cmp r0, #0 |
| bne 1f |
| bkpt #0 |
| #endif |
| 1: |
| ldrh r1, [r0] |
| lsrs r2, r1, #8 |
| adds r0, #2 |
| cmp r2, #0xdf |
| bne 1b |
| uxtb r1, r1 // r1 holds table offset |
| lsrs r2, r0, #2 |
| bcc 1f |
| // unaligned |
| ldrh r2, [r0, #0] |
| ldrh r0, [r0, #2] |
| lsls r0, #16 |
| orrs r0, r2 |
| b 2f |
| 1: |
| ldr r0, [r0] |
| 2: |
| ldr r2, =sf_table |
| str r0, [r2, r1] |
| str r0, [sp, #12] |
| pop {r0-r2, pc} |
| |
| float_section 642float_shims |
| |
| @ convert uint64 to float, rounding |
| regular_func uint642float_shim |
| movs r2,#0 @ fall through |
| |
| @ convert unsigned 64-bit fix to float, rounding; number of r0:r1 bits after point in r2 |
| regular_func ufix642float_shim |
| push {r4,r5,r14} |
| cmp r1,#0 |
| bpl 3f @ positive? we can use signed code |
| lsls r5,r1,#31 @ contribution to sticky bits |
| orrs r5,r0 |
| lsrs r0,r1,#1 |
| subs r2,#1 |
| b 4f |
| |
| @ convert int64 to float, rounding |
| regular_func int642float_shim |
| movs r2,#0 @ fall through |
| |
| @ convert signed 64-bit fix to float, rounding; number of r0:r1 bits after point in r2 |
| regular_func fix642float_shim |
| push {r4,r5,r14} |
| 3: |
| movs r5,r0 |
| orrs r5,r1 |
| beq ret_pop45 @ zero? return +0 |
| asrs r5,r1,#31 @ sign bits |
| 2: |
| asrs r4,r1,#24 @ try shifting 7 bits at a time |
| cmp r4,r5 |
| bne 1f @ next shift will overflow? |
| lsls r1,#7 |
| lsrs r4,r0,#25 |
| orrs r1,r4 |
| lsls r0,#7 |
| adds r2,#7 |
| b 2b |
| 1: |
| movs r5,r0 |
| movs r0,r1 |
| 4: |
| negs r2,r2 |
| adds r2,#32+29 |
| |
| // bl packx |
| ldr r1, =0x29ef // packx |
| blx r1 |
| ret_pop45: |
| pop {r4,r5,r15} |
| |
| float_section fatan2_shim |
| regular_func fatan2_shim |
| push {r4,r5,r14} |
| |
| ldr r4, =0x29c1 // unpackx |
| mov ip, r4 |
| @ unpack arguments and shift one down to have common exponent |
| blx ip |
| mov r4,r0 |
| mov r0,r1 |
| mov r1,r4 |
| mov r4,r2 |
| mov r2,r3 |
| mov r3,r4 |
| blx ip |
| lsls r0,r0,#5 @ Q28 |
| lsls r1,r1,#5 @ Q28 |
| adds r4,r2,r3 @ this is -760 if both arguments are 0 and at least -380-126=-506 otherwise |
| asrs r4,#9 |
| adds r4,#1 |
| bmi 2f @ force y to 0 proper, so result will be zero |
| subs r4,r2,r3 @ calculate shift |
| bge 1f @ ex>=ey? |
| negs r4,r4 @ make shift positive |
| asrs r0,r4 |
| cmp r4,#28 |
| blo 3f |
| asrs r0,#31 |
| b 3f |
| 1: |
| asrs r1,r4 |
| cmp r4,#28 |
| blo 3f |
| 2: |
| @ here |x|>>|y| or both x and y are ±0 |
| cmp r0,#0 |
| bge 4f @ x positive, return signed 0 |
| ldr r3, =0x2cfc @ &pi_q29, circular coefficients |
| ldr r0,[r3] @ x negative, return +/- pi |
| asrs r1,#31 |
| eors r0,r1 |
| b 7f |
| 4: |
| asrs r0,r1,#31 |
| b 7f |
| 3: |
| movs r2,#0 @ initial angle |
| ldr r3, =0x2cfc @ &pi_q29, circular coefficients |
| cmp r0,#0 @ x negative |
| bge 5f |
| negs r0,r0 @ rotate to 1st/4th quadrants |
| negs r1,r1 |
| ldr r2,[r3] @ pi Q29 |
| 5: |
| movs r4,#1 @ m=1 |
| ldr r5, =0x2b97 @ cordic_vec |
| blx r5 @ also produces magnitude (with scaling factor 1.646760119), which is discarded |
| mov r0,r2 @ result here is -pi/2..3pi/2 Q29 |
| @ asrs r2,#29 |
| @ subs r0,r2 |
| ldr r3, =0x2cfc @ &pi_q29, circular coefficients |
| ldr r2,[r3] @ pi Q29 |
| adds r4,r0,r2 @ attempt to fix -3pi/2..-pi case |
| bcs 6f @ -pi/2..0? leave result as is |
| subs r4,r0,r2 @ <pi? leave as is |
| bmi 6f |
| subs r0,r4,r2 @ >pi: take off 2pi |
| 6: |
| subs r0,#1 @ fiddle factor so atan2(0,1)==0 |
| 7: |
| movs r2,#0 @ exponent for pack |
| ldr r3, =0x2b19 |
| bx r3 |
| |
| float_section float232_shims |
| |
| regular_func float2int_shim |
| movs r1,#0 @ fall through |
| regular_func float2fix_shim |
| // check for -0 or -denormal upfront |
| asrs r2, r0, #23 |
| adds r2, #128 |
| adds r2, #128 |
| beq 1f |
| // call original |
| ldr r2, =0x2acd |
| bx r2 |
| 1: |
| movs r0, #0 |
| bx lr |
| |
| float_section float264_shims |
| |
| regular_func float2int64_shim |
| movs r1,#0 @ and fall through |
| regular_func float2fix64_shim |
| push {r14} |
| bl f2fix |
| b d2f64_a |
| |
| regular_func float2uint64_shim |
| movs r1,#0 @ and fall through |
| regular_func float2ufix64_shim |
| asrs r3,r0,#23 @ negative? return 0 |
| bmi ret_dzero |
| @ and fall through |
| |
| @ convert float in r0 to signed fixed point in r0:r1:r3, r1 places after point, rounding towards -Inf |
| @ result clamped so that r3 can only be 0 or -1 |
| @ trashes r12 |
| .thumb_func |
| f2fix: |
| push {r4,r14} |
| mov r12,r1 |
| asrs r3,r0,#31 |
| lsls r0,#1 |
| lsrs r2,r0,#24 |
| beq 1f @ zero? |
| cmp r2,#0xff @ Inf? |
| beq 2f |
| subs r1,r2,#1 |
| subs r2,#0x7f @ remove exponent bias |
| lsls r1,#24 |
| subs r0,r1 @ insert implied 1 |
| eors r0,r3 |
| subs r0,r3 @ top two's complement |
| asrs r1,r0,#4 @ convert to double format |
| lsls r0,#28 |
| ldr r4, =d2fix_a |
| bx r4 |
| 1: |
| movs r0,#0 |
| movs r1,r0 |
| movs r3,r0 |
| pop {r4,r15} |
| 2: |
| mvns r0,r3 @ return max/min value |
| mvns r1,r3 |
| pop {r4,r15} |
| |
| ret_dzero: |
| movs r0,#0 |
| movs r1,#0 |
| bx r14 |
| |
| float_section d2fix_a_float |
| |
| .weak d2fix_a // weak because it exists in float shims too |
| .thumb_func |
| d2fix_a: |
| @ here |
| @ r0:r1 two's complement mantissa |
| @ r2 unbaised exponent |
| @ r3 mantissa sign extension bits |
| add r2,r12 @ exponent plus offset for required binary point position |
| subs r2,#52 @ required shift |
| bmi 1f @ shift down? |
| @ here a shift up by r2 places |
| cmp r2,#12 @ will clamp? |
| bge 2f |
| movs r4,r0 |
| lsls r1,r2 |
| lsls r0,r2 |
| negs r2,r2 |
| adds r2,#32 @ complementary shift |
| lsrs r4,r2 |
| orrs r1,r4 |
| pop {r4,r15} |
| 2: |
| mvns r0,r3 |
| mvns r1,r3 @ overflow: clamp to extreme fixed-point values |
| pop {r4,r15} |
| 1: |
| @ here a shift down by -r2 places |
| adds r2,#32 |
| bmi 1f @ long shift? |
| mov r4,r1 |
| lsls r4,r2 |
| negs r2,r2 |
| adds r2,#32 @ complementary shift |
| asrs r1,r2 |
| lsrs r0,r2 |
| orrs r0,r4 |
| pop {r4,r15} |
| 1: |
| @ here a long shift down |
| movs r0,r1 |
| asrs r1,#31 @ shift down 32 places |
| adds r2,#32 |
| bmi 1f @ very long shift? |
| negs r2,r2 |
| adds r2,#32 |
| asrs r0,r2 |
| pop {r4,r15} |
| 1: |
| movs r0,r3 @ result very near zero: use sign extension bits |
| movs r1,r3 |
| pop {r4,r15} |
| d2f64_a: |
| asrs r2,r1,#31 |
| cmp r2,r3 |
| bne 1f @ sign extension bits fail to match sign of result? |
| pop {r15} |
| 1: |
| mvns r0,r3 |
| movs r1,#1 |
| lsls r1,#31 |
| eors r1,r1,r0 @ generate extreme fixed-point values |
| pop {r15} |
| |
| float_section float2double_shim |
| regular_func float2double_shim |
| lsrs r3,r0,#31 @ sign bit |
| lsls r3,#31 |
| lsls r1,r0,#1 |
| lsrs r2,r1,#24 @ exponent |
| beq 1f @ zero? |
| cmp r2,#0xff @ Inf? |
| beq 2f |
| lsrs r1,#4 @ exponent and top 20 bits of mantissa |
| ldr r2,=(0x3ff-0x7f)<<20 @ difference in exponent offsets |
| adds r1,r2 |
| orrs r1,r3 |
| lsls r0,#29 @ bottom 3 bits of mantissa |
| bx r14 |
| 1: |
| movs r1,r3 @ return signed zero |
| 3: |
| movs r0,#0 |
| bx r14 |
| 2: |
| ldr r1,=0x7ff00000 @ return signed infinity |
| adds r1,r3 |
| b 3b |
| |
| #endif |