src/rp2_common/pico_float/float_v1_rom_shim.S - third_party/github/raspberrypi/pico-sdk - Git at Google

 /*
  * Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */

 #include "pico/asm_helper.S"

 #if PICO_FLOAT_SUPPORT_ROM_V1
 .syntax unified
 .cpu cortex-m0plus
 .thumb

 #ifndef PICO_FLOAT_IN_RAM
 #define PICO_FLOAT_IN_RAM 0
 #endif

 .macro float_section name
 // todo separate flag for shims?
 #if PICO_FLOAT_IN_RAM
 .section RAM_SECTION_NAME(\name), "ax"
 #else
 .section SECTION_NAME(\name), "ax"
 #endif
 .endm

 float_section float_table_shim_on_use_helper
 regular_func float_table_shim_on_use_helper
     push {r0-r2, lr}
     mov r0, ip
 #ifndef NDEBUG
     // sanity check to make sure we weren't called by non (shimmable_) table_tail_call macro
     cmp r0, #0
     bne 1f
     bkpt #0
 #endif
 1:
     ldrh r1, [r0]
     lsrs r2, r1, #8
     adds r0, #2
     cmp r2, #0xdf
     bne 1b
     uxtb r1, r1 // r1 holds table offset
     lsrs r2, r0, #2
     bcc 1f
     // unaligned
     ldrh r2, [r0, #0]
     ldrh r0, [r0, #2]
     lsls r0, #16
     orrs r0, r2
     b 2f
 1:
     ldr r0, [r0]
 2:
     ldr r2, =sf_table
     str r0, [r2, r1]
     str r0, [sp, #12]
     pop {r0-r2, pc}

 float_section 642float_shims

 @ convert uint64 to float, rounding
 regular_func uint642float_shim
  movs r2,#0       @ fall through

 @ convert unsigned 64-bit fix to float, rounding; number of r0:r1 bits after point in r2
 regular_func ufix642float_shim
  push {r4,r5,r14}
  cmp r1,#0
  bpl 3f          @ positive? we can use signed code
  lsls r5,r1,#31  @ contribution to sticky bits
  orrs r5,r0
  lsrs r0,r1,#1
  subs r2,#1
  b 4f

 @ convert int64 to float, rounding
 regular_func int642float_shim
  movs r2,#0       @ fall through

 @ convert signed 64-bit fix to float, rounding; number of r0:r1 bits after point in r2
 regular_func fix642float_shim
  push {r4,r5,r14}
 3:
  movs r5,r0
  orrs r5,r1
  beq ret_pop45   @ zero? return +0
  asrs r5,r1,#31  @ sign bits
 2:
  asrs r4,r1,#24  @ try shifting 7 bits at a time
  cmp r4,r5
  bne 1f          @ next shift will overflow?
  lsls r1,#7
  lsrs r4,r0,#25
  orrs r1,r4
  lsls r0,#7
  adds r2,#7
  b 2b
 1:
  movs r5,r0
  movs r0,r1
 4:
  rsbs r2,#0
  adds r2,#32+29

  // bl packx
  ldr r1, =0x29ef // packx
  blx r1
 ret_pop45:
  pop {r4,r5,r15}

 float_section fatan2_shim
 regular_func fatan2_shim
  push {r4,r5,r14}

  ldr r4, =0x29c1 // unpackx
  mov ip, r4
 @ unpack arguments and shift one down to have common exponent
  blx ip
  mov r4,r0
  mov r0,r1
  mov r1,r4
  mov r4,r2
  mov r2,r3
  mov r3,r4
  blx ip
  lsls r0,r0,#5  @ Q28
  lsls r1,r1,#5  @ Q28
  adds r4,r2,r3  @ this is -760 if both arguments are 0 and at least -380-126=-506 otherwise
  asrs r4,#9
  adds r4,#1
  bmi 2f         @ force y to 0 proper, so result will be zero
  subs r4,r2,r3  @ calculate shift
  bge 1f         @ ex>=ey?
  rsbs r4,#0     @ make shift positive
  asrs r0,r4
  cmp r4,#28
  blo 3f
  asrs r0,#31
  b 3f
 1:
  asrs r1,r4
  cmp r4,#28
  blo 3f
 2:
 @ here |x|>>|y| or both x and y are ±0
  cmp r0,#0
  bge 4f         @ x positive, return signed 0
  ldr r3, =0x2cfc         @ &pi_q29, circular coefficients
  ldr r0,[r3]    @ x negative, return +/- pi
  asrs r1,#31
  eors r0,r1
  b 7f
 4:
  asrs r0,r1,#31
  b 7f
 3:
  movs r2,#0              @ initial angle
  ldr r3, =0x2cfc         @ &pi_q29, circular coefficients
  cmp r0,#0               @ x negative
  bge 5f
  rsbs r0,#0              @ rotate to 1st/4th quadrants
  rsbs r1,#0
  ldr r2,[r3]             @ pi Q29
 5:
  movs r4,#1              @ m=1
  ldr r5, =0x2b97         @ cordic_vec
  blx r5                  @ also produces magnitude (with scaling factor 1.646760119), which is discarded
  mov r0,r2               @ result here is -pi/2..3pi/2 Q29
 @ asrs r2,#29
 @ subs r0,r2
  ldr r3, =0x2cfc         @ &pi_q29, circular coefficients
  ldr r2,[r3]             @ pi Q29
  adds r4,r0,r2           @ attempt to fix -3pi/2..-pi case
  bcs 6f                  @ -pi/2..0? leave result as is
  subs r4,r0,r2           @ <pi? leave as is
  bmi 6f
  subs r0,r4,r2           @ >pi: take off 2pi
 6:
  subs r0,#1              @ fiddle factor so atan2(0,1)==0
 7:
  movs r2,#0              @ exponent for pack
  ldr r3, =0x2b19
  bx r3

 float_section float232_shims

 regular_func float2int_shim
      movs r1,#0                    @ fall through
 regular_func float2fix_shim
      // check for -0 or -denormal upfront
      asrs r2, r0, #23
      adds r2, #128
      adds r2, #128
      beq 1f
      // call original
      ldr r2, =0x2acd
      bx r2
      1:
      movs r0, #0
      bx lr

 float_section float264_shims

 regular_func float2int64_shim
  movs r1,#0                    @ and fall through
 regular_func float2fix64_shim
  push {r14}
  bl f2fix
  b d2f64_a

 regular_func float2uint64_shim
  movs r1,#0                    @ and fall through
 regular_func float2ufix64_shim
  asrs r3,r0,#23                @ negative? return 0
  bmi ret_dzero
 @ and fall through

 @ convert float in r0 to signed fixed point in r0:r1:r3, r1 places after point, rounding towards -Inf
 @ result clamped so that r3 can only be 0 or -1
 @ trashes r12
 .thumb_func
 f2fix:
  push {r4,r14}
  mov r12,r1
  asrs r3,r0,#31
  lsls r0,#1
  lsrs r2,r0,#24
  beq 1f                        @ zero?
  cmp r2,#0xff                  @ Inf?
  beq 2f
  subs r1,r2,#1
  subs r2,#0x7f                 @ remove exponent bias
  lsls r1,#24
  subs r0,r1                    @ insert implied 1
  eors r0,r3
  subs r0,r3                    @ top two's complement
  asrs r1,r0,#4                 @ convert to double format
  lsls r0,#28
  ldr r4, =d2fix_a
  bx r4
 1:
  movs r0,#0
  movs r1,r0
  movs r3,r0
  pop {r4,r15}
 2:
  mvns r0,r3                    @ return max/min value
  mvns r1,r3
  pop {r4,r15}

 ret_dzero:
  movs r0,#0
  movs r1,#0
  bx r14

 float_section d2fix_a_float

 .weak d2fix_a // weak because it exists in float shims too
 .thumb_func
 d2fix_a:
 @ here
 @ r0:r1 two's complement mantissa
 @ r2    unbaised exponent
 @ r3    mantissa sign extension bits
  add r2,r12                    @ exponent plus offset for required binary point position
  subs r2,#52                   @ required shift
  bmi 1f                        @ shift down?
 @ here a shift up by r2 places
  cmp r2,#12                    @ will clamp?
  bge 2f
  movs r4,r0
  lsls r1,r2
  lsls r0,r2
  rsbs r2,#0
  adds r2,#32                   @ complementary shift
  lsrs r4,r2
  orrs r1,r4
  pop {r4,r15}
 2:
  mvns r0,r3
  mvns r1,r3                    @ overflow: clamp to extreme fixed-point values
  pop {r4,r15}
 1:
 @ here a shift down by -r2 places
  adds r2,#32
  bmi 1f                        @ long shift?
  mov r4,r1
  lsls r4,r2
  rsbs r2,#0
  adds r2,#32                   @ complementary shift
  asrs r1,r2
  lsrs r0,r2
  orrs r0,r4
  pop {r4,r15}
 1:
 @ here a long shift down
  movs r0,r1
  asrs r1,#31                   @ shift down 32 places
  adds r2,#32
  bmi 1f                        @ very long shift?
  rsbs r2,#0
  adds r2,#32
  asrs r0,r2
  pop {r4,r15}
 1:
  movs r0,r3                    @ result very near zero: use sign extension bits
  movs r1,r3
  pop {r4,r15}
 d2f64_a:
  asrs r2,r1,#31
  cmp r2,r3
  bne 1f                        @ sign extension bits fail to match sign of result?
  pop {r15}
 1:
  mvns r0,r3
  movs r1,#1
  lsls r1,#31
  eors r1,r1,r0                 @ generate extreme fixed-point values
  pop {r15}

 float_section float2double_shim
 regular_func float2double_shim
  lsrs r3,r0,#31                @ sign bit
  lsls r3,#31
  lsls r1,r0,#1
  lsrs r2,r1,#24                @ exponent
  beq 1f                        @ zero?
  cmp r2,#0xff                  @ Inf?
  beq 2f
  lsrs r1,#4                    @ exponent and top 20 bits of mantissa
  ldr r2,=#(0x3ff-0x7f)<<20     @ difference in exponent offsets
  adds r1,r2
  orrs r1,r3
  lsls r0,#29                   @ bottom 3 bits of mantissa
  bx r14
 1:
  movs r1,r3                    @ return signed zero
 3:
  movs r0,#0
  bx r14
 2:
  ldr r1,=#0x7ff00000           @ return signed infinity
  adds r1,r3
  b 3b

 #endif
	/*
	* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
	*
	* SPDX-License-Identifier: BSD-3-Clause
	*/

	#include "pico/asm_helper.S"

	#if PICO_FLOAT_SUPPORT_ROM_V1
	.syntax unified
	.cpu cortex-m0plus
	.thumb

	#ifndef PICO_FLOAT_IN_RAM
	#define PICO_FLOAT_IN_RAM 0
	#endif

	.macro float_section name
	// todo separate flag for shims?
	#if PICO_FLOAT_IN_RAM
	.section RAM_SECTION_NAME(\name), "ax"
	#else
	.section SECTION_NAME(\name), "ax"
	#endif
	.endm

	float_section float_table_shim_on_use_helper
	regular_func float_table_shim_on_use_helper
	push {r0-r2, lr}
	mov r0, ip
	#ifndef NDEBUG
	// sanity check to make sure we weren't called by non (shimmable_) table_tail_call macro
	cmp r0, #0
	bne 1f
	bkpt #0
	#endif
	1:
	ldrh r1, [r0]
	lsrs r2, r1, #8
	adds r0, #2
	cmp r2, #0xdf
	bne 1b
	uxtb r1, r1 // r1 holds table offset
	lsrs r2, r0, #2
	bcc 1f
	// unaligned
	ldrh r2, [r0, #0]
	ldrh r0, [r0, #2]
	lsls r0, #16
	orrs r0, r2
	b 2f
	1:
	ldr r0, [r0]
	2:
	ldr r2, =sf_table
	str r0, [r2, r1]
	str r0, [sp, #12]
	pop {r0-r2, pc}

	float_section 642float_shims

	@ convert uint64 to float, rounding
	regular_func uint642float_shim
	movs r2,#0 @ fall through

	@ convert unsigned 64-bit fix to float, rounding; number of r0:r1 bits after point in r2
	regular_func ufix642float_shim
	push {r4,r5,r14}
	cmp r1,#0
	bpl 3f @ positive? we can use signed code
	lsls r5,r1,#31 @ contribution to sticky bits
	orrs r5,r0
	lsrs r0,r1,#1
	subs r2,#1
	b 4f

	@ convert int64 to float, rounding
	regular_func int642float_shim
	movs r2,#0 @ fall through

	@ convert signed 64-bit fix to float, rounding; number of r0:r1 bits after point in r2
	regular_func fix642float_shim
	push {r4,r5,r14}
	3:
	movs r5,r0
	orrs r5,r1
	beq ret_pop45 @ zero? return +0
	asrs r5,r1,#31 @ sign bits
	2:
	asrs r4,r1,#24 @ try shifting 7 bits at a time
	cmp r4,r5
	bne 1f @ next shift will overflow?
	lsls r1,#7
	lsrs r4,r0,#25
	orrs r1,r4
	lsls r0,#7
	adds r2,#7
	b 2b
	1:
	movs r5,r0
	movs r0,r1
	4:
	rsbs r2,#0
	adds r2,#32+29

	// bl packx
	ldr r1, =0x29ef // packx
	blx r1
	ret_pop45:
	pop {r4,r5,r15}

	float_section fatan2_shim
	regular_func fatan2_shim
	push {r4,r5,r14}

	ldr r4, =0x29c1 // unpackx
	mov ip, r4
	@ unpack arguments and shift one down to have common exponent
	blx ip
	mov r4,r0
	mov r0,r1
	mov r1,r4
	mov r4,r2
	mov r2,r3
	mov r3,r4
	blx ip
	lsls r0,r0,#5 @ Q28
	lsls r1,r1,#5 @ Q28
	adds r4,r2,r3 @ this is -760 if both arguments are 0 and at least -380-126=-506 otherwise
	asrs r4,#9
	adds r4,#1
	bmi 2f @ force y to 0 proper, so result will be zero
	subs r4,r2,r3 @ calculate shift
	bge 1f @ ex>=ey?
	rsbs r4,#0 @ make shift positive
	asrs r0,r4
	cmp r4,#28
	blo 3f
	asrs r0,#31
	b 3f
	1:
	asrs r1,r4
	cmp r4,#28
	blo 3f
	2:
	@ here \|x\|>>\|y\| or both x and y are ±0
	cmp r0,#0
	bge 4f @ x positive, return signed 0
	ldr r3, =0x2cfc @ &pi_q29, circular coefficients
	ldr r0,[r3] @ x negative, return +/- pi
	asrs r1,#31
	eors r0,r1
	b 7f
	4:
	asrs r0,r1,#31
	b 7f
	3:
	movs r2,#0 @ initial angle
	ldr r3, =0x2cfc @ &pi_q29, circular coefficients
	cmp r0,#0 @ x negative
	bge 5f
	rsbs r0,#0 @ rotate to 1st/4th quadrants
	rsbs r1,#0
	ldr r2,[r3] @ pi Q29
	5:
	movs r4,#1 @ m=1
	ldr r5, =0x2b97 @ cordic_vec
	blx r5 @ also produces magnitude (with scaling factor 1.646760119), which is discarded
	mov r0,r2 @ result here is -pi/2..3pi/2 Q29
	@ asrs r2,#29
	@ subs r0,r2
	ldr r3, =0x2cfc @ &pi_q29, circular coefficients
	ldr r2,[r3] @ pi Q29
	adds r4,r0,r2 @ attempt to fix -3pi/2..-pi case
	bcs 6f @ -pi/2..0? leave result as is
	subs r4,r0,r2 @ <pi? leave as is
	bmi 6f
	subs r0,r4,r2 @ >pi: take off 2pi
	6:
	subs r0,#1 @ fiddle factor so atan2(0,1)==0
	7:
	movs r2,#0 @ exponent for pack
	ldr r3, =0x2b19
	bx r3

	float_section float232_shims

	regular_func float2int_shim
	movs r1,#0 @ fall through
	regular_func float2fix_shim
	// check for -0 or -denormal upfront
	asrs r2, r0, #23
	adds r2, #128
	adds r2, #128
	beq 1f
	// call original
	ldr r2, =0x2acd
	bx r2
	1:
	movs r0, #0
	bx lr

	float_section float264_shims

	regular_func float2int64_shim
	movs r1,#0 @ and fall through
	regular_func float2fix64_shim
	push {r14}
	bl f2fix
	b d2f64_a

	regular_func float2uint64_shim
	movs r1,#0 @ and fall through
	regular_func float2ufix64_shim
	asrs r3,r0,#23 @ negative? return 0
	bmi ret_dzero
	@ and fall through

	@ convert float in r0 to signed fixed point in r0:r1:r3, r1 places after point, rounding towards -Inf
	@ result clamped so that r3 can only be 0 or -1
	@ trashes r12
	.thumb_func
	f2fix:
	push {r4,r14}
	mov r12,r1
	asrs r3,r0,#31
	lsls r0,#1
	lsrs r2,r0,#24
	beq 1f @ zero?
	cmp r2,#0xff @ Inf?
	beq 2f
	subs r1,r2,#1
	subs r2,#0x7f @ remove exponent bias
	lsls r1,#24
	subs r0,r1 @ insert implied 1
	eors r0,r3
	subs r0,r3 @ top two's complement
	asrs r1,r0,#4 @ convert to double format
	lsls r0,#28
	ldr r4, =d2fix_a
	bx r4
	1:
	movs r0,#0
	movs r1,r0
	movs r3,r0
	pop {r4,r15}
	2:
	mvns r0,r3 @ return max/min value
	mvns r1,r3
	pop {r4,r15}

	ret_dzero:
	movs r0,#0
	movs r1,#0
	bx r14

	float_section d2fix_a_float

	.weak d2fix_a // weak because it exists in float shims too
	.thumb_func
	d2fix_a:
	@ here
	@ r0:r1 two's complement mantissa
	@ r2 unbaised exponent
	@ r3 mantissa sign extension bits
	add r2,r12 @ exponent plus offset for required binary point position
	subs r2,#52 @ required shift
	bmi 1f @ shift down?
	@ here a shift up by r2 places
	cmp r2,#12 @ will clamp?
	bge 2f
	movs r4,r0
	lsls r1,r2
	lsls r0,r2
	rsbs r2,#0
	adds r2,#32 @ complementary shift
	lsrs r4,r2
	orrs r1,r4
	pop {r4,r15}
	2:
	mvns r0,r3
	mvns r1,r3 @ overflow: clamp to extreme fixed-point values
	pop {r4,r15}
	1:
	@ here a shift down by -r2 places
	adds r2,#32
	bmi 1f @ long shift?
	mov r4,r1
	lsls r4,r2
	rsbs r2,#0
	adds r2,#32 @ complementary shift
	asrs r1,r2
	lsrs r0,r2
	orrs r0,r4
	pop {r4,r15}
	1:
	@ here a long shift down
	movs r0,r1
	asrs r1,#31 @ shift down 32 places
	adds r2,#32
	bmi 1f @ very long shift?
	rsbs r2,#0
	adds r2,#32
	asrs r0,r2
	pop {r4,r15}
	1:
	movs r0,r3 @ result very near zero: use sign extension bits
	movs r1,r3
	pop {r4,r15}
	d2f64_a:
	asrs r2,r1,#31
	cmp r2,r3
	bne 1f @ sign extension bits fail to match sign of result?
	pop {r15}
	1:
	mvns r0,r3
	movs r1,#1
	lsls r1,#31
	eors r1,r1,r0 @ generate extreme fixed-point values
	pop {r15}

	float_section float2double_shim
	regular_func float2double_shim
	lsrs r3,r0,#31 @ sign bit
	lsls r3,#31
	lsls r1,r0,#1
	lsrs r2,r1,#24 @ exponent
	beq 1f @ zero?
	cmp r2,#0xff @ Inf?
	beq 2f
	lsrs r1,#4 @ exponent and top 20 bits of mantissa
	ldr r2,=#(0x3ff-0x7f)<<20 @ difference in exponent offsets
	adds r1,r2
	orrs r1,r3
	lsls r0,#29 @ bottom 3 bits of mantissa
	bx r14
	1:
	movs r1,r3 @ return signed zero
	3:
	movs r0,#0
	bx r14
	2:
	ldr r1,=#0x7ff00000 @ return signed infinity
	adds r1,r3
	b 3b

	#endif