RP2040 Updates: (#1193)

* Standardize on configNUMBER_OF_CORES != 1 to select SMP functionality
* Fix SDK pico_sync interoperability (configSUPPORT_PICO_SYNC_INTEROP == 1)

Co-authored-by: graham sanderson <graham.sanderson@raspeberryi.com>
diff --git a/portable/ThirdParty/GCC/RP2040/include/portmacro.h b/portable/ThirdParty/GCC/RP2040/include/portmacro.h
index 0232508..14f5894 100644
--- a/portable/ThirdParty/GCC/RP2040/include/portmacro.h
+++ b/portable/ThirdParty/GCC/RP2040/include/portmacro.h
@@ -151,11 +151,12 @@
 
 void vYieldCore( int xCoreID );
 #define portYIELD_CORE( a )                  vYieldCore( a )
-#define portRESTORE_INTERRUPTS( ulState )    __asm volatile ( "msr PRIMASK,%0" ::"r" ( ulState ) : )
 
 /*-----------------------------------------------------------*/
 
 /* Critical nesting count management. */
+#define portCRITICAL_NESTING_IN_TCB    0
+
 extern UBaseType_t uxCriticalNestings[ configNUMBER_OF_CORES ];
 #define portGET_CRITICAL_NESTING_COUNT()          ( uxCriticalNestings[ portGET_CORE_ID() ] )
 #define portSET_CRITICAL_NESTING_COUNT( x )       ( uxCriticalNestings[ portGET_CORE_ID() ] = ( x ) )
@@ -181,9 +182,7 @@
 #define portCLEAR_INTERRUPT_MASK_FROM_ISR( x )    vClearInterruptMaskFromISR( x )
 
 #define portDISABLE_INTERRUPTS()                  __asm volatile ( " cpsid i " ::: "memory" )
-
-extern void vPortEnableInterrupts();
-#define portENABLE_INTERRUPTS()                   vPortEnableInterrupts()
+#define portENABLE_INTERRUPTS()                   __asm volatile ( " cpsie i " ::: "memory" )
 
 #if ( configNUMBER_OF_CORES == 1 )
     extern void vPortEnterCritical( void );
@@ -203,6 +202,12 @@
 
 #define portRTOS_SPINLOCK_COUNT    2
 
+#if PICO_SDK_VERSION_MAJOR < 2
+__force_inline static bool spin_try_lock_unsafe(spin_lock_t *lock) {
+   return *lock;
+}
+#endif
+
 /* Note this is a single method with uxAcquire parameter since we have
  * static vars, the method is always called with a compile time constant for
  * uxAcquire, and the compiler should dothe right thing! */
@@ -210,45 +215,36 @@
                                        spin_lock_t * pxSpinLock,
                                        BaseType_t uxAcquire )
 {
-    static uint8_t ucOwnedByCore[ portMAX_CORE_COUNT ];
-    static uint8_t ucRecursionCountByLock[ portRTOS_SPINLOCK_COUNT ];
+    static volatile uint8_t ucOwnedByCore[ portMAX_CORE_COUNT ][portRTOS_SPINLOCK_COUNT];
+    static volatile uint8_t ucRecursionCountByLock[ portRTOS_SPINLOCK_COUNT ];
 
     configASSERT( ulLockNum < portRTOS_SPINLOCK_COUNT );
     uint32_t ulCoreNum = get_core_num();
-    uint32_t ulLockBit = 1u << ulLockNum;
-    configASSERT( ulLockBit < 256u );
 
     if( uxAcquire )
     {
-        if( __builtin_expect( !*pxSpinLock, 0 ) )
-        {
-            if( ucOwnedByCore[ ulCoreNum ] & ulLockBit )
+        if (!spin_try_lock_unsafe(pxSpinLock)) {
+            if( ucOwnedByCore[ ulCoreNum ][ ulLockNum ] )
             {
                 configASSERT( ucRecursionCountByLock[ ulLockNum ] != 255u );
                 ucRecursionCountByLock[ ulLockNum ]++;
                 return;
             }
-
-            while( __builtin_expect( !*pxSpinLock, 0 ) )
-            {
-            }
+            spin_lock_unsafe_blocking(pxSpinLock);
         }
-
-        __mem_fence_acquire();
         configASSERT( ucRecursionCountByLock[ ulLockNum ] == 0 );
         ucRecursionCountByLock[ ulLockNum ] = 1;
-        ucOwnedByCore[ ulCoreNum ] |= ulLockBit;
+        ucOwnedByCore[ ulCoreNum ][ ulLockNum ] = 1;
     }
     else
     {
-        configASSERT( ( ucOwnedByCore[ ulCoreNum ] & ulLockBit ) != 0 );
+        configASSERT( ( ucOwnedByCore[ ulCoreNum ] [ulLockNum ] ) != 0 );
         configASSERT( ucRecursionCountByLock[ ulLockNum ] != 0 );
 
         if( !--ucRecursionCountByLock[ ulLockNum ] )
         {
-            ucOwnedByCore[ ulCoreNum ] &= ~ulLockBit;
-            __mem_fence_release();
-            *pxSpinLock = 1;
+            ucOwnedByCore[ ulCoreNum ] [ ulLockNum ] = 0;
+            spin_unlock_unsafe(pxSpinLock);
         }
     }
 }
diff --git a/portable/ThirdParty/GCC/RP2040/port.c b/portable/ThirdParty/GCC/RP2040/port.c
index 78c11bd..37b037e 100644
--- a/portable/ThirdParty/GCC/RP2040/port.c
+++ b/portable/ThirdParty/GCC/RP2040/port.c
@@ -46,9 +46,6 @@
     #include "pico/multicore.h"
 #endif /* LIB_PICO_MULTICORE */
 
-/* TODO : consider to remove this macro. */
-#define portRUNNING_ON_BOTH_CORES             ( configNUMBER_OF_CORES == portMAX_CORE_COUNT )
-
 /* Constants required to manipulate the NVIC. */
 #define portNVIC_SYSTICK_CTRL_REG             ( *( ( volatile uint32_t * ) 0xe000e010 ) )
 #define portNVIC_SYSTICK_LOAD_REG             ( *( ( volatile uint32_t * ) 0xe000e014 ) )
@@ -123,22 +120,21 @@
 
 /*-----------------------------------------------------------*/
 
+#if ( configSUPPORT_PICO_SYNC_INTEROP == 1 || configNUMBER_OF_CORES > 1 )
+    #include "hardware/irq.h"
+#endif /* ( configSUPPORT_PICO_SYNC_INTEROP == 1 || configNUMBER_OF_CORES > 1 ) */
 #if ( configSUPPORT_PICO_SYNC_INTEROP == 1 )
     #include "pico/lock_core.h"
-    #include "hardware/irq.h"
     #include "event_groups.h"
     #if configSUPPORT_STATIC_ALLOCATION
         static StaticEventGroup_t xStaticEventGroup;
         #define pEventGroup    ( &xStaticEventGroup )
     #endif /* configSUPPORT_STATIC_ALLOCATION */
     static EventGroupHandle_t xEventGroup;
-    #if ( portRUNNING_ON_BOTH_CORES == 0 )
+    #if ( configNUMBER_OF_CORES == 1 )
         static EventBits_t uxCrossCoreEventBits;
-        static spin_lock_t * pxCrossCoreSpinLock;
+        static spin_lock_t * pxCrossCoreSpinLock; /* protects uxCrossCoreEventBits */
     #endif
-
-    static spin_lock_t * pxYieldSpinLock[ configNUMBER_OF_CORES ];
-    static uint32_t ulYieldSpinLockSaveValue[ configNUMBER_OF_CORES ];
 #endif /* configSUPPORT_PICO_SYNC_INTEROP */
 
 /*
@@ -171,7 +167,7 @@
 static uint8_t ucPrimaryCoreNum = INVALID_PRIMARY_CORE_NUM;
 
 /* Note: portIS_FREE_RTOS_CORE() also returns false until the scheduler is started */
-#if ( portRUNNING_ON_BOTH_CORES == 1 )
+#if ( configNUMBER_OF_CORES != 1 )
     #define portIS_FREE_RTOS_CORE()    ( ucPrimaryCoreNum != INVALID_PRIMARY_CORE_NUM )
 #else
     #define portIS_FREE_RTOS_CORE()    ( ucPrimaryCoreNum == get_core_num() )
@@ -247,16 +243,16 @@
                 "   ldr r0, [r0]                    \n"
                 "   msr msp, r0                     \n" /* Set the msp back to the start of the stack. */
             #endif /* configRESET_STACK_POINTER */
-            #if portRUNNING_ON_BOTH_CORES
+            #if ( configNUMBER_OF_CORES != 1 )
                 "   adr r1, ulAsmLocals             \n" /* Get the location of the current TCB for the current core. */
                 "   ldmia r1!, {r2, r3}             \n"
                 "   ldr r2, [r2]                    \n" /* r2 = Core number */
                 "   lsls r2, #2                     \n"
                 "   ldr r3, [r3, r2]                \n" /* r3 = pxCurrentTCBs[get_core_num()] */
-            #else
+            #else /* configNUMBER_OF_CORES != 1 */
                 "   ldr r3, =pxCurrentTCBs          \n"
                 "   ldr r3, [r3]                    \n"  /* r3 = pxCurrentTCBs[0] */
-            #endif /* portRUNNING_ON_BOTH_CORES */
+            #endif /* configNUMBER_OF_CORES != 1 */
             "    ldr  r0, [r3]                       \n" /* The first item in pxCurrentTCB is the task top of stack. */
             "    adds r0, #32                        \n" /* Discard everything up to r0. */
             "    msr  psp, r0                        \n" /* This is now the new top of stack to use in the task. */
@@ -269,7 +265,7 @@
             "    pop  {r2}                           \n" /* Pop and discard XPSR. */
             "    cpsie i                             \n" /* The first task has its context and interrupts can be enabled. */
             "    bx   r3                             \n" /* Finally, jump to the user defined task code. */
-            #if portRUNNING_ON_BOTH_CORES
+            #if configNUMBER_OF_CORES != 1
                 "                                   \n"
                 "     .align 4                      \n"
                 "ulAsmLocals:                       \n"
@@ -291,7 +287,7 @@
         /* And explicitly clear any other IRQ flags. */
         multicore_fifo_clear_irq();
 
-        #if ( portRUNNING_ON_BOTH_CORES == 1 )
+        #if ( configNUMBER_OF_CORES != 1 )
             portYIELD_FROM_ISR( pdTRUE );
         #elif ( configSUPPORT_PICO_SYNC_INTEROP == 1 )
             BaseType_t xHigherPriorityTaskWoken = pdFALSE;
@@ -301,7 +297,7 @@
             spin_unlock( pxCrossCoreSpinLock, ulSave );
             xEventGroupSetBitsFromISR( xEventGroup, ulBits, &xHigherPriorityTaskWoken );
             portYIELD_FROM_ISR( xHigherPriorityTaskWoken );
-        #endif /* portRUNNING_ON_BOTH_CORES */
+        #endif /* configNUMBER_OF_CORES != 1 */
     }
 #endif /* if ( LIB_PICO_MULTICORE == 1 ) && ( configSUPPORT_PICO_SYNC_INTEROP == 1 ) */
 
@@ -346,23 +342,21 @@
         /* Should never get here as the tasks will now be executing!  Call the task
          * exit error function to prevent compiler warnings about a static function
          * not being called in the case that the application writer overrides this
-         * functionality by defining configTASK_RETURN_ADDRESS.  Call
-         * vTaskSwitchContext() so link time optimisation does not remove the
+         * functionality by defining configTASK_RETURN_ADDRESS. Call
+         * vTaskSwitchContext() so link time optimization does not remove the
          * symbol. */
         vTaskSwitchContext( portGET_CORE_ID() );
         prvTaskExitError();
 
-        /* Should not get here! */
+        /* Should not get here. */
         return 0;
     }
 
-    #if portRUNNING_ON_BOTH_CORES
-        static void prvDisableInterruptsAndPortStartSchedulerOnCore( void )
-        {
-            portDISABLE_INTERRUPTS();
-            xPortStartSchedulerOnCore();
-        }
-    #endif
+    static void prvDisableInterruptsAndPortStartSchedulerOnCore( void )
+    {
+        portDISABLE_INTERRUPTS();
+        xPortStartSchedulerOnCore();
+    }
 
 /*
  * See header file for description.
@@ -375,7 +369,7 @@
         spin_lock_claim( configSMP_SPINLOCK_0 );
         spin_lock_claim( configSMP_SPINLOCK_1 );
 
-        #if portRUNNING_ON_BOTH_CORES
+        #if configNUMBER_OF_CORES != 1
             ucPrimaryCoreNum = configTICK_CORE;
             configASSERT( get_core_num() == 0 ); /* we must be started on core 0 */
             multicore_reset_core1();
@@ -418,7 +412,7 @@
             #if ( configSUPPORT_PICO_SYNC_INTEROP == 1 )
                 multicore_fifo_clear_irq();
                 multicore_fifo_drain();
-                uint32_t irq_num = 15 + get_core_num();
+                uint32_t irq_num = SIO_IRQ_PROC0 + get_core_num();
                 irq_set_priority( irq_num, portMIN_INTERRUPT_PRIORITY );
                 irq_set_exclusive_handler( irq_num, prvFIFOInterruptHandler );
                 irq_set_enabled( irq_num, 1 );
@@ -431,8 +425,8 @@
         /* Should never get here as the tasks will now be executing!  Call the task
          * exit error function to prevent compiler warnings about a static function
          * not being called in the case that the application writer overrides this
-         * functionality by defining configTASK_RETURN_ADDRESS.  Call
-         * vTaskSwitchContext() so link time optimisation does not remove the
+         * functionality by defining configTASK_RETURN_ADDRESS. Call
+         * vTaskSwitchContext() so link time optimization does not remove the
          * symbol. */
         vTaskSwitchContext();
         prvTaskExitError();
@@ -446,20 +440,14 @@
 
 void vPortEndScheduler( void )
 {
-    /* Not implemented in ports where there is nothing to return to. */
-    panic_unsupported();
+    /* Not implemented in ports where there is nothing to return to.
+     * Artificially force an assert. */
+    configASSERT( portGET_CORE_ID() == 1000UL );
 }
 /*-----------------------------------------------------------*/
 
 void vPortYield( void )
 {
-    #if ( configSUPPORT_PICO_SYNC_INTEROP == 1 )
-
-        /* We are not in an ISR, and pxYieldSpinLock is always dealt with and
-         * cleared when interrupts are re-enabled, so should be NULL */
-        configASSERT( pxYieldSpinLock[ portGET_CORE_ID() ] == NULL );
-    #endif /* configSUPPORT_PICO_SYNC_INTEROP */
-
     /* Set a PendSV to request a context switch. */
     portNVIC_INT_CTRL_REG = portNVIC_PENDSVSET_BIT;
 
@@ -495,21 +483,6 @@
     }
 #endif /* #if ( configNUMBER_OF_CORES == 1 ) */
 
-void vPortEnableInterrupts( void )
-{
-    #if ( configSUPPORT_PICO_SYNC_INTEROP == 1 )
-        int xCoreID = ( int ) portGET_CORE_ID();
-
-        if( pxYieldSpinLock[ xCoreID ] )
-        {
-            spin_lock_t * const pxTmpLock = pxYieldSpinLock[ xCoreID ];
-            pxYieldSpinLock[ xCoreID ] = NULL;
-            spin_unlock( pxTmpLock, ulYieldSpinLockSaveValue[ xCoreID ] );
-        }
-    #endif
-    __asm volatile ( " cpsie i " ::: "memory" );
-}
-
 /*-----------------------------------------------------------*/
 
 uint32_t ulSetInterruptMaskFromISR( void )
@@ -542,7 +515,7 @@
 
     configASSERT( xCoreID != ( int ) portGET_CORE_ID() );
 
-    #if portRUNNING_ON_BOTH_CORES
+    #if configNUMBER_OF_CORES != 1
 
         /* Non blocking, will cause interrupt on other core if the queue isn't already full,
          * in which case an IRQ must be pending */
@@ -645,7 +618,7 @@
             "                                       \n"
             "   adr    r0, ulAsmLocals2             \n" /* Get the location of the current TCB for the current core. */
             "   ldmia r0!, {r2, r3}                 \n"
-            #if portRUNNING_ON_BOTH_CORES
+            #if configNUMBER_OF_CORES != 1
                 "   ldr r0, [r2]                    \n" /* r0 = Core number */
                 "   lsls r0, r0, #2                 \n"
                 "   adds r3, r0                     \n" /* r3 = &pxCurrentTCBs[get_core_num()] */
@@ -685,11 +658,11 @@
                 "   subs r1, r1, #48                \n"
                 "   stmia r1!, {r4-r7}              \n"
             #endif /* portUSE_DIVIDER_SAVE_RESTORE */
-            #if portRUNNING_ON_BOTH_CORES
+            #if configNUMBER_OF_CORES != 1
                 "   ldr r0, [r2]                    \n" /* r0 = Core number */
             #else
                 "   movs r0, #0                     \n"
-            #endif /* portRUNNING_ON_BOTH_CORES */
+            #endif /* configNUMBER_OF_CORES != 1 */
             "   push {r3, r14}                      \n"
             "   cpsid i                             \n"
             "   bl vTaskSwitchContext               \n"
@@ -1001,10 +974,10 @@
         #if ( configTICK_TYPE_WIDTH_IN_BITS == TICK_TYPE_WIDTH_16_BITS )
             ulBit = 1u << ( spin_lock_get_num( spinLock ) & 0x7u );
         #elif ( configTICK_TYPE_WIDTH_IN_BITS == TICK_TYPE_WIDTH_32_BITS )
-            ulBit = 1u << spin_lock_get_num( spinLock );
-            /* reduce to range 0-24 */
-            ulBit |= ulBit << 8u;
-            ulBit >>= 8u;
+            /* Avoid potential use of SIO divider for % here out of abundance of caution */
+            ulBit = spin_lock_get_num( spinLock );
+            if (ulBit >= 24) ulBit -= 24;
+            ulBit = 1u << ulBit;
         #endif /* configTICK_TYPE_WIDTH_IN_BITS */
         return ( EventBits_t ) ulBit;
     }
@@ -1022,8 +995,8 @@
                                               uint32_t ulSave )
     {
         configASSERT( !portCHECK_IF_IN_ISR() );
+        configASSERT( pxLock->spin_lock );
 
-        /* note no need to check LIB_PICO_MULTICORE, as this is always returns true if that is not defined */
         if( !portIS_FREE_RTOS_CORE() )
         {
             spin_unlock( pxLock->spin_lock, ulSave );
@@ -1031,15 +1004,43 @@
         }
         else
         {
-            configASSERT( pxYieldSpinLock[ portGET_CORE_ID() ] == NULL );
-
-            /* we want to hold the lock until the event bits have been set; since interrupts are currently disabled */
-            /* by the spinlock, we can defer until portENABLE_INTERRUPTS is called which is always called when */
-            /* the scheduler is unlocked during this call */
-            configASSERT( pxLock->spin_lock );
-            int xCoreID = ( int ) portGET_CORE_ID();
-            pxYieldSpinLock[ xCoreID ] = pxLock->spin_lock;
-            ulYieldSpinLockSaveValue[ xCoreID ] = ulSave;
+            /* The requirement (from the SDK) on this implementation is that this method
+             * should always wake up from a corresponding call to vPortLockInternalSpinUnlockWithNotify
+             * that happens after this method is called.
+             *
+             * The moment that we unlock the spin lock, we need to be sure that
+             * there is no way that we end up blocking in xEventGroupWaitBits,
+             * despite the fact that other tasks can now run, if the corresponding
+             * unlock has occurred.
+             *
+             * Previously the RP2xxx ports used to attempt to disable IRQs until the
+             * task actually (potentially) became blocked by hooking the IRQ re-enable
+             * when xEventGroupWaitBits completes (or switches tasks), but this
+             * was a broken hack, in that IRQs are re-enabled at other points during
+             * that call.
+             *
+             * This deferred IRQ enable is not actually needed, because all we
+             * care about is that:
+             *
+             * Even in the presence of other tasks acquiring then releasing
+             * the lock, between the interrupt_enable and the xEventGroupWaitBits,
+             * the corresponding bit will still be set.
+             *
+             * This is the case, even any intervening blocked lock (which
+             * clears the event bit) will need to unlock it before we proceed,
+             * which will set the event bit again.
+             *
+             * The multiplexing down of multiple spin lock numbers to fewer
+             * event bits does not cause a possible race condition,
+             * but it does mean that a task waiting for lock A can be
+             * blocked by a task B which owns another lock.
+             *
+             * This could be fixed by using an array of event groups, however
+             * since the SDK spin locks are generally intended for very short
+             * term usage anyway, and rarely nested except in exotic cases
+             * like video output, we'll leave it as one event group for now
+             */
+            spin_unlock( pxLock->spin_lock, ulSave);
             xEventGroupWaitBits( xEventGroup, prvGetEventGroupBit( pxLock->spin_lock ),
                                  pdTRUE, pdFALSE, portMAX_DELAY );
         }
@@ -1072,11 +1073,7 @@
         else
         {
             __sev();
-            #if ( portRUNNING_ON_BOTH_CORES == 0 )
-
-                /* We could sent the bits across the FIFO which would have required us to block here if the FIFO was full,
-                 * or we could have just set all bits on the other side, however it seems reasonable instead to take
-                 * the hit of another spin lock to protect an accurate bit set. */
+            #if ( configNUMBER_OF_CORES == 1 )
                 if( pxCrossCoreSpinLock != pxLock->spin_lock )
                 {
                     spin_lock_unsafe_blocking( pxCrossCoreSpinLock );
@@ -1090,7 +1087,7 @@
 
                 /* This causes fifo irq on the other (FreeRTOS) core which will do the set the event bits */
                 sio_hw->fifo_wr = 0;
-            #endif /* portRUNNING_ON_BOTH_CORES == 0 */
+            #endif /* configNUMBER_OF_CORES == 1 */
             spin_unlock( pxLock->spin_lock, ulSave );
         }
     }
@@ -1100,6 +1097,7 @@
                                                                  absolute_time_t uxUntil )
     {
         configASSERT( !portCHECK_IF_IN_ISR() );
+        configASSERT( pxLock->spin_lock );
 
         /* note no need to check LIB_PICO_MULTICORE, as this is always returns true if that is not defined */
         if( !portIS_FREE_RTOS_CORE() )
@@ -1110,19 +1108,14 @@
         else
         {
             configASSERT( portIS_FREE_RTOS_CORE() );
-            configASSERT( pxYieldSpinLock[ portGET_CORE_ID() ] == NULL );
 
             TickType_t uxTicksToWait = prvGetTicksToWaitBefore( uxUntil );
 
             if( uxTicksToWait )
             {
-                /* We want to hold the lock until the event bits have been set; since interrupts are currently disabled
-                 * by the spinlock, we can defer until portENABLE_INTERRUPTS is called which is always called when
-                 * the scheduler is unlocked during this call */
-                configASSERT( pxLock->spin_lock );
-                int xCoreID = ( int ) portGET_CORE_ID();
-                pxYieldSpinLock[ xCoreID ] = pxLock->spin_lock;
-                ulYieldSpinLockSaveValue[ xCoreID ] = ulSave;
+                /* See comment in vPortLockInternalSpinUnlockWithWait for detail
+                 * about possible race conditions */
+                spin_unlock( pxLock->spin_lock, ulSave );
                 xEventGroupWaitBits( xEventGroup,
                                      prvGetEventGroupBit( pxLock->spin_lock ), pdTRUE,
                                      pdFALSE, uxTicksToWait );
@@ -1152,9 +1145,9 @@
         {
             /* This must be done even before the scheduler is started, as the spin lock
              * is used by the overrides of the SDK wait/notify primitives */
-            #if ( portRUNNING_ON_BOTH_CORES == 0 )
+            #if ( configNUMBER_OF_CORES == 1 )
                 pxCrossCoreSpinLock = spin_lock_instance( next_striped_spin_lock_num() );
-            #endif /* portRUNNING_ON_BOTH_CORES */
+            #endif /* configNUMBER_OF_CORES == 1 */
 
             /* The event group is not used prior to scheduler init, but is initialized
              * here to since it logically belongs with the spin lock */