split asm volatile back out (#1420)

diff --git a/src/rp2_common/hardware_divider/include/hardware/divider.h b/src/rp2_common/hardware_divider/include/hardware/divider.h
index cab3e04..081034a 100644
--- a/src/rp2_common/hardware_divider/include/hardware/divider.h
+++ b/src/rp2_common/hardware_divider/include/hardware/divider.h
@@ -85,7 +85,7 @@
     // we use one less register and instruction than gcc which uses a TST instruction
 
     uint32_t tmp; // allow compiler to pick scratch register
-    pico_default_asm (
+    pico_default_asm_volatile (
     "hw_divider_result_loop_%=:"
     "ldr %0, [%1, %2]\n\t"
     "lsrs %0, %0, #1\n\t"
@@ -296,7 +296,7 @@
  *  \ingroup hardware_divider
  */
 static inline void hw_divider_pause(void) {
-    pico_default_asm (
+    pico_default_asm_volatile(
     "b _1_%=\n"
     "_1_%=:\n"
     "b _2_%=\n"
@@ -305,7 +305,7 @@
     "_3_%=:\n"
     "b _4_%=\n"
     "_4_%=:\n"
-    :: : );
+    :::);
 }
 
 /*! \brief Do a hardware unsigned HW divide, wait for result, return quotient
diff --git a/src/rp2_common/hardware_sync/include/hardware/sync.h b/src/rp2_common/hardware_sync/include/hardware/sync.h
index 2acf5a9..b15f36b 100644
--- a/src/rp2_common/hardware_sync/include/hardware/sync.h
+++ b/src/rp2_common/hardware_sync/include/hardware/sync.h
@@ -116,7 +116,7 @@
  */
 #if !__has_builtin(__sev)
 __force_inline static void __sev(void) {
-    pico_default_asm ("sev");
+    pico_default_asm_volatile ("sev");
 }
 #endif
 
@@ -128,7 +128,7 @@
  */
 #if !__has_builtin(__wfe)
 __force_inline static void __wfe(void) {
-    pico_default_asm ("wfe");
+    pico_default_asm_volatile ("wfe");
 }
 #endif
 
@@ -139,7 +139,7 @@
  */
 #if !__has_builtin(__wfi)
 __force_inline static void __wfi(void) {
-    pico_default_asm ("wfi");
+    pico_default_asm_volatile("wfi");
 }
 #endif
 
@@ -150,7 +150,7 @@
  * instruction will be observed before any explicit access after the instruction.
  */
 __force_inline static void __dmb(void) {
-    pico_default_asm ("dmb" : : : "memory");
+    pico_default_asm_volatile("dmb" : : : "memory");
 }
 
 /*! \brief Insert a DSB instruction in to the code path.
@@ -161,7 +161,7 @@
  * accesses before this instruction complete.
  */
 __force_inline static void __dsb(void) {
-    pico_default_asm ("dsb" : : : "memory");
+    pico_default_asm_volatile("dsb" : : : "memory");
 }
 
 /*! \brief Insert a ISB instruction in to the code path.
@@ -172,7 +172,7 @@
  * the ISB instruction has been completed.
  */
 __force_inline static void __isb(void) {
-    pico_default_asm ("isb");
+    pico_default_asm_volatile("isb" ::: "memory");
 }
 
 /*! \brief Acquire a memory fence
@@ -213,8 +213,10 @@
  */
 __force_inline static uint32_t save_and_disable_interrupts(void) {
     uint32_t status;
-    pico_default_asm ("mrs %0, PRIMASK" : "=r" (status)::);
-    pico_default_asm ("cpsid i");
+    pico_default_asm_volatile(
+            "mrs %0, PRIMASK\n"
+            "cpsid i"
+            : "=r" (status) ::);
     return status;
 }
 
@@ -224,7 +226,7 @@
  * \param status Previous interrupt status from save_and_disable_interrupts()
   */
 __force_inline static void restore_interrupts(uint32_t status) {
-    pico_default_asm ("msr PRIMASK,%0"::"r" (status) : );
+    pico_default_asm_volatile("msr PRIMASK,%0"::"r" (status) : );
 }
 
 /*! \brief Get HW Spinlock instance from number
diff --git a/src/rp2_common/pico_platform/include/pico/platform.h b/src/rp2_common/pico_platform/include/pico/platform.h
index 61eb53a..64b5e96 100644
--- a/src/rp2_common/pico_platform/include/pico/platform.h
+++ b/src/rp2_common/pico_platform/include/pico/platform.h
@@ -335,7 +335,8 @@
 #define MIN(a, b) ((b)>(a)?(a):(b))
 #endif
 
-#define pico_default_asm(...) __asm volatile (".syntax unified\n" __VA_ARGS__)
+#define pico_default_asm(...) __asm (".syntax unified\n" __VA_ARGS__)
+#define pico_default_asm_volatile(...) __asm volatile (".syntax unified\n" __VA_ARGS__)
 
 /*! \brief Execute a breakpoint instruction
  *  \ingroup pico_platform
@@ -357,7 +358,7 @@
  * might - even above the memory store!)
  */
 __force_inline static void __compiler_memory_barrier(void) {
-    pico_default_asm ("" : : : "memory");
+    pico_default_asm_volatile ("" : : : "memory");
 }
 
 /*! \brief Macro for converting memory addresses to 32 bit addresses suitable for DMA
@@ -479,7 +480,7 @@
  */
 static __force_inline uint __get_current_exception(void) {
     uint exception;
-    pico_default_asm ("mrs %0, ipsr" : "=l" (exception));
+    pico_default_asm( "mrs %0, ipsr" : "=l" (exception));
     return exception;
 }
 
@@ -501,7 +502,7 @@
  * \param minimum_cycles the minimum number of system clock cycles to delay for
  */
 static inline void busy_wait_at_least_cycles(uint32_t minimum_cycles) {
-    pico_default_asm (
+    pico_default_asm_volatile(
         "1: subs %0, #3\n"
         "bcs 1b\n"
         : "+l" (minimum_cycles) : : "memory"