Multiple modules: Update ARM linker script aligns

Updates alignment in Cortex-M linker scripts and adds annotations for
exceptions to clarify edge cases.

Change-Id: Ia90ad12c3886e3e415be0554bda2d49f4e47a3ca
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/107614
Reviewed-by: Ewout van Bekkum <ewout@google.com>
Commit-Queue: Auto-Submit <auto-submit@pigweed.google.com.iam.gserviceaccount.com>
Pigweed-Auto-Submit: Armando Montanez <amontanez@google.com>
diff --git a/pw_bloat/docs.rst b/pw_bloat/docs.rst
index bf6c885..2337d01 100644
--- a/pw_bloat/docs.rst
+++ b/pw_bloat/docs.rst
@@ -235,7 +235,7 @@
   SECTIONS
   {
     /* Main executable code. */
-    .code : ALIGN(8)
+    .code : ALIGN(4)
     {
       /* Application code. */
       *(.text)
@@ -243,27 +243,27 @@
       KEEP(*(.init))
       KEEP(*(.fini))
 
-      . = ALIGN(8);
+      . = ALIGN(4);
       /* Constants.*/
       *(.rodata)
       *(.rodata*)
     } >FLASH
 
     /* Explicitly initialized global and static data. (.data)*/
-    .static_init_ram : ALIGN(8)
+    .static_init_ram : ALIGN(4)
     {
       *(.data)
       *(.data*)
-      . = ALIGN(8);
+      . = ALIGN(4);
     } >RAM AT> FLASH
 
     /* Zero initialized global/static data. (.bss) */
-    .zero_init_ram (NOLOAD) : ALIGN(8)
+    .zero_init_ram (NOLOAD) : ALIGN(4)
     {
       *(.bss)
       *(.bss*)
       *(COMMON)
-      . = ALIGN(8);
+      . = ALIGN(4);
     } >RAM
   }
 
@@ -300,7 +300,7 @@
   SECTIONS
   {
     /* Main executable code. */
-    .code : ALIGN(8)
+    .code : ALIGN(4)
     {
       /* Application code. */
       *(.text)
@@ -308,41 +308,41 @@
       KEEP(*(.init))
       KEEP(*(.fini))
 
-      . = ALIGN(8);
+      . = ALIGN(4);
       /* Constants.*/
       *(.rodata)
       *(.rodata*)
     } >FLASH
 
     /* Explicitly initialized global and static data. (.data)*/
-    .static_init_ram : ALIGN(8)
+    .static_init_ram : ALIGN(4)
     {
       *(.data)
       *(.data*)
-      . = ALIGN(8);
+      . = ALIGN(4);
     } >RAM AT> FLASH
 
     /* Represents unused space in the FLASH segment. This MUST be the last
      * section assigned to the FLASH region.
      */
-    .FLASH.unused_space (NOLOAD) : ALIGN(8)
+    .FLASH.unused_space (NOLOAD) : ALIGN(4)
     {
       . = ABSOLUTE(ORIGIN(FLASH) + LENGTH(FLASH));
     } >FLASH
 
     /* Zero initialized global/static data. (.bss). */
-    .zero_init_ram (NOLOAD) : ALIGN(8)
+    .zero_init_ram (NOLOAD) : ALIGN(4)
     {
       *(.bss)
       *(.bss*)
       *(COMMON)
-      . = ALIGN(8);
+      . = ALIGN(4);
     } >RAM
 
     /* Represents unused space in the RAM segment. This MUST be the last section
      * assigned to the RAM region.
      */
-    .RAM.unused_space (NOLOAD) : ALIGN(8)
+    .RAM.unused_space (NOLOAD) : ALIGN(4)
     {
       . = ABSOLUTE(ORIGIN(RAM) + LENGTH(RAM));
     } >RAM
diff --git a/pw_boot_cortex_m/basic_cortex_m.ld b/pw_boot_cortex_m/basic_cortex_m.ld
index 1165c35..ae84e3e 100644
--- a/pw_boot_cortex_m/basic_cortex_m.ld
+++ b/pw_boot_cortex_m/basic_cortex_m.ld
@@ -111,6 +111,11 @@
    * so it's used on reset, or by explicitly setting the VTOR in a bootloader
    * manually to point to &pw_boot_vector_table_addr before interrupts are
    * enabled.
+   *
+   * The ARMv7-M architecture requires this is at least aligned to 128 bytes,
+   * and aligned to a power of two that is greater than 4 times the number of
+   * supported exceptions. 512 has been selected as it accommodates most
+   * devices' vector tables.
    */
   .vector_table : ALIGN(512)
   {
@@ -121,22 +126,22 @@
   /* Represents unused space in the VECTOR_TABLE segment. This MUST be the last
    * section assigned to the VECTOR_TABLE region.
    */
-  .VECTOR_TABLE.unused_space (NOLOAD) : ALIGN(8)
+  .VECTOR_TABLE.unused_space (NOLOAD) : ALIGN(4)
   {
     . = ABSOLUTE(ORIGIN(VECTOR_TABLE) + LENGTH(VECTOR_TABLE));
   } >VECTOR_TABLE
 
   /* Main executable code. */
-  .code : ALIGN(8)
+  .code : ALIGN(4)
   {
-    . = ALIGN(8);
+    . = ALIGN(4);
     /* Application code. */
     *(.text)
     *(.text*)
     KEEP(*(.init))
     KEEP(*(.fini))
 
-    . = ALIGN(8);
+    . = ALIGN(4);
     /* Constants.*/
     *(.rodata)
     *(.rodata*)
@@ -147,7 +152,7 @@
      * Since the region isn't explicitly referenced, specify KEEP to prevent
      * link-time garbage collection. SORT is used for sections that have strict
      * init/de-init ordering requirements. */
-    . = ALIGN(8);
+    . = ALIGN(4);
     PROVIDE_HIDDEN(__preinit_array_start = .);
     KEEP(*(.preinit_array*))
     PROVIDE_HIDDEN(__preinit_array_end = .);
@@ -164,24 +169,24 @@
   } >FLASH
 
   /* Used by unwind-arm/ */
-  .ARM : ALIGN(8) {
+  .ARM : ALIGN(4) {
     __exidx_start = .;
     *(.ARM.exidx*)
     __exidx_end = .;
   } >FLASH
 
   /* Explicitly initialized global and static data. (.data)*/
-  .static_init_ram : ALIGN(8)
+  .static_init_ram : ALIGN(4)
   {
     *(.data)
     *(.data*)
-    . = ALIGN(8);
+    . = ALIGN(4);
   } >RAM AT> FLASH
 
   /* Represents unused space in the FLASH segment. This MUST be the last section
    * assigned to the FLASH region.
    */
-  .FLASH.unused_space (NOLOAD) : ALIGN(8)
+  .FLASH.unused_space (NOLOAD) : ALIGN(4)
   {
     . = ABSOLUTE(ORIGIN(FLASH) + LENGTH(FLASH));
   } >FLASH
@@ -198,23 +203,29 @@
    * as they only represent allocated memory regions, so they also do not need
    * to be loaded.
    */
-  .zero_init_ram (NOLOAD) : ALIGN(8)
+  .zero_init_ram (NOLOAD) : ALIGN(4)
   {
     *(.bss)
     *(.bss*)
     *(COMMON)
-    . = ALIGN(8);
+    . = ALIGN(4);
   } >RAM
 
-  .heap (NOLOAD) : ALIGN(8)
+  .heap (NOLOAD) : ALIGN(4)
   {
     pw_boot_heap_low_addr = .;
     . = . + PW_BOOT_HEAP_SIZE;
-    . = ALIGN(8);
+    . = ALIGN(4);
     pw_boot_heap_high_addr = .;
   } >RAM
 
-  /* Link-time check for stack overlaps. */
+  /* Link-time check for stack overlaps.
+   *
+   * The ARMv7-M architecture may require 8-byte alignment of the stack pointer
+   * rather than 4 in some contexts and implementations, so this region is
+   * 8-byte aligned (see ARMv7-M Architecture Reference Manual DDI0403E
+   * section B1.5.7).
+   */
   .stack (NOLOAD) : ALIGN(8)
   {
     /* Set the address that the main stack pointer should be initialized to. */
@@ -231,7 +242,7 @@
   /* Represents unused space in the RAM segment. This MUST be the last section
    * assigned to the RAM region.
    */
-  .RAM.unused_space (NOLOAD) : ALIGN(8)
+  .RAM.unused_space (NOLOAD) : ALIGN(4)
   {
     . = ABSOLUTE(ORIGIN(RAM) + LENGTH(RAM));
   } >RAM
diff --git a/pw_build_info/docs.rst b/pw_build_info/docs.rst
index 4e1500f..376bd4c 100644
--- a/pw_build_info/docs.rst
+++ b/pw_build_info/docs.rst
@@ -43,9 +43,9 @@
 .. code-block:: none
 
   /* Main executable code. */
-  .code : ALIGN(8)
+  .code : ALIGN(4)
   {
-    . = ALIGN(8);
+    . = ALIGN(4);
     /* Application code. */
     *(.text)
     *(.text*)
@@ -63,7 +63,7 @@
   } >FLASH
 
   /* Explicitly initialized global and static data. (.data) */
-  .static_init_ram : ALIGN(8)
+  .static_init_ram : ALIGN(4)
   {
     *(.data)
     *(.data*)
@@ -83,9 +83,9 @@
 .. code-block:: none
 
   /* Main executable code. */
-  .code : ALIGN(8)
+  .code : ALIGN(4)
   {
-    . = ALIGN(8);
+    . = ALIGN(4);
     /* Application code. */
     *(.text)
     *(.text*)
diff --git a/pw_stm32cube_build/py/pw_stm32cube_build/icf_to_ld.py b/pw_stm32cube_build/py/pw_stm32cube_build/icf_to_ld.py
index fafede0..662ea2f 100644
--- a/pw_stm32cube_build/py/pw_stm32cube_build/icf_to_ld.py
+++ b/pw_stm32cube_build/py/pw_stm32cube_build/icf_to_ld.py
@@ -143,16 +143,22 @@
 
 SECTIONS
 {{
+
+  /* The ARMv8-M architecture requires this is at least aligned to 128 bytes,
+   * and aligned to a power of two that is greater than 4 times the number of
+   * supported exceptions. 512 has been selected as it accommodates most vector
+   * tables.
+   */
   .isr_vector :
   {{
-    . = ALIGN(8);
+    . = ALIGN(512);
     KEEP(*(.isr_vector))
-    . = ALIGN(8);
+    . = ALIGN(4);
   }} >FLASH
 
   .text :
   {{
-    . = ALIGN(8);
+    . = ALIGN(4);
     *(.text)
     *(.text*)
     *(.glue_7)
@@ -162,73 +168,73 @@
     KEEP (*(.init))
     KEEP (*(.fini))
 
-    . = ALIGN(8);
+    . = ALIGN(4);
     _etext = .;
   }} >FLASH
 
   .rodata :
   {{
-    . = ALIGN(8);
+    . = ALIGN(4);
     *(.rodata)
     *(.rodata*)
-    . = ALIGN(8);
+    . = ALIGN(4);
   }} >FLASH
 
   .ARM.extab   : {{
-    . = ALIGN(8);
+    . = ALIGN(4);
     *(.ARM.extab* .gnu.linkonce.armextab.*)
-    . = ALIGN(8);
+    . = ALIGN(4);
   }} >FLASH
 
   .ARM : {{
-    . = ALIGN(8);
+    . = ALIGN(4);
     __exidx_start = .;
     *(.ARM.exidx*)
     __exidx_end = .;
-    . = ALIGN(8);
+    . = ALIGN(4);
   }} >FLASH
 
   .preinit_array     :
   {{
-    . = ALIGN(8);
+    . = ALIGN(4);
     PROVIDE_HIDDEN (__preinit_array_start = .);
     KEEP (*(.preinit_array*))
     PROVIDE_HIDDEN (__preinit_array_end = .);
-    . = ALIGN(8);
+    . = ALIGN(4);
   }} >FLASH
 
   .init_array :
   {{
-    . = ALIGN(8);
+    . = ALIGN(4);
     PROVIDE_HIDDEN (__init_array_start = .);
     KEEP (*(SORT(.init_array.*)))
     KEEP (*(.init_array*))
     PROVIDE_HIDDEN (__init_array_end = .);
-    . = ALIGN(8);
+    . = ALIGN(4);
   }} >FLASH
 
   .fini_array :
   {{
-    . = ALIGN(8);
+    . = ALIGN(4);
     PROVIDE_HIDDEN (__fini_array_start = .);
     KEEP (*(SORT(.fini_array.*)))
     KEEP (*(.fini_array*))
     PROVIDE_HIDDEN (__fini_array_end = .);
-    . = ALIGN(8);
+    . = ALIGN(4);
   }} >FLASH
 
   _sidata = LOADADDR(.data);
   .data :
   {{
-    . = ALIGN(8);
+    . = ALIGN(4);
     _sdata = .;
     *(.data)
     *(.data*)
-    . = ALIGN(8);
+    . = ALIGN(4);
     _edata = .;
   }} >RAM AT> FLASH
 
-  . = ALIGN(8);
+  . = ALIGN(4);
   .bss :
   {{
     _sbss = .;
@@ -237,11 +243,16 @@
     *(.bss*)
     *(COMMON)
 
-    . = ALIGN(8);
+    . = ALIGN(4);
     _ebss = .;
     __bss_end__ = _ebss;
   }} >RAM
 
+  /* The ARMv7-M architecture may require 8-byte alignment of the stack pointer
+   * rather than 4 in some contexts and implementations, so this region is
+   * 8-byte aligned (see ARMv7-M Architecture Reference Manual DDI0403E
+   * section B1.5.7).
+   */
   ._user_heap_stack :
   {{
     . = ALIGN(8);
diff --git a/targets/emcraft_sf2_som/emcraft_sf2_som_mddr_debug.ld b/targets/emcraft_sf2_som/emcraft_sf2_som_mddr_debug.ld
index fb34753..dd3f2c6 100644
--- a/targets/emcraft_sf2_som/emcraft_sf2_som_mddr_debug.ld
+++ b/targets/emcraft_sf2_som/emcraft_sf2_som_mddr_debug.ld
@@ -111,6 +111,11 @@
    * so it's used on reset, or by explicitly setting the VTOR in a bootloader
    * manually to point to &pw_boot_vector_table_addr before interrupts are
    * enabled.
+   *
+   * The ARMv7-M architecture requires this is at least aligned to 128 bytes,
+   * and aligned to a power of two that is greater than 4 times the number of
+   * supported exceptions. 512 has been selected as it accommodates this
+   * device's vector table.
    */
   .vector_table : ALIGN(512)
   {
@@ -121,13 +126,13 @@
   /* Represents unused space in the VECTOR_TABLE segment. This MUST be the last
    * section assigned to the VECTOR_TABLE region.
    */
-  .VECTOR_TABLE.unused_space (NOLOAD) : ALIGN(8)
+  .VECTOR_TABLE.unused_space (NOLOAD) : ALIGN(4)
   {
     . = ABSOLUTE(ORIGIN(VECTOR_TABLE) + LENGTH(VECTOR_TABLE));
   } >VECTOR_TABLE
 
   /* Main executable code. */
-  .code : ALIGN(0x10)
+  .code : ALIGN(4)
   {
    CREATE_OBJECT_SYMBOLS
     __text_load = LOADADDR(.code);
@@ -137,7 +142,7 @@
     KEEP(*(.init))
     KEEP(*(.fini))
 
-    . = ALIGN(0x10);
+    . = ALIGN(4);
     /* Constants.*/
     *(.rodata)
     *(.rodata*)
@@ -148,7 +153,7 @@
      * Since the region isn't explicitly referenced, specify KEEP to prevent
      * link-time garbage collection. SORT is used for sections that have strict
      * init/de-init ordering requirements. */
-    . = ALIGN(0x10);
+    . = ALIGN(4);
     PROVIDE_HIDDEN(__preinit_array_start = .);
     KEEP(*(.preinit_array*))
     PROVIDE_HIDDEN(__preinit_array_end = .);
@@ -165,24 +170,24 @@
   } >TEXT_EXTERNAL_RAM
 
   /* Used by unwind-arm/ */
-  .ARM : ALIGN(0x10) {
+  .ARM : ALIGN(4) {
     __exidx_start = .;
     *(.ARM.exidx*)
     __exidx_end = .;
   } >TEXT_EXTERNAL_RAM
 
   /* Explicitly initialized global and static data. (.data)*/
-  .static_init_ram : ALIGN(0x10)
+  .static_init_ram : ALIGN(4)
   {
     *(.data)
     *(.data*)
-    . = ALIGN(0x10);
+    . = ALIGN(4);
   } >EXTERNAL_RAM AT> TEXT_EXTERNAL_RAM
 
   /* Represents unused space in the TEXT_EXTERNAL_RAM segment. This MUST be the
    * last section assigned to the TEXT_EXTERNAL_RAM region.
    */
-  .TEXT_EXTERNAL_RAM.unused_space (NOLOAD) : ALIGN(8)
+  .TEXT_EXTERNAL_RAM.unused_space (NOLOAD) : ALIGN(4)
   {
     . = ABSOLUTE(ORIGIN(TEXT_EXTERNAL_RAM) + LENGTH(TEXT_EXTERNAL_RAM));
   } >TEXT_EXTERNAL_RAM
@@ -199,23 +204,29 @@
    * as they only represent allocated memory regions, so they also do not need
    * to be loaded.
    */
-  .zero_init_ram (NOLOAD) : ALIGN(0x10)
+  .zero_init_ram (NOLOAD) : ALIGN(4)
   {
     *(.bss)
     *(.bss*)
     *(COMMON)
-    . = ALIGN(0x10);
+    . = ALIGN(4);
   } >EXTERNAL_RAM
 
-  .heap (NOLOAD) : ALIGN(8)
+  .heap (NOLOAD) : ALIGN(4)
   {
     pw_boot_heap_low_addr = .;
     . = . + PW_BOOT_HEAP_SIZE;
-    . = ALIGN(8);
+    . = ALIGN(4);
     pw_boot_heap_high_addr = .;
   } >EXTERNAL_RAM
 
-  /* Link-time check for stack overlaps. */
+  /* Link-time check for stack overlaps.
+   *
+   * The ARMv7-M architecture may require 8-byte alignment of the stack pointer
+   * rather than 4 in some contexts and implementations, so this region is
+   * 8-byte aligned (see ARMv7-M Architecture Reference Manual DDI0403E
+   * section B1.5.7).
+   */
   .stack (NOLOAD) : ALIGN(8)
   {
     /* Set the address that the main stack pointer should be initialized to. */
@@ -232,7 +243,7 @@
   /* Represents unused space in the EXTERNAL_RAM segment. This MUST be the last
    * section assigned to the EXTERNAL_RAM region.
    */
-  .EXTERNAL_RAM.unused_space (NOLOAD) : ALIGN(8)
+  .EXTERNAL_RAM.unused_space (NOLOAD) : ALIGN(4)
   {
     . = ABSOLUTE(ORIGIN(EXTERNAL_RAM) + LENGTH(EXTERNAL_RAM));
   } >EXTERNAL_RAM
diff --git a/targets/mimxrt595_evk/mimxrt595_flash.ld b/targets/mimxrt595_evk/mimxrt595_flash.ld
index 9dcaf22..8c54099 100644
--- a/targets/mimxrt595_evk/mimxrt595_flash.ld
+++ b/targets/mimxrt595_evk/mimxrt595_flash.ld
@@ -117,6 +117,11 @@
    * so it's used on reset, or by explicitly setting the VTOR in a bootloader
    * manually to point to &pw_boot_vector_table_addr before interrupts are
    * enabled.
+   *
+   * The ARMv8-M architecture requires this is at least aligned to 128 bytes,
+   * and aligned to a power of two that is greater than 4 times the number of
+   * supported exceptions. 512 has been selected as it accommodates this
+   * device's vector table.
    */
   .vector_table : ALIGN(512)
   {
@@ -127,22 +132,22 @@
   /* Represents unused space in the VECTOR_TABLE segment. This MUST be the last
    * section assigned to the VECTOR_TABLE region.
    */
-  .VECTOR_TABLE.unused_space (NOLOAD) : ALIGN(8)
+  .VECTOR_TABLE.unused_space (NOLOAD) : ALIGN(4)
   {
     . = ABSOLUTE(ORIGIN(VECTOR_TABLE) + LENGTH(VECTOR_TABLE));
   } >VECTOR_TABLE
 
   /* Main executable code. */
-  .code : ALIGN(8)
+  .code : ALIGN(4)
   {
-    . = ALIGN(8);
+    . = ALIGN(4);
     /* Application code. */
     *(.text)
     *(.text*)
     KEEP(*(.init))
     KEEP(*(.fini))
 
-    . = ALIGN(8);
+    . = ALIGN(4);
     /* Constants.*/
     *(.rodata)
     *(.rodata*)
@@ -160,7 +165,7 @@
      * Since the region isn't explicitly referenced, specify KEEP to prevent
      * link-time garbage collection. SORT is used for sections that have strict
      * init/de-init ordering requirements. */
-    . = ALIGN(8);
+    . = ALIGN(4);
     PROVIDE_HIDDEN(__preinit_array_start = .);
     KEEP(*(.preinit_array*))
     PROVIDE_HIDDEN(__preinit_array_end = .);
@@ -177,26 +182,26 @@
   } >FLASH
 
   /* Used by unwind-arm/ */
-  .ARM : ALIGN(8) {
+  .ARM : ALIGN(4) {
     __exidx_start = .;
     *(.ARM.exidx*)
     __exidx_end = .;
   } >FLASH
 
   /* Explicitly initialized global and static data. (.data)*/
-  .static_init_ram : ALIGN(8)
+  .static_init_ram : ALIGN(4)
   {
     *(CodeQuickAccess)
     *(DataQuickAccess)
     *(.data)
     *(.data*)
-    . = ALIGN(8);
+    . = ALIGN(4);
   } >RAM AT> FLASH
 
   /* Represents unused space in the FLASH segment. This MUST be the last section
    * assigned to the FLASH region.
    */
-  .FLASH.unused_space (NOLOAD) : ALIGN(8)
+  .FLASH.unused_space (NOLOAD) : ALIGN(4)
   {
     . = ABSOLUTE(ORIGIN(FLASH) + LENGTH(FLASH));
   } >FLASH
@@ -213,23 +218,28 @@
    * as they only represent allocated memory regions, so they also do not need
    * to be loaded.
    */
-  .zero_init_ram (NOLOAD) : ALIGN(8)
+  .zero_init_ram (NOLOAD) : ALIGN(4)
   {
     *(.bss)
     *(.bss*)
     *(COMMON)
-    . = ALIGN(8);
+    . = ALIGN(4);
   } >RAM
 
-  .heap (NOLOAD) : ALIGN(8)
+  .heap (NOLOAD) : ALIGN(4)
   {
     pw_boot_heap_low_addr = .;
     . = . + PW_BOOT_HEAP_SIZE;
-    . = ALIGN(8);
+    . = ALIGN(4);
     pw_boot_heap_high_addr = .;
   } >RAM
 
-  /* Link-time check for stack overlaps. */
+  /* Link-time check for stack overlaps.
+   *
+   * The ARMv8-M architecture requires 8-byte alignment of the stack pointer
+   * rather than 4 in some contexts, so this region is 8-byte aligned (see
+   * ARMv8-M Architecture Reference Manual DDI0553 section B3.8).
+   */
   .stack (NOLOAD) : ALIGN(8)
   {
     /* Set the address that the main stack pointer should be initialized to. */
@@ -246,7 +256,7 @@
   /* Represents unused space in the RAM segment. This MUST be the last section
    * assigned to the RAM region.
    */
-  .RAM.unused_space (NOLOAD) : ALIGN(8)
+  .RAM.unused_space (NOLOAD) : ALIGN(4)
   {
     . = ABSOLUTE(ORIGIN(RAM) + LENGTH(RAM));
   } >RAM
@@ -265,7 +275,7 @@
   /* Represents unused space in the USB_SRAM segment. This MUST be the last
    * section assigned to the USB_SRAM region.
    */
-  .USB_SRAM.unused_space (NOLOAD) : ALIGN(8)
+  .USB_SRAM.unused_space (NOLOAD) : ALIGN(4)
   {
     . = ABSOLUTE(ORIGIN(USB_SRAM) + LENGTH(USB_SRAM));
   } >USB_SRAM