pw_cpu_exception: On-device exception analyzer

Extends pw_cpu_exception_armv7m's LogCpuState() to analyze the provided
CPU context to provide insight into the cause of the exception.

Change-Id: I8f706d0e88fefb5a9205435d819abfeee530407c
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/29380
Commit-Queue: Armando Montanez <amontanez@google.com>
Reviewed-by: Ewout van Bekkum <ewout@google.com>
diff --git a/pw_cpu_exception_armv7m/BUILD b/pw_cpu_exception_armv7m/BUILD
index 453b448..9d90070 100644
--- a/pw_cpu_exception_armv7m/BUILD
+++ b/pw_cpu_exception_armv7m/BUILD
@@ -24,6 +24,7 @@
         "proto_dump.cc",
         "public/pw_cpu_exception_armv7m/cpu_state.h",
         "public/pw_cpu_exception_armv7m/proto_dump.h",
+        "pw_cpu_exception_armv7m_private/cortex_m_constants.h",
     ],
 )
 
diff --git a/pw_cpu_exception_armv7m/BUILD.gn b/pw_cpu_exception_armv7m/BUILD.gn
index 61395e2..a3d6016 100644
--- a/pw_cpu_exception_armv7m/BUILD.gn
+++ b/pw_cpu_exception_armv7m/BUILD.gn
@@ -22,6 +22,7 @@
 
 config("default_config") {
   include_dirs = [ "public" ]
+  visibility = [ ":*" ]
 }
 
 pw_source_set("support") {
@@ -33,7 +34,10 @@
   ]
   deps = [ dir_pw_log ]
   public = [ "public/pw_cpu_exception_armv7m/cpu_state.h" ]
-  sources = [ "cpu_state.cc" ]
+  sources = [
+    "cpu_state.cc",
+    "pw_cpu_exception_armv7m_private/cortex_m_constants.h",
+  ]
 }
 
 pw_source_set("proto_dump") {
@@ -61,7 +65,10 @@
     "$dir_pw_cpu_exception:handler",
     "$dir_pw_preprocessor",
   ]
-  sources = [ "entry.cc" ]
+  sources = [
+    "entry.cc",
+    "pw_cpu_exception_armv7m_private/cortex_m_constants.h",
+  ]
 }
 
 pw_test_group("tests") {
diff --git a/pw_cpu_exception_armv7m/cpu_state.cc b/pw_cpu_exception_armv7m/cpu_state.cc
index bec977e..0c65f7f 100644
--- a/pw_cpu_exception_armv7m/cpu_state.cc
+++ b/pw_cpu_exception_armv7m/cpu_state.cc
@@ -19,10 +19,139 @@
 #include <span>
 
 #include "pw_cpu_exception/support.h"
+#include "pw_cpu_exception_armv7m_private/cortex_m_constants.h"
 #include "pw_log/log.h"
 #include "pw_string/string_builder.h"
 
+// TODO(amontanez): Set up config when this module is moved to *_cortex_m.
+#ifndef PW_CPU_EXCEPTION_EXTENDED_CFSR_DUMP
+#define PW_CPU_EXCEPTION_EXTENDED_CFSR_DUMP 0
+#endif  // PW_CPU_EXCEPTION_EXTENDED_CFSR_DUMP
+
 namespace pw::cpu_exception {
+namespace {
+
+[[maybe_unused]] void AnalyzeCfsr(const uint32_t cfsr) {
+  if (cfsr == 0) {
+    return;
+  }
+
+  PW_LOG_INFO("Active CFSR fields:");
+
+  // Memory managment fault fields.
+  if (cfsr & kCfsrIaccviolMask) {
+    PW_LOG_ERROR("  IACCVIOL: MPU violation on instruction fetch");
+  }
+  if (cfsr & kCfsrDaccviolMask) {
+    PW_LOG_ERROR("  DACCVIOL: MPU violation on memory read/write");
+  }
+  if (cfsr & kCfsrMunstkerrMask) {
+    PW_LOG_ERROR("  MUNSTKERR: 'MPU violation on exception return");
+  }
+  if (cfsr & kCfsrMstkerrMask) {
+    PW_LOG_ERROR("  MSTKERR: MPU violation on exception entry");
+  }
+  if (cfsr & kCfsrMlsperrMask) {
+    PW_LOG_ERROR("  MLSPERR: MPU violation on lazy FPU state preservation");
+  }
+  if (cfsr & kCfsrMmarvalidMask) {
+    PW_LOG_ERROR("  MMARVALID: MMFAR register is valid");
+  }
+
+  // Bus fault fields.
+  if (cfsr & kCfsrIbuserrMask) {
+    PW_LOG_ERROR("  IBUSERR: Bus fault on instruction fetch");
+  }
+  if (cfsr & kCfsrPreciserrMask) {
+    PW_LOG_ERROR("  PRECISERR: Precise bus fault");
+  }
+  if (cfsr & kCfsrImpreciserrMask) {
+    PW_LOG_ERROR("  IMPRECISERR: Imprecise bus fault");
+  }
+  if (cfsr & kCfsrUnstkerrMask) {
+    PW_LOG_ERROR("  UNSTKERR: Derived bus fault on exception context save");
+  }
+  if (cfsr & kCfsrStkerrMask) {
+    PW_LOG_ERROR("  STKERR: Derived bus fault on exception context restore");
+  }
+  if (cfsr & kCfsrLsperrMask) {
+    PW_LOG_ERROR("  LSPERR: Derived bus fault on lazy FPU state preservation");
+  }
+  if (cfsr & kCfsrBfarvalidMask) {
+    PW_LOG_ERROR("  BFARVALID: BFAR register is valid");
+  }
+
+  // Usage fault fields.
+  if (cfsr & kCfsrUndefinstrMask) {
+    PW_LOG_ERROR("  UNDEFINSTR: Encountered invalid instruction");
+  }
+  if (cfsr & kCfsrInvstateMask) {
+    PW_LOG_ERROR(
+        "  INVSTATE: Attempted to execute an instruction with an invalid "
+        "Execution Program Status Register (EPSR) value");
+  }
+  if (cfsr & kCfsrInvpcMask) {
+    PW_LOG_ERROR("  INVPC: Program Counter (PC) is not legal");
+  }
+  if (cfsr & kCfsrNocpMask) {
+    PW_LOG_ERROR("  NOCP: Coprocessor disabled or not present");
+  }
+  if (cfsr & kCfsrUnalignedMask) {
+    PW_LOG_ERROR("  UNALIGNED: Unaligned memory access");
+  }
+  if (cfsr & kCfsrDivbyzeroMask) {
+    PW_LOG_ERROR("  DIVBYZERO: Division by zero");
+  }
+  // This flag is only present on ARMv8-M cores.
+  if (cfsr & kCfsrStkofMask) {
+    PW_LOG_ERROR("  STKOF: Stack overflowed");
+  }
+}
+
+void AnalyzeException(const pw_CpuExceptionState& cpu_state) {
+  // This provides a high-level assessment of the cause of the exception.
+  // These conditionals are ordered by priority to ensure the most critical
+  // issues are highlighted first. These are not mutually exclusive; a bus fault
+  // could occur during the handling of a MPU violation, causing a nested fault.
+  if (cpu_state.extended.hfsr & kHfsrForcedMask) {
+    PW_LOG_CRITICAL("Encountered a nested CPU fault (See active CFSR fields)");
+  }
+  // TODO(pwbug/296): #if this out on non-ARMv7-M builds.
+  if (cpu_state.extended.cfsr & kCfsrStkofMask) {
+    if (cpu_state.extended.exc_return & kExcReturnStackMask) {
+      PW_LOG_CRITICAL("Encountered stack overflow in thread mode");
+    } else {
+      PW_LOG_CRITICAL("Encountered main (interrupt handler) stack overflow");
+    }
+  }
+  if (cpu_state.extended.cfsr & kCfsrMemFaultMask) {
+    if (cpu_state.extended.cfsr & kCfsrMmarvalidMask) {
+      PW_LOG_CRITICAL(
+          "Encountered Memory Protection Unit (MPU) violation at 0x%08" PRIx32,
+          cpu_state.extended.mmfar);
+    } else {
+      PW_LOG_CRITICAL("Encountered Memory Protection Unit (MPU) violation");
+    }
+  }
+  if (cpu_state.extended.cfsr & kCfsrBusFaultMask) {
+    if (cpu_state.extended.cfsr & kCfsrBfarvalidMask) {
+      PW_LOG_CRITICAL("Encountered bus fault at 0x%08" PRIx32,
+                      cpu_state.extended.bfar);
+    } else {
+      PW_LOG_CRITICAL("Encountered bus fault");
+    }
+  }
+  if (cpu_state.extended.cfsr & kCfsrUsageFaultMask) {
+    PW_LOG_CRITICAL("Encountered usage fault (See active CFSR fields)");
+  }
+  if ((cpu_state.extended.icsr & kIcsrVectactiveMask) == kNmiIsrNum) {
+    PW_LOG_INFO("Encountered non-maskable interrupt (NMI)");
+  }
+#if PW_CPU_EXCEPTION_EXTENDED_CFSR_DUMP
+  AnalyzeCfsr(cpu_state.extended.cfsr);
+#endif  // PW_CPU_EXCEPTION_EXTENDED_CFSR_DUMP
+}
+}  // namespace
 
 std::span<const uint8_t> RawFaultingCpuState(
     const pw_CpuExceptionState& cpu_state) {
@@ -78,7 +207,9 @@
   const ArmV7mFaultRegisters& base = cpu_state.base;
   const ArmV7mExtraRegisters& extended = cpu_state.extended;
 
-  PW_LOG_INFO("Captured CPU state:");
+  AnalyzeException(cpu_state);
+
+  PW_LOG_INFO("All captured CPU registers:");
 
 #define _PW_LOG_REGISTER(state_section, name) \
   PW_LOG_INFO("  %-10s 0x%08" PRIx32, #name, state_section.name)
diff --git a/pw_cpu_exception_armv7m/docs.rst b/pw_cpu_exception_armv7m/docs.rst
index 9c79720..5f1d58c 100644
--- a/pw_cpu_exception_armv7m/docs.rst
+++ b/pw_cpu_exception_armv7m/docs.rst
@@ -103,3 +103,13 @@
 
 While this allows some faults to nest, it doesn't guarantee all will properly
 nest.
+
+Configuration Options
+=====================
+
+ - ``PW_CPU_EXCEPTION_EXTENDED_CFSR_DUMP``: Enable extended logging in
+   ``pw::cpu_exception::LogCpuState()`` that dumps the active CFSR fields with
+   help strings. This is disabled by default since it increases the binary size
+   by >1.5KB when using plain-text logs, or ~460 Bytes when using tokenized
+   logging. It's useful to enable this for device bringup until your application
+   has an end-to-end crash reporting solution.
diff --git a/pw_cpu_exception_armv7m/entry.cc b/pw_cpu_exception_armv7m/entry.cc
index 77d5c43..b90dcb7 100644
--- a/pw_cpu_exception_armv7m/entry.cc
+++ b/pw_cpu_exception_armv7m/entry.cc
@@ -19,39 +19,12 @@
 
 #include "pw_cpu_exception/handler.h"
 #include "pw_cpu_exception_armv7m/cpu_state.h"
+#include "pw_cpu_exception_armv7m_private/cortex_m_constants.h"
 #include "pw_preprocessor/compiler.h"
 
 namespace pw::cpu_exception {
 namespace {
 
-// CMSIS/Cortex-M/ARMv7 related constants.
-// These values are from the ARMv7-M Architecture Reference Manual DDI 0403E.b.
-// https://static.docs.arm.com/ddi0403/e/DDI0403E_B_armv7m_arm.pdf
-
-// Masks for individual bits of CFSR. (ARMv7-M Section B3.2.15)
-constexpr uint32_t kMemFaultStart = 0x1u;
-constexpr uint32_t kMStkErrMask = kMemFaultStart << 4;
-constexpr uint32_t kBusFaultStart = 0x1u << 8;
-constexpr uint32_t kStkErrMask = kBusFaultStart << 4;
-
-// Bit masks for an exception return value. (ARMv7-M Section B1.5.8)
-constexpr uint32_t kExcReturnStackMask = (0x1u << 2);
-constexpr uint32_t kExcReturnBasicFrameMask = (0x1u << 4);
-
-// Memory mapped registers. (ARMv7-M Section B3.2.2, Table B3-4)
-volatile uint32_t& arm_v7m_cfsr =
-    *reinterpret_cast<volatile uint32_t*>(0xE000ED28u);
-volatile uint32_t& arm_v7m_mmfar =
-    *reinterpret_cast<volatile uint32_t*>(0xE000ED34u);
-volatile uint32_t& arm_v7m_bfar =
-    *reinterpret_cast<volatile uint32_t*>(0xE000ED38u);
-volatile uint32_t& arm_v7m_icsr =
-    *reinterpret_cast<volatile uint32_t*>(0xE000ED04u);
-volatile uint32_t& arm_v7m_hfsr =
-    *reinterpret_cast<volatile uint32_t*>(0xE000ED2Cu);
-volatile uint32_t& arm_v7m_shcsr =
-    *reinterpret_cast<volatile uint32_t*>(0xE000ED24u);
-
 // If the CPU fails to capture some registers, the captured struct members will
 // be populated with this value. The only registers that this value should be
 // loaded into are pc, lr, and psr when the CPU fails to push an exception
@@ -80,8 +53,8 @@
 // on exception entry).
 void CloneBaseRegistersFromPsp(pw_CpuExceptionState* cpu_state) {
   // If CPU succeeded in pushing context to PSP, copy it to the MSP.
-  if (!(cpu_state->extended.cfsr & kStkErrMask) &&
-      !(cpu_state->extended.cfsr & kMStkErrMask)) {
+  if (!(cpu_state->extended.cfsr & kCfsrStkerrMask) &&
+      !(cpu_state->extended.cfsr & kCfsrMstkerrMask)) {
     // TODO(amontanez): {r0-r3,r12} are captured in pw_CpuExceptionEntry(),
     //                  so this only really needs to copy pc, lr, and psr. Could
     //                  (possibly) improve speed, but would add marginally more
@@ -109,8 +82,8 @@
   // contents of cpu_state to the CPU-pushed register frame so the CPU can
   // continue. Otherwise, don't attempt as we'll likely end up in an escalated
   // hard fault.
-  if (!(cpu_state->extended.cfsr & kStkErrMask) &&
-      !(cpu_state->extended.cfsr & kMStkErrMask)) {
+  if (!(cpu_state->extended.cfsr & kCfsrStkerrMask) &&
+      !(cpu_state->extended.cfsr & kCfsrMstkerrMask)) {
     std::memcpy(reinterpret_cast<void*>(cpu_state->extended.psp),
                 &cpu_state->base,
                 sizeof(ArmV7mFaultRegisters));
@@ -139,8 +112,8 @@
   // If CPU context was not pushed to program stack (because program stack
   // wasn't in use, or an error occurred when pushing context), the PSP doesn't
   // need to be shifted.
-  if (!PspWasActive(cpu_state) || (cpu_state.extended.cfsr & kStkErrMask) ||
-      (cpu_state.extended.cfsr & kMStkErrMask)) {
+  if (!PspWasActive(cpu_state) || (cpu_state.extended.cfsr & kCfsrStkerrMask) ||
+      (cpu_state.extended.cfsr & kCfsrMstkerrMask)) {
     return 0;
   }
 
diff --git a/pw_cpu_exception_armv7m/pw_cpu_exception_armv7m_private/cortex_m_constants.h b/pw_cpu_exception_armv7m/pw_cpu_exception_armv7m_private/cortex_m_constants.h
new file mode 100644
index 0000000..c1ee60c
--- /dev/null
+++ b/pw_cpu_exception_armv7m/pw_cpu_exception_armv7m_private/cortex_m_constants.h
@@ -0,0 +1,89 @@
+// Copyright 2021 The Pigweed Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+#pragma once
+
+#include <cstdint>
+
+namespace pw::cpu_exception {
+
+// CMSIS/Cortex-M/ARMv7 related constants.
+// These values are from the ARMv7-M Architecture Reference Manual DDI 0403E.b.
+// https://static.docs.arm.com/ddi0403/e/DDI0403E_B_armv7m_arm.pdf
+
+constexpr uint32_t kThreadModeIsrNum = 0x0;
+constexpr uint32_t kNmiIsrNum = 0x2;
+constexpr uint32_t kHardFaultIsrNum = 0x3;
+constexpr uint32_t kMemFaultIsrNum = 0x4;
+constexpr uint32_t kBusFaultIsrNum = 0x5;
+constexpr uint32_t kUsageFaultIsrNum = 0x6;
+
+// Masks for Interrupt Control and State Register ICSR (ARMv7-M Section B3.2.4)
+constexpr uint32_t kIcsrVectactiveMask = (1 << 9) - 1;
+
+// Masks for individual bits of HFSR. (ARMv7-M Section B3.2.16)
+constexpr uint32_t kHfsrForcedMask = (0x1 << 30);
+
+// Masks for different sections of CFSR. (ARMv7-M Section B3.2.15)
+constexpr uint32_t kCfsrMemFaultMask = 0x000000ff;
+constexpr uint32_t kCfsrBusFaultMask = 0x0000ff00;
+constexpr uint32_t kCfsrUsageFaultMask = 0xffff0000;
+
+// Masks for individual bits of CFSR. (ARMv7-M Section B3.2.15)
+// Memory faults (MemManage Status Register)
+constexpr uint32_t kCfsrMemFaultStart = (0x1);
+constexpr uint32_t kCfsrIaccviolMask = (kCfsrMemFaultStart << 0);
+constexpr uint32_t kCfsrDaccviolMask = (kCfsrMemFaultStart << 1);
+constexpr uint32_t kCfsrMunstkerrMask = (kCfsrMemFaultStart << 3);
+constexpr uint32_t kCfsrMstkerrMask = (kCfsrMemFaultStart << 4);
+constexpr uint32_t kCfsrMlsperrMask = (kCfsrMemFaultStart << 5);
+constexpr uint32_t kCfsrMmarvalidMask = (kCfsrMemFaultStart << 7);
+// Bus faults (BusFault Status Register)
+constexpr uint32_t kCfsrBusFaultStart = (0x1 << 8);
+constexpr uint32_t kCfsrIbuserrMask = (kCfsrBusFaultStart << 0);
+constexpr uint32_t kCfsrPreciserrMask = (kCfsrBusFaultStart << 1);
+constexpr uint32_t kCfsrImpreciserrMask = (kCfsrBusFaultStart << 2);
+constexpr uint32_t kCfsrUnstkerrMask = (kCfsrBusFaultStart << 3);
+constexpr uint32_t kCfsrStkerrMask = (kCfsrBusFaultStart << 4);
+constexpr uint32_t kCfsrLsperrMask = (kCfsrBusFaultStart << 5);
+constexpr uint32_t kCfsrBfarvalidMask = (kCfsrBusFaultStart << 7);
+// Usage faults (UsageFault Status Register)
+constexpr uint32_t kCfsrUsageFaultStart = (0x1 << 16);
+constexpr uint32_t kCfsrUndefinstrMask = (kCfsrUsageFaultStart << 0);
+constexpr uint32_t kCfsrInvstateMask = (kCfsrUsageFaultStart << 1);
+constexpr uint32_t kCfsrInvpcMask = (kCfsrUsageFaultStart << 2);
+constexpr uint32_t kCfsrNocpMask = (kCfsrUsageFaultStart << 3);
+constexpr uint32_t kCfsrStkofMask = (kCfsrUsageFaultStart << 4);
+constexpr uint32_t kCfsrUnalignedMask = (kCfsrUsageFaultStart << 8);
+constexpr uint32_t kCfsrDivbyzeroMask = (kCfsrUsageFaultStart << 9);
+
+// Bit masks for an exception return value. (ARMv7-M Section B1.5.8)
+constexpr uint32_t kExcReturnStackMask = 0x1u << 2;
+constexpr uint32_t kExcReturnBasicFrameMask = 0x1u << 4;
+
+// Memory mapped registers. (ARMv7-M Section B3.2.2, Table B3-4)
+inline volatile uint32_t& arm_v7m_cfsr =
+    *reinterpret_cast<volatile uint32_t*>(0xE000ED28u);
+inline volatile uint32_t& arm_v7m_mmfar =
+    *reinterpret_cast<volatile uint32_t*>(0xE000ED34u);
+inline volatile uint32_t& arm_v7m_bfar =
+    *reinterpret_cast<volatile uint32_t*>(0xE000ED38u);
+inline volatile uint32_t& arm_v7m_icsr =
+    *reinterpret_cast<volatile uint32_t*>(0xE000ED04u);
+inline volatile uint32_t& arm_v7m_hfsr =
+    *reinterpret_cast<volatile uint32_t*>(0xE000ED2Cu);
+inline volatile uint32_t& arm_v7m_shcsr =
+    *reinterpret_cast<volatile uint32_t*>(0xE000ED24u);
+
+}  // namespace pw::cpu_exception