pw_snapshot: Add ElfMatcher

Adds an argument to pw_snapshot's process_snapshot() that allows users
to pass a function that provides a matching ELF file for a given
Snapshot.

Change-Id: Ic0a67ed26cde9682db09775c94c86b8f7486a165
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/55054
Pigweed-Auto-Submit: Armando Montanez <amontanez@google.com>
Reviewed-by: Ewout van Bekkum <ewout@google.com>
Commit-Queue: Auto-Submit <auto-submit@pigweed.google.com.iam.gserviceaccount.com>
diff --git a/pw_snapshot/module_usage.rst b/pw_snapshot/module_usage.rst
index 52fb42d..b94a1f1 100644
--- a/pw_snapshot/module_usage.rst
+++ b/pw_snapshot/module_usage.rst
@@ -141,3 +141,38 @@
   Stack info
     Stack used:   0x2001ac00 - 0x2001ab0c (244 bytes, 47.66%)
     Stack limits: 0x2001ac00 - 0x2001aa00 (512 bytes)
+
+---------------------
+Symbolizing Addresses
+---------------------
+The snapshot processor tool has built-in support for symbolization of some data
+embedded into snapshots. Taking advantage of this requires the use of a
+project-provided ``ElfMatcher`` callback. This is used by the snapshot processor
+to understand which ELF file should be used to symbolize which snapshot in cases
+where a snapshot has related snapshots embedded inside of it.
+
+Here's an example implementation that uses the device name:
+
+.. code-block:: py
+
+  # Given a firmware bundle directory, determine the ELF file associated with
+  # the provided snapshot.
+  def _snapshot_elf_matcher(fw_bundle_dir: Path,
+                            snapshot: snapshot_pb2.Snapshot) -> Optional[Path]:
+      metadata = MetadataProcessor(snapshot.metadata, DETOKENIZER)
+      if metadata.device_name().startswith('GSHOE_MAIN_CORE'):
+          return fw_bundle_dir / 'main.elf'
+      if metadata.device_name().startswith('GSHOE_SENSOR_CORE'):
+          return fw_bundle_dir / 'sensors.elf'
+      return None
+
+
+  # A project specific wrapper to decode snapshots that provides a detokenizer
+  # and ElfMatcher.
+  def decode_snapshots(snapshot: bytes, fw_bundle_dir: Path) -> str:
+
+      # This is the actual ElfMatcher, which wraps the helper in a lambda that
+      # captures the passed firmware artifacts directory.
+      matcher: processor.ElfMatcher = lambda snapshot: _snapshot_elf_matcher(
+          fw_bundle_dir, snapshot)
+      return processor.process_snapshots(snapshot, DETOKENIZER, matcher)
diff --git a/pw_snapshot/py/BUILD.gn b/pw_snapshot/py/BUILD.gn
index 46d8592..85d5a28 100644
--- a/pw_snapshot/py/BUILD.gn
+++ b/pw_snapshot/py/BUILD.gn
@@ -47,6 +47,7 @@
   tests = [ "metadata_test.py" ]
   python_deps = [
     ":pw_snapshot_metadata",
+    "$dir_pw_symbolizer/py",
     "$dir_pw_thread:protos.python",
     "$dir_pw_thread/py",
     "$dir_pw_tokenizer/py",
diff --git a/pw_snapshot/py/pw_snapshot/processor.py b/pw_snapshot/py/pw_snapshot/processor.py
index 3f9cadf..1fcf352 100644
--- a/pw_snapshot/py/pw_snapshot/processor.py
+++ b/pw_snapshot/py/pw_snapshot/processor.py
@@ -15,10 +15,12 @@
 
 import argparse
 import sys
+from pathlib import Path
 from typing import Optional, BinaryIO, TextIO, Callable
 import pw_tokenizer
 from pw_snapshot_metadata import metadata
 from pw_snapshot_protos import snapshot_pb2
+from pw_symbolizer import LlvmSymbolizer
 from pw_thread import thread_analyzer
 
 _BRANDING = """
@@ -31,10 +33,19 @@
 
 """
 
+# ELF files are useful for symbolizing addresses in snapshots. As a single
+# snapshot may contain embedded snapshots from multiple devices, there's a need
+# to match ELF files to the correct snapshot to correctly symbolize addresses.
+#
+# An ElfMatcher is a function that takes a snapshot and investigates its
+# metadata (often build ID, device name, or the version string) to determine
+# whether a suitable ELF file can be provided for symbolization.
+ElfMatcher = Callable[[snapshot_pb2.Snapshot], Optional[Path]]
 
-def process_snapshot(
-        serialized_snapshot: bytes,
-        detokenizer: Optional[pw_tokenizer.Detokenizer] = None) -> str:
+
+def process_snapshot(serialized_snapshot: bytes,
+                     detokenizer: Optional[pw_tokenizer.Detokenizer] = None,
+                     elf_matcher: Optional[ElfMatcher] = None) -> str:
     """Processes a single snapshot."""
 
     output = [_BRANDING]
@@ -44,14 +55,20 @@
     if captured_metadata:
         output.append(captured_metadata)
 
+    # Open a symbolizer.
+    snapshot = snapshot_pb2.Snapshot()
+    snapshot.ParseFromString(serialized_snapshot)
+    if elf_matcher is not None:
+        symbolizer = LlvmSymbolizer(elf_matcher(snapshot))
+    else:
+        symbolizer = LlvmSymbolizer()
+
     thread_info = thread_analyzer.process_snapshot(serialized_snapshot,
-                                                   detokenizer)
+                                                   detokenizer, symbolizer)
     if thread_info:
         output.append(thread_info)
 
     # Check and emit the number of related snapshots embedded in this snapshot.
-    snapshot = snapshot_pb2.Snapshot()
-    snapshot.ParseFromString(serialized_snapshot)
     if snapshot.related_snapshots:
         snapshot_count = len(snapshot.related_snapshots)
         plural = 's' if snapshot_count > 1 else ''
@@ -66,12 +83,14 @@
 def process_snapshots(
         serialized_snapshot: bytes,
         detokenizer: Optional[pw_tokenizer.Detokenizer] = None,
+        elf_matcher: Optional[ElfMatcher] = None,
         user_processing_callback: Optional[Callable[[bytes],
                                                     str]] = None) -> str:
     """Processes a snapshot that may have multiple embedded snapshots."""
     output = []
     # Process the top-level snapshot.
-    output.append(process_snapshot(serialized_snapshot, detokenizer))
+    output.append(
+        process_snapshot(serialized_snapshot, detokenizer, elf_matcher))
 
     # If the user provided a custom processing callback, call it on each
     # snapshot.
@@ -86,7 +105,7 @@
         output.append(
             str(
                 process_snapshots(nested_snapshot.SerializeToString(),
-                                  detokenizer)))
+                                  detokenizer, elf_matcher)))
 
     return '\n'.join(output)
 
diff --git a/pw_thread/py/pw_thread/thread_analyzer.py b/pw_thread/py/pw_thread/thread_analyzer.py
index 38db197..3ca209a 100644
--- a/pw_thread/py/pw_thread/thread_analyzer.py
+++ b/pw_thread/py/pw_thread/thread_analyzer.py
@@ -30,13 +30,17 @@
 }
 
 
-def process_snapshot(serialized_snapshot: bytes,
-                     tokenizer_db: Optional[pw_tokenizer.Detokenizer]) -> str:
+def process_snapshot(
+    serialized_snapshot: bytes,
+    tokenizer_db: Optional[pw_tokenizer.Detokenizer],
+    symbolizer: LlvmSymbolizer = LlvmSymbolizer()
+) -> str:
     """Processes snapshot threads, producing a multi-line string."""
     captured_threads = thread_pb2.SnapshotThreadInfo()
     captured_threads.ParseFromString(serialized_snapshot)
 
-    return str(ThreadSnapshotAnalyzer(captured_threads, tokenizer_db))
+    return str(
+        ThreadSnapshotAnalyzer(captured_threads, tokenizer_db, symbolizer))
 
 
 class StackInfo: