pw_snapshot: Match snapshots to Symbolizers Allows donwstream projects to provide a Symbolizer matcher rather than an ELF matcher. This provides greater degree of flexibility in selection of symbolzation tool or architecture/OS support. Change-Id: I29c9cc7bfcd7d5789ea0da20f4ea5524fcbbde79 Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/66871 Pigweed-Auto-Submit: Armando Montanez <amontanez@google.com> Reviewed-by: Joe Ethier <jethier@google.com> Commit-Queue: Auto-Submit <auto-submit@pigweed.google.com.iam.gserviceaccount.com>

commit: 4b12552ee0e007ec0abd5151211de6ebc778c87e [log] [tgz]
author: Armando Montanez <amontanez@google.com> Fri Oct 29 12:43:45 2021 -0700
committer: CQ Bot Account <pigweed-scoped@luci-project-accounts.iam.gserviceaccount.com> Tue Nov 16 00:49:43 2021 +0000
tree: 8fb0b184f8cc508d7f9b245d5c64ceee4f23f96e
parent: 6a6bf1bfd30de4710289a9e74bc2ca2d14eaefe3 [diff]
diff --git a/pw_snapshot/module_usage.rst b/pw_snapshot/module_usage.rst
index 0ef9596..11f4957 100644
--- a/pw_snapshot/module_usage.rst
+++ b/pw_snapshot/module_usage.rst

@@ -149,9 +149,9 @@
 ---------------------
 The snapshot processor tool has built-in support for symbolization of some data
 embedded into snapshots. Taking advantage of this requires the use of a
-project-provided ``ElfMatcher`` callback. This is used by the snapshot processor
-to understand which ELF file should be used to symbolize which snapshot in cases
-where a snapshot has related snapshots embedded inside of it.
+project-provided ``SymbolizerMatcher`` callback. This is used by the snapshot
+processor to understand which ELF file should be used to symbolize which
+snapshot in cases where a snapshot has related snapshots embedded inside of it.
 
 Here's an example implementation that uses the device name:
 
@@ -159,14 +159,15 @@
 
   # Given a firmware bundle directory, determine the ELF file associated with
   # the provided snapshot.
-  def _snapshot_elf_matcher(fw_bundle_dir: Path,
-                            snapshot: snapshot_pb2.Snapshot) -> Optional[Path]:
+  def _snapshot_symbolizer_matcher(fw_bundle_dir: Path,
+                                   snapshot: snapshot_pb2.Snapshot
+      ) -> Symbolizer:
       metadata = MetadataProcessor(snapshot.metadata, DETOKENIZER)
       if metadata.device_name().startswith('GSHOE_MAIN_CORE'):
-          return fw_bundle_dir / 'main.elf'
+          return LlvmSymbolizer(fw_bundle_dir / 'main.elf')
       if metadata.device_name().startswith('GSHOE_SENSOR_CORE'):
-          return fw_bundle_dir / 'sensors.elf'
-      return None
+          return LlvmSymbolizer(fw_bundle_dir / 'sensors.elf')
+      return LlvmSymbolizer()
 
 
   # A project specific wrapper to decode snapshots that provides a detokenizer
@@ -175,8 +176,9 @@
 
       # This is the actual ElfMatcher, which wraps the helper in a lambda that
       # captures the passed firmware artifacts directory.
-      matcher: processor.ElfMatcher = lambda snapshot: _snapshot_elf_matcher(
-          fw_bundle_dir, snapshot)
+      matcher: processor.SymbolizerMatcher = (
+          lambda snapshot: _snapshot_symbolizer_matcher(
+              fw_bundle_dir, snapshot))
       return processor.process_snapshots(snapshot, DETOKENIZER, matcher)
 
 -------------

diff --git a/pw_snapshot/py/pw_snapshot/processor.py b/pw_snapshot/py/pw_snapshot/processor.py
index 2cd67ed..3375bed 100644
--- a/pw_snapshot/py/pw_snapshot/processor.py
+++ b/pw_snapshot/py/pw_snapshot/processor.py

@@ -21,7 +21,7 @@
 import pw_cpu_exception_cortex_m
 from pw_snapshot_metadata import metadata
 from pw_snapshot_protos import snapshot_pb2
-from pw_symbolizer import LlvmSymbolizer
+from pw_symbolizer import LlvmSymbolizer, Symbolizer
 from pw_thread import thread_analyzer
 
 _BRANDING = """
@@ -34,19 +34,25 @@
 
 """
 
-# ELF files are useful for symbolizing addresses in snapshots. As a single
-# snapshot may contain embedded snapshots from multiple devices, there's a need
-# to match ELF files to the correct snapshot to correctly symbolize addresses.
-#
-# An ElfMatcher is a function that takes a snapshot and investigates its
-# metadata (often build ID, device name, or the version string) to determine
-# whether a suitable ELF file can be provided for symbolization.
+# Deprecated, use SymbolizerMatcher. Will be removed shortly.
 ElfMatcher = Callable[[snapshot_pb2.Snapshot], Optional[Path]]
 
+# Symbolizers are useful for turning addresses into source code locations and
+# function names. As a single snapshot may contain embedded snapshots from
+# multiple devices, there's a need to match ELF files to the correct snapshot to
+# correctly symbolize addresses.
+#
+# A SymbolizerMatcher is a function that takes a snapshot and investigates its
+# metadata (often build ID, device name, or the version string) to determine
+# whether a Symbolizer may be loaded with a suitable ELF file for symbolization.
+SymbolizerMatcher = Callable[[snapshot_pb2.Snapshot], Symbolizer]
 
-def process_snapshot(serialized_snapshot: bytes,
-                     detokenizer: Optional[pw_tokenizer.Detokenizer] = None,
-                     elf_matcher: Optional[ElfMatcher] = None) -> str:
+
+def process_snapshot(
+        serialized_snapshot: bytes,
+        detokenizer: Optional[pw_tokenizer.Detokenizer] = None,
+        elf_matcher: Optional[ElfMatcher] = None,
+        symbolizer_matcher: Optional[SymbolizerMatcher] = None) -> str:
     """Processes a single snapshot."""
 
     output = [_BRANDING]
@@ -59,7 +65,10 @@
     # Open a symbolizer.
     snapshot = snapshot_pb2.Snapshot()
     snapshot.ParseFromString(serialized_snapshot)
-    if elf_matcher is not None:
+
+    if symbolizer_matcher is not None:
+        symbolizer = symbolizer_matcher(snapshot)
+    elif elf_matcher is not None:
         symbolizer = LlvmSymbolizer(elf_matcher(snapshot))
     else:
         symbolizer = LlvmSymbolizer()
@@ -90,13 +99,14 @@
         serialized_snapshot: bytes,
         detokenizer: Optional[pw_tokenizer.Detokenizer] = None,
         elf_matcher: Optional[ElfMatcher] = None,
-        user_processing_callback: Optional[Callable[[bytes],
-                                                    str]] = None) -> str:
+        user_processing_callback: Optional[Callable[[bytes], str]] = None,
+        symbolizer_matcher: Optional[SymbolizerMatcher] = None) -> str:
     """Processes a snapshot that may have multiple embedded snapshots."""
     output = []
     # Process the top-level snapshot.
     output.append(
-        process_snapshot(serialized_snapshot, detokenizer, elf_matcher))
+        process_snapshot(serialized_snapshot, detokenizer, elf_matcher,
+                         symbolizer_matcher))
 
     # If the user provided a custom processing callback, call it on each
     # snapshot.
@@ -111,7 +121,9 @@
         output.append(
             str(
                 process_snapshots(nested_snapshot.SerializeToString(),
-                                  detokenizer, elf_matcher)))
+                                  detokenizer, elf_matcher,
+                                  user_processing_callback,
+                                  symbolizer_matcher)))
 
     return '\n'.join(output)
 

diff --git a/pw_thread/py/pw_thread/thread_analyzer.py b/pw_thread/py/pw_thread/thread_analyzer.py
index e67c068..1cd6fb5 100644
--- a/pw_thread/py/pw_thread/thread_analyzer.py
+++ b/pw_thread/py/pw_thread/thread_analyzer.py

@@ -15,7 +15,7 @@
 
 from typing import Optional, List, Mapping
 import pw_tokenizer
-from pw_symbolizer import LlvmSymbolizer
+from pw_symbolizer import LlvmSymbolizer, Symbolizer
 from pw_tokenizer import proto as proto_detokenizer
 from pw_thread_protos import thread_pb2
 
@@ -30,14 +30,14 @@
 }
 
 
-def process_snapshot(
-    serialized_snapshot: bytes,
-    tokenizer_db: Optional[pw_tokenizer.Detokenizer],
-    symbolizer: LlvmSymbolizer = LlvmSymbolizer()
-) -> str:
+def process_snapshot(serialized_snapshot: bytes,
+                     tokenizer_db: Optional[pw_tokenizer.Detokenizer] = None,
+                     symbolizer: Optional[Symbolizer] = None) -> str:
     """Processes snapshot threads, producing a multi-line string."""
     captured_threads = thread_pb2.SnapshotThreadInfo()
     captured_threads.ParseFromString(serialized_snapshot)
+    if symbolizer is None:
+        symbolizer = LlvmSymbolizer()
 
     return str(
         ThreadSnapshotAnalyzer(captured_threads, tokenizer_db, symbolizer))
@@ -175,11 +175,14 @@
     def __init__(self,
                  threads: thread_pb2.SnapshotThreadInfo,
                  tokenizer_db: Optional[pw_tokenizer.Detokenizer] = None,
-                 symbolizer: LlvmSymbolizer = LlvmSymbolizer()):
+                 symbolizer: Optional[Symbolizer] = None):
         self._threads = threads.threads
         self._tokenizer_db = (tokenizer_db if tokenizer_db is not None else
                               pw_tokenizer.Detokenizer(None))
-        self._symbolizer = symbolizer
+        if symbolizer is not None:
+            self._symbolizer = symbolizer
+        else:
+            self._symbolizer = LlvmSymbolizer()
 
         for thread in self._threads:
             proto_detokenizer.detokenize_fields(self._tokenizer_db, thread)
commit	4b12552ee0e007ec0abd5151211de6ebc778c87e	[log] [tgz]
author	Armando Montanez <amontanez@google.com>	Fri Oct 29 12:43:45 2021 -0700
committer	CQ Bot Account <pigweed-scoped@luci-project-accounts.iam.gserviceaccount.com>	Tue Nov 16 00:49:43 2021 +0000
tree	8fb0b184f8cc508d7f9b245d5c64ceee4f23f96e
parent	6a6bf1bfd30de4710289a9e74bc2ca2d14eaefe3 [diff]