pw_snapshot: Add ElfMatcher
Adds an argument to pw_snapshot's process_snapshot() that allows users
to pass a function that provides a matching ELF file for a given
Snapshot.
Change-Id: Ic0a67ed26cde9682db09775c94c86b8f7486a165
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/55054
Pigweed-Auto-Submit: Armando Montanez <amontanez@google.com>
Reviewed-by: Ewout van Bekkum <ewout@google.com>
Commit-Queue: Auto-Submit <auto-submit@pigweed.google.com.iam.gserviceaccount.com>
diff --git a/pw_snapshot/module_usage.rst b/pw_snapshot/module_usage.rst
index 52fb42d..b94a1f1 100644
--- a/pw_snapshot/module_usage.rst
+++ b/pw_snapshot/module_usage.rst
@@ -141,3 +141,38 @@
Stack info
Stack used: 0x2001ac00 - 0x2001ab0c (244 bytes, 47.66%)
Stack limits: 0x2001ac00 - 0x2001aa00 (512 bytes)
+
+---------------------
+Symbolizing Addresses
+---------------------
+The snapshot processor tool has built-in support for symbolization of some data
+embedded into snapshots. Taking advantage of this requires the use of a
+project-provided ``ElfMatcher`` callback. This is used by the snapshot processor
+to understand which ELF file should be used to symbolize which snapshot in cases
+where a snapshot has related snapshots embedded inside of it.
+
+Here's an example implementation that uses the device name:
+
+.. code-block:: py
+
+ # Given a firmware bundle directory, determine the ELF file associated with
+ # the provided snapshot.
+ def _snapshot_elf_matcher(fw_bundle_dir: Path,
+ snapshot: snapshot_pb2.Snapshot) -> Optional[Path]:
+ metadata = MetadataProcessor(snapshot.metadata, DETOKENIZER)
+ if metadata.device_name().startswith('GSHOE_MAIN_CORE'):
+ return fw_bundle_dir / 'main.elf'
+ if metadata.device_name().startswith('GSHOE_SENSOR_CORE'):
+ return fw_bundle_dir / 'sensors.elf'
+ return None
+
+
+ # A project specific wrapper to decode snapshots that provides a detokenizer
+ # and ElfMatcher.
+ def decode_snapshots(snapshot: bytes, fw_bundle_dir: Path) -> str:
+
+ # This is the actual ElfMatcher, which wraps the helper in a lambda that
+ # captures the passed firmware artifacts directory.
+ matcher: processor.ElfMatcher = lambda snapshot: _snapshot_elf_matcher(
+ fw_bundle_dir, snapshot)
+ return processor.process_snapshots(snapshot, DETOKENIZER, matcher)
diff --git a/pw_snapshot/py/BUILD.gn b/pw_snapshot/py/BUILD.gn
index 46d8592..85d5a28 100644
--- a/pw_snapshot/py/BUILD.gn
+++ b/pw_snapshot/py/BUILD.gn
@@ -47,6 +47,7 @@
tests = [ "metadata_test.py" ]
python_deps = [
":pw_snapshot_metadata",
+ "$dir_pw_symbolizer/py",
"$dir_pw_thread:protos.python",
"$dir_pw_thread/py",
"$dir_pw_tokenizer/py",
diff --git a/pw_snapshot/py/pw_snapshot/processor.py b/pw_snapshot/py/pw_snapshot/processor.py
index 3f9cadf..1fcf352 100644
--- a/pw_snapshot/py/pw_snapshot/processor.py
+++ b/pw_snapshot/py/pw_snapshot/processor.py
@@ -15,10 +15,12 @@
import argparse
import sys
+from pathlib import Path
from typing import Optional, BinaryIO, TextIO, Callable
import pw_tokenizer
from pw_snapshot_metadata import metadata
from pw_snapshot_protos import snapshot_pb2
+from pw_symbolizer import LlvmSymbolizer
from pw_thread import thread_analyzer
_BRANDING = """
@@ -31,10 +33,19 @@
"""
+# ELF files are useful for symbolizing addresses in snapshots. As a single
+# snapshot may contain embedded snapshots from multiple devices, there's a need
+# to match ELF files to the correct snapshot to correctly symbolize addresses.
+#
+# An ElfMatcher is a function that takes a snapshot and investigates its
+# metadata (often build ID, device name, or the version string) to determine
+# whether a suitable ELF file can be provided for symbolization.
+ElfMatcher = Callable[[snapshot_pb2.Snapshot], Optional[Path]]
-def process_snapshot(
- serialized_snapshot: bytes,
- detokenizer: Optional[pw_tokenizer.Detokenizer] = None) -> str:
+
+def process_snapshot(serialized_snapshot: bytes,
+ detokenizer: Optional[pw_tokenizer.Detokenizer] = None,
+ elf_matcher: Optional[ElfMatcher] = None) -> str:
"""Processes a single snapshot."""
output = [_BRANDING]
@@ -44,14 +55,20 @@
if captured_metadata:
output.append(captured_metadata)
+ # Open a symbolizer.
+ snapshot = snapshot_pb2.Snapshot()
+ snapshot.ParseFromString(serialized_snapshot)
+ if elf_matcher is not None:
+ symbolizer = LlvmSymbolizer(elf_matcher(snapshot))
+ else:
+ symbolizer = LlvmSymbolizer()
+
thread_info = thread_analyzer.process_snapshot(serialized_snapshot,
- detokenizer)
+ detokenizer, symbolizer)
if thread_info:
output.append(thread_info)
# Check and emit the number of related snapshots embedded in this snapshot.
- snapshot = snapshot_pb2.Snapshot()
- snapshot.ParseFromString(serialized_snapshot)
if snapshot.related_snapshots:
snapshot_count = len(snapshot.related_snapshots)
plural = 's' if snapshot_count > 1 else ''
@@ -66,12 +83,14 @@
def process_snapshots(
serialized_snapshot: bytes,
detokenizer: Optional[pw_tokenizer.Detokenizer] = None,
+ elf_matcher: Optional[ElfMatcher] = None,
user_processing_callback: Optional[Callable[[bytes],
str]] = None) -> str:
"""Processes a snapshot that may have multiple embedded snapshots."""
output = []
# Process the top-level snapshot.
- output.append(process_snapshot(serialized_snapshot, detokenizer))
+ output.append(
+ process_snapshot(serialized_snapshot, detokenizer, elf_matcher))
# If the user provided a custom processing callback, call it on each
# snapshot.
@@ -86,7 +105,7 @@
output.append(
str(
process_snapshots(nested_snapshot.SerializeToString(),
- detokenizer)))
+ detokenizer, elf_matcher)))
return '\n'.join(output)
diff --git a/pw_thread/py/pw_thread/thread_analyzer.py b/pw_thread/py/pw_thread/thread_analyzer.py
index 38db197..3ca209a 100644
--- a/pw_thread/py/pw_thread/thread_analyzer.py
+++ b/pw_thread/py/pw_thread/thread_analyzer.py
@@ -30,13 +30,17 @@
}
-def process_snapshot(serialized_snapshot: bytes,
- tokenizer_db: Optional[pw_tokenizer.Detokenizer]) -> str:
+def process_snapshot(
+ serialized_snapshot: bytes,
+ tokenizer_db: Optional[pw_tokenizer.Detokenizer],
+ symbolizer: LlvmSymbolizer = LlvmSymbolizer()
+) -> str:
"""Processes snapshot threads, producing a multi-line string."""
captured_threads = thread_pb2.SnapshotThreadInfo()
captured_threads.ParseFromString(serialized_snapshot)
- return str(ThreadSnapshotAnalyzer(captured_threads, tokenizer_db))
+ return str(
+ ThreadSnapshotAnalyzer(captured_threads, tokenizer_db, symbolizer))
class StackInfo: