pw_tokenizer: Display as prefixed Base64 if decoding fails

If a tokenized message cannot be decoded, use the prefixed Base64
version for __str__ rather than an empty string. This makes it simple
to decode the string later.

Change-Id: Ibb473d7a3b7857fb0a7da22a675924bc02693075
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/38260
Pigweed-Auto-Submit: Wyatt Hepler <hepler@google.com>
Commit-Queue: Auto-Submit <auto-submit@pigweed.google.com.iam.gserviceaccount.com>
Reviewed-by: Ewout van Bekkum <ewout@google.com>
diff --git a/pw_tokenizer/py/detokenize_test.py b/pw_tokenizer/py/detokenize_test.py
index 6613e53..1de6160 100755
--- a/pw_tokenizer/py/detokenize_test.py
+++ b/pw_tokenizer/py/detokenize_test.py
@@ -179,7 +179,9 @@
         self.assertIn('unknown token',
                       detok.detokenize(b'1234').error_message())
         self.assertIn('unknown token', repr(detok.detokenize(b'1234')))
-        self.assertEqual('', str(detok.detokenize(b'1234')))
+
+        self.assertEqual('$' + base64.b64encode(b'1234').decode(),
+                         str(detok.detokenize(b'1234')))
 
         self.assertIsNone(detok.detokenize(b'').token)
 
@@ -211,16 +213,18 @@
     def test_missing_token(self):
         detok = detokenize.Detokenizer(io.BytesIO(EMPTY_ELF))
         self.assertIn('missing token', detok.detokenize(b'').error_message())
-        self.assertEqual('', str(detok.detokenize(b'')))
+        self.assertEqual('$', str(detok.detokenize(b'')))
         self.assertIn('missing token', repr(detok.detokenize(b'123')))
 
         self.assertIn('missing token', detok.detokenize(b'1').error_message())
-        self.assertEqual('', str(detok.detokenize(b'1')))
+        self.assertEqual('$' + base64.b64encode(b'1').decode(),
+                         str(detok.detokenize(b'1')))
         self.assertIn('missing token', repr(detok.detokenize(b'1')))
 
         self.assertIn('missing token',
                       detok.detokenize(b'123').error_message())
-        self.assertEqual('', str(detok.detokenize(b'123')))
+        self.assertEqual('$' + base64.b64encode(b'123').decode(),
+                         str(detok.detokenize(b'123')))
         self.assertIn('missing token', repr(detok.detokenize(b'123')))
 
     def test_decode_from_elf_data(self):
diff --git a/pw_tokenizer/py/pw_tokenizer/detokenize.py b/pw_tokenizer/py/pw_tokenizer/detokenize.py
index 26f5d52..770a4f0 100755
--- a/pw_tokenizer/py/pw_tokenizer/detokenize.py
+++ b/pw_tokenizer/py/pw_tokenizer/detokenize.py
@@ -48,13 +48,13 @@
                     NamedTuple, Optional, Pattern, Tuple, Union)
 
 try:
-    from pw_tokenizer import database, decode, tokens
+    from pw_tokenizer import database, decode, encode, tokens
 except ImportError:
     # Append this path to the module search path to allow running this module
     # without installing the pw_tokenizer package.
     sys.path.append(os.path.dirname(os.path.dirname(
         os.path.abspath(__file__))))
-    from pw_tokenizer import database, decode, tokens
+    from pw_tokenizer import database, decode, encode, tokens
 
 ENCODED_TOKEN = struct.Struct('<I')
 _LOG = logging.getLogger('pw_tokenizer')
@@ -149,7 +149,9 @@
         if self._show_errors:
             return '<[ERROR: {}|{!r}]>'.format(self.error_message(),
                                                self.encoded_message)
-        return ''
+
+        # Display the string as prefixed Base64 if it cannot be decoded.
+        return encode.prefixed_base64(self.encoded_message)
 
     def __repr__(self) -> str:
         if self.ok():
@@ -353,7 +355,7 @@
     return decode_and_detokenize
 
 
-BASE64_PREFIX = b'$'
+BASE64_PREFIX = encode.BASE64_PREFIX.encode()
 DEFAULT_RECURSION = 9
 
 
diff --git a/pw_tokenizer/py/pw_tokenizer/encode.py b/pw_tokenizer/py/pw_tokenizer/encode.py
index e197b79..97c62bf 100644
--- a/pw_tokenizer/py/pw_tokenizer/encode.py
+++ b/pw_tokenizer/py/pw_tokenizer/encode.py
@@ -13,11 +13,13 @@
 # the License.
 """Provides functionality for encoding tokenized messages."""
 
+import base64
 import struct
 from typing import Union
 
 _INT32_MAX = 2**31 - 1
 _UINT32_MAX = 2**32 - 1
+BASE64_PREFIX = '$'
 
 
 def _zig_zag_encode(value: int) -> int:
@@ -86,3 +88,8 @@
                 f'{arg} has type {type(arg)}, which is not supported')
 
     return bytes(data)
+
+
+def prefixed_base64(data: bytes, prefix: str = '$') -> str:
+    """Encodes a tokenized message as prefixed Base64."""
+    return prefix + base64.b64encode(data).decode()