pw_tokenizer: Tool for detokenizing from serial

pw_tokenizer.serial_detokenizer detokenizes prefixed Base64-encoded
strings in the output from a serial port.

Change-Id: I0675192d40fff2cf2db5db8a219b3326b2780b36
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/15365
Commit-Queue: Wyatt Hepler <hepler@google.com>
Reviewed-by: Armando Montanez <amontanez@google.com>
diff --git a/pw_tokenizer/docs.rst b/pw_tokenizer/docs.rst
index 9a5b369..48d66a7 100644
--- a/pw_tokenizer/docs.rst
+++ b/pw_tokenizer/docs.rst
@@ -692,6 +692,29 @@
     TransmitLogMessage(base64_buffer, base64_size);
   }
 
+Command line utilities
+^^^^^^^^^^^^^^^^^^^^^^
+``pw_tokenizer`` provides two standalone command line utilities for detokenizing
+Base64-encoded tokenized strings.
+
+* ``detokenize.py`` -- Detokenizes Base64-encoded strings in files or from
+  stdin.
+* ``detokenize_serial.py`` -- Detokenizes Base64-encoded strings from a
+  connected serial device.
+
+If the ``pw_tokenizer`` Python package is installed, these tools may be executed
+as runnable modules. For example:
+
+.. code-block::
+
+  # Detokenize Base64-encoded strings in a file
+  python -m pw_tokenizer.detokenize -i input_file.txt
+
+  # Detokenize Base64-encoded strings in output from a serial device
+  python -m pw_tokenizer.detokenize_serial --device /dev/ttyACM0
+
+See the ``--help`` options for these tools for full usage information.
+
 Deployment war story
 ====================
 The tokenizer module was developed to bring tokenized logging to an
diff --git a/pw_tokenizer/py/pw_tokenizer/__main__.py b/pw_tokenizer/py/pw_tokenizer/__main__.py
index 3ed2835..4a7bdf2 100644
--- a/pw_tokenizer/py/pw_tokenizer/__main__.py
+++ b/pw_tokenizer/py/pw_tokenizer/__main__.py
@@ -15,4 +15,4 @@
 
 from pw_tokenizer import detokenize
 
-detokenize._main(detokenize._parse_args())  # pylint: disable=protected-access
+detokenize.main()
diff --git a/pw_tokenizer/py/pw_tokenizer/detokenize.py b/pw_tokenizer/py/pw_tokenizer/detokenize.py
index 2ec0369..040978e 100755
--- a/pw_tokenizer/py/pw_tokenizer/detokenize.py
+++ b/pw_tokenizer/py/pw_tokenizer/detokenize.py
@@ -343,13 +343,14 @@
     return decode_and_detokenize
 
 
+BASE64_PREFIX = b'$'
 DEFAULT_RECURSION = 9
 
 
 def detokenize_base64_live(detokenizer,
                            input_file,
                            output,
-                           prefix=b'$',
+                           prefix=BASE64_PREFIX,
                            recursion=DEFAULT_RECURSION):
     """Reads chars one-at-a-time and decodes messages; SLOW for big files."""
     transform = _detokenize_prefixed_base64(detokenizer, prefix, recursion)
@@ -367,7 +368,7 @@
 def detokenize_base64_to_file(detokenizer,
                               data,
                               output,
-                              prefix=b'$',
+                              prefix=BASE64_PREFIX,
                               recursion=DEFAULT_RECURSION):
     """Decodes prefixed Base64 messages in data; decodes to an output file."""
     transform = _detokenize_prefixed_base64(detokenizer, prefix, recursion)
@@ -389,7 +390,7 @@
 
 def detokenize_base64(detokenizer,
                       data,
-                      prefix=b'$',
+                      prefix=BASE64_PREFIX,
                       recursion=DEFAULT_RECURSION):
     """Decodes and replaces prefixed Base64 messages in the provided data.
 
@@ -428,7 +429,7 @@
 
 
 def _parse_args():
-    """Parse and return command line arguments."""
+    """Parses and return command line arguments."""
 
     parser = argparse.ArgumentParser(
         description=__doc__,
@@ -460,7 +461,7 @@
     subparser.add_argument(
         '-p',
         '--prefix',
-        default='$',
+        default=BASE64_PREFIX,
         help=('The one-character prefix that signals the start of a '
               'Base64-encoded message. (default: $)'))
     subparser.add_argument(
@@ -473,7 +474,9 @@
     return parser.parse_args()
 
 
-def _main(args):
+def main():
+    args = _parse_args()
+
     handler = args.handler
     del args.handler
 
@@ -483,4 +486,4 @@
 if __name__ == '__main__':
     if sys.version_info[0] < 3:
         sys.exit('ERROR: The detokenizer command line tools require Python 3.')
-    _main(_parse_args())
+    sys.exit(main())
diff --git a/pw_tokenizer/py/pw_tokenizer/serial_detokenizer.py b/pw_tokenizer/py/pw_tokenizer/serial_detokenizer.py
new file mode 100644
index 0000000..d017a8c
--- /dev/null
+++ b/pw_tokenizer/py/pw_tokenizer/serial_detokenizer.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python3
+# Copyright 2020 The Pigweed Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+"""Decodes and detokenizes Base64-encoded strings in serial output.
+
+The output is printed or saved to a file. Input is not supported.
+"""
+
+import argparse
+import sys
+from typing import BinaryIO, Iterable
+
+import serial
+from pw_tokenizer import database, detokenize, tokens
+
+
+def _parse_args():
+    """Parses and return command line arguments."""
+
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        parents=[database.token_databases_parser()])
+    parser.add_argument('-d',
+                        '--device',
+                        required=True,
+                        help='The serial device from which to read')
+    parser.add_argument('-b',
+                        '--baudrate',
+                        type=int,
+                        default=115200,
+                        help='The baud rate for the serial device')
+    parser.add_argument('-o',
+                        '--output',
+                        type=argparse.FileType('wb'),
+                        default=sys.stdout.buffer,
+                        help=('The file to which to write the output; '
+                              'provide - or omit for stdout.'))
+    parser.add_argument(
+        '-p',
+        '--prefix',
+        default=detokenize.BASE64_PREFIX,
+        help=('The one-character prefix that signals the start of a '
+              'Base64-encoded message. (default: $)'))
+    parser.add_argument(
+        '-s',
+        '--show_errors',
+        action='store_true',
+        help=('Show error messages instead of conversion specifiers when '
+              'arguments cannot be decoded.'))
+
+    return parser.parse_args()
+
+
+def _detokenize_serial(databases: Iterable, device: serial.Serial,
+                       baudrate: int, show_errors: bool, output: BinaryIO,
+                       prefix: str) -> None:
+    if output is sys.stdout:
+        output = sys.stdout.buffer
+
+    detokenizer = detokenize.Detokenizer(tokens.Database.merged(*databases),
+                                         show_errors=show_errors)
+    serial_device = serial.Serial(port=device, baudrate=baudrate)
+
+    try:
+        detokenize.detokenize_base64_live(detokenizer, serial_device, output,
+                                          prefix)
+    except KeyboardInterrupt:
+        output.flush()
+
+
+def main():
+    _detokenize_serial(**vars(_parse_args()))
+    return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/pw_tokenizer/py/setup.py b/pw_tokenizer/py/setup.py
index 3b3dec2..9d9a96e 100644
--- a/pw_tokenizer/py/setup.py
+++ b/pw_tokenizer/py/setup.py
@@ -23,4 +23,5 @@
     description='Tools for decoding tokenized strings',
     packages=setuptools.find_packages(),
     test_suite='setup.test_suite',
+    extra_requires=['serial'],
 )