pw_tokenizer: tail -f functionality for detokenize - Add -f/--follow option to pw_tokenizer.detokenize, which gives similar behavior to tail -f. - Default to requiring at least one database file at the command line. Change-Id: I56813ba0e7706a121acff1f1d9c2ea2d9c64c7cd Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/24056 Reviewed-by: Anthony DiGirolamo <tonymd@google.com> Commit-Queue: Wyatt Hepler <hepler@google.com>

commit: a7e49776454cee3b619ee40eba08dd34c8628584 [log] [tgz]
author: Wyatt Hepler <hepler@google.com> Sat Nov 07 23:20:16 2020 -0800
committer: CQ Bot Account <pigweed-scoped@luci-project-accounts.iam.gserviceaccount.com> Sun Nov 08 08:24:24 2020 +0000
tree: b0f06d51a32751105a22c176ee71b86724b82e9f
parent: aed1a726931f234ecefaccff0ee66b36dc3c3721 [diff]
diff --git a/pw_tokenizer/py/pw_tokenizer/database.py b/pw_tokenizer/py/pw_tokenizer/database.py
index 2c879ae..43ba5cb 100755
--- a/pw_tokenizer/py/pw_tokenizer/database.py
+++ b/pw_tokenizer/py/pw_tokenizer/database.py

@@ -389,7 +389,7 @@
         setattr(namespace, self.dest, databases)
 
 
-def token_databases_parser() -> argparse.ArgumentParser:
+def token_databases_parser(nargs: str = '+') -> argparse.ArgumentParser:
     """Returns an argument parser for reading token databases.
 
     These arguments can be added to another parser using the parents arg.
@@ -398,7 +398,7 @@
     parser.add_argument(
         'databases',
         metavar='elf_or_token_database',
-        nargs='*',
+        nargs=nargs,
         action=LoadTokenDatabases,
         help=('ELF or token database files from which to read strings and '
               'tokens. For ELF files, the tokenization domain to read from '
@@ -428,7 +428,7 @@
                            required=True,
                            help='The database file to update.')
 
-    option_tokens = token_databases_parser()
+    option_tokens = token_databases_parser('*')
 
     # Top-level argument parser.
     parser = argparse.ArgumentParser(

diff --git a/pw_tokenizer/py/pw_tokenizer/detokenize.py b/pw_tokenizer/py/pw_tokenizer/detokenize.py
index 0e349a9..26f5d52 100755
--- a/pw_tokenizer/py/pw_tokenizer/detokenize.py
+++ b/pw_tokenizer/py/pw_tokenizer/detokenize.py

@@ -425,8 +425,27 @@
     return output.getvalue()
 
 
+def _follow_and_detokenize_file(detokenizer: _Detokenizer,
+                                file: BinaryIO,
+                                output: BinaryIO,
+                                prefix: Union[str, bytes],
+                                poll_period_s: float = 0.01) -> None:
+    """Polls a file to detokenize it and any appended data."""
+
+    try:
+        while True:
+            data = file.read()
+            if data:
+                detokenize_base64_to_file(detokenizer, data, output, prefix)
+                output.flush()
+            else:
+                time.sleep(poll_period_s)
+    except KeyboardInterrupt:
+        pass
+
+
 def _handle_base64(databases, input_file: BinaryIO, output: BinaryIO,
-                   prefix: str, show_errors: bool) -> None:
+                   prefix: str, show_errors: bool, follow: bool) -> None:
     """Handles the base64 command line option."""
     # argparse.FileType doesn't correctly handle - for binary files.
     if input_file is sys.stdin:
@@ -438,11 +457,14 @@
     detokenizer = Detokenizer(tokens.Database.merged(*databases),
                               show_errors=show_errors)
 
-    # If the input is seekable, process it all at once, which is MUCH faster.
-    if input_file.seekable():
+    if follow:
+        _follow_and_detokenize_file(detokenizer, input_file, output, prefix)
+    elif input_file.seekable():
+        # Process seekable files all at once, which is MUCH faster.
         detokenize_base64_to_file(detokenizer, input_file.read(), output,
                                   prefix)
     else:
+        # For non-seekable inputs (e.g. pipes), read one character at a time.
         detokenize_base64_live(detokenizer, input_file, output, prefix)
 
 
@@ -470,6 +492,12 @@
         type=argparse.FileType('rb'),
         default=sys.stdin.buffer,
         help='The file from which to read; provide - or omit for stdin.')
+    subparser.add_argument(
+        '-f',
+        '--follow',
+        action='store_true',
+        help=('Detokenize data appended to input_file as it grows; similar to '
+              'tail -f.'))
     subparser.add_argument('-o',
                            '--output',
                            type=argparse.FileType('wb'),
commit	a7e49776454cee3b619ee40eba08dd34c8628584	[log] [tgz]
author	Wyatt Hepler <hepler@google.com>	Sat Nov 07 23:20:16 2020 -0800
committer	CQ Bot Account <pigweed-scoped@luci-project-accounts.iam.gserviceaccount.com>	Sun Nov 08 08:24:24 2020 +0000
tree	b0f06d51a32751105a22c176ee71b86724b82e9f
parent	aed1a726931f234ecefaccff0ee66b36dc3c3721 [diff]