pw_tokenizer: tail -f functionality for detokenize
- Add -f/--follow option to pw_tokenizer.detokenize, which gives similar
behavior to tail -f.
- Default to requiring at least one database file at the command line.
Change-Id: I56813ba0e7706a121acff1f1d9c2ea2d9c64c7cd
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/24056
Reviewed-by: Anthony DiGirolamo <tonymd@google.com>
Commit-Queue: Wyatt Hepler <hepler@google.com>
diff --git a/pw_tokenizer/py/pw_tokenizer/database.py b/pw_tokenizer/py/pw_tokenizer/database.py
index 2c879ae..43ba5cb 100755
--- a/pw_tokenizer/py/pw_tokenizer/database.py
+++ b/pw_tokenizer/py/pw_tokenizer/database.py
@@ -389,7 +389,7 @@
setattr(namespace, self.dest, databases)
-def token_databases_parser() -> argparse.ArgumentParser:
+def token_databases_parser(nargs: str = '+') -> argparse.ArgumentParser:
"""Returns an argument parser for reading token databases.
These arguments can be added to another parser using the parents arg.
@@ -398,7 +398,7 @@
parser.add_argument(
'databases',
metavar='elf_or_token_database',
- nargs='*',
+ nargs=nargs,
action=LoadTokenDatabases,
help=('ELF or token database files from which to read strings and '
'tokens. For ELF files, the tokenization domain to read from '
@@ -428,7 +428,7 @@
required=True,
help='The database file to update.')
- option_tokens = token_databases_parser()
+ option_tokens = token_databases_parser('*')
# Top-level argument parser.
parser = argparse.ArgumentParser(
diff --git a/pw_tokenizer/py/pw_tokenizer/detokenize.py b/pw_tokenizer/py/pw_tokenizer/detokenize.py
index 0e349a9..26f5d52 100755
--- a/pw_tokenizer/py/pw_tokenizer/detokenize.py
+++ b/pw_tokenizer/py/pw_tokenizer/detokenize.py
@@ -425,8 +425,27 @@
return output.getvalue()
+def _follow_and_detokenize_file(detokenizer: _Detokenizer,
+ file: BinaryIO,
+ output: BinaryIO,
+ prefix: Union[str, bytes],
+ poll_period_s: float = 0.01) -> None:
+ """Polls a file to detokenize it and any appended data."""
+
+ try:
+ while True:
+ data = file.read()
+ if data:
+ detokenize_base64_to_file(detokenizer, data, output, prefix)
+ output.flush()
+ else:
+ time.sleep(poll_period_s)
+ except KeyboardInterrupt:
+ pass
+
+
def _handle_base64(databases, input_file: BinaryIO, output: BinaryIO,
- prefix: str, show_errors: bool) -> None:
+ prefix: str, show_errors: bool, follow: bool) -> None:
"""Handles the base64 command line option."""
# argparse.FileType doesn't correctly handle - for binary files.
if input_file is sys.stdin:
@@ -438,11 +457,14 @@
detokenizer = Detokenizer(tokens.Database.merged(*databases),
show_errors=show_errors)
- # If the input is seekable, process it all at once, which is MUCH faster.
- if input_file.seekable():
+ if follow:
+ _follow_and_detokenize_file(detokenizer, input_file, output, prefix)
+ elif input_file.seekable():
+ # Process seekable files all at once, which is MUCH faster.
detokenize_base64_to_file(detokenizer, input_file.read(), output,
prefix)
else:
+ # For non-seekable inputs (e.g. pipes), read one character at a time.
detokenize_base64_live(detokenizer, input_file, output, prefix)
@@ -470,6 +492,12 @@
type=argparse.FileType('rb'),
default=sys.stdin.buffer,
help='The file from which to read; provide - or omit for stdin.')
+ subparser.add_argument(
+ '-f',
+ '--follow',
+ action='store_true',
+ help=('Detokenize data appended to input_file as it grows; similar to '
+ 'tail -f.'))
subparser.add_argument('-o',
'--output',
type=argparse.FileType('wb'),