pw_tokenizer: Accept token domain pattern Accept token domain patter in AutoUpdatingDetokenizer objects. This allows to use an .elf file and define the tokens to load on file changes. It is very useful if the user wants to load all the tokens, using "<path>#.*". Change-Id: Id2afc917030a763d8dc737789089bdb3a5e1a02c Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/109472 Reviewed-by: Wyatt Hepler <hepler@google.com> Commit-Queue: Carlos Chinchilla <cachinchilla@google.com>

commit: 026f36d18d87ab19bd4f599db51580923dc4dd61 [log] [tgz]
author: Carlos Chinchilla <cachinchilla@google.com> Thu Sep 08 23:33:17 2022 +0000
committer: CQ Bot Account <pigweed-scoped@luci-project-accounts.iam.gserviceaccount.com> Thu Sep 08 23:33:17 2022 +0000
tree: d5ee85a8981b2ffd60511aa4b137b4ee8c1af229
parent: 6ff4182e947a9ddfbebb8793c4ce2d82af79a7d5 [diff]
diff --git a/pw_tokenizer/docs.rst b/pw_tokenizer/docs.rst
index b1cfcc5..a4ed480 100644
--- a/pw_tokenizer/docs.rst
+++ b/pw_tokenizer/docs.rst

@@ -922,7 +922,9 @@
 The ``pw_tokenizer`` package also provides the ``AutoUpdatingDetokenizer``
 class, which can be used in place of the standard ``Detokenizer``. This class
 monitors database files for changes and automatically reloads them when they
-change. This is helpful for long-running tools that use detokenization.
+change. This is helpful for long-running tools that use detokenization. The
+class also supports token domains for the given database files in the
+``<path>#<domain>`` format.
 
 For messages that are optionally tokenized and may be encoded as binary,
 Base64, or plaintext UTF-8, use

diff --git a/pw_tokenizer/py/detokenize_test.py b/pw_tokenizer/py/detokenize_test.py
index 07ca677..81f4fd8 100755
--- a/pw_tokenizer/py/detokenize_test.py
+++ b/pw_tokenizer/py/detokenize_test.py

@@ -89,10 +89,12 @@
 #
 #   arm-none-eabi-objcopy -S --only-section ".pw_tokenizer*" <ELF> <OUTPUT>
 #
-ELF_WITH_TOKENIZER_SECTIONS = Path(__file__).parent.joinpath(
-    'example_binary_with_tokenized_strings.elf').read_bytes()
+ELF_WITH_TOKENIZER_SECTIONS_PATH = Path(__file__).parent.joinpath(
+    'example_binary_with_tokenized_strings.elf')
+ELF_WITH_TOKENIZER_SECTIONS = ELF_WITH_TOKENIZER_SECTIONS_PATH.read_bytes()
 
 TOKENS_IN_ELF = 22
+TOKENS_IN_ELF_WITH_TOKENIZER_SECTIONS = 26
 
 # 0x2e668cd6 is 'Jello, world!' (which is also used in database_test.py).
 JELLO_WORLD_TOKEN = b'\xd6\x8c\x66\x2e'
@@ -468,6 +470,17 @@
             finally:
                 os.unlink(file.name)
 
+    def test_token_domains(self, _):
+        """Tests that token domains can be parsed from input filename"""
+        filename_and_domain = f'{ELF_WITH_TOKENIZER_SECTIONS_PATH}#.*'
+        detok_with_domain = detokenize.AutoUpdatingDetokenizer(
+            filename_and_domain, min_poll_period_s=0)
+        self.assertEqual(len(detok_with_domain.database),
+                         TOKENS_IN_ELF_WITH_TOKENIZER_SECTIONS)
+        detok = detokenize.AutoUpdatingDetokenizer(
+            str(ELF_WITH_TOKENIZER_SECTIONS_PATH), min_poll_period_s=0)
+        self.assertEqual(len(detok.database), TOKENS_IN_ELF)
+
 
 def _next_char(message: bytes) -> bytes:
     return bytes(b + 1 for b in message)

diff --git a/pw_tokenizer/py/pw_tokenizer/detokenize.py b/pw_tokenizer/py/pw_tokenizer/detokenize.py
index e4c5596..68d14af 100755
--- a/pw_tokenizer/py/pw_tokenizer/detokenize.py
+++ b/pw_tokenizer/py/pw_tokenizer/detokenize.py

@@ -296,7 +296,17 @@
     class _DatabasePath:
         """Tracks the modified time of a path or file object."""
         def __init__(self, path: _PathOrFile) -> None:
-            self.path = path if isinstance(path, (str, Path)) else path.name
+            self.path: Path
+            self.domain = None
+            if isinstance(path, str):
+                if path.count('#') == 1:
+                    path, domain = path.split('#')
+                    self.domain = re.compile(domain)
+                self.path = Path(path)
+            elif isinstance(path, Path):
+                self.path = path
+            else:
+                self.path = Path(path.name)
             self._modified_time: Optional[float] = self._last_modified_time()
 
         def updated(self) -> bool:
@@ -316,6 +326,9 @@
 
         def load(self) -> tokens.Database:
             try:
+                if self.domain is not None:
+                    return database.load_token_database(self.path,
+                                                        domain=self.domain)
                 return database.load_token_database(self.path)
             except FileNotFoundError:
                 return database.load_token_database()
commit	026f36d18d87ab19bd4f599db51580923dc4dd61	[log] [tgz]
author	Carlos Chinchilla <cachinchilla@google.com>	Thu Sep 08 23:33:17 2022 +0000
committer	CQ Bot Account <pigweed-scoped@luci-project-accounts.iam.gserviceaccount.com>	Thu Sep 08 23:33:17 2022 +0000
tree	d5ee85a8981b2ffd60511aa4b137b4ee8c1af229
parent	6ff4182e947a9ddfbebb8793c4ce2d82af79a7d5 [diff]