pw_tokenizer: Accept token domain pattern

Accept token domain patter in AutoUpdatingDetokenizer objects.
This allows to use an .elf file and define the tokens to load on file
changes. It is very useful if the user wants to load all the tokens,
using "<path>#.*".

Change-Id: Id2afc917030a763d8dc737789089bdb3a5e1a02c
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/109472
Reviewed-by: Wyatt Hepler <hepler@google.com>
Commit-Queue: Carlos Chinchilla <cachinchilla@google.com>
diff --git a/pw_tokenizer/docs.rst b/pw_tokenizer/docs.rst
index b1cfcc5..a4ed480 100644
--- a/pw_tokenizer/docs.rst
+++ b/pw_tokenizer/docs.rst
@@ -922,7 +922,9 @@
 The ``pw_tokenizer`` package also provides the ``AutoUpdatingDetokenizer``
 class, which can be used in place of the standard ``Detokenizer``. This class
 monitors database files for changes and automatically reloads them when they
-change. This is helpful for long-running tools that use detokenization.
+change. This is helpful for long-running tools that use detokenization. The
+class also supports token domains for the given database files in the
+``<path>#<domain>`` format.
 
 For messages that are optionally tokenized and may be encoded as binary,
 Base64, or plaintext UTF-8, use
diff --git a/pw_tokenizer/py/detokenize_test.py b/pw_tokenizer/py/detokenize_test.py
index 07ca677..81f4fd8 100755
--- a/pw_tokenizer/py/detokenize_test.py
+++ b/pw_tokenizer/py/detokenize_test.py
@@ -89,10 +89,12 @@
 #
 #   arm-none-eabi-objcopy -S --only-section ".pw_tokenizer*" <ELF> <OUTPUT>
 #
-ELF_WITH_TOKENIZER_SECTIONS = Path(__file__).parent.joinpath(
-    'example_binary_with_tokenized_strings.elf').read_bytes()
+ELF_WITH_TOKENIZER_SECTIONS_PATH = Path(__file__).parent.joinpath(
+    'example_binary_with_tokenized_strings.elf')
+ELF_WITH_TOKENIZER_SECTIONS = ELF_WITH_TOKENIZER_SECTIONS_PATH.read_bytes()
 
 TOKENS_IN_ELF = 22
+TOKENS_IN_ELF_WITH_TOKENIZER_SECTIONS = 26
 
 # 0x2e668cd6 is 'Jello, world!' (which is also used in database_test.py).
 JELLO_WORLD_TOKEN = b'\xd6\x8c\x66\x2e'
@@ -468,6 +470,17 @@
             finally:
                 os.unlink(file.name)
 
+    def test_token_domains(self, _):
+        """Tests that token domains can be parsed from input filename"""
+        filename_and_domain = f'{ELF_WITH_TOKENIZER_SECTIONS_PATH}#.*'
+        detok_with_domain = detokenize.AutoUpdatingDetokenizer(
+            filename_and_domain, min_poll_period_s=0)
+        self.assertEqual(len(detok_with_domain.database),
+                         TOKENS_IN_ELF_WITH_TOKENIZER_SECTIONS)
+        detok = detokenize.AutoUpdatingDetokenizer(
+            str(ELF_WITH_TOKENIZER_SECTIONS_PATH), min_poll_period_s=0)
+        self.assertEqual(len(detok.database), TOKENS_IN_ELF)
+
 
 def _next_char(message: bytes) -> bytes:
     return bytes(b + 1 for b in message)
diff --git a/pw_tokenizer/py/pw_tokenizer/detokenize.py b/pw_tokenizer/py/pw_tokenizer/detokenize.py
index e4c5596..68d14af 100755
--- a/pw_tokenizer/py/pw_tokenizer/detokenize.py
+++ b/pw_tokenizer/py/pw_tokenizer/detokenize.py
@@ -296,7 +296,17 @@
     class _DatabasePath:
         """Tracks the modified time of a path or file object."""
         def __init__(self, path: _PathOrFile) -> None:
-            self.path = path if isinstance(path, (str, Path)) else path.name
+            self.path: Path
+            self.domain = None
+            if isinstance(path, str):
+                if path.count('#') == 1:
+                    path, domain = path.split('#')
+                    self.domain = re.compile(domain)
+                self.path = Path(path)
+            elif isinstance(path, Path):
+                self.path = path
+            else:
+                self.path = Path(path.name)
             self._modified_time: Optional[float] = self._last_modified_time()
 
         def updated(self) -> bool:
@@ -316,6 +326,9 @@
 
         def load(self) -> tokens.Database:
             try:
+                if self.domain is not None:
+                    return database.load_token_database(self.path,
+                                                        domain=self.domain)
                 return database.load_token_database(self.path)
             except FileNotFoundError:
                 return database.load_token_database()