pw_tokenizer: Update %p machinery Changes how the `%p` machinery is handled to support more flags and modifiers. This version superficially looks the same as the old version, but allows `+`, `-`, ` `, and user-specified width. The implementation is largely the same as the machinery to workaround Python having a non-standard octal `#` flag (implemented in the next CL). Bug: b/265307572 Change-Id: Ib217a11d4c62bc5d079f5eb8938aca1fb8ee29e2 Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/126836 Reviewed-by: Wyatt Hepler <hepler@google.com> Commit-Queue: Greg Pataky <gregpataky@google.com>

commit: e4a7f356a8e5c067cf5fb1134cfa2b5bc52c8a2c [log] [tgz]
author: Greg Pataky <gregpataky@google.com> Wed Feb 08 22:40:38 2023 +0000
committer: CQ Bot Account <pigweed-scoped@luci-project-accounts.iam.gserviceaccount.com> Wed Feb 08 22:40:38 2023 +0000
tree: 5cdd4244b4dba59a509a48309ba0e3b771d9a649
parent: 7f0b97c24fb01200b2bc48b9bfcda8557e4474da [diff]
diff --git a/pw_tokenizer/py/decode_test.py b/pw_tokenizer/py/decode_test.py
index 1f69a96..8d879a6 100755
--- a/pw_tokenizer/py/decode_test.py
+++ b/pw_tokenizer/py/decode_test.py

@@ -108,6 +108,26 @@
             )
 
 
+class TestPointerDecoding(unittest.TestCase):
+    """Tests decoding pointer values."""
+
+    def test_pointer(self) -> None:
+        result = decode.FormatString('%p').format(
+            encode.encode_args(0xDEADBEEF)
+        )
+        self.assertTrue(result.ok())
+        self.assertEqual(result.value, '0xDEADBEEF')
+        self.assertEqual(result.remaining, b'')
+
+    def test_pointer_0_padding(self) -> None:
+        result = decode.FormatString('%p').format(
+            encode.encode_args(0x00000000)
+        )
+        self.assertTrue(result.ok())
+        self.assertEqual(result.value, '0x00000000')
+        self.assertEqual(result.remaining, b'')
+
+
 class TestFormattedString(unittest.TestCase):
     """Tests scoring how successfully a formatted string decoded."""
 

diff --git a/pw_tokenizer/py/pw_tokenizer/decode.py b/pw_tokenizer/py/pw_tokenizer/decode.py
index fd68896..21c0632 100644
--- a/pw_tokenizer/py/pw_tokenizer/decode.py
+++ b/pw_tokenizer/py/pw_tokenizer/decode.py

@@ -55,11 +55,17 @@
     - Length (Optional)
       - TODO(gregpataky): Finish.
     - Specifiers (Required)
+      - `p`: Used for formatting a pointer address.
       - TODO(gregpataky): Finish.
 
     Underspecified details:
     - `p` is implementation defined. For this implementation, it will print
       with a `0x` prefix and then the pointer value was printed using `%08X`.
+      `p` supports the `+`, `-`, and ` ` flags, but not the `#` or `0` flags.
+      None of the length modifiers are usable with `p`. This implementation will
+      try to adhere to user-specified width (assuming the width provided is
+      larger than the guaranteed minimum of 10). Specifying precision for `p` is
+      considered an error.
 
     Non-conformant details:
     - `n` specifier: We do not support the `n` specifier since it is impossible
@@ -78,7 +84,7 @@
     )
 
     # Conversions to make format strings Python compatible.
-    _REMAP_TYPE = {'a': 'f', 'A': 'F'}
+    _REMAP_TYPE = {'a': 'f', 'A': 'F', 'p': 'X'}
 
     # Conversion specifiers by type; n is not supported.
     _SIGNED_INT = 'di'
@@ -113,26 +119,56 @@
         self.type: str = self.match.group('type')
 
         self.error = None
-
-        if self.type == 'p':
-            # %p prints as 0xFEEDBEEF.
-            self.compatible = '0x%08X'
-        elif self.type == 'n':
+        if self.type == 'n':
             self.error = 'Unsupported conversion specifier n'
-        else:
-            # N.B.: The Python %-format machinery never requires the length
-            # modifier to work correctly, and it doesn't support all of the
-            # C99 length format specifiers anyway. We remove it from the
-            # python-compaitble format string.
-            self.compatible = ''.join(
-                [
-                    '%',
-                    self.flags,
-                    self.width,
-                    self.precision,
-                    self._REMAP_TYPE.get(self.type, self.type),
-                ]
-            )
+
+        # If we are going to add additional characters to the output, we add to
+        # width_bias to ensure user-provided widths are reduced by that amount.
+        self._width_bias = 0
+        # Some of our machinery requires that we maintain a minimum precision
+        # width to ensure a certain amount of digits gets printed. This
+        # increases the user-provided precision in these cases if it was not
+        # enough.
+        self._minimum_precision = 0
+        if self.type == 'p':
+            self._width_bias = 2
+            self._minimum_precision = 8
+
+        # If we have a concrete width, we reduce it by any width bias.
+        # Otherwise, we either have no width or width is *, where the decoding
+        # logic will handle the width bias.
+        parsed_width = int(self.width.replace('*', '') or '0')
+        if parsed_width > self._width_bias:
+            self.width = f'{parsed_width - self._width_bias}'
+
+        # N.B.: Python %-operator does not support `.` without a
+        # trailing number. `.` is defined to be equivalent to `.0`.
+        if self.precision == '.':
+            self.precision = '.0'
+
+        # If we have a concrete precision that is not *, we check that it is at
+        # least minimum precision. If it is *, other parts of decoding will
+        # ensure the minimum is upheld.
+        if (
+            self.precision != '.*'
+            and int(self.precision.replace('.', '') or '0')
+            < self._minimum_precision
+        ):
+            self.precision = f'.{self._minimum_precision}'
+
+        # N.B.: The Python %-format machinery never requires the length
+        # modifier to work correctly, and it doesn't support all of the
+        # C99 length format specifiers anyway. We remove it from the
+        # python-compaitble format string.
+        self.compatible = ''.join(
+            [
+                '%',
+                self.flags,
+                self.width,
+                self.precision,
+                self._REMAP_TYPE.get(self.type, self.type),
+            ]
+        )
 
     def decode(self, encoded_arg: bytes) -> 'DecodedArg':
         """Decodes the provided data according to this format specifier."""
@@ -317,7 +353,20 @@
 
         if self.ok():
             try:
-                return self.specifier.compatible % self.value
+                result = self.specifier.compatible % self.value
+                if self.specifier.type == 'p':
+                    # Find index of the first non-space, non-plus, and non-zero
+                    # character (unless we hit the first of the 8 required hex
+                    # digits).
+                    counter = 0
+                    for i, value in enumerate(result[:-7]):
+                        if value not in [' ', '+', '0'] or i == len(result) - 8:
+                            counter = i
+                            break
+                    # Insert the pointer 0x prefix in after the leading `+`,
+                    # space, or `0`
+                    return result[:counter] + '0x' + result[counter:]
+                return result
             except (OverflowError, TypeError, ValueError) as err:
                 self.status |= self.DECODE_ERROR
                 self.error = err
commit	e4a7f356a8e5c067cf5fb1134cfa2b5bc52c8a2c	[log] [tgz]
author	Greg Pataky <gregpataky@google.com>	Wed Feb 08 22:40:38 2023 +0000
committer	CQ Bot Account <pigweed-scoped@luci-project-accounts.iam.gserviceaccount.com>	Wed Feb 08 22:40:38 2023 +0000
tree	5cdd4244b4dba59a509a48309ba0e3b771d9a649
parent	7f0b97c24fb01200b2bc48b9bfcda8557e4474da [diff]