pw_compilation_testing: Relax PW_NC_EXPECT syntax; add tests

Allow PW_NC_EXPECT() statements to span multiple lines. The statements
must contain only string literals or //-style comments. This prevents
clang-format from breaking PW_NC_EXPECT() statements when it reflows
string literals.

With this change, statements like the following are permitted:

  PW_NC_EXPECT(  // This is a comment
               "This is a \"very\" long string that didn't "  // Comment
               "fit on one line,\nokay?");

Also added tests for negative compilation test parsing.

Change-Id: Ie7ad157c817eaf7f0f576bcd841f00426623b635
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/107678
Reviewed-by: Armando Montanez <amontanez@google.com>
Commit-Queue: Auto-Submit <auto-submit@pigweed.google.com.iam.gserviceaccount.com>
Pigweed-Auto-Submit: Wyatt Hepler <hepler@google.com>
diff --git a/pw_compilation_testing/docs.rst b/pw_compilation_testing/docs.rst
index 00ce0c1..a03bae1 100644
--- a/pw_compilation_testing/docs.rst
+++ b/pw_compilation_testing/docs.rst
@@ -94,11 +94,11 @@
   line.
 - Execute the tests by running the build.
 
-To simplify parsing, all ``PW_NC_TEST()`` and ``PW_NC_EXPECT()`` statements
-must fit within a single line. If they are too long for one line, disable
-automatic formatting around them with ``// clang-format disable`` or split
-``PW_NC_EXPECT()`` statements into multiple statements on separate lines. This
-restriction may be relaxed in the future.
+To simplify parsing, all ``PW_NC_TEST()`` statements must fit on a single line.
+``PW_NC_EXPECT()`` statements may span multiple lines, but must contain a single
+regular expression as a string literal. The string may be comprised of multiple
+implicitly concatenated string literals. The ``PW_NC_EXPECT()`` statement cannot
+contain anything else except for ``//``-style comments.
 
 Test assertions
 ===============
diff --git a/pw_compilation_testing/py/BUILD.gn b/pw_compilation_testing/py/BUILD.gn
index 35d2644..a8ffb4d 100644
--- a/pw_compilation_testing/py/BUILD.gn
+++ b/pw_compilation_testing/py/BUILD.gn
@@ -27,6 +27,7 @@
     "pw_compilation_testing/generator.py",
     "pw_compilation_testing/runner.py",
   ]
+  tests = [ "generator_test.py" ]
   python_deps = [ "$dir_pw_cli/py" ]
   pylintrc = "$dir_pigweed/.pylintrc"
 }
diff --git a/pw_compilation_testing/py/generator_test.py b/pw_compilation_testing/py/generator_test.py
new file mode 100644
index 0000000..875cdd4
--- /dev/null
+++ b/pw_compilation_testing/py/generator_test.py
@@ -0,0 +1,107 @@
+# Copyright 2022 The Pigweed Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+"""Tests for the negative compilation test generator."""
+
+from pathlib import Path
+import re
+import tempfile
+import unittest
+
+from pw_compilation_testing.generator import (Compiler, Expectation,
+                                              ParseError, TestCase,
+                                              enumerate_tests)
+
+SOURCE = r'''
+#if PW_NC_TEST(FirstTest)
+PW_NC_EXPECT("abcdef");
+
+SomeSourceCode();
+
+#endif  // PW_NC_TEST
+
+#if PW_NC_TEST(SecondTest)
+PW_NC_EXPECT("\"\"abc123"    // Include " and other escapes in the string
+             "def'456\""
+             // Goodbye
+             "ghi\n\t789"  // ???
+); // abc
+
+#endif  // PW_NC_TEST
+'''
+
+ILLEGAL_COMMENT = '''
+#if PW_NC_TEST(FirstTest)
+PW_NC_EXPECT("abcdef" /* illegal comment */);
+
+#endif  // PW_NC_TEST
+'''
+
+UNTERMINATED_EXPECTATION = '#if PW_NC_TEST(FirstTest)\nPW_NC_EXPECT("abcdef"\n'
+
+
+def _write_to_temp_file(contents: str) -> Path:
+    file = tempfile.NamedTemporaryFile('w', delete=False)
+    file.write(contents)
+    file.close()
+    return Path(file.name)
+
+
+class ParserTest(unittest.TestCase):
+    """Tests parsing negative compilation tests from a file."""
+    def test_successful(self) -> None:
+        try:
+            path = _write_to_temp_file(SOURCE)
+
+            self.assertEqual([
+                TestCase(
+                    'TestSuite',
+                    'FirstTest',
+                    (Expectation(Compiler.ANY, re.compile('abcdef'), 3), ),
+                    path,
+                    2,
+                ),
+                TestCase(
+                    'TestSuite',
+                    'SecondTest',
+                    (Expectation(Compiler.ANY,
+                                 re.compile('""abc123def\'456"ghi\\n\\t789'),
+                                 10), ),
+                    path,
+                    9,
+                )
+            ], list(enumerate_tests('TestSuite', [path])))
+        finally:
+            path.unlink()
+
+    def test_illegal_comment(self) -> None:
+        try:
+            path = _write_to_temp_file(ILLEGAL_COMMENT)
+            with self.assertRaises(ParseError):
+                list(enumerate_tests('TestSuite', [path]))
+        finally:
+            path.unlink()
+
+    def test_unterminated_expectation(self) -> None:
+        try:
+            path = _write_to_temp_file(UNTERMINATED_EXPECTATION)
+            with self.assertRaises(ParseError) as err:
+                list(enumerate_tests('TestSuite', [path]))
+        finally:
+            path.unlink()
+
+        self.assertIn('Unterminated', str(err.exception))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/pw_compilation_testing/py/pw_compilation_testing/generator.py b/pw_compilation_testing/py/pw_compilation_testing/generator.py
index f4f97f2..ae33a04 100644
--- a/pw_compilation_testing/py/pw_compilation_testing/generator.py
+++ b/pw_compilation_testing/py/pw_compilation_testing/generator.py
@@ -30,24 +30,24 @@
 import pickle
 import re
 import sys
-from typing import (Iterable, Iterator, List, NamedTuple, NoReturn, Pattern,
-                    Sequence, Set, Tuple)
+from typing import (Iterable, Iterator, List, NamedTuple, NoReturn, Optional,
+                    Pattern, Sequence, Set, Tuple)
 
 # Matches the #if or #elif statement that starts a compile fail test.
-_TEST_START = re.compile(br'^[ \t]*#[ \t]*(?:el)?if[ \t]+PW_NC_TEST\([ \t]*')
+_TEST_START = re.compile(r'^[ \t]*#[ \t]*(?:el)?if[ \t]+PW_NC_TEST\([ \t]*')
 
 # Matches the name of a test case.
 _TEST_NAME = re.compile(
-    br'(?P<name>[a-zA-Z0-9_]+)[ \t]*\)[ \t]*(?://.*|/\*.*)?$')
+    r'(?P<name>[a-zA-Z0-9_]+)[ \t]*\)[ \t]*(?://.*|/\*.*)?$')
 
 # Negative compilation test commands take the form PW_NC_EXPECT("regex"),
 # PW_NC_EXPECT_GCC("regex"), or PW_NC_EXPECT_CLANG("regex"). PW_NC_EXPECT() is
 # an error.
-_EXPECT_START = re.compile(br'^[ \t]*PW_NC_EXPECT(?P<compiler>_GCC|_CLANG)?\(')
+_EXPECT_START = re.compile(r'^[ \t]*PW_NC_EXPECT(?P<compiler>_GCC|_CLANG)?\(')
 
 # EXPECT statements are regular expressions that must match the compiler output.
 # They must fit on a single line.
-_EXPECT_REGEX = re.compile(br'(?P<regex>"[^\n]+")\);[ \t]*(?://.*|/\*.*)?$')
+_EXPECT_REGEX = re.compile(r'(?P<regex>"[^\n]+")\);[ \t]*(?://.*|/\*.*)?$')
 
 
 class Compiler(Enum):
@@ -99,18 +99,93 @@
 
 class ParseError(Exception):
     """Failed to parse a PW_NC_TEST."""
-    def __init__(self, message: str, file: Path, lines: Sequence[bytes],
+    def __init__(self, message: str, file: Path, lines: Sequence[str],
                  error_lines: Sequence[int]) -> None:
         for i in error_lines:
-            message += (
-                f'\n{file.name}:{i + 1}: {lines[i].decode(errors="replace")}')
+            message += f'\n{file.name}:{i + 1}: {lines[i]}'
         super().__init__(message)
 
 
+class _ExpectationParser:
+    """Parses expecatations from 'PW_NC_EXPECT(' to the final ');'."""
+    class _State:
+        SPACE = 0  # Space characters, which are ignored
+        COMMENT_START = 1  # First / in a //-style comment
+        COMMENT = 2  # Everything after // on a line
+        OPEN_QUOTE = 3  # Starting quote for a string literal
+        CHARACTERS = 4  # Characters within a string literal
+        ESCAPE = 5  # \ within a string literal, which may escape a "
+        CLOSE_PAREN = 6  # Closing parenthesis to the PW_NC_EXPECT statement.
+
+    def __init__(self, index: int, compiler: Compiler) -> None:
+        self.index = index
+        self._compiler = compiler
+        self._state = self._State.SPACE
+        self._contents: List[str] = []
+
+    def parse(self, chars: str) -> Optional[Expectation]:
+        """State machine that parses characters in PW_NC_EXPECT()."""
+        for char in chars:
+            if self._state is self._State.SPACE:
+                if char == '"':
+                    self._state = self._State.CHARACTERS
+                elif char == ')':
+                    self._state = self._State.CLOSE_PAREN
+                elif char == '/':
+                    self._state = self._State.COMMENT_START
+                elif not char.isspace():
+                    raise ValueError(f'Unexpected character "{char}"')
+            elif self._state is self._State.COMMENT_START:
+                if char == '*':
+                    raise ValueError(
+                        '"/* */" comments are not supported; use // instead')
+                if char != '/':
+                    raise ValueError(f'Unexpected character "{char}"')
+                self._state = self._State.COMMENT
+            elif self._state is self._State.COMMENT:
+                if char == '\n':
+                    self._state = self._State.SPACE
+            elif self._state is self._State.CHARACTERS:
+                if char == '"':
+                    self._state = self._State.SPACE
+                elif char == '\\':
+                    self._state = self._State.ESCAPE
+                else:
+                    self._contents.append(char)
+            elif self._state is self._State.ESCAPE:
+                # Include escaped " directly. Restore the \ for other chars.
+                if char != '"':
+                    self._contents.append('\\')
+                self._contents.append(char)
+                self._state = self._State.CHARACTERS
+            elif self._state is self._State.CLOSE_PAREN:
+                if char != ';':
+                    raise ValueError(f'Expected ";", found "{char}"')
+
+                return self._expectation(''.join(self._contents))
+
+        return None
+
+    def _expectation(self, regex: str) -> Expectation:
+        if '"""' in regex:
+            raise ValueError('The regular expression cannot contain """')
+
+        # Evaluate the string from the C++ source as a raw literal.
+        re_string = eval(f'r"""{regex}"""')  # pylint: disable=eval-used
+        if not isinstance(re_string, str):
+            raise ValueError('The regular expression must be a string!')
+
+        try:
+            return Expectation(self._compiler, re.compile(re_string),
+                               self.index + 1)
+        except re.error as error:
+            raise ValueError('Invalid regular expression: ' + error.msg)
+
+
 class _NegativeCompilationTestSource:
     def __init__(self, file: Path) -> None:
         self._file = file
-        self._lines = self._file.read_bytes().splitlines()
+        self._lines = self._file.read_text().splitlines(keepends=True)
 
         self._parsed_expectations: Set[int] = set()
 
@@ -118,41 +193,45 @@
         raise ParseError(message, self._file, self._lines, error_lines)
 
     def _parse_expectations(self, start: int) -> Iterator[Expectation]:
+        expectation: Optional[_ExpectationParser] = None
+
         for index in range(start, len(self._lines)):
             line = self._lines[index]
 
-            expect_match = _EXPECT_START.match(line)
+            # Skip empty or comment lines
+            if not line or line.isspace() or line.lstrip().startswith('//'):
+                continue
 
-            if not expect_match:
-                # Skip empty lines or lines with comments
-                if not line or line.isspace() or line.lstrip().startswith(
-                    (b'//', b'/*')):
-                    continue
-                break
+            # Look for a 'PW_NC_EXPECT(' in the code.
+            if not expectation:
+                expect_match = _EXPECT_START.match(line)
+                if not expect_match:
+                    break  # No expectation found, stop processing.
 
-            compiler_str = expect_match['compiler'] or b'ANY'
-            compiler = Compiler[compiler_str.lstrip(b'_').decode()]
+                compiler = expect_match['compiler'] or 'ANY'
+                expectation = _ExpectationParser(
+                    index, Compiler[compiler.lstrip('_')])
 
-            self._parsed_expectations.add(index)
+                self._parsed_expectations.add(index)
 
-            regex_match = _EXPECT_REGEX.match(line, expect_match.end())
-            if not regex_match:
-                self._error(
-                    'Failed to parse PW_EXPECT_NC() statement. PW_EXPECT_NC() '
-                    'statements must fit on one line and contain a single, '
-                    'non-empty string literal.', index)
+                # Remove the 'PW_NC_EXPECT(' so the line starts with the regex.
+                line = line[expect_match.end():]
 
-            # Evaluate the string from the C++ source as a raw literal.
-            re_string = eval(b'r' + regex_match['regex'])  # pylint: disable=eval-used
-            if not isinstance(re_string, str):
-                self._error('The regular expression must be a string!', index)
-
+            # Find the regex after previously finding 'PW_NC_EXPECT('.
             try:
-                yield Expectation(compiler, re.compile(re_string), index + 1)
-            except re.error as error:
-                self._error('Invalid regular expression: ' + error.msg, index)
+                if parsed_expectation := expectation.parse(line.lstrip()):
+                    yield parsed_expectation
 
-            start = regex_match.end()
+                    expectation = None
+            except ValueError as err:
+                self._error(
+                    f'Failed to parse PW_NC_EXPECT() statement: {err}. '
+                    'PW_NC_EXPECT() must contain only a string literal and '
+                    '//-style comments.', index)
+
+        if expectation:
+            self._error('Unterminated PW_NC_EXPECT() statement!',
+                        expectation.index)
 
     def _check_for_stray_expectations(self) -> None:
         all_expectations = frozenset(i for i in range(len(self._lines))
@@ -173,13 +252,12 @@
             if not name_match:
                 self._error(
                     'Negative compilation test syntax error. '
-                    'Expected test name, found '
-                    f"'{line[case_match.end():].decode(errors='replace')}'",
+                    f"Expected test name, found '{line[case_match.end():]}'",
                     index)
 
             expectations = tuple(self._parse_expectations(index + 1))
-            yield TestCase(suite, name_match['name'].decode(), expectations,
-                           self._file, index + 1)
+            yield TestCase(suite, name_match['name'], expectations, self._file,
+                           index + 1)
 
         self._check_for_stray_expectations()