| # Copyright 2019 Google LLC |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # https://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| """Tests for parser.""" |
| |
| import unittest |
| from compiler.front_end import lr1 |
| from compiler.front_end import parser |
| from compiler.front_end import tokenizer |
| from compiler.util import parser_types |
| |
| |
| # TODO(bolms): This is repeated in lr1_test.py; separate into test utils? |
| def _parse_productions(*productions): |
| """Parses text into a grammar by calling Production.parse on each line.""" |
| return [parser_types.Production.parse(p) for p in productions] |
| |
| |
| _EXAMPLE_DIVIDER = "\n" + "=" * 80 + "\n" |
| _MESSAGE_ERROR_DIVIDER = "\n" + "-" * 80 + "\n" |
| _ERROR_DIVIDER = "\n---\n" |
| |
| |
| class ParserGeneratorTest(unittest.TestCase): |
| """Tests parser.parse_error_examples and generate_parser.""" |
| |
| def test_parse_good_error_examples(self): |
| errors = parser.parse_error_examples( |
| _EXAMPLE_DIVIDER + # ======... |
| "structure names must be Camel" + # Message. |
| _MESSAGE_ERROR_DIVIDER + # ------... |
| "struct $ERR FOO" + # First example. |
| _ERROR_DIVIDER + # --- |
| "struct $ERR foo" + # Second example. |
| _EXAMPLE_DIVIDER + # ======... |
| ' \n struct must be followed by ":" \n\n' + # Second message. |
| _MESSAGE_ERROR_DIVIDER + # ------... |
| "struct Foo $ERR") # Example for second message. |
| self.assertEqual(tokenizer.tokenize("struct FOO", "")[0], errors[0][0]) |
| self.assertEqual("structure names must be Camel", errors[0][2]) |
| self.assertEqual(tokenizer.tokenize("struct foo", "")[0], errors[1][0]) |
| self.assertEqual("structure names must be Camel", errors[1][2]) |
| self.assertEqual(tokenizer.tokenize("struct Foo ", "")[0], errors[2][0]) |
| self.assertEqual('struct must be followed by ":"', errors[2][2]) |
| |
| def test_parse_good_wildcard_example(self): |
| errors = parser.parse_error_examples( |
| _EXAMPLE_DIVIDER + # ======... |
| ' \n struct must be followed by ":" \n\n' + # Second message. |
| _MESSAGE_ERROR_DIVIDER + # ------... |
| "struct Foo $ERR $ANY") |
| tokens = tokenizer.tokenize("struct Foo ", "")[0] |
| # The $ANY token should come just before the end-of-line token in the parsed |
| # result. |
| tokens.insert(-1, lr1.ANY_TOKEN) |
| self.assertEqual(tokens, errors[0][0]) |
| self.assertEqual('struct must be followed by ":"', errors[0][2]) |
| |
| def test_parse_with_no_error_marker(self): |
| self.assertRaises( |
| parser.ParserGenerationError, |
| parser.parse_error_examples, |
| _EXAMPLE_DIVIDER + "msg" + _MESSAGE_ERROR_DIVIDER + "-- doc") |
| |
| def test_that_no_error_example_fails(self): |
| self.assertRaises(parser.ParserGenerationError, |
| parser.parse_error_examples, |
| _EXAMPLE_DIVIDER + "msg" + _EXAMPLE_DIVIDER + "msg" + |
| _MESSAGE_ERROR_DIVIDER + "example") |
| |
| def test_that_message_example_divider_must_be_on_its_own_line(self): |
| self.assertRaises(parser.ParserGenerationError, |
| parser.parse_error_examples, |
| _EXAMPLE_DIVIDER + "msg" + "-" * 80 + "example") |
| self.assertRaises(parser.ParserGenerationError, |
| parser.parse_error_examples, |
| _EXAMPLE_DIVIDER + "msg\n" + "-" * 80 + "example") |
| self.assertRaises(parser.ParserGenerationError, |
| parser.parse_error_examples, |
| _EXAMPLE_DIVIDER + "msg" + "-" * 80 + "\nexample") |
| self.assertRaises(parser.ParserGenerationError, |
| parser.parse_error_examples, |
| _EXAMPLE_DIVIDER + "msg\n" + "-" * 80 + " \nexample") |
| |
| def test_that_example_divider_must_be_on_its_own_line(self): |
| self.assertRaises( |
| parser.ParserGenerationError, |
| parser.parse_error_examples, |
| _EXAMPLE_DIVIDER + "msg" + _MESSAGE_ERROR_DIVIDER + "example" + "=" * 80 |
| + "msg" + _MESSAGE_ERROR_DIVIDER + "example") |
| self.assertRaises( |
| parser.ParserGenerationError, |
| parser.parse_error_examples, |
| _EXAMPLE_DIVIDER + "msg" + _MESSAGE_ERROR_DIVIDER + "example\n" + "=" * |
| 80 + "msg" + _MESSAGE_ERROR_DIVIDER + "example") |
| self.assertRaises( |
| parser.ParserGenerationError, |
| parser.parse_error_examples, |
| _EXAMPLE_DIVIDER + "msg" + _MESSAGE_ERROR_DIVIDER + "example" + "=" * 80 |
| + "\nmsg" + _MESSAGE_ERROR_DIVIDER + "example") |
| self.assertRaises( |
| parser.ParserGenerationError, |
| parser.parse_error_examples, |
| _EXAMPLE_DIVIDER + "msg" + _MESSAGE_ERROR_DIVIDER + "example\n" + "=" * |
| 80 + " \nmsg" + _MESSAGE_ERROR_DIVIDER + "example") |
| |
| def test_that_tokenization_failure_results_in_failure(self): |
| self.assertRaises( |
| parser.ParserGenerationError, |
| parser.parse_error_examples, |
| _EXAMPLE_DIVIDER + "message" + _MESSAGE_ERROR_DIVIDER + "|") |
| |
| def test_generate_parser(self): |
| self.assertTrue(parser.generate_parser("C", _parse_productions("C -> s"), |
| [])) |
| self.assertTrue(parser.generate_parser( |
| "C", _parse_productions("C -> s", "C -> d"), [])) |
| |
| def test_generated_parser_error(self): |
| test_parser = parser.generate_parser( |
| "C", _parse_productions("C -> s", "C -> d"), |
| [([parser_types.Token("s", "s", None), |
| parser_types.Token("s", "s", None)], |
| parser_types.Token("s", "s", None), |
| "double s", "ss")]) |
| parse_result = test_parser.parse([parser_types.Token("s", "s", None), |
| parser_types.Token("s", "s", None)]) |
| self.assertEqual(None, parse_result.parse_tree) |
| self.assertEqual("double s", parse_result.error.code) |
| |
| def test_conflict_error(self): |
| self.assertRaises( |
| parser.ParserGenerationError, |
| parser.generate_parser, |
| "C", _parse_productions("C -> S", "C -> D", "S -> a", "D -> a"), []) |
| |
| def test_bad_mark_error(self): |
| self.assertRaises(parser.ParserGenerationError, |
| parser.generate_parser, |
| "C", _parse_productions("C -> s", "C -> d"), |
| [([parser_types.Token("s", "s", None), |
| parser_types.Token("s", "s", None)], |
| parser_types.Token("s", "s", None), |
| "double s", "ss"), |
| ([parser_types.Token("s", "s", None), |
| parser_types.Token("s", "s", None)], |
| parser_types.Token("s", "s", None), |
| "double 's'", "ss")]) |
| self.assertRaises(parser.ParserGenerationError, |
| parser.generate_parser, |
| "C", _parse_productions("C -> s", "C -> d"), |
| [([parser_types.Token("s", "s", None)], |
| parser_types.Token("s", "s", None), |
| "single s", "s")]) |
| |
| |
| class ModuleParserTest(unittest.TestCase): |
| """Tests for parser.parse_module(). |
| |
| Correct parses should mostly be checked in conjunction with |
| module_ir.build_ir, as the exact data structure returned by |
| parser.parse_module() is determined by the grammar defined in module_ir. |
| These tests only need to cover errors and sanity checking. |
| """ |
| |
| def test_error_reporting_by_example(self): |
| parse_result = parser.parse_module( |
| tokenizer.tokenize("struct LogFileStatus:\n" |
| " 0 [+4] UInt\n", "")[0]) |
| self.assertEqual(None, parse_result.parse_tree) |
| self.assertEqual("A name is required for a struct field.", |
| parse_result.error.code) |
| self.assertEqual('"\\n"', parse_result.error.token.symbol) |
| self.assertEqual(set(['"["', "SnakeWord", '"."', '":"', '"("']), |
| parse_result.error.expected_tokens) |
| |
| def test_error_reporting_without_example(self): |
| parse_result = parser.parse_module( |
| tokenizer.tokenize("struct LogFileStatus:\n" |
| " 0 [+4] UInt foo +\n", "")[0]) |
| self.assertEqual(None, parse_result.parse_tree) |
| self.assertEqual(None, parse_result.error.code) |
| self.assertEqual('"+"', parse_result.error.token.symbol) |
| self.assertEqual(set(['"("', '"\\n"', '"["', "Documentation", "Comment"]), |
| parse_result.error.expected_tokens) |
| |
| def test_ok_parse(self): |
| parse_result = parser.parse_module( |
| tokenizer.tokenize("struct LogFileStatus:\n" |
| " 0 [+4] UInt foo\n", "")[0]) |
| self.assertTrue(parse_result.parse_tree) |
| self.assertEqual(None, parse_result.error) |
| |
| |
| if __name__ == "__main__": |
| unittest.main() |