Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 1 | # Copyright 2019 Google LLC |
| 2 | # |
| 3 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | # you may not use this file except in compliance with the License. |
| 5 | # You may obtain a copy of the License at |
| 6 | # |
| 7 | # https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | # |
| 9 | # Unless required by applicable law or agreed to in writing, software |
| 10 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | # See the License for the specific language governing permissions and |
| 13 | # limitations under the License. |
| 14 | |
| 15 | """Tests for tokenizer.""" |
| 16 | |
| 17 | import unittest |
reventlov | 6731fc4 | 2019-10-03 15:23:13 -0700 | [diff] [blame] | 18 | from compiler.front_end import tokenizer |
| 19 | from compiler.util import error |
| 20 | from compiler.util import parser_types |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 21 | |
| 22 | |
| 23 | def _token_symbols(token_list): |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 24 | """Given a list of tokens, returns a list of their symbol names.""" |
| 25 | return [token.symbol for token in token_list] |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 26 | |
| 27 | |
| 28 | class TokenizerTest(unittest.TestCase): |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 29 | """Tests for the tokenizer.tokenize function.""" |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 30 | |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 31 | def test_bad_indent_tab_versus_space(self): |
| 32 | # A bad indent is one that doesn't match a previous unmatched indent. |
| 33 | tokens, errors = tokenizer.tokenize(" a\n\tb", "file") |
| 34 | self.assertFalse(tokens) |
| 35 | self.assertEqual( |
| 36 | [ |
| 37 | [ |
| 38 | error.error( |
| 39 | "file", |
| 40 | parser_types.make_location((2, 1), (2, 2)), |
| 41 | "Bad indentation", |
| 42 | ) |
| 43 | ] |
| 44 | ], |
| 45 | errors, |
| 46 | ) |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 47 | |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 48 | def test_bad_indent_tab_versus_eight_spaces(self): |
| 49 | tokens, errors = tokenizer.tokenize(" a\n\tb", "file") |
| 50 | self.assertFalse(tokens) |
| 51 | self.assertEqual( |
| 52 | [ |
| 53 | [ |
| 54 | error.error( |
| 55 | "file", |
| 56 | parser_types.make_location((2, 1), (2, 2)), |
| 57 | "Bad indentation", |
| 58 | ) |
| 59 | ] |
| 60 | ], |
| 61 | errors, |
| 62 | ) |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 63 | |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 64 | def test_bad_indent_tab_versus_four_spaces(self): |
| 65 | tokens, errors = tokenizer.tokenize(" a\n\tb", "file") |
| 66 | self.assertFalse(tokens) |
| 67 | self.assertEqual( |
| 68 | [ |
| 69 | [ |
| 70 | error.error( |
| 71 | "file", |
| 72 | parser_types.make_location((2, 1), (2, 2)), |
| 73 | "Bad indentation", |
| 74 | ) |
| 75 | ] |
| 76 | ], |
| 77 | errors, |
| 78 | ) |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 79 | |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 80 | def test_bad_indent_two_spaces_versus_one_space(self): |
| 81 | tokens, errors = tokenizer.tokenize(" a\n b", "file") |
| 82 | self.assertFalse(tokens) |
| 83 | self.assertEqual( |
| 84 | [ |
| 85 | [ |
| 86 | error.error( |
| 87 | "file", |
| 88 | parser_types.make_location((2, 1), (2, 2)), |
| 89 | "Bad indentation", |
| 90 | ) |
| 91 | ] |
| 92 | ], |
| 93 | errors, |
| 94 | ) |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 95 | |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 96 | def test_bad_indent_matches_closed_indent(self): |
| 97 | tokens, errors = tokenizer.tokenize(" a\nb\n c\n d", "file") |
| 98 | self.assertFalse(tokens) |
| 99 | self.assertEqual( |
| 100 | [ |
| 101 | [ |
| 102 | error.error( |
| 103 | "file", |
| 104 | parser_types.make_location((4, 1), (4, 2)), |
| 105 | "Bad indentation", |
| 106 | ) |
| 107 | ] |
| 108 | ], |
| 109 | errors, |
| 110 | ) |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 111 | |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 112 | def test_bad_string_after_string_with_escaped_backslash_at_end(self): |
| 113 | tokens, errors = tokenizer.tokenize(r'"\\""', "name") |
| 114 | self.assertFalse(tokens) |
| 115 | self.assertEqual( |
| 116 | [ |
| 117 | [ |
| 118 | error.error( |
| 119 | "name", |
| 120 | parser_types.make_location((1, 5), (1, 6)), |
| 121 | "Unrecognized token", |
| 122 | ) |
| 123 | ] |
| 124 | ], |
| 125 | errors, |
| 126 | ) |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 127 | |
| 128 | |
| 129 | def _make_short_token_match_tests(): |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 130 | """Makes tests for short, simple tokenization cases.""" |
| 131 | eol = '"\\n"' |
| 132 | cases = { |
| 133 | "Cam": ["CamelWord", eol], |
| 134 | "Ca9": ["CamelWord", eol], |
| 135 | "CanB": ["CamelWord", eol], |
| 136 | "CanBee": ["CamelWord", eol], |
| 137 | "CBa": ["CamelWord", eol], |
| 138 | "cam": ["SnakeWord", eol], |
| 139 | "ca9": ["SnakeWord", eol], |
| 140 | "can_b": ["SnakeWord", eol], |
| 141 | "can_bee": ["SnakeWord", eol], |
| 142 | "c_ba": ["SnakeWord", eol], |
| 143 | "cba_": ["SnakeWord", eol], |
| 144 | "c_b_a_": ["SnakeWord", eol], |
| 145 | "CAM": ["ShoutyWord", eol], |
| 146 | "CA9": ["ShoutyWord", eol], |
| 147 | "CAN_B": ["ShoutyWord", eol], |
| 148 | "CAN_BEE": ["ShoutyWord", eol], |
| 149 | "C_BA": ["ShoutyWord", eol], |
| 150 | "C": ["BadWord", eol], |
| 151 | "C1": ["BadWord", eol], |
| 152 | "c": ["SnakeWord", eol], |
| 153 | "$": ["BadWord", eol], |
| 154 | "_": ["BadWord", eol], |
| 155 | "_a": ["BadWord", eol], |
| 156 | "_A": ["BadWord", eol], |
| 157 | "Cb_A": ["BadWord", eol], |
| 158 | "aCb": ["BadWord", eol], |
| 159 | "a b": ["SnakeWord", "SnakeWord", eol], |
| 160 | "a\tb": ["SnakeWord", "SnakeWord", eol], |
| 161 | "a \t b ": ["SnakeWord", "SnakeWord", eol], |
| 162 | " \t ": [eol], |
| 163 | "a #b": ["SnakeWord", "Comment", eol], |
| 164 | "a#": ["SnakeWord", "Comment", eol], |
| 165 | "# b": ["Comment", eol], |
| 166 | " # b": ["Comment", eol], |
| 167 | " #": ["Comment", eol], |
| 168 | "": [], |
| 169 | "\n": [eol], |
| 170 | "\na": [eol, "SnakeWord", eol], |
| 171 | "a--example": ["SnakeWord", "BadDocumentation", eol], |
| 172 | "a ---- example": ["SnakeWord", "BadDocumentation", eol], |
| 173 | "a --- example": ["SnakeWord", "BadDocumentation", eol], |
| 174 | "a-- example": ["SnakeWord", "Documentation", eol], |
| 175 | "a -- -- example": ["SnakeWord", "Documentation", eol], |
| 176 | "a -- - example": ["SnakeWord", "Documentation", eol], |
| 177 | "--": ["Documentation", eol], |
| 178 | "-- ": ["Documentation", eol], |
| 179 | "-- ": ["Documentation", eol], |
| 180 | "$default": ['"$default"', eol], |
| 181 | "$defaultx": ["BadWord", eol], |
| 182 | "$def": ["BadWord", eol], |
| 183 | "x$default": ["BadWord", eol], |
| 184 | "9$default": ["BadWord", eol], |
| 185 | "struct": ['"struct"', eol], |
| 186 | "external": ['"external"', eol], |
| 187 | "bits": ['"bits"', eol], |
| 188 | "enum": ['"enum"', eol], |
| 189 | "as": ['"as"', eol], |
| 190 | "import": ['"import"', eol], |
| 191 | "true": ["BooleanConstant", eol], |
| 192 | "false": ["BooleanConstant", eol], |
| 193 | "truex": ["SnakeWord", eol], |
| 194 | "falsex": ["SnakeWord", eol], |
| 195 | "structx": ["SnakeWord", eol], |
| 196 | "bitsx": ["SnakeWord", eol], |
| 197 | "enumx": ["SnakeWord", eol], |
| 198 | "0b": ["BadNumber", eol], |
| 199 | "0x": ["BadNumber", eol], |
| 200 | "0b011101": ["Number", eol], |
| 201 | "0b0": ["Number", eol], |
| 202 | "0b0111_1111_0000": ["Number", eol], |
| 203 | "0b00_000_00": ["BadNumber", eol], |
| 204 | "0b0_0_0": ["BadNumber", eol], |
| 205 | "0b0111012": ["BadNumber", eol], |
| 206 | "0b011101x": ["BadWord", eol], |
| 207 | "0b011101b": ["BadNumber", eol], |
| 208 | "0B0": ["BadNumber", eol], |
| 209 | "0X0": ["BadNumber", eol], |
| 210 | "0b_": ["BadNumber", eol], |
| 211 | "0x_": ["BadNumber", eol], |
| 212 | "0b__": ["BadNumber", eol], |
| 213 | "0x__": ["BadNumber", eol], |
| 214 | "0b_0000": ["Number", eol], |
| 215 | "0b0000_": ["BadNumber", eol], |
| 216 | "0b00_____00": ["BadNumber", eol], |
| 217 | "0x00_000_00": ["BadNumber", eol], |
| 218 | "0x0_0_0": ["BadNumber", eol], |
| 219 | "0b____0____": ["BadNumber", eol], |
| 220 | "0b00000000000000000000": ["Number", eol], |
| 221 | "0b_00000000": ["Number", eol], |
| 222 | "0b0000_0000_0000": ["Number", eol], |
| 223 | "0b000_0000_0000": ["Number", eol], |
| 224 | "0b00_0000_0000": ["Number", eol], |
| 225 | "0b0_0000_0000": ["Number", eol], |
| 226 | "0b_0000_0000_0000": ["Number", eol], |
| 227 | "0b_000_0000_0000": ["Number", eol], |
| 228 | "0b_00_0000_0000": ["Number", eol], |
| 229 | "0b_0_0000_0000": ["Number", eol], |
| 230 | "0b00000000_00000000_00000000": ["Number", eol], |
| 231 | "0b0000000_00000000_00000000": ["Number", eol], |
| 232 | "0b000000_00000000_00000000": ["Number", eol], |
| 233 | "0b00000_00000000_00000000": ["Number", eol], |
| 234 | "0b0000_00000000_00000000": ["Number", eol], |
| 235 | "0b000_00000000_00000000": ["Number", eol], |
| 236 | "0b00_00000000_00000000": ["Number", eol], |
| 237 | "0b0_00000000_00000000": ["Number", eol], |
| 238 | "0b_00000000_00000000_00000000": ["Number", eol], |
| 239 | "0b_0000000_00000000_00000000": ["Number", eol], |
| 240 | "0b_000000_00000000_00000000": ["Number", eol], |
| 241 | "0b_00000_00000000_00000000": ["Number", eol], |
| 242 | "0b_0000_00000000_00000000": ["Number", eol], |
| 243 | "0b_000_00000000_00000000": ["Number", eol], |
| 244 | "0b_00_00000000_00000000": ["Number", eol], |
| 245 | "0b_0_00000000_00000000": ["Number", eol], |
| 246 | "0x0": ["Number", eol], |
| 247 | "0x00000000000000000000": ["Number", eol], |
| 248 | "0x_0000": ["Number", eol], |
| 249 | "0x_00000000": ["Number", eol], |
| 250 | "0x0000_0000_0000": ["Number", eol], |
| 251 | "0x000_0000_0000": ["Number", eol], |
| 252 | "0x00_0000_0000": ["Number", eol], |
| 253 | "0x0_0000_0000": ["Number", eol], |
| 254 | "0x_0000_0000_0000": ["Number", eol], |
| 255 | "0x_000_0000_0000": ["Number", eol], |
| 256 | "0x_00_0000_0000": ["Number", eol], |
| 257 | "0x_0_0000_0000": ["Number", eol], |
| 258 | "0x00000000_00000000_00000000": ["Number", eol], |
| 259 | "0x0000000_00000000_00000000": ["Number", eol], |
| 260 | "0x000000_00000000_00000000": ["Number", eol], |
| 261 | "0x00000_00000000_00000000": ["Number", eol], |
| 262 | "0x0000_00000000_00000000": ["Number", eol], |
| 263 | "0x000_00000000_00000000": ["Number", eol], |
| 264 | "0x00_00000000_00000000": ["Number", eol], |
| 265 | "0x0_00000000_00000000": ["Number", eol], |
| 266 | "0x_00000000_00000000_00000000": ["Number", eol], |
| 267 | "0x_0000000_00000000_00000000": ["Number", eol], |
| 268 | "0x_000000_00000000_00000000": ["Number", eol], |
| 269 | "0x_00000_00000000_00000000": ["Number", eol], |
| 270 | "0x_0000_00000000_00000000": ["Number", eol], |
| 271 | "0x_000_00000000_00000000": ["Number", eol], |
| 272 | "0x_00_00000000_00000000": ["Number", eol], |
| 273 | "0x_0_00000000_00000000": ["Number", eol], |
| 274 | "0x__00000000_00000000": ["BadNumber", eol], |
| 275 | "0x00000000_00000000_0000": ["BadNumber", eol], |
| 276 | "0x00000000_0000_0000": ["BadNumber", eol], |
| 277 | "0x_00000000000000000000": ["BadNumber", eol], |
| 278 | "0b_00000000000000000000": ["BadNumber", eol], |
| 279 | "0b00000000_00000000_0000": ["BadNumber", eol], |
| 280 | "0b00000000_0000_0000": ["BadNumber", eol], |
| 281 | "0x0000_": ["BadNumber", eol], |
| 282 | "0x00_____00": ["BadNumber", eol], |
| 283 | "0x____0____": ["BadNumber", eol], |
| 284 | "EmbossReserved": ["BadWord", eol], |
| 285 | "EmbossReservedA": ["BadWord", eol], |
| 286 | "EmbossReserved_": ["BadWord", eol], |
| 287 | "EMBOSS_RESERVED": ["BadWord", eol], |
| 288 | "EMBOSS_RESERVED_": ["BadWord", eol], |
| 289 | "EMBOSS_RESERVEDA": ["BadWord", eol], |
| 290 | "emboss_reserved": ["BadWord", eol], |
| 291 | "emboss_reserved_": ["BadWord", eol], |
| 292 | "emboss_reserveda": ["BadWord", eol], |
| 293 | "0x0123456789abcdefABCDEF": ["Number", eol], |
| 294 | "0": ["Number", eol], |
| 295 | "1": ["Number", eol], |
| 296 | "1a": ["BadNumber", eol], |
| 297 | "1g": ["BadWord", eol], |
| 298 | "1234567890": ["Number", eol], |
| 299 | "1_234_567_890": ["Number", eol], |
| 300 | "234_567_890": ["Number", eol], |
| 301 | "34_567_890": ["Number", eol], |
| 302 | "4_567_890": ["Number", eol], |
| 303 | "1_2_3_4_5_6_7_8_9_0": ["BadNumber", eol], |
| 304 | "1234567890_": ["BadNumber", eol], |
| 305 | "1__234567890": ["BadNumber", eol], |
| 306 | "_1234567890": ["BadWord", eol], |
| 307 | "[]": ['"["', '"]"', eol], |
| 308 | "()": ['"("', '")"', eol], |
| 309 | "..": ['"."', '"."', eol], |
| 310 | "...": ['"."', '"."', '"."', eol], |
| 311 | "....": ['"."', '"."', '"."', '"."', eol], |
| 312 | '"abc"': ["String", eol], |
| 313 | '""': ["String", eol], |
| 314 | r'"\\"': ["String", eol], |
| 315 | r'"\""': ["String", eol], |
| 316 | r'"\n"': ["String", eol], |
| 317 | r'"\\n"': ["String", eol], |
| 318 | r'"\\xyz"': ["String", eol], |
| 319 | r'"\\\\"': ["String", eol], |
| 320 | } |
| 321 | for c in ( |
| 322 | "[ ] ( ) ? : = + - * . == != < <= > >= && || , $max $present " |
| 323 | "$upper_bound $lower_bound $size_in_bits $size_in_bytes " |
| 324 | "$max_size_in_bits $max_size_in_bytes $min_size_in_bits " |
| 325 | "$min_size_in_bytes " |
| 326 | "$default struct bits enum external import as if let" |
| 327 | ).split(): |
| 328 | cases[c] = ['"' + c + '"', eol] |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 329 | |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 330 | def make_test_case(case): |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 331 | |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 332 | def test_case(self): |
| 333 | tokens, errors = tokenizer.tokenize(case, "name") |
| 334 | symbols = _token_symbols(tokens) |
| 335 | self.assertFalse(errors) |
| 336 | self.assertEqual(symbols, cases[case]) |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 337 | |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 338 | return test_case |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 339 | |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 340 | for c in cases: |
| 341 | setattr(TokenizerTest, "testShortTokenMatch{!r}".format(c), make_test_case(c)) |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 342 | |
| 343 | |
| 344 | def _make_bad_char_tests(): |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 345 | """Makes tests that an error is returned for bad characters.""" |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 346 | |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 347 | def make_test_case(case): |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 348 | |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 349 | def test_case(self): |
| 350 | tokens, errors = tokenizer.tokenize(case, "name") |
| 351 | self.assertFalse(tokens) |
| 352 | self.assertEqual( |
| 353 | [ |
| 354 | [ |
| 355 | error.error( |
| 356 | "name", |
| 357 | parser_types.make_location((1, 1), (1, 2)), |
| 358 | "Unrecognized token", |
| 359 | ) |
| 360 | ] |
| 361 | ], |
| 362 | errors, |
| 363 | ) |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 364 | |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 365 | return test_case |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 366 | |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 367 | for c in "~`!@%^&\\|;'\"/{}": |
| 368 | setattr(TokenizerTest, "testBadChar{!r}".format(c), make_test_case(c)) |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 369 | |
| 370 | |
| 371 | def _make_bad_string_tests(): |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 372 | """Makes tests that an error is returned for bad strings.""" |
| 373 | bad_strings = (r'"\"', '"\\\n"', r'"\\\"', r'"', r'"\q"', r'"\\\q"') |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 374 | |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 375 | def make_test_case(string): |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 376 | |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 377 | def test_case(self): |
| 378 | tokens, errors = tokenizer.tokenize(string, "name") |
| 379 | self.assertFalse(tokens) |
| 380 | self.assertEqual( |
| 381 | [ |
| 382 | [ |
| 383 | error.error( |
| 384 | "name", |
| 385 | parser_types.make_location((1, 1), (1, 2)), |
| 386 | "Unrecognized token", |
| 387 | ) |
| 388 | ] |
| 389 | ], |
| 390 | errors, |
| 391 | ) |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 392 | |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 393 | return test_case |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 394 | |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 395 | for s in bad_strings: |
| 396 | setattr(TokenizerTest, "testBadString{!r}".format(s), make_test_case(s)) |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 397 | |
| 398 | |
| 399 | def _make_multiline_tests(): |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 400 | """Makes tests for indent/dedent insertion and eol insertion.""" |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 401 | |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 402 | c = "Comment" |
| 403 | eol = '"\\n"' |
| 404 | sw = "SnakeWord" |
| 405 | ind = "Indent" |
| 406 | ded = "Dedent" |
| 407 | cases = { |
| 408 | "a\nb\n": [sw, eol, sw, eol], |
| 409 | "a\n\nb\n": [sw, eol, eol, sw, eol], |
| 410 | "a\n#foo\nb\n": [sw, eol, c, eol, sw, eol], |
| 411 | "a\n #foo\nb\n": [sw, eol, c, eol, sw, eol], |
| 412 | "a\n b\n": [sw, eol, ind, sw, eol, ded], |
| 413 | "a\n b\n\n": [sw, eol, ind, sw, eol, eol, ded], |
| 414 | "a\n b\n c\n": [sw, eol, ind, sw, eol, ind, sw, eol, ded, ded], |
| 415 | "a\n b\n c\n": [sw, eol, ind, sw, eol, sw, eol, ded], |
| 416 | "a\n b\n\n c\n": [sw, eol, ind, sw, eol, eol, sw, eol, ded], |
| 417 | "a\n b\n #\n c\n": [sw, eol, ind, sw, eol, c, eol, sw, eol, ded], |
| 418 | "a\n\tb\n #\n\tc\n": [sw, eol, ind, sw, eol, c, eol, sw, eol, ded], |
| 419 | " a\n b\n c\n d\n": [ |
| 420 | ind, |
| 421 | sw, |
| 422 | eol, |
| 423 | ind, |
| 424 | sw, |
| 425 | eol, |
| 426 | ind, |
| 427 | sw, |
| 428 | eol, |
| 429 | ded, |
| 430 | ded, |
| 431 | sw, |
| 432 | eol, |
| 433 | ded, |
| 434 | ], |
| 435 | } |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 436 | |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 437 | def make_test_case(case): |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 438 | |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 439 | def test_case(self): |
| 440 | tokens, errors = tokenizer.tokenize(case, "file") |
| 441 | self.assertFalse(errors) |
| 442 | self.assertEqual(_token_symbols(tokens), cases[case]) |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 443 | |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 444 | return test_case |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 445 | |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 446 | for c in cases: |
| 447 | setattr(TokenizerTest, "testMultiline{!r}".format(c), make_test_case(c)) |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 448 | |
| 449 | |
| 450 | def _make_offset_tests(): |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 451 | """Makes tests that the tokenizer fills in correct source locations.""" |
| 452 | cases = { |
| 453 | "a+": ["1:1-1:2", "1:2-1:3", "1:3-1:3"], |
| 454 | "a + ": ["1:1-1:2", "1:5-1:6", "1:9-1:9"], |
| 455 | "a\n\nb": ["1:1-1:2", "1:2-1:2", "2:1-2:1", "3:1-3:2", "3:2-3:2"], |
| 456 | "a\n b": ["1:1-1:2", "1:2-1:2", "2:1-2:3", "2:3-2:4", "2:4-2:4", "3:1-3:1"], |
| 457 | "a\n b\nc": [ |
| 458 | "1:1-1:2", |
| 459 | "1:2-1:2", |
| 460 | "2:1-2:3", |
| 461 | "2:3-2:4", |
| 462 | "2:4-2:4", |
| 463 | "3:1-3:1", |
| 464 | "3:1-3:2", |
| 465 | "3:2-3:2", |
| 466 | ], |
| 467 | "a\n b\n c": [ |
| 468 | "1:1-1:2", |
| 469 | "1:2-1:2", |
| 470 | "2:1-2:2", |
| 471 | "2:2-2:3", |
| 472 | "2:3-2:3", |
| 473 | "3:2-3:3", |
| 474 | "3:3-3:4", |
| 475 | "3:4-3:4", |
| 476 | "4:1-4:1", |
| 477 | "4:1-4:1", |
| 478 | ], |
| 479 | } |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 480 | |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 481 | def make_test_case(case): |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 482 | |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 483 | def test_case(self): |
| 484 | self.assertEqual( |
| 485 | [ |
| 486 | parser_types.format_location(l.source_location) |
| 487 | for l in tokenizer.tokenize(case, "file")[0] |
| 488 | ], |
| 489 | cases[case], |
| 490 | ) |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 491 | |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 492 | return test_case |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 493 | |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 494 | for c in cases: |
| 495 | setattr(TokenizerTest, "testOffset{!r}".format(c), make_test_case(c)) |
| 496 | |
Ben Olmstead | c0d7784 | 2019-07-31 17:34:05 -0700 | [diff] [blame] | 497 | |
| 498 | _make_short_token_match_tests() |
| 499 | _make_bad_char_tests() |
| 500 | _make_bad_string_tests() |
| 501 | _make_multiline_tests() |
| 502 | _make_offset_tests() |
| 503 | |
| 504 | if __name__ == "__main__": |
Dmitri Prime | 495d3f2 | 2024-09-06 16:56:59 -0700 | [diff] [blame] | 505 | unittest.main() |