blob: 54bcdcc9f639d218b2b5903475df23c66a2825a5 [file] [log] [blame]
Ben Olmsteadc0d77842019-07-31 17:34:05 -07001# Copyright 2019 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Symbol resolver for Emboss IR.
16
17The resolve_symbols function should be used to generate canonical resolutions
18for all symbol references in an Emboss IR.
19"""
20
21import collections
22
reventlov6731fc42019-10-03 15:23:13 -070023from compiler.util import error
Ben Olmstead231e8ee2019-10-04 17:39:31 -070024from compiler.util import ir_pb2
reventlov6731fc42019-10-03 15:23:13 -070025from compiler.util import ir_util
26from compiler.util import traverse_ir
Ben Olmsteadc0d77842019-07-31 17:34:05 -070027
28# TODO(bolms): Symbol resolution raises an exception at the first error, but
29# this is one place where it can make sense to report multiple errors.
30
31FileLocation = collections.namedtuple("FileLocation", ["file", "location"])
32
33
34def ambiguous_name_error(file_name, location, name, candidate_locations):
35 """A name cannot be resolved because there are two or more candidates."""
36 result = [error.error(file_name, location, "Ambiguous name '{}'".format(name))
37 ]
38 for location in sorted(candidate_locations):
39 result.append(error.note(location.file, location.location,
40 "Possible resolution"))
41 return result
42
43
44def duplicate_name_error(file_name, location, name, original_location):
45 """A name is defined two or more times."""
46 return [error.error(file_name, location, "Duplicate name '{}'".format(name)),
47 error.note(original_location.file, original_location.location,
48 "Original definition")]
49
50
51def missing_name_error(file_name, location, name):
52 return [error.error(file_name, location, "No candidate for '{}'".format(name))
53 ]
54
55
56def array_subfield_error(file_name, location, name):
57 return [error.error(file_name, location,
58 "Cannot access member of array '{}'".format(name))]
59
60
61def noncomposite_subfield_error(file_name, location, name):
62 return [error.error(file_name, location,
63 "Cannot access member of noncomposite field '{}'".format(
64 name))]
65
66
67def _nested_name(canonical_name, name):
68 """Creates a new CanonicalName with name appended to the object_path."""
69 return ir_pb2.CanonicalName(
70 module_file=canonical_name.module_file,
71 object_path=list(canonical_name.object_path) + [name])
72
73
74class _Scope(dict):
75 """A _Scope holds data for a symbol.
76
77 A _Scope is a dict with some additional attributes. Lexically nested names
78 are kept in the dict, and bookkeeping is kept in the additional attributes.
79
80 For example, each module should have a child _Scope for each type contained in
81 the module. `struct` and `bits` types should have nested _Scopes for each
82 field; `enum` types should have nested scopes for each enumerated name.
83
84 Attributes:
85 canonical_name: The absolute name of this symbol; e.g. ("file.emb",
86 "TypeName", "SubTypeName", "field_name")
87 source_location: The ir_pb2.SourceLocation where this symbol is defined.
88 visibility: LOCAL, PRIVATE, or SEARCHABLE; see below.
89 alias: If set, this name is merely a pointer to another name.
90 """
91 __slots__ = ("canonical_name", "source_location", "visibility", "alias")
92
93 # A LOCAL name is visible outside of its enclosing scope, but should not be
94 # found when searching for a name. That is, this name should be matched in
95 # the tail of a qualified reference (the 'bar' in 'foo.bar'), but not when
96 # searching for names (the 'foo' in 'foo.bar' should not match outside of
97 # 'foo's scope). This applies to public field names.
98 LOCAL = object()
99
100 # A PRIVATE name is similar to LOCAL except that it is never visible outside
101 # its enclosing scope. This applies to abbreviations of field names: if 'a'
102 # is an abbreviation for field 'apple', then 'foo.a' is not a valid reference;
103 # instead it should be 'foo.apple'.
104 PRIVATE = object()
105
106 # A SEARCHABLE name is visible as long as it is in a scope in the search list.
107 # This applies to type names ('Foo'), which may be found from many scopes.
108 SEARCHABLE = object()
109
110 def __init__(self, canonical_name, source_location, visibility, alias=None):
111 super(_Scope, self).__init__()
112 self.canonical_name = canonical_name
113 self.source_location = source_location
114 self.visibility = visibility
115 self.alias = alias
116
117
118def _add_name_to_scope(name_ir, scope, canonical_name, visibility, errors):
119 """Adds the given name_ir to the given scope."""
120 name = name_ir.text
121 new_scope = _Scope(canonical_name, name_ir.source_location, visibility)
122 if name in scope:
123 errors.append(duplicate_name_error(
124 scope.canonical_name.module_file, name_ir.source_location, name,
125 FileLocation(scope[name].canonical_name.module_file,
126 scope[name].source_location)))
127 else:
128 scope[name] = new_scope
129 return new_scope
130
131
132def _add_name_to_scope_and_normalize(name_ir, scope, visibility, errors):
133 """Adds the given name_ir to scope and sets its canonical_name."""
134 name = name_ir.name.text
135 canonical_name = _nested_name(scope.canonical_name, name)
136 name_ir.canonical_name.CopyFrom(canonical_name)
137 return _add_name_to_scope(name_ir.name, scope, canonical_name, visibility,
138 errors)
139
140
141def _add_struct_field_to_scope(field, scope, errors):
142 """Adds the name of the given field to the scope."""
143 new_scope = _add_name_to_scope_and_normalize(field.name, scope, _Scope.LOCAL,
144 errors)
145 if field.HasField("abbreviation"):
146 _add_name_to_scope(field.abbreviation, scope, new_scope.canonical_name,
147 _Scope.PRIVATE, errors)
148
149 value_builtin_name = ir_pb2.Word(
150 text="this",
151 source_location=ir_pb2.Location(is_synthetic=True),
152 )
153 # In "inside field" scope, the name `this` maps back to the field itself.
154 # This is important for attributes like `[requires]`.
155 _add_name_to_scope(value_builtin_name, new_scope,
156 field.name.canonical_name, _Scope.PRIVATE, errors)
157
158
159def _add_parameter_name_to_scope(parameter, scope, errors):
160 """Adds the name of the given parameter to the scope."""
161 _add_name_to_scope_and_normalize(parameter.name, scope, _Scope.LOCAL, errors)
162
163
164def _add_enum_value_to_scope(value, scope, errors):
165 """Adds the name of the enum value to scope."""
166 _add_name_to_scope_and_normalize(value.name, scope, _Scope.LOCAL, errors)
167
168
169def _add_type_name_to_scope(type_definition, scope, errors):
170 """Adds the name of type_definition to the given scope."""
171 new_scope = _add_name_to_scope_and_normalize(type_definition.name, scope,
172 _Scope.SEARCHABLE, errors)
173 return {"scope": new_scope}
174
175
176def _set_scope_for_type_definition(type_definition, scope):
177 """Sets the current scope for an ir_pb2.TypeDefinition."""
178 return {"scope": scope[type_definition.name.name.text]}
179
180
181def _add_module_to_scope(module, scope):
182 """Adds the name of the module to the given scope."""
183 module_symbol_table = _Scope(
184 ir_pb2.CanonicalName(module_file=module.source_file_name,
185 object_path=[]),
186 None,
187 _Scope.SEARCHABLE)
188 scope[module.source_file_name] = module_symbol_table
189 return {"scope": scope[module.source_file_name]}
190
191
192def _set_scope_for_module(module, scope):
193 """Adds the name of the module to the given scope."""
194 return {"scope": scope[module.source_file_name]}
195
196
197def _add_import_to_scope(foreign_import, table, module, errors):
198 if not foreign_import.local_name.text:
199 # This is the prelude import; ignore it.
200 return
201 _add_alias_to_scope(foreign_import.local_name, table, module.canonical_name,
202 [foreign_import.file_name.text], _Scope.SEARCHABLE,
203 errors)
204
205
206def _construct_symbol_tables(ir):
207 """Constructs per-module symbol tables for each module in ir."""
208 symbol_tables = {}
209 errors = []
210 traverse_ir.fast_traverse_ir_top_down(
211 ir, [ir_pb2.Module], _add_module_to_scope,
212 parameters={"errors": errors, "scope": symbol_tables})
213 traverse_ir.fast_traverse_ir_top_down(
214 ir, [ir_pb2.TypeDefinition], _add_type_name_to_scope,
215 incidental_actions={ir_pb2.Module: _set_scope_for_module},
216 parameters={"errors": errors, "scope": symbol_tables})
217 if errors:
218 # Ideally, we would find duplicate field names elsewhere in the module, even
219 # if there are duplicate type names, but field/enum names in the colliding
220 # types also end up colliding, leading to spurious errors. E.g., if you
221 # have two `struct Foo`s, then the field check will also discover a
222 # collision for `$size_in_bytes`, since there are two `Foo.$size_in_bytes`.
223 return symbol_tables, errors
224
225 traverse_ir.fast_traverse_ir_top_down(
226 ir, [ir_pb2.EnumValue], _add_enum_value_to_scope,
227 incidental_actions={
228 ir_pb2.Module: _set_scope_for_module,
229 ir_pb2.TypeDefinition: _set_scope_for_type_definition,
230 },
231 parameters={"errors": errors, "scope": symbol_tables})
232 traverse_ir.fast_traverse_ir_top_down(
233 ir, [ir_pb2.Field], _add_struct_field_to_scope,
234 incidental_actions={
235 ir_pb2.Module: _set_scope_for_module,
236 ir_pb2.TypeDefinition: _set_scope_for_type_definition,
237 },
238 parameters={"errors": errors, "scope": symbol_tables})
239 traverse_ir.fast_traverse_ir_top_down(
240 ir, [ir_pb2.RuntimeParameter], _add_parameter_name_to_scope,
241 incidental_actions={
242 ir_pb2.Module: _set_scope_for_module,
243 ir_pb2.TypeDefinition: _set_scope_for_type_definition,
244 },
245 parameters={"errors": errors, "scope": symbol_tables})
246 return symbol_tables, errors
247
248
249def _add_alias_to_scope(name_ir, table, scope, alias, visibility, errors):
250 """Adds the given name to the scope as an alias."""
251 name = name_ir.text
252 new_scope = _Scope(_nested_name(scope, name), name_ir.source_location,
253 visibility, alias)
254 scoped_table = table[scope.module_file]
255 for path_element in scope.object_path:
256 scoped_table = scoped_table[path_element]
257 if name in scoped_table:
258 errors.append(duplicate_name_error(
259 scoped_table.canonical_name.module_file, name_ir.source_location, name,
260 FileLocation(scoped_table[name].canonical_name.module_file,
261 scoped_table[name].source_location)))
262 else:
263 scoped_table[name] = new_scope
264 return new_scope
265
266
267def _resolve_head_of_field_reference(field_reference, table, current_scope,
268 visible_scopes, source_file_name, errors):
269 return _resolve_reference(
270 field_reference.path[0], table, current_scope,
271 visible_scopes, source_file_name, errors)
272
273
274def _resolve_reference(reference, table, current_scope, visible_scopes,
275 source_file_name, errors):
276 """Sets the canonical name of the given reference."""
277 if reference.HasField("canonical_name"):
278 # This reference has already been resolved by the _resolve_field_reference
279 # pass.
280 return
281 target = _find_target_of_reference(reference, table, current_scope,
282 visible_scopes, source_file_name, errors)
283 if target is not None:
284 assert not target.alias
285 reference.canonical_name.CopyFrom(target.canonical_name)
286
287
288def _find_target_of_reference(reference, table, current_scope, visible_scopes,
289 source_file_name, errors):
290 """Returns the resolved name of the given reference."""
291 found_in_table = None
292 name = reference.source_name[0].text
293 for scope in visible_scopes:
294 scoped_table = table[scope.module_file]
295 for path_element in scope.object_path:
296 scoped_table = scoped_table[path_element]
297 if (name in scoped_table and
298 (scope == current_scope or
299 scoped_table[name].visibility == _Scope.SEARCHABLE)):
300 # Prelude is "", so explicitly check for None.
301 if found_in_table is not None:
302 # TODO(bolms): Currently, this catches the case where a module tries to
303 # use a name that is defined (at the same scope) in two different
304 # modules. It may make sense to raise duplicate_name_error whenever two
305 # modules define the same name (whether it is used or not), and reserve
306 # ambiguous_name_error for cases where a name is found in multiple
307 # scopes.
308 errors.append(ambiguous_name_error(
309 source_file_name, reference.source_location, name, [FileLocation(
310 found_in_table[name].canonical_name.module_file,
311 found_in_table[name].source_location), FileLocation(
312 scoped_table[name].canonical_name.module_file, scoped_table[
313 name].source_location)]))
314 continue
315 found_in_table = scoped_table
316 if reference.is_local_name:
317 # This is a little hacky. When "is_local_name" is True, the name refers
318 # to a type that was defined inline. In many cases, the type should be
319 # found at the same scope as the field; e.g.:
320 #
321 # struct Foo:
322 # 0 [+1] enum bar:
323 # BAZ = 1
324 #
325 # In this case, `Foo.bar` has type `Foo.Bar`. Unfortunately, things
326 # break down a little bit when there is an inline type in an anonymous
327 # `bits`:
328 #
329 # struct Foo:
330 # 0 [+1] bits:
331 # 0 [+7] enum bar:
332 # BAZ = 1
333 #
334 # Types inside of anonymous `bits` are hoisted into their parent type,
335 # so instead of `Foo.EmbossReservedAnonymous1.Bar`, `bar`'s type is just
336 # `Foo.Bar`. Unfortunately, the field is still
337 # `Foo.EmbossReservedAnonymous1.bar`, so `bar`'s type won't be found in
338 # `bar`'s `current_scope`.
339 #
340 # (The name `bar` is exposed from `Foo` as an alias virtual field, so
341 # perhaps the correct answer is to allow type aliases, so that `Bar` can
342 # be found in both `Foo` and `Foo.EmbossReservedAnonymous1`. That would
343 # involve an entirely new feature, though.)
344 #
345 # The workaround here is to search scopes from the innermost outward,
346 # and just stop as soon as a match is found. This isn't ideal, because
347 # it relies on other bits of the front end having correctly added the
348 # inline type to the correct scope before symbol resolution, but it does
349 # work. Names with False `is_local_name` will still be checked for
350 # ambiguity.
351 break
352 if found_in_table is None:
353 errors.append(missing_name_error(
354 source_file_name, reference.source_name[0].source_location, name))
355 if not errors:
356 for subname in reference.source_name:
357 if subname.text not in found_in_table:
358 errors.append(missing_name_error(source_file_name,
359 subname.source_location, subname.text))
360 return None
361 found_in_table = found_in_table[subname.text]
362 while found_in_table.alias:
363 referenced_table = table
364 for name in found_in_table.alias:
365 referenced_table = referenced_table[name]
366 # TODO(bolms): This section should really be a recursive lookup
367 # function, which would be able to handle arbitrary aliases through
368 # other aliases.
369 #
370 # This should be fine for now, since the only aliases here should be
371 # imports, which can't refer to other imports.
372 assert not referenced_table.alias, "Alias found to contain alias."
373 found_in_table = referenced_table
374 return found_in_table
375 return None
376
377
378def _resolve_field_reference(field_reference, source_file_name, errors, ir):
379 """Resolves the References inside of a FieldReference."""
380 if field_reference.path[-1].HasField("canonical_name"):
381 # Already done.
382 return
383 previous_field = ir_util.find_object_or_none(field_reference.path[0], ir)
384 previous_reference = field_reference.path[0]
385 for ref in field_reference.path[1:]:
386 while ir_util.field_is_virtual(previous_field):
387 if (previous_field.read_transform.WhichOneof("expression") ==
388 "field_reference"):
389 # Pass a separate error list into the recursive _resolve_field_reference
390 # call so that only one copy of the error for a particular reference
391 # will actually surface: in particular, the one that results from a
392 # direct call from traverse_ir_top_down into _resolve_field_reference.
393 new_errors = []
394 _resolve_field_reference(
395 previous_field.read_transform.field_reference,
396 previous_field.name.canonical_name.module_file, new_errors, ir)
397 # If the recursive _resolve_field_reference was unable to resolve the
398 # field, then bail. Otherwise we get a cascade of errors, where an
399 # error in `x` leads to errors in anything trying to reach a member of
400 # `x`.
401 if not previous_field.read_transform.field_reference.path[-1].HasField(
402 "canonical_name"):
403 return
404 previous_field = ir_util.find_object(
405 previous_field.read_transform.field_reference.path[-1], ir)
406 else:
407 errors.append(
408 noncomposite_subfield_error(source_file_name,
409 previous_reference.source_location,
410 previous_reference.source_name[0].text))
411 return
412 if previous_field.type.WhichOneof("type") == "array_type":
413 errors.append(
414 array_subfield_error(source_file_name,
415 previous_reference.source_location,
416 previous_reference.source_name[0].text))
417 return
418 assert previous_field.type.WhichOneof("type") == "atomic_type"
419 member_name = ir_pb2.CanonicalName()
420 member_name.CopyFrom(
421 previous_field.type.atomic_type.reference.canonical_name)
422 member_name.object_path.extend([ref.source_name[0].text])
423 previous_field = ir_util.find_object_or_none(member_name, ir)
424 if previous_field is None:
425 errors.append(
426 missing_name_error(source_file_name,
427 ref.source_name[0].source_location,
428 ref.source_name[0].text))
429 return
430 ref.canonical_name.CopyFrom(member_name)
431 previous_reference = ref
432
433
434def _set_visible_scopes_for_type_definition(type_definition, visible_scopes):
435 """Sets current_scope and visible_scopes for the given type_definition."""
436 return {
437 "current_scope": type_definition.name.canonical_name,
438
439 # In order to ensure that the iteration through scopes in
440 # _find_target_of_reference will go from innermost to outermost, it is
441 # important that the current scope (type_definition.name.canonical_name)
442 # precedes the previous visible_scopes here.
443 "visible_scopes": (type_definition.name.canonical_name,) + visible_scopes,
444 }
445
446
447def _set_visible_scopes_for_module(module):
448 """Sets visible_scopes for the given module."""
449 self_scope = ir_pb2.CanonicalName(module_file=module.source_file_name)
450 extra_visible_scopes = []
451 for foreign_import in module.foreign_import:
452 # Anonymous imports are searched for top-level names; named imports are not.
453 # As of right now, only the prelude should be imported anonymously; other
454 # modules must be imported with names.
455 if not foreign_import.local_name.text:
456 extra_visible_scopes.append(
457 ir_pb2.CanonicalName(module_file=foreign_import.file_name.text))
458 return {"visible_scopes": (self_scope,) + tuple(extra_visible_scopes)}
459
460
461def _set_visible_scopes_for_attribute(attribute, field, visible_scopes):
462 """Sets current_scope and visible_scopes for the attribute."""
463 del attribute # Unused
464 if field is None:
465 return
466 return {
467 "current_scope": field.name.canonical_name,
468 "visible_scopes": (field.name.canonical_name,) + visible_scopes,
469 }
470
471
472def _resolve_symbols_from_table(ir, table):
473 """Resolves all references in the given IR, given the constructed table."""
474 errors = []
475 # Symbol resolution is broken into five passes. First, this code resolves any
476 # imports, and adds import aliases to modules.
477 traverse_ir.fast_traverse_ir_top_down(
478 ir, [ir_pb2.Import], _add_import_to_scope,
479 incidental_actions={
480 ir_pb2.Module: lambda m, table: {"module": table[m.source_file_name]},
481 },
482 parameters={"errors": errors, "table": table})
483 if errors:
484 return errors
485 # Next, this resolves all absolute references (e.g., it resolves "UInt" in
486 # "0:1 UInt field" to [prelude]::UInt).
487 traverse_ir.fast_traverse_ir_top_down(
488 ir, [ir_pb2.Reference], _resolve_reference,
489 skip_descendants_of=(ir_pb2.FieldReference,),
490 incidental_actions={
491 ir_pb2.TypeDefinition: _set_visible_scopes_for_type_definition,
492 ir_pb2.Module: _set_visible_scopes_for_module,
493 ir_pb2.Field: lambda f: {"field": f},
494 ir_pb2.Attribute: _set_visible_scopes_for_attribute,
495 },
496 parameters={"table": table, "errors": errors, "field": None})
497 # Lastly, head References to fields (e.g., the `a` of `a.b.c`) are resolved.
498 traverse_ir.fast_traverse_ir_top_down(
499 ir, [ir_pb2.FieldReference], _resolve_head_of_field_reference,
500 incidental_actions={
501 ir_pb2.TypeDefinition: _set_visible_scopes_for_type_definition,
502 ir_pb2.Module: _set_visible_scopes_for_module,
503 ir_pb2.Field: lambda f: {"field": f},
504 ir_pb2.Attribute: _set_visible_scopes_for_attribute,
505 },
506 parameters={"table": table, "errors": errors, "field": None})
507 return errors
508
509
510def resolve_field_references(ir):
511 """Resolves structure member accesses ("field.subfield") in ir."""
512 errors = []
513 traverse_ir.fast_traverse_ir_top_down(
514 ir, [ir_pb2.FieldReference], _resolve_field_reference,
515 incidental_actions={
516 ir_pb2.TypeDefinition: _set_visible_scopes_for_type_definition,
517 ir_pb2.Module: _set_visible_scopes_for_module,
518 ir_pb2.Field: lambda f: {"field": f},
519 ir_pb2.Attribute: _set_visible_scopes_for_attribute,
520 },
521 parameters={"errors": errors, "field": None})
522 return errors
523
524
525def resolve_symbols(ir):
526 """Resolves the symbols in all modules in ir."""
527 symbol_tables, errors = _construct_symbol_tables(ir)
528 if errors:
529 return errors
530 return _resolve_symbols_from_table(ir, symbol_tables)