| # Copyright 2019 Google LLC |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # https://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| """Symbol resolver for Emboss IR. |
| |
| The resolve_symbols function should be used to generate canonical resolutions |
| for all symbol references in an Emboss IR. |
| """ |
| |
| import collections |
| |
| from compiler.util import ir_pb2 |
| from compiler.util import error |
| from compiler.util import ir_util |
| from compiler.util import traverse_ir |
| |
| # TODO(bolms): Symbol resolution raises an exception at the first error, but |
| # this is one place where it can make sense to report multiple errors. |
| |
| FileLocation = collections.namedtuple("FileLocation", ["file", "location"]) |
| |
| |
| def ambiguous_name_error(file_name, location, name, candidate_locations): |
| """A name cannot be resolved because there are two or more candidates.""" |
| result = [error.error(file_name, location, "Ambiguous name '{}'".format(name)) |
| ] |
| for location in sorted(candidate_locations): |
| result.append(error.note(location.file, location.location, |
| "Possible resolution")) |
| return result |
| |
| |
| def duplicate_name_error(file_name, location, name, original_location): |
| """A name is defined two or more times.""" |
| return [error.error(file_name, location, "Duplicate name '{}'".format(name)), |
| error.note(original_location.file, original_location.location, |
| "Original definition")] |
| |
| |
| def missing_name_error(file_name, location, name): |
| return [error.error(file_name, location, "No candidate for '{}'".format(name)) |
| ] |
| |
| |
| def array_subfield_error(file_name, location, name): |
| return [error.error(file_name, location, |
| "Cannot access member of array '{}'".format(name))] |
| |
| |
| def noncomposite_subfield_error(file_name, location, name): |
| return [error.error(file_name, location, |
| "Cannot access member of noncomposite field '{}'".format( |
| name))] |
| |
| |
| def _nested_name(canonical_name, name): |
| """Creates a new CanonicalName with name appended to the object_path.""" |
| return ir_pb2.CanonicalName( |
| module_file=canonical_name.module_file, |
| object_path=list(canonical_name.object_path) + [name]) |
| |
| |
| class _Scope(dict): |
| """A _Scope holds data for a symbol. |
| |
| A _Scope is a dict with some additional attributes. Lexically nested names |
| are kept in the dict, and bookkeeping is kept in the additional attributes. |
| |
| For example, each module should have a child _Scope for each type contained in |
| the module. `struct` and `bits` types should have nested _Scopes for each |
| field; `enum` types should have nested scopes for each enumerated name. |
| |
| Attributes: |
| canonical_name: The absolute name of this symbol; e.g. ("file.emb", |
| "TypeName", "SubTypeName", "field_name") |
| source_location: The ir_pb2.SourceLocation where this symbol is defined. |
| visibility: LOCAL, PRIVATE, or SEARCHABLE; see below. |
| alias: If set, this name is merely a pointer to another name. |
| """ |
| __slots__ = ("canonical_name", "source_location", "visibility", "alias") |
| |
| # A LOCAL name is visible outside of its enclosing scope, but should not be |
| # found when searching for a name. That is, this name should be matched in |
| # the tail of a qualified reference (the 'bar' in 'foo.bar'), but not when |
| # searching for names (the 'foo' in 'foo.bar' should not match outside of |
| # 'foo's scope). This applies to public field names. |
| LOCAL = object() |
| |
| # A PRIVATE name is similar to LOCAL except that it is never visible outside |
| # its enclosing scope. This applies to abbreviations of field names: if 'a' |
| # is an abbreviation for field 'apple', then 'foo.a' is not a valid reference; |
| # instead it should be 'foo.apple'. |
| PRIVATE = object() |
| |
| # A SEARCHABLE name is visible as long as it is in a scope in the search list. |
| # This applies to type names ('Foo'), which may be found from many scopes. |
| SEARCHABLE = object() |
| |
| def __init__(self, canonical_name, source_location, visibility, alias=None): |
| super(_Scope, self).__init__() |
| self.canonical_name = canonical_name |
| self.source_location = source_location |
| self.visibility = visibility |
| self.alias = alias |
| |
| |
| def _add_name_to_scope(name_ir, scope, canonical_name, visibility, errors): |
| """Adds the given name_ir to the given scope.""" |
| name = name_ir.text |
| new_scope = _Scope(canonical_name, name_ir.source_location, visibility) |
| if name in scope: |
| errors.append(duplicate_name_error( |
| scope.canonical_name.module_file, name_ir.source_location, name, |
| FileLocation(scope[name].canonical_name.module_file, |
| scope[name].source_location))) |
| else: |
| scope[name] = new_scope |
| return new_scope |
| |
| |
| def _add_name_to_scope_and_normalize(name_ir, scope, visibility, errors): |
| """Adds the given name_ir to scope and sets its canonical_name.""" |
| name = name_ir.name.text |
| canonical_name = _nested_name(scope.canonical_name, name) |
| name_ir.canonical_name.CopyFrom(canonical_name) |
| return _add_name_to_scope(name_ir.name, scope, canonical_name, visibility, |
| errors) |
| |
| |
| def _add_struct_field_to_scope(field, scope, errors): |
| """Adds the name of the given field to the scope.""" |
| new_scope = _add_name_to_scope_and_normalize(field.name, scope, _Scope.LOCAL, |
| errors) |
| if field.HasField("abbreviation"): |
| _add_name_to_scope(field.abbreviation, scope, new_scope.canonical_name, |
| _Scope.PRIVATE, errors) |
| |
| value_builtin_name = ir_pb2.Word( |
| text="this", |
| source_location=ir_pb2.Location(is_synthetic=True), |
| ) |
| # In "inside field" scope, the name `this` maps back to the field itself. |
| # This is important for attributes like `[requires]`. |
| _add_name_to_scope(value_builtin_name, new_scope, |
| field.name.canonical_name, _Scope.PRIVATE, errors) |
| |
| |
| def _add_parameter_name_to_scope(parameter, scope, errors): |
| """Adds the name of the given parameter to the scope.""" |
| _add_name_to_scope_and_normalize(parameter.name, scope, _Scope.LOCAL, errors) |
| |
| |
| def _add_enum_value_to_scope(value, scope, errors): |
| """Adds the name of the enum value to scope.""" |
| _add_name_to_scope_and_normalize(value.name, scope, _Scope.LOCAL, errors) |
| |
| |
| def _add_type_name_to_scope(type_definition, scope, errors): |
| """Adds the name of type_definition to the given scope.""" |
| new_scope = _add_name_to_scope_and_normalize(type_definition.name, scope, |
| _Scope.SEARCHABLE, errors) |
| return {"scope": new_scope} |
| |
| |
| def _set_scope_for_type_definition(type_definition, scope): |
| """Sets the current scope for an ir_pb2.TypeDefinition.""" |
| return {"scope": scope[type_definition.name.name.text]} |
| |
| |
| def _add_module_to_scope(module, scope): |
| """Adds the name of the module to the given scope.""" |
| module_symbol_table = _Scope( |
| ir_pb2.CanonicalName(module_file=module.source_file_name, |
| object_path=[]), |
| None, |
| _Scope.SEARCHABLE) |
| scope[module.source_file_name] = module_symbol_table |
| return {"scope": scope[module.source_file_name]} |
| |
| |
| def _set_scope_for_module(module, scope): |
| """Adds the name of the module to the given scope.""" |
| return {"scope": scope[module.source_file_name]} |
| |
| |
| def _add_import_to_scope(foreign_import, table, module, errors): |
| if not foreign_import.local_name.text: |
| # This is the prelude import; ignore it. |
| return |
| _add_alias_to_scope(foreign_import.local_name, table, module.canonical_name, |
| [foreign_import.file_name.text], _Scope.SEARCHABLE, |
| errors) |
| |
| |
| def _construct_symbol_tables(ir): |
| """Constructs per-module symbol tables for each module in ir.""" |
| symbol_tables = {} |
| errors = [] |
| traverse_ir.fast_traverse_ir_top_down( |
| ir, [ir_pb2.Module], _add_module_to_scope, |
| parameters={"errors": errors, "scope": symbol_tables}) |
| traverse_ir.fast_traverse_ir_top_down( |
| ir, [ir_pb2.TypeDefinition], _add_type_name_to_scope, |
| incidental_actions={ir_pb2.Module: _set_scope_for_module}, |
| parameters={"errors": errors, "scope": symbol_tables}) |
| if errors: |
| # Ideally, we would find duplicate field names elsewhere in the module, even |
| # if there are duplicate type names, but field/enum names in the colliding |
| # types also end up colliding, leading to spurious errors. E.g., if you |
| # have two `struct Foo`s, then the field check will also discover a |
| # collision for `$size_in_bytes`, since there are two `Foo.$size_in_bytes`. |
| return symbol_tables, errors |
| |
| traverse_ir.fast_traverse_ir_top_down( |
| ir, [ir_pb2.EnumValue], _add_enum_value_to_scope, |
| incidental_actions={ |
| ir_pb2.Module: _set_scope_for_module, |
| ir_pb2.TypeDefinition: _set_scope_for_type_definition, |
| }, |
| parameters={"errors": errors, "scope": symbol_tables}) |
| traverse_ir.fast_traverse_ir_top_down( |
| ir, [ir_pb2.Field], _add_struct_field_to_scope, |
| incidental_actions={ |
| ir_pb2.Module: _set_scope_for_module, |
| ir_pb2.TypeDefinition: _set_scope_for_type_definition, |
| }, |
| parameters={"errors": errors, "scope": symbol_tables}) |
| traverse_ir.fast_traverse_ir_top_down( |
| ir, [ir_pb2.RuntimeParameter], _add_parameter_name_to_scope, |
| incidental_actions={ |
| ir_pb2.Module: _set_scope_for_module, |
| ir_pb2.TypeDefinition: _set_scope_for_type_definition, |
| }, |
| parameters={"errors": errors, "scope": symbol_tables}) |
| return symbol_tables, errors |
| |
| |
| def _add_alias_to_scope(name_ir, table, scope, alias, visibility, errors): |
| """Adds the given name to the scope as an alias.""" |
| name = name_ir.text |
| new_scope = _Scope(_nested_name(scope, name), name_ir.source_location, |
| visibility, alias) |
| scoped_table = table[scope.module_file] |
| for path_element in scope.object_path: |
| scoped_table = scoped_table[path_element] |
| if name in scoped_table: |
| errors.append(duplicate_name_error( |
| scoped_table.canonical_name.module_file, name_ir.source_location, name, |
| FileLocation(scoped_table[name].canonical_name.module_file, |
| scoped_table[name].source_location))) |
| else: |
| scoped_table[name] = new_scope |
| return new_scope |
| |
| |
| def _resolve_head_of_field_reference(field_reference, table, current_scope, |
| visible_scopes, source_file_name, errors): |
| return _resolve_reference( |
| field_reference.path[0], table, current_scope, |
| visible_scopes, source_file_name, errors) |
| |
| |
| def _resolve_reference(reference, table, current_scope, visible_scopes, |
| source_file_name, errors): |
| """Sets the canonical name of the given reference.""" |
| if reference.HasField("canonical_name"): |
| # This reference has already been resolved by the _resolve_field_reference |
| # pass. |
| return |
| target = _find_target_of_reference(reference, table, current_scope, |
| visible_scopes, source_file_name, errors) |
| if target is not None: |
| assert not target.alias |
| reference.canonical_name.CopyFrom(target.canonical_name) |
| |
| |
| def _find_target_of_reference(reference, table, current_scope, visible_scopes, |
| source_file_name, errors): |
| """Returns the resolved name of the given reference.""" |
| found_in_table = None |
| name = reference.source_name[0].text |
| for scope in visible_scopes: |
| scoped_table = table[scope.module_file] |
| for path_element in scope.object_path: |
| scoped_table = scoped_table[path_element] |
| if (name in scoped_table and |
| (scope == current_scope or |
| scoped_table[name].visibility == _Scope.SEARCHABLE)): |
| # Prelude is "", so explicitly check for None. |
| if found_in_table is not None: |
| # TODO(bolms): Currently, this catches the case where a module tries to |
| # use a name that is defined (at the same scope) in two different |
| # modules. It may make sense to raise duplicate_name_error whenever two |
| # modules define the same name (whether it is used or not), and reserve |
| # ambiguous_name_error for cases where a name is found in multiple |
| # scopes. |
| errors.append(ambiguous_name_error( |
| source_file_name, reference.source_location, name, [FileLocation( |
| found_in_table[name].canonical_name.module_file, |
| found_in_table[name].source_location), FileLocation( |
| scoped_table[name].canonical_name.module_file, scoped_table[ |
| name].source_location)])) |
| continue |
| found_in_table = scoped_table |
| if reference.is_local_name: |
| # This is a little hacky. When "is_local_name" is True, the name refers |
| # to a type that was defined inline. In many cases, the type should be |
| # found at the same scope as the field; e.g.: |
| # |
| # struct Foo: |
| # 0 [+1] enum bar: |
| # BAZ = 1 |
| # |
| # In this case, `Foo.bar` has type `Foo.Bar`. Unfortunately, things |
| # break down a little bit when there is an inline type in an anonymous |
| # `bits`: |
| # |
| # struct Foo: |
| # 0 [+1] bits: |
| # 0 [+7] enum bar: |
| # BAZ = 1 |
| # |
| # Types inside of anonymous `bits` are hoisted into their parent type, |
| # so instead of `Foo.EmbossReservedAnonymous1.Bar`, `bar`'s type is just |
| # `Foo.Bar`. Unfortunately, the field is still |
| # `Foo.EmbossReservedAnonymous1.bar`, so `bar`'s type won't be found in |
| # `bar`'s `current_scope`. |
| # |
| # (The name `bar` is exposed from `Foo` as an alias virtual field, so |
| # perhaps the correct answer is to allow type aliases, so that `Bar` can |
| # be found in both `Foo` and `Foo.EmbossReservedAnonymous1`. That would |
| # involve an entirely new feature, though.) |
| # |
| # The workaround here is to search scopes from the innermost outward, |
| # and just stop as soon as a match is found. This isn't ideal, because |
| # it relies on other bits of the front end having correctly added the |
| # inline type to the correct scope before symbol resolution, but it does |
| # work. Names with False `is_local_name` will still be checked for |
| # ambiguity. |
| break |
| if found_in_table is None: |
| errors.append(missing_name_error( |
| source_file_name, reference.source_name[0].source_location, name)) |
| if not errors: |
| for subname in reference.source_name: |
| if subname.text not in found_in_table: |
| errors.append(missing_name_error(source_file_name, |
| subname.source_location, subname.text)) |
| return None |
| found_in_table = found_in_table[subname.text] |
| while found_in_table.alias: |
| referenced_table = table |
| for name in found_in_table.alias: |
| referenced_table = referenced_table[name] |
| # TODO(bolms): This section should really be a recursive lookup |
| # function, which would be able to handle arbitrary aliases through |
| # other aliases. |
| # |
| # This should be fine for now, since the only aliases here should be |
| # imports, which can't refer to other imports. |
| assert not referenced_table.alias, "Alias found to contain alias." |
| found_in_table = referenced_table |
| return found_in_table |
| return None |
| |
| |
| def _resolve_field_reference(field_reference, source_file_name, errors, ir): |
| """Resolves the References inside of a FieldReference.""" |
| if field_reference.path[-1].HasField("canonical_name"): |
| # Already done. |
| return |
| previous_field = ir_util.find_object_or_none(field_reference.path[0], ir) |
| previous_reference = field_reference.path[0] |
| for ref in field_reference.path[1:]: |
| while ir_util.field_is_virtual(previous_field): |
| if (previous_field.read_transform.WhichOneof("expression") == |
| "field_reference"): |
| # Pass a separate error list into the recursive _resolve_field_reference |
| # call so that only one copy of the error for a particular reference |
| # will actually surface: in particular, the one that results from a |
| # direct call from traverse_ir_top_down into _resolve_field_reference. |
| new_errors = [] |
| _resolve_field_reference( |
| previous_field.read_transform.field_reference, |
| previous_field.name.canonical_name.module_file, new_errors, ir) |
| # If the recursive _resolve_field_reference was unable to resolve the |
| # field, then bail. Otherwise we get a cascade of errors, where an |
| # error in `x` leads to errors in anything trying to reach a member of |
| # `x`. |
| if not previous_field.read_transform.field_reference.path[-1].HasField( |
| "canonical_name"): |
| return |
| previous_field = ir_util.find_object( |
| previous_field.read_transform.field_reference.path[-1], ir) |
| else: |
| errors.append( |
| noncomposite_subfield_error(source_file_name, |
| previous_reference.source_location, |
| previous_reference.source_name[0].text)) |
| return |
| if previous_field.type.WhichOneof("type") == "array_type": |
| errors.append( |
| array_subfield_error(source_file_name, |
| previous_reference.source_location, |
| previous_reference.source_name[0].text)) |
| return |
| assert previous_field.type.WhichOneof("type") == "atomic_type" |
| member_name = ir_pb2.CanonicalName() |
| member_name.CopyFrom( |
| previous_field.type.atomic_type.reference.canonical_name) |
| member_name.object_path.extend([ref.source_name[0].text]) |
| previous_field = ir_util.find_object_or_none(member_name, ir) |
| if previous_field is None: |
| errors.append( |
| missing_name_error(source_file_name, |
| ref.source_name[0].source_location, |
| ref.source_name[0].text)) |
| return |
| ref.canonical_name.CopyFrom(member_name) |
| previous_reference = ref |
| |
| |
| def _set_visible_scopes_for_type_definition(type_definition, visible_scopes): |
| """Sets current_scope and visible_scopes for the given type_definition.""" |
| return { |
| "current_scope": type_definition.name.canonical_name, |
| |
| # In order to ensure that the iteration through scopes in |
| # _find_target_of_reference will go from innermost to outermost, it is |
| # important that the current scope (type_definition.name.canonical_name) |
| # precedes the previous visible_scopes here. |
| "visible_scopes": (type_definition.name.canonical_name,) + visible_scopes, |
| } |
| |
| |
| def _set_visible_scopes_for_module(module): |
| """Sets visible_scopes for the given module.""" |
| self_scope = ir_pb2.CanonicalName(module_file=module.source_file_name) |
| extra_visible_scopes = [] |
| for foreign_import in module.foreign_import: |
| # Anonymous imports are searched for top-level names; named imports are not. |
| # As of right now, only the prelude should be imported anonymously; other |
| # modules must be imported with names. |
| if not foreign_import.local_name.text: |
| extra_visible_scopes.append( |
| ir_pb2.CanonicalName(module_file=foreign_import.file_name.text)) |
| return {"visible_scopes": (self_scope,) + tuple(extra_visible_scopes)} |
| |
| |
| def _set_visible_scopes_for_attribute(attribute, field, visible_scopes): |
| """Sets current_scope and visible_scopes for the attribute.""" |
| del attribute # Unused |
| if field is None: |
| return |
| return { |
| "current_scope": field.name.canonical_name, |
| "visible_scopes": (field.name.canonical_name,) + visible_scopes, |
| } |
| |
| |
| def _resolve_symbols_from_table(ir, table): |
| """Resolves all references in the given IR, given the constructed table.""" |
| errors = [] |
| # Symbol resolution is broken into five passes. First, this code resolves any |
| # imports, and adds import aliases to modules. |
| traverse_ir.fast_traverse_ir_top_down( |
| ir, [ir_pb2.Import], _add_import_to_scope, |
| incidental_actions={ |
| ir_pb2.Module: lambda m, table: {"module": table[m.source_file_name]}, |
| }, |
| parameters={"errors": errors, "table": table}) |
| if errors: |
| return errors |
| # Next, this resolves all absolute references (e.g., it resolves "UInt" in |
| # "0:1 UInt field" to [prelude]::UInt). |
| traverse_ir.fast_traverse_ir_top_down( |
| ir, [ir_pb2.Reference], _resolve_reference, |
| skip_descendants_of=(ir_pb2.FieldReference,), |
| incidental_actions={ |
| ir_pb2.TypeDefinition: _set_visible_scopes_for_type_definition, |
| ir_pb2.Module: _set_visible_scopes_for_module, |
| ir_pb2.Field: lambda f: {"field": f}, |
| ir_pb2.Attribute: _set_visible_scopes_for_attribute, |
| }, |
| parameters={"table": table, "errors": errors, "field": None}) |
| # Lastly, head References to fields (e.g., the `a` of `a.b.c`) are resolved. |
| traverse_ir.fast_traverse_ir_top_down( |
| ir, [ir_pb2.FieldReference], _resolve_head_of_field_reference, |
| incidental_actions={ |
| ir_pb2.TypeDefinition: _set_visible_scopes_for_type_definition, |
| ir_pb2.Module: _set_visible_scopes_for_module, |
| ir_pb2.Field: lambda f: {"field": f}, |
| ir_pb2.Attribute: _set_visible_scopes_for_attribute, |
| }, |
| parameters={"table": table, "errors": errors, "field": None}) |
| return errors |
| |
| |
| def resolve_field_references(ir): |
| """Resolves structure member accesses ("field.subfield") in ir.""" |
| errors = [] |
| traverse_ir.fast_traverse_ir_top_down( |
| ir, [ir_pb2.FieldReference], _resolve_field_reference, |
| incidental_actions={ |
| ir_pb2.TypeDefinition: _set_visible_scopes_for_type_definition, |
| ir_pb2.Module: _set_visible_scopes_for_module, |
| ir_pb2.Field: lambda f: {"field": f}, |
| ir_pb2.Attribute: _set_visible_scopes_for_attribute, |
| }, |
| parameters={"errors": errors, "field": None}) |
| return errors |
| |
| |
| def resolve_symbols(ir): |
| """Resolves the symbols in all modules in ir.""" |
| symbol_tables, errors = _construct_symbol_tables(ir) |
| if errors: |
| return errors |
| return _resolve_symbols_from_table(ir, symbol_tables) |