Initial open-source commit of Emboss.

commit: c0d7784b29194d36c84914005d634fc2a962770a [log] [tgz]
author: Ben Olmstead <bolms@google.com> Wed Jul 31 17:34:05 2019 -0700
committer: Ben Olmstead <bolms@google.com> Wed Jul 31 17:34:05 2019 -0700
tree: 418f8e110f116bc821c8fef8bc5461e3a99b95b1
diff --git a/front_end/dependency_checker.py b/front_end/dependency_checker.py
new file mode 100644
index 0000000..7a1fa7d
--- /dev/null
+++ b/front_end/dependency_checker.py

@@ -0,0 +1,261 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Checks for dependency cycles in Emboss IR."""
+
+from public import ir_pb2
+from util import error
+from util import ir_util
+from util import traverse_ir
+
+
+def _add_reference_to_dependencies(reference, dependencies, name):
+  dependencies[name] |= {ir_util.hashable_form_of_reference(reference)}
+
+
+def _add_field_reference_to_dependencies(reference, dependencies, name):
+  dependencies[name] |= {ir_util.hashable_form_of_reference(reference.path[0])}
+
+
+def _add_name_to_dependencies(proto, dependencies):
+  name = ir_util.hashable_form_of_reference(proto.name)
+  dependencies.setdefault(name, set())
+  return {"name": name}
+
+
+def _find_dependencies(ir):
+  """Constructs a dependency graph for the entire IR."""
+  dependencies = {}
+  traverse_ir.fast_traverse_ir_top_down(
+      ir, [ir_pb2.Reference], _add_reference_to_dependencies,
+      # TODO(bolms): Add handling for references inside of attributes, once
+      # there are attributes with non-constant values.
+      skip_descendants_of={
+          ir_pb2.AtomicType, ir_pb2.Attribute, ir_pb2.FieldReference
+      },
+      incidental_actions={
+          ir_pb2.Field: _add_name_to_dependencies,
+          ir_pb2.EnumValue: _add_name_to_dependencies,
+          ir_pb2.RuntimeParameter: _add_name_to_dependencies,
+      },
+      parameters={"dependencies": dependencies})
+  traverse_ir.fast_traverse_ir_top_down(
+      ir, [ir_pb2.FieldReference], _add_field_reference_to_dependencies,
+      skip_descendants_of={ir_pb2.Attribute},
+      incidental_actions={
+          ir_pb2.Field: _add_name_to_dependencies,
+          ir_pb2.EnumValue: _add_name_to_dependencies,
+          ir_pb2.RuntimeParameter: _add_name_to_dependencies,
+      },
+      parameters={"dependencies": dependencies})
+  return dependencies
+
+
+def _find_dependency_ordering_for_fields_in_structure(
+    structure, type_definition, dependencies):
+  """Populates structure.fields_in_dependency_order."""
+  # For fields which appear before their dependencies in the original source
+  # text, this algorithm moves them to immediately after their dependencies.
+  #
+  # This is one of many possible schemes for constructing a dependency ordering;
+  # it has the advantage that all of the generated fields (e.g., $size_in_bytes)
+  # stay at the end of the ordering, which makes testing easier.
+  order = []
+  added = set()
+  for parameter in type_definition.runtime_parameter:
+    added.add(ir_util.hashable_form_of_reference(parameter.name))
+  needed = list(range(len(structure.field)))
+  while True:
+    for i in range(len(needed)):
+      field_number = needed[i]
+      field = ir_util.hashable_form_of_reference(
+          structure.field[field_number].name)
+      assert field in dependencies, "dependencies = {}".format(dependencies)
+      if all(dependency in added for dependency in dependencies[field]):
+        order.append(field_number)
+        added.add(field)
+        del needed[i]
+        break
+    else:
+      break
+  # If a non-local-field dependency were in dependencies[field], then not all
+  # fields would be added to the dependency ordering.  This shouldn't happen.
+  assert len(order) == len(structure.field), (
+      "order: {}\nlen(structure.field: {})".format(order, len(structure.field)))
+  del structure.fields_in_dependency_order[:]
+  structure.fields_in_dependency_order.extend(order)
+
+
+def _find_dependency_ordering_for_fields(ir):
+  """Populates the fields_in_dependency_order fields throughout ir."""
+  dependencies = {}
+  # TODO(bolms): This duplicates work in _find_dependencies that could be
+  # shared.
+  traverse_ir.fast_traverse_ir_top_down(
+      ir, [ir_pb2.FieldReference], _add_field_reference_to_dependencies,
+      skip_descendants_of={ir_pb2.Attribute},
+      incidental_actions={
+          ir_pb2.Field: _add_name_to_dependencies,
+          ir_pb2.EnumValue: _add_name_to_dependencies,
+          ir_pb2.RuntimeParameter: _add_name_to_dependencies,
+      },
+      parameters={"dependencies": dependencies})
+  traverse_ir.fast_traverse_ir_top_down(
+      ir, [ir_pb2.Structure],
+      _find_dependency_ordering_for_fields_in_structure,
+      parameters={"dependencies": dependencies})
+
+
+def _find_module_import_dependencies(ir):
+  """Constructs a dependency graph of module imports."""
+  dependencies = {}
+  for module in ir.module:
+    foreign_imports = set()
+    for foreign_import in module.foreign_import:
+      # The prelude gets an automatic self-import that shouldn't cause any
+      # problems.  No other self-imports are allowed, however.
+      if foreign_import.file_name.text or module.source_file_name:
+        foreign_imports |= {(foreign_import.file_name.text,)}
+    dependencies[module.source_file_name,] = foreign_imports
+  return dependencies
+
+
+def _find_cycles(graph):
+  """Finds cycles in graph.
+
+  The graph does not need to be fully connected.
+
+  Arguments:
+    graph: A dictionary whose keys are node labels.  Values are sets of node
+      labels, representing edges from the key node to the value nodes.
+
+  Returns:
+    A set of sets of nodes which form strongly-connected components (subgraphs
+    where every node is directly or indirectly reachable from every other node).
+    No node will be included in more than one strongly-connected component, by
+    definition.  Strongly-connected components of size 1, where the node in the
+    component does not have a self-edge, are not included in the result.
+
+    Note that a strongly-connected component may have a more complex structure
+    than a single loop.  For example:
+
+        +-- A <-+ +-> B --+
+        |       | |       |
+        v        C        v
+        D       ^ ^       E
+        |       | |       |
+        +-> F --+ +-- G <-+
+  """
+  # This uses Tarjan's strongly-connected components algorithm, as described by
+  # Wikipedia.  This is a depth-first traversal of the graph with a node stack
+  # that is independent of the call stack; nodes are added to the stack when
+  # they are first encountered, but not removed until all nodes they can reach
+  # have been checked.
+  next_index = [0]
+  node_indices = {}
+  node_lowlinks = {}
+  nodes_on_stack = set()
+  stack = []
+  nontrivial_components = set()
+
+  def strong_connect(node):
+    """Implements the STRONGCONNECT routine of Tarjan's algorithm."""
+    node_indices[node] = next_index[0]
+    node_lowlinks[node] = next_index[0]
+    next_index[0] += 1
+    stack.append(node)
+    nodes_on_stack.add(node)
+
+    for destination_node in graph[node]:
+      if destination_node not in node_indices:
+        strong_connect(destination_node)
+        node_lowlinks[node] = min(node_lowlinks[node],
+                                  node_lowlinks[destination_node])
+      elif destination_node in nodes_on_stack:
+        node_lowlinks[node] = min(node_lowlinks[node],
+                                  node_indices[destination_node])
+
+    strongly_connected_component = []
+    if node_lowlinks[node] == node_indices[node]:
+      while True:
+        popped_node = stack.pop()
+        nodes_on_stack.remove(popped_node)
+        strongly_connected_component.append(popped_node)
+        if popped_node == node:
+          break
+      if (len(strongly_connected_component) > 1 or
+          strongly_connected_component[0] in
+          graph[strongly_connected_component[0]]):
+        nontrivial_components.add(frozenset(strongly_connected_component))
+
+  for node in graph:
+    if node not in node_indices:
+      strong_connect(node)
+  return nontrivial_components
+
+
+def _find_object_dependency_cycles(ir):
+  """Finds dependency cycles in types in the ir."""
+  dependencies = _find_dependencies(ir)
+  cycles = _find_cycles(dict(dependencies))
+  errors = []
+  for cycle in cycles:
+    # TODO(bolms): This lists the entire strongly-connected component in a
+    # fairly arbitrary order.  This is simple, and handles components that
+    # aren't simple cycles, but may not be the most user-friendly way to
+    # present this information.
+    cycle_list = sorted(list(cycle))
+    node_object = ir_util.find_object(cycle_list[0], ir)
+    error_group = [
+        error.error(cycle_list[0][0], node_object.source_location,
+                    "Dependency cycle\n" + node_object.name.name.text)
+    ]
+    for node in cycle_list[1:]:
+      node_object = ir_util.find_object(node, ir)
+      error_group.append(error.note(node[0], node_object.source_location,
+                                    node_object.name.name.text))
+    errors.append(error_group)
+  return errors
+
+
+def _find_module_dependency_cycles(ir):
+  """Finds dependency cycles in modules in the ir."""
+  dependencies = _find_module_import_dependencies(ir)
+  cycles = _find_cycles(dict(dependencies))
+  errors = []
+  for cycle in cycles:
+    cycle_list = sorted(list(cycle))
+    module = ir_util.find_object(cycle_list[0], ir)
+    error_group = [
+        error.error(cycle_list[0][0], module.source_location,
+                    "Import dependency cycle\n" + module.source_file_name)
+    ]
+    for module_name in cycle_list[1:]:
+      module = ir_util.find_object(module_name, ir)
+      error_group.append(error.note(module_name[0], module.source_location,
+                                    module.source_file_name))
+    errors.append(error_group)
+  return errors
+
+
+def find_dependency_cycles(ir):
+  """Finds any dependency cycles in the ir."""
+  errors = _find_module_dependency_cycles(ir)
+  return errors + _find_object_dependency_cycles(ir)
+
+
+def set_dependency_order(ir):
+  """Sets the fields_in_dependency_order member of Structures."""
+  _find_dependency_ordering_for_fields(ir)
+  return []
commit	c0d7784b29194d36c84914005d634fc2a962770a	[log] [tgz]
author	Ben Olmstead <bolms@google.com>	Wed Jul 31 17:34:05 2019 -0700
committer	Ben Olmstead <bolms@google.com>	Wed Jul 31 17:34:05 2019 -0700
tree	418f8e110f116bc821c8fef8bc5461e3a99b95b1