# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Main driver for the Emboss front-end.

The parse_emboss_file function performs a complete parse of the specified file,
and returns an IR or formatted error message.
"""

import collections
import pkgutil

from compiler.front_end import attribute_checker
from compiler.front_end import constraints
from compiler.front_end import dependency_checker
from compiler.front_end import expression_bounds
from compiler.front_end import lr1
from compiler.front_end import module_ir
from compiler.front_end import parser
from compiler.front_end import symbol_resolver
from compiler.front_end import synthetics
from compiler.front_end import tokenizer
from compiler.front_end import type_check
from compiler.front_end import write_inference
from compiler.util import error
from compiler.util import ir_pb2
from compiler.util import parser_types

_IrDebugInfo = collections.namedtuple("IrDebugInfo", ["ir", "debug_info",
                                                      "errors"])


class DebugInfo(object):
  """Debug information about Emboss parsing."""
  __slots__ = ("modules")

  def __init__(self):
    self.modules = {}

  def __eq__(self, other):
    return self.modules == other.modules

  def __ne__(self, other):
    return not self == other


class ModuleDebugInfo(object):
  """Debug information about the parse of a single file.

  Attributes:
    file_name: The name of the file from which this module came.
    tokens: The tokenization of this module's source text.
    parse_tree: The raw parse tree for this module.
    ir: The intermediate representation of this module, before additional
        processing such as symbol resolution.
    used_productions: The set of grammar productions used when parsing this
        module.
    source_code: The source text of the module.
  """
  __slots__ = ("file_name", "tokens", "parse_tree", "ir", "used_productions",
               "source_code")

  def __init__(self, file_name):
    self.file_name = file_name
    self.tokens = None
    self.parse_tree = None
    self.ir = None
    self.used_productions = None
    self.source_code = None

  def __eq__(self, other):
    return (self.file_name == other.file_name and self.tokens == other.tokens
            and self.parse_tree == other.parse_tree and self.ir == other.ir and
            self.used_productions == other.used_productions and
            self.source_code == other.source_code)

  def __ne__(self, other):
    return not self == other

  def format_tokenization(self):
    """Renders self.tokens in a human-readable format."""
    return "\n".join([str(token) for token in self.tokens])

  def format_parse_tree(self, parse_tree=None, indent=""):
    """Renders self.parse_tree in a human-readable format."""
    if parse_tree is None:
      parse_tree = self.parse_tree
    result = []
    if isinstance(parse_tree, lr1.Reduction):
      result.append(indent + parse_tree.symbol)
      if parse_tree.children:
        result.append(":\n")
        for child in parse_tree.children:
          result.append(self.format_parse_tree(child, indent + "  "))
      else:
        result.append("\n")
    else:
      result.append("{}{}\n".format(indent, parse_tree))
    return "".join(result)

  def format_module_ir(self):
    """Renders self.ir in a human-readable format."""
    return self.ir.to_json(indent=2)


def format_production_set(productions):
  """Renders a set of productions in a human-readable format."""
  return "\n".join([str(production) for production in sorted(productions)])


_cached_modules = {}


def parse_module_text(source_code, file_name):
  """Parses the text of a module, returning a module-level IR.

  Arguments:
    source_code: The text of the module to parse.
    file_name: The name of the module's source file (will be included in the
        resulting IR).

  Returns:
    A module-level intermediate representation (IR), prior to import and symbol
    resolution, and a corresponding ModuleDebugInfo, for debugging the parser.

  Raises:
    FrontEndFailure: An error occurred while parsing the module.  str(error)
        will give a human-readable error message.
  """
  # This is strictly an optimization to speed up tests, mostly by avoiding the
  # need to re-parse the prelude for every test .emb.
  if (source_code, file_name) in _cached_modules:
    debug_info = _cached_modules[source_code, file_name]
    ir = ir_pb2.Module()
    ir.CopyFrom(debug_info.ir)
  else:
    debug_info = ModuleDebugInfo(file_name)
    debug_info.source_code = source_code
    tokens, errors = tokenizer.tokenize(source_code, file_name)
    if errors:
      return _IrDebugInfo(None, debug_info, errors)
    debug_info.tokens = tokens
    parse_result = parser.parse_module(tokens)
    if parse_result.error:
      return _IrDebugInfo(
          None,
          debug_info,
          [error.make_error_from_parse_error(file_name, parse_result.error)])
    debug_info.parse_tree = parse_result.parse_tree
    used_productions = set()
    ir = module_ir.build_ir(parse_result.parse_tree, used_productions)
    debug_info.used_productions = used_productions
    debug_info.ir = ir_pb2.Module()
    debug_info.ir.CopyFrom(ir)
    _cached_modules[source_code, file_name] = debug_info
  ir.source_file_name = file_name
  return _IrDebugInfo(ir, debug_info, [])


def parse_module(file_name, file_reader):
  """Parses a module, returning a module-level IR.

  Arguments:
    file_name: The name of the module's source file.
    file_reader: A callable that returns either:
        (file_contents, None) or
        (None, list_of_error_detail_strings)

  Returns:
    (ir, debug_info, errors), where ir is a module-level intermediate
    representation (IR), debug_info is a ModuleDebugInfo containing the
    tokenization, parse tree, and original source text of all modules, and
    errors is a list of tokenization or parse errors.  If errors is not an empty
    list, ir will be None.

  Raises:
    FrontEndFailure: An error occurred while reading or parsing the module.
        str(error) will give a human-readable error message.
  """
  source_code, errors = file_reader(file_name)
  if errors:
    location = parser_types.make_location((1, 1), (1, 1))
    return None, None, [
        [error.error(file_name, location, "Unable to read file.")] +
        [error.note(file_name, location, e) for e in errors]
    ]
  return parse_module_text(source_code, file_name)


def get_prelude():
  """Returns the module IR and debug info of the Emboss Prelude."""
  return parse_module_text(
      pkgutil.get_data("compiler.front_end",
                       "prelude.emb").decode(encoding="UTF-8"),
      "")


def parse_emboss_file(file_name, file_reader, stop_before_step=None):
  """Fully parses an .emb, and returns an IR suitable for passing to a back end.

  parse_emboss_file is a convenience function which calls only_parse_emboss_file
  and process_ir.

  Arguments:
    file_name: The name of the module's source file.
    file_reader: A callable that returns the contents of files, or raises
        IOError.
    stop_before_step: If set, parse_emboss_file will stop normalizing the IR
        just before the specified step.  This parameter should be None for
        non-test code.

  Returns:
    (ir, debug_info, errors), where ir is a complete IR, ready for consumption
    by an Emboss back end, debug_info is a DebugInfo containing the
    tokenization, parse tree, and original source text of all modules, and
    errors is a list of tokenization or parse errors.  If errors is not an empty
    list, ir will be None.
  """
  ir, debug_info, errors = only_parse_emboss_file(file_name, file_reader)
  if errors:
    return _IrDebugInfo(None, debug_info, errors)
  ir, errors = process_ir(ir, stop_before_step)
  if errors:
    return _IrDebugInfo(None, debug_info, errors)
  return _IrDebugInfo(ir, debug_info, errors)


def only_parse_emboss_file(file_name, file_reader):
  """Parses an .emb, and returns an IR suitable for process_ir.

  only_parse_emboss_file parses the given file and all of its transitive
  imports, and returns a first-stage intermediate representation, which can be
  passed to process_ir.

  Arguments:
    file_name: The name of the module's source file.
    file_reader: A callable that returns the contents of files, or raises
        IOError.

  Returns:
    (ir, debug_info, errors), where ir is an intermediate representation (IR),
    debug_info is a DebugInfo containing the tokenization, parse tree, and
    original source text of all modules, and errors is a list of tokenization or
    parse errors.  If errors is not an empty list, ir will be None.
  """
  file_queue = [file_name]
  files = {file_name}
  debug_info = DebugInfo()
  ir = ir_pb2.EmbossIr(module=[])
  while file_queue:
    file_to_parse = file_queue[0]
    del file_queue[0]
    if file_to_parse:
      module, module_debug_info, errors = parse_module(file_to_parse,
                                                       file_reader)
    else:
      module, module_debug_info, errors = get_prelude()
    if module_debug_info:
      debug_info.modules[file_to_parse] = module_debug_info
    if errors:
      return _IrDebugInfo(None, debug_info, errors)
    ir.module.extend([module])  # Proto supports extend but not append here.
    for import_ in module.foreign_import:
      if import_.file_name.text not in files:
        file_queue.append(import_.file_name.text)
        files.add(import_.file_name.text)
  return _IrDebugInfo(ir, debug_info, [])


def process_ir(ir, stop_before_step):
  """Turns a first-stage IR into a fully-processed IR.

  process_ir performs all of the semantic processing steps on `ir`: resolving
  symbols, checking dependencies, adding type annotations, normalizing
  attributes, etc.  process_ir is generally meant to be called with the result
  of parse_emboss_file(), but in theory could be called with a first-stage
  intermediate representation (IR) from another source.

  Arguments:
    ir: The IR to process.  This structure will be modified during processing.
    stop_before_step: If set, process_ir will stop normalizing the IR just
        before the specified step.  This parameter should be None for non-test
        code.

  Returns:
    (ir, errors), where ir is a complete IR, ready for consumption by an Emboss
    back end, and errors is a list of compilation errors.  If errors is not an
    empty list, ir will be None.
  """
  passes = (synthetics.desugar,
            symbol_resolver.resolve_symbols,
            dependency_checker.find_dependency_cycles,
            dependency_checker.set_dependency_order,
            symbol_resolver.resolve_field_references,
            type_check.annotate_types,
            type_check.check_types,
            expression_bounds.compute_constants,
            attribute_checker.normalize_and_verify,
            constraints.check_constraints,
            write_inference.set_write_methods)
  assert stop_before_step in [None] + [f.__name__ for f in passes], (
      "Bad value for stop_before_step.")
  # Some parts of the IR are synthesized from "natural" parts of the IR, before
  # the natural parts have been fully error checked.  Because of this, the
  # synthesized parts can have errors; in a couple of cases, they can have
  # errors that show up in an earlier pass than the errors in the natural parts
  # of the IR.  As an example:
  #
  #     struct Foo:
  #       0 [+1]  bits:
  #         0 [+1]  Flag  flag
  #       1 [+flag]  UInt:8  field
  #
  # In this case, the use of `flag` as the size of `field` is incorrect, because
  # `flag` is a boolean, but the size of a field must be an integer.
  #
  # Type checking occurs in two passes: in the first pass, expressions are
  # checked for internal consistency.  In the second pass, expression types are
  # checked against their location.  The use of `flag` would be caught in the
  # second pass.
  #
  # However, the generated_fields pass will synthesize a $size_in_bytes virtual
  # field that would look like:
  #
  #     struct Foo:
  #       0 [+1]  bits:
  #         0 [+1]  Flag  flag
  #       1 [+flag]  UInt:8  field
  #       let $size_in_bytes = $max(true ? 0 + 1 : 0, true ? 1 + flag : 0)
  #
  # Since `1 + flag` is not internally consistent, this type error would be
  # caught in the first pass, and the user would see a very strange error
  # message that "the right-hand argument of operator `+` must be an integer."
  #
  # In order to avoid showing these kinds of errors to the user, we defer any
  # errors in synthetic parts of the IR.  Unless there is a compiler bug, those
  # errors will show up as errors in the natural parts of the IR, which should
  # be much more comprehensible to end users.
  #
  # If, for some reason, there is an error in the synthetic IR, but no error in
  # the natural IR, the synthetic errors will be shown.  In this case, the
  # formatting for the synthetic errors will show '[compiler bug]' for the
  # error location, which (hopefully) will provide the end user with a cue that
  # the error is a compiler bug.
  deferred_errors = []
  for function in passes:
    if stop_before_step == function.__name__:
      return (ir, [])
    errors, hidden_errors = error.split_errors(function(ir))
    if errors:
      return (None, errors)
    deferred_errors.extend(hidden_errors)

  if deferred_errors:
    return (None, deferred_errors)

  assert stop_before_step is None, "Bad value for stop_before_step."
  return (ir, [])
