Emit compilation errors for bad C++ namespaces (#92)

* Emit compilation errors for bad C++ namespaces

Prior to this CL, an incorrectly-formatted namespace will fail an assert
and print a python exception to the user. While this likely sufficient
for a user to diagnose the issue, it is inconsistent with other error
reporting in Emboss.

With this change, the namespace checking is implemented in the same way
as other validations in the Emboss compiler, providing a specific file,
line, and column range of the issue and printing the line containing the
issue to the user.

* Improve namespace error messages
diff --git a/compiler/back_end/cpp/header_generator.py b/compiler/back_end/cpp/header_generator.py
index b45993d..922d80b 100644
--- a/compiler/back_end/cpp/header_generator.py
+++ b/compiler/back_end/cpp/header_generator.py
@@ -66,6 +66,15 @@
 # Emboss C++ support classes.
 _SUPPORT_NAMESPACE = "::emboss::support"
 
+# Regex matching a C++ namespace component. Captures component name.
+_NS_COMPONENT_RE = r"(?:^\s*|::)\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*(?=\s*$|::)"
+# Regex matching a full C++ namespace (at least one namespace component).
+_NS_RE = fr"^\s*(?:{_NS_COMPONENT_RE})+\s*$"
+# Regex matching an empty C++ namespace.
+_NS_EMPTY_RE = r"^\s*$"
+# Regex matching only the global C++ namespace.
+_NS_GLOBAL_RE = r"^\s*::\s*$"
+
 # TODO(bolms): This should be a command-line flag.
 _PRELUDE_INCLUDE_FILE = "runtime/cpp/emboss_prelude.h"
 
@@ -77,6 +86,22 @@
   assert name_conversion.is_case_conversion_supported("SHOUTY_CASE", _enum_case)
 
 
+def _get_namespace_components(namespace):
+  """Gets the components of a C++ namespace
+
+  Examples:
+    "::some::name::detail" -> ["some", "name", "detail"]
+    "product::name" -> ["product", "name"]
+    "simple" -> ["simple"]
+
+  Arguments:
+    namespace: A string containing the namespace. May be fully-qualified.
+
+  Returns:
+    A list of strings, one per namespace component."""
+  return re.findall(_NS_COMPONENT_RE, namespace)
+
+
 def _get_module_namespace(module):
   """Returns the C++ namespace of the module, as a list of components.
 
@@ -92,19 +117,7 @@
     namespace = namespace_attr.string_constant.text
   else:
     namespace = "emboss_generated_code"
-  if namespace[0:2] == "::":
-    # If the user explicitly specified the leading "::", trim it off: it will be
-    # re-added later, when the namespace is used as a prefix (as opposed to
-    # "namespace foo { }").
-    namespace = namespace[2:]
-  namespace_list = namespace.split("::")
-  for namespace_component in namespace_list:
-    assert re.match("[a-zA-Z_][a-zA-Z0-9_]*", namespace_component), (
-        "Bad namespace '{}'".format(namespace))
-    assert namespace_component not in _CPP_RESERVED_WORDS, (
-        "Reserved word '{}' is not allowed as a namespace component.".format(
-            namespace_component))
-  return namespace_list
+  return _get_namespace_components(namespace)
 
 
 def _cpp_string_escape(string):
@@ -1430,6 +1443,34 @@
   return new_location
 
 
+def _verify_namespace_attribute(attr, source_file_name, errors):
+  if attr.name.text != attributes.Attribute.NAMESPACE:
+    return
+  namespace_value = attr.value.string_constant
+  if not re.match(_NS_RE, namespace_value.text):
+    if re.match(_NS_EMPTY_RE, namespace_value.text):
+      errors.append([error.error(
+          source_file_name, namespace_value.source_location,
+          'Empty namespace value is not allowed.')])
+    elif re.match(_NS_GLOBAL_RE, namespace_value.text):
+      errors.append([error.error(
+          source_file_name, namespace_value.source_location,
+          'Global namespace is not allowed.')])
+    else:
+      errors.append([error.error(
+          source_file_name, namespace_value.source_location,
+          'Invalid namespace, must be a valid C++ namespace, such as "abc", '
+          '"abc::def", or "::abc::def::ghi" (ISO/IEC 14882:2017 '
+          'enclosing-namespace-specifier).')])
+    return
+  for word in _get_namespace_components(namespace_value.text):
+    if word in _CPP_RESERVED_WORDS:
+      errors.append([error.error(
+          source_file_name, namespace_value.source_location,
+          f'Reserved word "{word}" is not allowed as a namespace component.'
+      )])
+
+
 def _verify_enum_case_attribute(attr, source_file_name, errors):
   """Verify that `enum_case` values are supported."""
   if attr.name.text != attributes.Attribute.ENUM_CASE:
@@ -1468,11 +1509,14 @@
 def _verify_attribute_values(ir):
   """Verify backend attribute values."""
   errors = []
-  # Note, this does not yet verify `namespace` attributes. Namespace
-  # verification is done in _get_module_namespace.
+
+  traverse_ir.fast_traverse_ir_top_down(
+      ir, [ir_pb2.Attribute], _verify_namespace_attribute,
+      parameters={"errors": errors})
   traverse_ir.fast_traverse_ir_top_down(
       ir, [ir_pb2.Attribute], _verify_enum_case_attribute,
       parameters={"errors": errors})
+
   return errors
 
 
diff --git a/compiler/back_end/cpp/header_generator_test.py b/compiler/back_end/cpp/header_generator_test.py
index 1025a74..e67ed50 100644
--- a/compiler/back_end/cpp/header_generator_test.py
+++ b/compiler/back_end/cpp/header_generator_test.py
@@ -266,6 +266,110 @@
                     'Empty enum case (or excess comma).')
     ]], header_generator.generate_header(ir)[1])
 
+  def test_accepts_namespace(self):
+    for test in [
+        '[(cpp) namespace: "basic"]\n',
+        '[(cpp) namespace: "multiple::components"]\n',
+        '[(cpp) namespace: "::absolute"]\n',
+        '[(cpp) namespace: "::fully::qualified"]\n',
+        '[(cpp) namespace: "CAN::Be::cAPITAL"]\n',
+        '[(cpp) namespace: "trailingNumbers54321"]\n',
+        '[(cpp) namespace: "containing4321numbers"]\n',
+        '[(cpp) namespace: "can_have_underscores"]\n',
+        '[(cpp) namespace: "_initial_underscore"]\n',
+        '[(cpp) namespace: "_initial::_underscore"]\n',
+        '[(cpp) namespace: "::_initial::_underscore"]\n',
+        '[(cpp) namespace: "trailing_underscore_"]\n',
+        '[(cpp) namespace: "trailing_::underscore_"]\n',
+        '[(cpp) namespace: "::trailing_::underscore_"]\n',
+        '[(cpp) namespace: " spaces "]\n',
+        '[(cpp) namespace: "with :: spaces"]\n',
+        '[(cpp) namespace: "   ::fully:: qualified :: with::spaces"]\n',
+    ]:
+      ir = _make_ir_from_emb(test)
+      self.assertEqual([], header_generator.generate_header(ir)[1])
+
+  def test_rejects_non_namespace_strings(self):
+    for test in [
+        '[(cpp) namespace: "5th::avenue"]\n',
+        '[(cpp) namespace: "can\'t::have::apostrophe"]\n',
+        '[(cpp) namespace: "cannot-have-dash"]\n',
+        '[(cpp) namespace: "no/slashes"]\n',
+        '[(cpp) namespace: "no\\\\slashes"]\n',
+        '[(cpp) namespace: "apostrophes*are*rejected"]\n',
+        '[(cpp) namespace: "avoid.dot"]\n',
+        '[(cpp) namespace: "does5+5"]\n',
+        '[(cpp) namespace: "=10"]\n',
+        '[(cpp) namespace: "?"]\n',
+        '[(cpp) namespace: "reject::spaces in::components"]\n',
+        '[(cpp) namespace: "totally::valid::but::extra         +"]\n',
+        '[(cpp) namespace: "totally::valid::but::extra         ::?"]\n',
+        '[(cpp) namespace: "< totally::valid::but::extra"]\n',
+        '[(cpp) namespace: "< ::totally::valid::but::extra"]\n',
+        '[(cpp) namespace: "::totally::valid::but::extra::"]\n',
+        '[(cpp) namespace: ":::extra::colon"]\n',
+        '[(cpp) namespace: "::extra:::colon"]\n',
+    ]:
+      ir = _make_ir_from_emb(test)
+      attr = ir.module[0].attribute[0]
+      self.assertEqual([[
+          error.error("m.emb", attr.value.source_location,
+                      'Invalid namespace, must be a valid C++ namespace, such '
+                      'as "abc", "abc::def", or "::abc::def::ghi" (ISO/IEC '
+                      '14882:2017 enclosing-namespace-specifier).')
+      ]], header_generator.generate_header(ir)[1])
+
+  def test_rejects_empty_namespace(self):
+    for test in [
+        '[(cpp) namespace: ""]\n',
+        '[(cpp) namespace: " "]\n',
+        '[(cpp) namespace: "    "]\n',
+    ]:
+      ir = _make_ir_from_emb(test)
+      attr = ir.module[0].attribute[0]
+      self.assertEqual([[
+          error.error("m.emb", attr.value.source_location,
+                      'Empty namespace value is not allowed.')
+      ]], header_generator.generate_header(ir)[1])
+
+  def test_rejects_global_namespace(self):
+    for test in [
+        '[(cpp) namespace: "::"]\n',
+        '[(cpp) namespace: "  ::"]\n',
+        '[(cpp) namespace: ":: "]\n',
+        '[(cpp) namespace: " ::  "]\n',
+    ]:
+      ir = _make_ir_from_emb(test)
+      attr = ir.module[0].attribute[0]
+      self.assertEqual([[
+          error.error("m.emb", attr.value.source_location,
+                      'Global namespace is not allowed.')
+      ]], header_generator.generate_header(ir)[1])
+
+  def test_rejects_reserved_namespace(self):
+    for test, expected in [
+        # Only component
+        ('[(cpp) namespace: "class"]\n', 'class'),
+        # Only component, fully qualified name
+        ('[(cpp) namespace: "::const"]\n', 'const'),
+        # First component
+        ('[(cpp) namespace: "if::valid"]\n', 'if'),
+        # First component, fully qualified name
+        ('[(cpp) namespace: "::auto::pilot"]\n', 'auto'),
+        # Last component
+        ('[(cpp) namespace: "make::do"]\n', 'do'),
+        # Middle component
+        ('[(cpp) namespace: "our::new::product"]\n', 'new'),
+    ]:
+      ir = _make_ir_from_emb(test)
+      attr = ir.module[0].attribute[0]
+
+      self.assertEqual([[
+          error.error("m.emb", attr.value.source_location,
+                      f'Reserved word "{expected}" is not allowed '
+                      f'as a namespace component.')]],
+          header_generator.generate_header(ir)[1])
+
 
 if __name__ == "__main__":
     unittest.main()