pw_protobuf: Auto-fix reserved C++ identifiers Make Pigweed's protobuf codegen automatically fix identifiers that are reserved by the C++ spec or that conflict with standard-library macros. (Specifically, append an underscore in these cases.) Bug: b/234855743 Change-Id: I2782304080fe6aeebd194b029cde266a7058cd4f Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/96900 Commit-Queue: Matthias Guenther <mrguenther@google.com> Reviewed-by: Ted Pudlik <tpudlik@google.com>

commit: 48a8cf308e5cbf2050d1ac3e7ba387bae8749738 [log] [tgz]
author: Matthias Guenther <mrguenther@google.com> Fri Jun 03 11:12:21 2022 -0700
committer: CQ Bot Account <pigweed-scoped@luci-project-accounts.iam.gserviceaccount.com> Thu Jun 09 18:24:15 2022 +0000
tree: 0cf568d4f07a35e898690b954b919aebba64b11e
parent: d48a5727b2103e6447b28222c4a9536799fec55d [diff]
diff --git a/pw_protobuf/codegen_message_test.cc b/pw_protobuf/codegen_message_test.cc
index 772aed3..ddfc928 100644
--- a/pw_protobuf/codegen_message_test.cc
+++ b/pw_protobuf/codegen_message_test.cc

@@ -14,6 +14,7 @@
 #include <array>
 #include <span>
 #include <string_view>
+#include <tuple>
 
 #include "gtest/gtest.h"
 #include "pw_preprocessor/compiler.h"
@@ -239,6 +240,47 @@
   EXPECT_TRUE(one == two);
 }
 
+TEST(CodegenMessage, FixReservedIdentifiers) {
+  // This test checks that the code was generated as expected, so it will simply
+  // fail to compile if its expectations are not met.
+
+  // Make sure that the `signed` field was renamed to `signed_`.
+  std::ignore = IntegerMetadata::Message{
+      .bits = 32,
+      .signed_ = true,
+  };
+
+  // Make sure that `Message::Message` exists in the generated code and hasn't
+  // caused any namespace-resolution issues.
+  std::ignore = Message::Message{
+      .length = 4096,
+  };
+
+  // Make sure that `Fields::Fields` exists in the generated code and hasn't
+  // caused any namespace-resolution issues.
+  std::ignore = Fields::Fields::INTEGERS;
+  std::ignore = Fields::Fields::COMPLEX_NUMBERS;
+  std::ignore = Fields::Fields::MEROMORPHIC_FUNCTIONS_ON_COMPLEX_PLANE;
+
+  // Make sure that the `ReservedWord` enum values were renamed as expected.
+  // Specifically, only enum-value names that are reserved in UPPER_SNAKE_CASE
+  // should be modified. Names that are only reserved in lower_snake_case should
+  // be left alone since they'll never appear in that form in the generated
+  // code.
+  std::ignore = ReservedWord::NULL_;    // Add underscore since NULL is a macro.
+  std::ignore = ReservedWord::kNull;    // No underscore necessary.
+  std::ignore = ReservedWord::INT;      // No underscore necessary.
+  std::ignore = ReservedWord::kInt;     // No underscore necessary.
+  std::ignore = ReservedWord::RETURN;   // No underscore necessary.
+  std::ignore = ReservedWord::kReturn;  // No underscore necessary.
+  std::ignore = ReservedWord::BREAK;    // No underscore necessary.
+  std::ignore = ReservedWord::kBreak;   // No underscore necessary.
+  std::ignore = ReservedWord::FOR;      // No underscore necessary.
+  std::ignore = ReservedWord::kFor;     // No underscore necessary.
+  std::ignore = ReservedWord::DO;       // No underscore necessary.
+  std::ignore = ReservedWord::kDo;      // No underscore necessary.
+}
+
 PW_MODIFY_DIAGNOSTICS_POP();
 
 TEST(CodegenMessage, Read) {

diff --git a/pw_protobuf/docs.rst b/pw_protobuf/docs.rst
index 27cf243..98487a1 100644
--- a/pw_protobuf/docs.rst
+++ b/pw_protobuf/docs.rst

@@ -666,6 +666,39 @@
       std::optional<int32_t> points;
     };
 
+* Fields and messages whose names conflict with reserved C++ keywords are
+  suffixed with underscores to avoid compilation failures. This also applies to
+  names that conflict with symbols defined by the code-generation
+  implementation. These cases are illustrated below by the ``operator`` field
+  and the ``Message`` message, respectively.
+
+  .. code::
+
+    message Channel {
+      int32 bitrate = 1;
+      float signal_to_noise_ratio = 2;
+      Company operator = 3;
+    }
+
+    message Message {
+      User sender = 2;
+      User recipient = 3;
+      Channel channel = 4;
+    }
+
+  .. code:: c++
+
+    struct Channel::Message {
+      int32_t bitrate;
+      float signal_to_noise_ratio;
+      Company::Message operator_;
+    };
+
+    struct Message_::Message {
+      User::Message sender;
+      User::Message recipient;
+      Channel::Message channel;
+    };
 
 * Repeated scalar fields are represented by ``pw::Vector`` when the
   ``max_count`` option is set for that field, or by ``std::array`` when both

diff --git a/pw_protobuf/pw_protobuf_test_protos/full_test.proto b/pw_protobuf/pw_protobuf_test_protos/full_test.proto
index eca7ac1..1309a6c 100644
--- a/pw_protobuf/pw_protobuf_test_protos/full_test.proto
+++ b/pw_protobuf/pw_protobuf_test_protos/full_test.proto

@@ -137,6 +137,46 @@
   repeated Crate smaller_crates = 2;
 }
 
+// Ensure that reserved words are suffixed with underscores.
+message IntegerMetadata {
+  int32 bits = 1;
+  bool signed = 2;  // `signed` should become `signed_` in the C++ code.
+}
+
+// Ensure that messages named `Message` don't cause namespace-resolution issues
+// when the codegen internally references the generated type `Message::Message`.
+message Message {
+  int32 length = 1;
+}
+
+// Ensure that messages named `Fields` don't cause namespace-resolution issues
+// when the codegen internally references the generated type `Fields::Fields`.
+message Fields {
+  message Field {}
+  Field integers = 1;
+  Field complex_numbers = 2;
+  Field meromorphic_functions_on_complex_plane = 3;
+}
+
+// Enum values are handled differently from normal identifiers because they are
+// automatically case-converted in the generated code. Therefore, we append an
+// underscore if the output format exactly matches a reserved word or a
+// standard-library macro. This enum tests that underscores are appended in
+// cases where they're actually necessary but not when they can be skipped due
+// to case conversion.
+enum ReservedWord {
+  NULL = 0;
+  int = 1;
+  return = 2;
+  break = 3;
+  for  // The linter wants a line break here.
+    = 4;
+  do  // The linter wants a line break here.
+    = 5;
+  // Note: This obviously isn't anywhere near a complete list of C++ keywords
+  //       and standard-library macros.
+}
+
 // This might be useful.
 message KeyValuePair {
   string key = 1;

diff --git a/pw_protobuf/py/BUILD.bazel b/pw_protobuf/py/BUILD.bazel
index 882576f..e273026 100644
--- a/pw_protobuf/py/BUILD.bazel
+++ b/pw_protobuf/py/BUILD.bazel

@@ -27,6 +27,7 @@
         "pw_protobuf/output_file.py",
         "pw_protobuf/plugin.py",
         "pw_protobuf/proto_tree.py",
+        "pw_protobuf/symbol_name_mapping.py",
     ],
 )
 

diff --git a/pw_protobuf/py/BUILD.gn b/pw_protobuf/py/BUILD.gn
index 5afd50a..746c7ed 100644
--- a/pw_protobuf/py/BUILD.gn
+++ b/pw_protobuf/py/BUILD.gn

@@ -29,6 +29,7 @@
     "pw_protobuf/output_file.py",
     "pw_protobuf/plugin.py",
     "pw_protobuf/proto_tree.py",
+    "pw_protobuf/symbol_name_mapping.py",
   ]
   python_deps = [
     "$dir_pw_cli/py",

diff --git a/pw_protobuf/py/pw_protobuf/codegen_pwpb.py b/pw_protobuf/py/pw_protobuf/codegen_pwpb.py
index 1086acb..2304b26 100644
--- a/pw_protobuf/py/pw_protobuf/codegen_pwpb.py
+++ b/pw_protobuf/py/pw_protobuf/codegen_pwpb.py

@@ -2289,7 +2289,7 @@
     # message/enum nodes, then the second creates the fields in each. This is
     # done as non-primitive fields need pointers to their types, which requires
     # the entire tree to have been parsed into memory.
-    _, package_root = build_node_tree(proto_file, proto_options)
+    _, package_root = build_node_tree(proto_file, proto_options=proto_options)
 
     output_filename = _proto_filename_to_generated_header(proto_file.name)
     output_file = OutputFile(output_filename)

diff --git a/pw_protobuf/py/pw_protobuf/proto_tree.py b/pw_protobuf/py/pw_protobuf/proto_tree.py
index f0b5b3d..f77db7f 100644
--- a/pw_protobuf/py/pw_protobuf/proto_tree.py
+++ b/pw_protobuf/py/pw_protobuf/proto_tree.py

@@ -22,7 +22,7 @@
 
 from google.protobuf import descriptor_pb2
 
-from pw_protobuf import options
+from pw_protobuf import options, symbol_name_mapping
 from pw_protobuf_codegen_protos.options_pb2 import Options
 
 T = TypeVar('T')  # pylint: disable=invalid-name
@@ -66,7 +66,8 @@
 
     def cpp_name(self) -> str:
         """The name of this node in generated C++ code."""
-        return self._name.replace('.', '::')
+        return symbol_name_mapping.fix_cc_identifier(self._name).replace(
+            '.', '::')
 
     def cpp_namespace(self, root: Optional['ProtoNode'] = None) -> str:
         """C++ namespace of the node, up to the specified root."""
@@ -160,7 +161,11 @@
         # pylint: enable=protected-access
 
     def find(self, path: str) -> Optional['ProtoNode']:
-        """Finds a node within this node's subtree."""
+        """Finds a node within this node's subtree.
+
+        Args:
+          path: The path to the sought node.
+        """
         node = self
 
         # pylint: disable=protected-access
@@ -229,7 +234,11 @@
         return list(self._values)
 
     def add_value(self, name: str, value: int) -> None:
-        self._values.append((ProtoMessageField.upper_snake_case(name), value))
+        self._values.append((
+            ProtoMessageField.upper_snake_case(
+                symbol_name_mapping.fix_cc_enum_value_name(name)),
+            value,
+        ))
 
     def _supports_child(self, child: ProtoNode) -> bool:
         # Enums cannot have nested children.
@@ -328,7 +337,7 @@
                  type_node: Optional[ProtoNode] = None,
                  repeated: bool = False,
                  field_options: Optional[Options] = None):
-        self._field_name = field_name
+        self._field_name = symbol_name_mapping.fix_cc_identifier(field_name)
         self._number: int = field_number
         self._type: int = field_type
         self._type_node: Optional[ProtoNode] = type_node
@@ -360,9 +369,7 @@
     def upper_camel_case(field_name: str) -> str:
         """Converts a field name to UpperCamelCase."""
         name_components = field_name.split('_')
-        for i, _ in enumerate(name_components):
-            name_components[i] = name_components[i].lower().capitalize()
-        return ''.join(name_components)
+        return ''.join([word.lower().capitalize() for word in name_components])
 
     @staticmethod
     def upper_snake_case(field_name: str) -> str:

diff --git a/pw_protobuf/py/pw_protobuf/symbol_name_mapping.py b/pw_protobuf/py/pw_protobuf/symbol_name_mapping.py
new file mode 100755
index 0000000..216e409
--- /dev/null
+++ b/pw_protobuf/py/pw_protobuf/symbol_name_mapping.py

@@ -0,0 +1,603 @@
+#!/usr/bin/env python3
+# Copyright 2022 The Pigweed Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+"""Fixes identifiers that would cause compiler errors in generated C++ code."""
+
+# Set of words that can't be used as identifiers in the generated code. Many of
+# these are valid identifiers in proto syntax, but they need special handling in
+# the generated C++ code.
+#
+# Note: This is primarily used for "if x in y" operations, hence the use of a
+# set rather than a list.
+PW_PROTO_CODEGEN_RESERVED_WORDS: set[str] = {
+    # C++20 keywords (https://en.cppreference.com/w/cpp/keyword):
+    "alignas",
+    "alignof",
+    "and",
+    "and_eq",
+    "asm",
+    "atomic_cancel",
+    "atomic_commit",
+    "atomic_noexcept",
+    "auto",
+    "bitand",
+    "bitor",
+    "bool",
+    "break",
+    "case",
+    "catch",
+    "char",
+    "char8_t",
+    "char16_t",
+    "char32_t",
+    "class",
+    "compl",
+    "concept",
+    "const",
+    "consteval",
+    "constexpr",
+    "constinit",
+    "const_cast",
+    "continue",
+    "co_await",
+    "co_return",
+    "co_yield",
+    "decltype",
+    "default",
+    "delete",
+    "do",
+    "double",
+    "dynamic_cast",
+    "else",
+    "enum",
+    "explicit",
+    "export",
+    "extern",
+    "false",
+    "float",
+    "for",
+    "friend",
+    "goto",
+    "if",
+    "inline",
+    "int",
+    "long",
+    "mutable",
+    "namespace",
+    "new",
+    "noexcept",
+    "not",
+    "not_eq",
+    "nullptr",
+    "operator",
+    "or",
+    "or_eq",
+    "private",
+    "protected",
+    "public",
+    "reflexpr",
+    "register",
+    "reinterpret_cast",
+    "requires",
+    "return",
+    "short",
+    "signed",
+    "sizeof",
+    "static",
+    "static_assert",
+    "static_cast",
+    "struct",
+    "switch",
+    "synchronized",
+    "template",
+    "this",
+    "thread_local",
+    "throw",
+    "true",
+    "try",
+    "typedef",
+    "typeid",
+    "typename",
+    "union",
+    "unsigned",
+    "using",
+    "virtual",
+    "void",
+    "volatile",
+    "wchar_t",
+    "while",
+    "xor",
+    "xor_eq",
+    # C++20 macros (https://en.cppreference.com/w/cpp/symbol_index/macro),
+    # excluding the following:
+    # - Function-like macros, which have unambiguous syntax and thus won't
+    #   conflict with generated symbols.
+    # - Macros that couldn't be made valid by appending underscores, namely
+    #   those containing "__" or starting with "_[A-Z]". C++ reserves all such
+    #   identifiers for the compiler, and appending underscores wouldn't change
+    #   that.
+    "ATOMIC_BOOL_LOCK_FREE",
+    "ATOMIC_CHAR_LOCK_FREE",
+    "ATOMIC_CHAR16_T_LOCK_FREE",
+    "ATOMIC_CHAR32_T_LOCK_FREE",
+    "ATOMIC_CHAR8_T_LOCK_FREE",
+    "ATOMIC_FLAG_INIT",
+    "ATOMIC_INT_LOCK_FREE",
+    "ATOMIC_LLONG_LOCK_FREE",
+    "ATOMIC_LONG_LOCK_FREE",
+    "ATOMIC_POINTER_LOCK_FREE",
+    "ATOMIC_SHORT_LOCK_FREE",
+    "ATOMIC_WCHAR_T_LOCK_FREE",
+    "BUFSIZ",
+    "CHAR_BIT",
+    "CHAR_MAX",
+    "CHAR_MIN",
+    "CLOCKS_PER_SEC",
+    "DBL_DECIMAL_DIG",
+    "DBL_DIG",
+    "DBL_EPSILON",
+    "DBL_HAS_SUBNORM",
+    "DBL_MANT_DIG",
+    "DBL_MAX",
+    "DBL_MAX_10_EXP",
+    "DBL_MAX_EXP",
+    "DBL_MIN",
+    "DBL_MIN_10_EXP",
+    "DBL_MIN_EXP",
+    "DBL_TRUE_MIN",
+    "DECIMAL_DIG",
+    "E2BIG",
+    "EACCES",
+    "EADDRINUSE",
+    "EADDRNOTAVAIL",
+    "EAFNOSUPPORT",
+    "EAGAIN",
+    "EALREADY",
+    "EBADF",
+    "EBADMSG",
+    "EBUSY",
+    "ECANCELED",
+    "ECHILD",
+    "ECONNABORTED",
+    "ECONNREFUSED",
+    "ECONNRESET",
+    "EDEADLK",
+    "EDESTADDRREQ",
+    "EDOM",
+    "EEXIST",
+    "EFAULT",
+    "EFBIG",
+    "EHOSTUNREACH",
+    "EIDRM",
+    "EILSEQ",
+    "EINPROGRESS",
+    "EINTR",
+    "EINVAL",
+    "EIO",
+    "EISCONN",
+    "EISDIR",
+    "ELOOP",
+    "EMFILE",
+    "EMLINK",
+    "EMSGSIZE",
+    "ENAMETOOLONG",
+    "ENETDOWN",
+    "ENETRESET",
+    "ENETUNREACH",
+    "ENFILE",
+    "ENOBUFS",
+    "ENODATA",
+    "ENODEV",
+    "ENOENT",
+    "ENOEXEC",
+    "ENOLCK",
+    "ENOLINK",
+    "ENOMEM",
+    "ENOMSG",
+    "ENOPROTOOPT",
+    "ENOSPC",
+    "ENOSR",
+    "ENOSTR",
+    "ENOSYS",
+    "ENOTCONN",
+    "ENOTDIR",
+    "ENOTEMPTY",
+    "ENOTRECOVERABLE",
+    "ENOTSOCK",
+    "ENOTSUP",
+    "ENOTTY",
+    "ENXIO",
+    "EOF",
+    "EOPNOTSUPP",
+    "EOVERFLOW",
+    "EOWNERDEAD",
+    "EPERM",
+    "EPIPE",
+    "EPROTO",
+    "EPROTONOSUPPORT",
+    "EPROTOTYPE",
+    "ERANGE",
+    "EROFS",
+    "errno",
+    "ESPIPE",
+    "ESRCH",
+    "ETIME",
+    "ETIMEDOUT",
+    "ETXTBSY",
+    "EWOULDBLOCK",
+    "EXDEV",
+    "EXIT_FAILURE",
+    "EXIT_SUCCESS",
+    "FE_ALL_EXCEPT",
+    "FE_DFL_ENV",
+    "FE_DIVBYZERO",
+    "FE_DOWNWARD",
+    "FE_INEXACT",
+    "FE_INVALID",
+    "FE_OVERFLOW",
+    "FE_TONEAREST",
+    "FE_TOWARDZERO",
+    "FE_UNDERFLOW",
+    "FE_UPWARD",
+    "FILENAME_MAX",
+    "FLT_DECIMAL_DIG",
+    "FLT_DIG",
+    "FLT_EPSILON",
+    "FLT_EVAL_METHOD",
+    "FLT_HAS_SUBNORM",
+    "FLT_MANT_DIG",
+    "FLT_MAX",
+    "FLT_MAX_10_EXP",
+    "FLT_MAX_EXP",
+    "FLT_MIN",
+    "FLT_MIN_10_EXP",
+    "FLT_MIN_EXP",
+    "FLT_RADIX",
+    "FLT_ROUNDS",
+    "FLT_TRUE_MIN",
+    "FOPEN_MAX",
+    "FP_FAST_FMA",
+    "FP_FAST_FMAF",
+    "FP_FAST_FMAL",
+    "FP_ILOGB0",
+    "FP_ILOGBNAN",
+    "FP_SUBNORMAL",
+    "FP_ZERO",
+    "FP_INFINITE",
+    "FP_NAN",
+    "FP_NORMAL",
+    "HUGE_VAL",
+    "HUGE_VALF",
+    "HUGE_VALL",
+    "INFINITY",
+    "INT_FAST16_MAX",
+    "INT_FAST16_MIN",
+    "INT_FAST32_MAX",
+    "INT_FAST32_MIN",
+    "INT_FAST64_MAX",
+    "INT_FAST64_MIN",
+    "INT_FAST8_MAX",
+    "INT_FAST8_MIN",
+    "INT_LEAST16_MAX",
+    "INT_LEAST16_MIN",
+    "INT_LEAST32_MAX",
+    "INT_LEAST32_MIN",
+    "INT_LEAST64_MAX",
+    "INT_LEAST64_MIN",
+    "INT_LEAST8_MAX",
+    "INT_LEAST8_MIN",
+    "INT_MAX",
+    "INT_MIN",
+    "INT16_MAX",
+    "INT16_MIN",
+    "INT32_MAX",
+    "INT32_MIN",
+    "INT64_MAX",
+    "INT64_MIN",
+    "INT8_MAX",
+    "INT8_MIN",
+    "INTMAX_MAX",
+    "INTMAX_MIN",
+    "INTPTR_MAX",
+    "INTPTR_MIN",
+    "L_tmpnam",
+    "LC_ALL",
+    "LC_COLLATE",
+    "LC_CTYPE",
+    "LC_MONETARY",
+    "LC_NUMERIC",
+    "LC_TIME",
+    "LDBL_DECIMAL_DIG",
+    "LDBL_DIG",
+    "LDBL_EPSILON",
+    "LDBL_HAS_SUBNORM",
+    "LDBL_MANT_DIG",
+    "LDBL_MAX",
+    "LDBL_MAX_10_EXP",
+    "LDBL_MAX_EXP",
+    "LDBL_MIN",
+    "LDBL_MIN_10_EXP",
+    "LDBL_MIN_EXP",
+    "LDBL_TRUE_MIN",
+    "LLONG_MAX",
+    "LLONG_MIN",
+    "LONG_MAX",
+    "LONG_MIN",
+    "MATH_ERREXCEPT",
+    "math_errhandling",
+    "MATH_ERRNO",
+    "MB_CUR_MAX",
+    "MB_LEN_MAX",
+    "NAN",
+    "NULL",
+    "ONCE_FLAG_INIT",
+    "PRId16",
+    "PRId32",
+    "PRId64",
+    "PRId8",
+    "PRIdFAST16",
+    "PRIdFAST32",
+    "PRIdFAST64",
+    "PRIdFAST8",
+    "PRIdLEAST16",
+    "PRIdLEAST32",
+    "PRIdLEAST64",
+    "PRIdLEAST8",
+    "PRIdMAX",
+    "PRIdPTR",
+    "PRIi16",
+    "PRIi32",
+    "PRIi64",
+    "PRIi8",
+    "PRIiFAST16",
+    "PRIiFAST32",
+    "PRIiFAST64",
+    "PRIiFAST8",
+    "PRIiLEAST16",
+    "PRIiLEAST32",
+    "PRIiLEAST64",
+    "PRIiLEAST8",
+    "PRIiMAX",
+    "PRIiPTR",
+    "PRIo16",
+    "PRIo32",
+    "PRIo64",
+    "PRIo8",
+    "PRIoFAST16",
+    "PRIoFAST32",
+    "PRIoFAST64",
+    "PRIoFAST8",
+    "PRIoLEAST16",
+    "PRIoLEAST32",
+    "PRIoLEAST64",
+    "PRIoLEAST8",
+    "PRIoMAX",
+    "PRIoPTR",
+    "PRIu16",
+    "PRIu32",
+    "PRIu64",
+    "PRIu8",
+    "PRIuFAST16",
+    "PRIuFAST32",
+    "PRIuFAST64",
+    "PRIuFAST8",
+    "PRIuLEAST16",
+    "PRIuLEAST32",
+    "PRIuLEAST64",
+    "PRIuLEAST8",
+    "PRIuMAX",
+    "PRIuPTR",
+    "PRIx16",
+    "PRIX16",
+    "PRIx32",
+    "PRIX32",
+    "PRIx64",
+    "PRIX64",
+    "PRIx8",
+    "PRIX8",
+    "PRIxFAST16",
+    "PRIXFAST16",
+    "PRIxFAST32",
+    "PRIXFAST32",
+    "PRIxFAST64",
+    "PRIXFAST64",
+    "PRIxFAST8",
+    "PRIXFAST8",
+    "PRIxLEAST16",
+    "PRIXLEAST16",
+    "PRIxLEAST32",
+    "PRIXLEAST32",
+    "PRIxLEAST64",
+    "PRIXLEAST64",
+    "PRIxLEAST8",
+    "PRIXLEAST8",
+    "PRIxMAX",
+    "PRIXMAX",
+    "PRIxPTR",
+    "PRIXPTR",
+    "PTRDIFF_MAX",
+    "PTRDIFF_MIN",
+    "RAND_MAX",
+    "SCHAR_MAX",
+    "SCHAR_MIN",
+    "SCNd16",
+    "SCNd32",
+    "SCNd64",
+    "SCNd8",
+    "SCNdFAST16",
+    "SCNdFAST32",
+    "SCNdFAST64",
+    "SCNdFAST8",
+    "SCNdLEAST16",
+    "SCNdLEAST32",
+    "SCNdLEAST64",
+    "SCNdLEAST8",
+    "SCNdMAX",
+    "SCNdPTR",
+    "SCNi16",
+    "SCNi32",
+    "SCNi64",
+    "SCNi8",
+    "SCNiFAST16",
+    "SCNiFAST32",
+    "SCNiFAST64",
+    "SCNiFAST8",
+    "SCNiLEAST16",
+    "SCNiLEAST32",
+    "SCNiLEAST64",
+    "SCNiLEAST8",
+    "SCNiMAX",
+    "SCNiPTR",
+    "SCNo16",
+    "SCNo32",
+    "SCNo64",
+    "SCNo8",
+    "SCNoFAST16",
+    "SCNoFAST32",
+    "SCNoFAST64",
+    "SCNoFAST8",
+    "SCNoLEAST16",
+    "SCNoLEAST32",
+    "SCNoLEAST64",
+    "SCNoLEAST8",
+    "SCNoMAX",
+    "SCNoPTR",
+    "SCNu16",
+    "SCNu32",
+    "SCNu64",
+    "SCNu8",
+    "SCNuFAST16",
+    "SCNuFAST32",
+    "SCNuFAST64",
+    "SCNuFAST8",
+    "SCNuLEAST16",
+    "SCNuLEAST32",
+    "SCNuLEAST64",
+    "SCNuLEAST8",
+    "SCNuMAX",
+    "SCNuPTR",
+    "SCNx16",
+    "SCNx32",
+    "SCNx64",
+    "SCNx8",
+    "SCNxFAST16",
+    "SCNxFAST32",
+    "SCNxFAST64",
+    "SCNxFAST8",
+    "SCNxLEAST16",
+    "SCNxLEAST32",
+    "SCNxLEAST64",
+    "SCNxLEAST8",
+    "SCNxMAX",
+    "SCNxPTR",
+    "SEEK_CUR",
+    "SEEK_END",
+    "SEEK_SET",
+    "SHRT_MAX",
+    "SHRT_MIN",
+    "SIG_ATOMIC_MAX",
+    "SIG_ATOMIC_MIN",
+    "SIG_DFL",
+    "SIG_ERR",
+    "SIG_IGN",
+    "SIGABRT",
+    "SIGFPE",
+    "SIGILL",
+    "SIGINT",
+    "SIGSEGV",
+    "SIGTERM",
+    "SIZE_MAX",
+    "stderr",
+    "stdin",
+    "stdout",
+    "TIME_UTC",
+    "TMP_MAX",
+    "UCHAR_MAX",
+    "UINT_FAST16_MAX",
+    "UINT_FAST32_MAX",
+    "UINT_FAST64_MAX",
+    "UINT_FAST8_MAX",
+    "UINT_LEAST16_MAX",
+    "UINT_LEAST32_MAX",
+    "UINT_LEAST64_MAX",
+    "UINT_LEAST8_MAX",
+    "UINT_MAX",
+    "UINT16_MAX",
+    "UINT32_MAX",
+    "UINT64_MAX",
+    "UINT8_MAX",
+    "UINTMAX_MAX",
+    "UINTPTR_MAX",
+    "ULLONG_MAX",
+    "ULONG_MAX",
+    "USHRT_MAX",
+    "WCHAR_MAX",
+    "WCHAR_MIN",
+    "WEOF",
+    "WINT_MAX",
+    "WINT_MIN",
+}
+
+
+def _transform_invalid_identifier(invalid_identifier: str) -> str:
+    """Applies a transformation to an invalid C++ identifier to make it valid.
+
+    Currently, this simply appends an underscore. This addresses the vast
+    majority of realistic cases, but there are some caveats; see
+    `fix_cc_identifier` function documentation for details.
+    """
+    return f"{invalid_identifier}_"
+
+
+def fix_cc_identifier(proto_identifier: str) -> str:
+    """Returns an adjusted form of the identifier for use in generated C++ code.
+
+    If the given identifier is already valid for use in the generated C++ code,
+    it will be returned as-is. If the identifier is a C++ keyword or a
+    preprocessor macro from the standard library, the returned identifier will
+    be modified slightly in order to avoid compiler errors.
+
+    Currently, this simply appends an underscore if necessary. This handles the
+    vast majority of realistic cases, though it doesn't attempt to fix
+    identifiers that the C++ spec reserves for the compiler's use.
+
+    For reference, C++ reserves two categories of identifiers for the compiler:
+    - Any identifier that contains the substring "__" anywhere in it.
+    - Any identifier with an underscore for the first character and a capital
+      letter for the second character.
+    """
+    return (_transform_invalid_identifier(proto_identifier)  #
+            if proto_identifier in PW_PROTO_CODEGEN_RESERVED_WORDS  #
+            else proto_identifier)
+
+
+def fix_cc_enum_value_name(proto_enum_entry: str) -> str:
+    """Returns an adjusted form of the enum-value name for use in generated C++.
+
+    Generates an UPPER_SNAKE_CASE variant of the given enum-value name and then
+    checks it for collisions with C++ keywords and standard-library macros.
+    Returns a potentially modified version of the input in order to fix
+    collisions if any are found.
+
+    Note that, although the code generation also creates enum-value aliases in
+    kHungarianNotationPascalCase, symbols of that form never conflict with
+    keywords or standard-library macros in C++20. Therefore, only the
+    UPPER_SNAKE_CASE versions need to be checked for conflicts.
+
+    See `fix_cc_identifier` for further details.
+    """
+    upper_snake_case = proto_enum_entry.upper()
+    return (_transform_invalid_identifier(proto_enum_entry)  #
+            if upper_snake_case in PW_PROTO_CODEGEN_RESERVED_WORDS  #
+            else proto_enum_entry)
commit	48a8cf308e5cbf2050d1ac3e7ba387bae8749738	[log] [tgz]
author	Matthias Guenther <mrguenther@google.com>	Fri Jun 03 11:12:21 2022 -0700
committer	CQ Bot Account <pigweed-scoped@luci-project-accounts.iam.gserviceaccount.com>	Thu Jun 09 18:24:15 2022 +0000
tree	0cf568d4f07a35e898690b954b919aebba64b11e
parent	d48a5727b2103e6447b28222c4a9536799fec55d [diff]