pw_protobuf: Auto-fix reserved C++ identifiers
Make Pigweed's protobuf codegen automatically fix identifiers that are
reserved by the C++ spec or that conflict with standard-library macros.
(Specifically, append an underscore in these cases.)
Bug: b/234855743
Change-Id: I2782304080fe6aeebd194b029cde266a7058cd4f
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/96900
Commit-Queue: Matthias Guenther <mrguenther@google.com>
Reviewed-by: Ted Pudlik <tpudlik@google.com>
diff --git a/pw_protobuf/codegen_message_test.cc b/pw_protobuf/codegen_message_test.cc
index 772aed3..ddfc928 100644
--- a/pw_protobuf/codegen_message_test.cc
+++ b/pw_protobuf/codegen_message_test.cc
@@ -14,6 +14,7 @@
#include <array>
#include <span>
#include <string_view>
+#include <tuple>
#include "gtest/gtest.h"
#include "pw_preprocessor/compiler.h"
@@ -239,6 +240,47 @@
EXPECT_TRUE(one == two);
}
+TEST(CodegenMessage, FixReservedIdentifiers) {
+ // This test checks that the code was generated as expected, so it will simply
+ // fail to compile if its expectations are not met.
+
+ // Make sure that the `signed` field was renamed to `signed_`.
+ std::ignore = IntegerMetadata::Message{
+ .bits = 32,
+ .signed_ = true,
+ };
+
+ // Make sure that `Message::Message` exists in the generated code and hasn't
+ // caused any namespace-resolution issues.
+ std::ignore = Message::Message{
+ .length = 4096,
+ };
+
+ // Make sure that `Fields::Fields` exists in the generated code and hasn't
+ // caused any namespace-resolution issues.
+ std::ignore = Fields::Fields::INTEGERS;
+ std::ignore = Fields::Fields::COMPLEX_NUMBERS;
+ std::ignore = Fields::Fields::MEROMORPHIC_FUNCTIONS_ON_COMPLEX_PLANE;
+
+ // Make sure that the `ReservedWord` enum values were renamed as expected.
+ // Specifically, only enum-value names that are reserved in UPPER_SNAKE_CASE
+ // should be modified. Names that are only reserved in lower_snake_case should
+ // be left alone since they'll never appear in that form in the generated
+ // code.
+ std::ignore = ReservedWord::NULL_; // Add underscore since NULL is a macro.
+ std::ignore = ReservedWord::kNull; // No underscore necessary.
+ std::ignore = ReservedWord::INT; // No underscore necessary.
+ std::ignore = ReservedWord::kInt; // No underscore necessary.
+ std::ignore = ReservedWord::RETURN; // No underscore necessary.
+ std::ignore = ReservedWord::kReturn; // No underscore necessary.
+ std::ignore = ReservedWord::BREAK; // No underscore necessary.
+ std::ignore = ReservedWord::kBreak; // No underscore necessary.
+ std::ignore = ReservedWord::FOR; // No underscore necessary.
+ std::ignore = ReservedWord::kFor; // No underscore necessary.
+ std::ignore = ReservedWord::DO; // No underscore necessary.
+ std::ignore = ReservedWord::kDo; // No underscore necessary.
+}
+
PW_MODIFY_DIAGNOSTICS_POP();
TEST(CodegenMessage, Read) {
diff --git a/pw_protobuf/docs.rst b/pw_protobuf/docs.rst
index 27cf243..98487a1 100644
--- a/pw_protobuf/docs.rst
+++ b/pw_protobuf/docs.rst
@@ -666,6 +666,39 @@
std::optional<int32_t> points;
};
+* Fields and messages whose names conflict with reserved C++ keywords are
+ suffixed with underscores to avoid compilation failures. This also applies to
+ names that conflict with symbols defined by the code-generation
+ implementation. These cases are illustrated below by the ``operator`` field
+ and the ``Message`` message, respectively.
+
+ .. code::
+
+ message Channel {
+ int32 bitrate = 1;
+ float signal_to_noise_ratio = 2;
+ Company operator = 3;
+ }
+
+ message Message {
+ User sender = 2;
+ User recipient = 3;
+ Channel channel = 4;
+ }
+
+ .. code:: c++
+
+ struct Channel::Message {
+ int32_t bitrate;
+ float signal_to_noise_ratio;
+ Company::Message operator_;
+ };
+
+ struct Message_::Message {
+ User::Message sender;
+ User::Message recipient;
+ Channel::Message channel;
+ };
* Repeated scalar fields are represented by ``pw::Vector`` when the
``max_count`` option is set for that field, or by ``std::array`` when both
diff --git a/pw_protobuf/pw_protobuf_test_protos/full_test.proto b/pw_protobuf/pw_protobuf_test_protos/full_test.proto
index eca7ac1..1309a6c 100644
--- a/pw_protobuf/pw_protobuf_test_protos/full_test.proto
+++ b/pw_protobuf/pw_protobuf_test_protos/full_test.proto
@@ -137,6 +137,46 @@
repeated Crate smaller_crates = 2;
}
+// Ensure that reserved words are suffixed with underscores.
+message IntegerMetadata {
+ int32 bits = 1;
+ bool signed = 2; // `signed` should become `signed_` in the C++ code.
+}
+
+// Ensure that messages named `Message` don't cause namespace-resolution issues
+// when the codegen internally references the generated type `Message::Message`.
+message Message {
+ int32 length = 1;
+}
+
+// Ensure that messages named `Fields` don't cause namespace-resolution issues
+// when the codegen internally references the generated type `Fields::Fields`.
+message Fields {
+ message Field {}
+ Field integers = 1;
+ Field complex_numbers = 2;
+ Field meromorphic_functions_on_complex_plane = 3;
+}
+
+// Enum values are handled differently from normal identifiers because they are
+// automatically case-converted in the generated code. Therefore, we append an
+// underscore if the output format exactly matches a reserved word or a
+// standard-library macro. This enum tests that underscores are appended in
+// cases where they're actually necessary but not when they can be skipped due
+// to case conversion.
+enum ReservedWord {
+ NULL = 0;
+ int = 1;
+ return = 2;
+ break = 3;
+ for // The linter wants a line break here.
+ = 4;
+ do // The linter wants a line break here.
+ = 5;
+ // Note: This obviously isn't anywhere near a complete list of C++ keywords
+ // and standard-library macros.
+}
+
// This might be useful.
message KeyValuePair {
string key = 1;
diff --git a/pw_protobuf/py/BUILD.bazel b/pw_protobuf/py/BUILD.bazel
index 882576f..e273026 100644
--- a/pw_protobuf/py/BUILD.bazel
+++ b/pw_protobuf/py/BUILD.bazel
@@ -27,6 +27,7 @@
"pw_protobuf/output_file.py",
"pw_protobuf/plugin.py",
"pw_protobuf/proto_tree.py",
+ "pw_protobuf/symbol_name_mapping.py",
],
)
diff --git a/pw_protobuf/py/BUILD.gn b/pw_protobuf/py/BUILD.gn
index 5afd50a..746c7ed 100644
--- a/pw_protobuf/py/BUILD.gn
+++ b/pw_protobuf/py/BUILD.gn
@@ -29,6 +29,7 @@
"pw_protobuf/output_file.py",
"pw_protobuf/plugin.py",
"pw_protobuf/proto_tree.py",
+ "pw_protobuf/symbol_name_mapping.py",
]
python_deps = [
"$dir_pw_cli/py",
diff --git a/pw_protobuf/py/pw_protobuf/codegen_pwpb.py b/pw_protobuf/py/pw_protobuf/codegen_pwpb.py
index 1086acb..2304b26 100644
--- a/pw_protobuf/py/pw_protobuf/codegen_pwpb.py
+++ b/pw_protobuf/py/pw_protobuf/codegen_pwpb.py
@@ -2289,7 +2289,7 @@
# message/enum nodes, then the second creates the fields in each. This is
# done as non-primitive fields need pointers to their types, which requires
# the entire tree to have been parsed into memory.
- _, package_root = build_node_tree(proto_file, proto_options)
+ _, package_root = build_node_tree(proto_file, proto_options=proto_options)
output_filename = _proto_filename_to_generated_header(proto_file.name)
output_file = OutputFile(output_filename)
diff --git a/pw_protobuf/py/pw_protobuf/proto_tree.py b/pw_protobuf/py/pw_protobuf/proto_tree.py
index f0b5b3d..f77db7f 100644
--- a/pw_protobuf/py/pw_protobuf/proto_tree.py
+++ b/pw_protobuf/py/pw_protobuf/proto_tree.py
@@ -22,7 +22,7 @@
from google.protobuf import descriptor_pb2
-from pw_protobuf import options
+from pw_protobuf import options, symbol_name_mapping
from pw_protobuf_codegen_protos.options_pb2 import Options
T = TypeVar('T') # pylint: disable=invalid-name
@@ -66,7 +66,8 @@
def cpp_name(self) -> str:
"""The name of this node in generated C++ code."""
- return self._name.replace('.', '::')
+ return symbol_name_mapping.fix_cc_identifier(self._name).replace(
+ '.', '::')
def cpp_namespace(self, root: Optional['ProtoNode'] = None) -> str:
"""C++ namespace of the node, up to the specified root."""
@@ -160,7 +161,11 @@
# pylint: enable=protected-access
def find(self, path: str) -> Optional['ProtoNode']:
- """Finds a node within this node's subtree."""
+ """Finds a node within this node's subtree.
+
+ Args:
+ path: The path to the sought node.
+ """
node = self
# pylint: disable=protected-access
@@ -229,7 +234,11 @@
return list(self._values)
def add_value(self, name: str, value: int) -> None:
- self._values.append((ProtoMessageField.upper_snake_case(name), value))
+ self._values.append((
+ ProtoMessageField.upper_snake_case(
+ symbol_name_mapping.fix_cc_enum_value_name(name)),
+ value,
+ ))
def _supports_child(self, child: ProtoNode) -> bool:
# Enums cannot have nested children.
@@ -328,7 +337,7 @@
type_node: Optional[ProtoNode] = None,
repeated: bool = False,
field_options: Optional[Options] = None):
- self._field_name = field_name
+ self._field_name = symbol_name_mapping.fix_cc_identifier(field_name)
self._number: int = field_number
self._type: int = field_type
self._type_node: Optional[ProtoNode] = type_node
@@ -360,9 +369,7 @@
def upper_camel_case(field_name: str) -> str:
"""Converts a field name to UpperCamelCase."""
name_components = field_name.split('_')
- for i, _ in enumerate(name_components):
- name_components[i] = name_components[i].lower().capitalize()
- return ''.join(name_components)
+ return ''.join([word.lower().capitalize() for word in name_components])
@staticmethod
def upper_snake_case(field_name: str) -> str:
diff --git a/pw_protobuf/py/pw_protobuf/symbol_name_mapping.py b/pw_protobuf/py/pw_protobuf/symbol_name_mapping.py
new file mode 100755
index 0000000..216e409
--- /dev/null
+++ b/pw_protobuf/py/pw_protobuf/symbol_name_mapping.py
@@ -0,0 +1,603 @@
+#!/usr/bin/env python3
+# Copyright 2022 The Pigweed Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+"""Fixes identifiers that would cause compiler errors in generated C++ code."""
+
+# Set of words that can't be used as identifiers in the generated code. Many of
+# these are valid identifiers in proto syntax, but they need special handling in
+# the generated C++ code.
+#
+# Note: This is primarily used for "if x in y" operations, hence the use of a
+# set rather than a list.
+PW_PROTO_CODEGEN_RESERVED_WORDS: set[str] = {
+ # C++20 keywords (https://en.cppreference.com/w/cpp/keyword):
+ "alignas",
+ "alignof",
+ "and",
+ "and_eq",
+ "asm",
+ "atomic_cancel",
+ "atomic_commit",
+ "atomic_noexcept",
+ "auto",
+ "bitand",
+ "bitor",
+ "bool",
+ "break",
+ "case",
+ "catch",
+ "char",
+ "char8_t",
+ "char16_t",
+ "char32_t",
+ "class",
+ "compl",
+ "concept",
+ "const",
+ "consteval",
+ "constexpr",
+ "constinit",
+ "const_cast",
+ "continue",
+ "co_await",
+ "co_return",
+ "co_yield",
+ "decltype",
+ "default",
+ "delete",
+ "do",
+ "double",
+ "dynamic_cast",
+ "else",
+ "enum",
+ "explicit",
+ "export",
+ "extern",
+ "false",
+ "float",
+ "for",
+ "friend",
+ "goto",
+ "if",
+ "inline",
+ "int",
+ "long",
+ "mutable",
+ "namespace",
+ "new",
+ "noexcept",
+ "not",
+ "not_eq",
+ "nullptr",
+ "operator",
+ "or",
+ "or_eq",
+ "private",
+ "protected",
+ "public",
+ "reflexpr",
+ "register",
+ "reinterpret_cast",
+ "requires",
+ "return",
+ "short",
+ "signed",
+ "sizeof",
+ "static",
+ "static_assert",
+ "static_cast",
+ "struct",
+ "switch",
+ "synchronized",
+ "template",
+ "this",
+ "thread_local",
+ "throw",
+ "true",
+ "try",
+ "typedef",
+ "typeid",
+ "typename",
+ "union",
+ "unsigned",
+ "using",
+ "virtual",
+ "void",
+ "volatile",
+ "wchar_t",
+ "while",
+ "xor",
+ "xor_eq",
+ # C++20 macros (https://en.cppreference.com/w/cpp/symbol_index/macro),
+ # excluding the following:
+ # - Function-like macros, which have unambiguous syntax and thus won't
+ # conflict with generated symbols.
+ # - Macros that couldn't be made valid by appending underscores, namely
+ # those containing "__" or starting with "_[A-Z]". C++ reserves all such
+ # identifiers for the compiler, and appending underscores wouldn't change
+ # that.
+ "ATOMIC_BOOL_LOCK_FREE",
+ "ATOMIC_CHAR_LOCK_FREE",
+ "ATOMIC_CHAR16_T_LOCK_FREE",
+ "ATOMIC_CHAR32_T_LOCK_FREE",
+ "ATOMIC_CHAR8_T_LOCK_FREE",
+ "ATOMIC_FLAG_INIT",
+ "ATOMIC_INT_LOCK_FREE",
+ "ATOMIC_LLONG_LOCK_FREE",
+ "ATOMIC_LONG_LOCK_FREE",
+ "ATOMIC_POINTER_LOCK_FREE",
+ "ATOMIC_SHORT_LOCK_FREE",
+ "ATOMIC_WCHAR_T_LOCK_FREE",
+ "BUFSIZ",
+ "CHAR_BIT",
+ "CHAR_MAX",
+ "CHAR_MIN",
+ "CLOCKS_PER_SEC",
+ "DBL_DECIMAL_DIG",
+ "DBL_DIG",
+ "DBL_EPSILON",
+ "DBL_HAS_SUBNORM",
+ "DBL_MANT_DIG",
+ "DBL_MAX",
+ "DBL_MAX_10_EXP",
+ "DBL_MAX_EXP",
+ "DBL_MIN",
+ "DBL_MIN_10_EXP",
+ "DBL_MIN_EXP",
+ "DBL_TRUE_MIN",
+ "DECIMAL_DIG",
+ "E2BIG",
+ "EACCES",
+ "EADDRINUSE",
+ "EADDRNOTAVAIL",
+ "EAFNOSUPPORT",
+ "EAGAIN",
+ "EALREADY",
+ "EBADF",
+ "EBADMSG",
+ "EBUSY",
+ "ECANCELED",
+ "ECHILD",
+ "ECONNABORTED",
+ "ECONNREFUSED",
+ "ECONNRESET",
+ "EDEADLK",
+ "EDESTADDRREQ",
+ "EDOM",
+ "EEXIST",
+ "EFAULT",
+ "EFBIG",
+ "EHOSTUNREACH",
+ "EIDRM",
+ "EILSEQ",
+ "EINPROGRESS",
+ "EINTR",
+ "EINVAL",
+ "EIO",
+ "EISCONN",
+ "EISDIR",
+ "ELOOP",
+ "EMFILE",
+ "EMLINK",
+ "EMSGSIZE",
+ "ENAMETOOLONG",
+ "ENETDOWN",
+ "ENETRESET",
+ "ENETUNREACH",
+ "ENFILE",
+ "ENOBUFS",
+ "ENODATA",
+ "ENODEV",
+ "ENOENT",
+ "ENOEXEC",
+ "ENOLCK",
+ "ENOLINK",
+ "ENOMEM",
+ "ENOMSG",
+ "ENOPROTOOPT",
+ "ENOSPC",
+ "ENOSR",
+ "ENOSTR",
+ "ENOSYS",
+ "ENOTCONN",
+ "ENOTDIR",
+ "ENOTEMPTY",
+ "ENOTRECOVERABLE",
+ "ENOTSOCK",
+ "ENOTSUP",
+ "ENOTTY",
+ "ENXIO",
+ "EOF",
+ "EOPNOTSUPP",
+ "EOVERFLOW",
+ "EOWNERDEAD",
+ "EPERM",
+ "EPIPE",
+ "EPROTO",
+ "EPROTONOSUPPORT",
+ "EPROTOTYPE",
+ "ERANGE",
+ "EROFS",
+ "errno",
+ "ESPIPE",
+ "ESRCH",
+ "ETIME",
+ "ETIMEDOUT",
+ "ETXTBSY",
+ "EWOULDBLOCK",
+ "EXDEV",
+ "EXIT_FAILURE",
+ "EXIT_SUCCESS",
+ "FE_ALL_EXCEPT",
+ "FE_DFL_ENV",
+ "FE_DIVBYZERO",
+ "FE_DOWNWARD",
+ "FE_INEXACT",
+ "FE_INVALID",
+ "FE_OVERFLOW",
+ "FE_TONEAREST",
+ "FE_TOWARDZERO",
+ "FE_UNDERFLOW",
+ "FE_UPWARD",
+ "FILENAME_MAX",
+ "FLT_DECIMAL_DIG",
+ "FLT_DIG",
+ "FLT_EPSILON",
+ "FLT_EVAL_METHOD",
+ "FLT_HAS_SUBNORM",
+ "FLT_MANT_DIG",
+ "FLT_MAX",
+ "FLT_MAX_10_EXP",
+ "FLT_MAX_EXP",
+ "FLT_MIN",
+ "FLT_MIN_10_EXP",
+ "FLT_MIN_EXP",
+ "FLT_RADIX",
+ "FLT_ROUNDS",
+ "FLT_TRUE_MIN",
+ "FOPEN_MAX",
+ "FP_FAST_FMA",
+ "FP_FAST_FMAF",
+ "FP_FAST_FMAL",
+ "FP_ILOGB0",
+ "FP_ILOGBNAN",
+ "FP_SUBNORMAL",
+ "FP_ZERO",
+ "FP_INFINITE",
+ "FP_NAN",
+ "FP_NORMAL",
+ "HUGE_VAL",
+ "HUGE_VALF",
+ "HUGE_VALL",
+ "INFINITY",
+ "INT_FAST16_MAX",
+ "INT_FAST16_MIN",
+ "INT_FAST32_MAX",
+ "INT_FAST32_MIN",
+ "INT_FAST64_MAX",
+ "INT_FAST64_MIN",
+ "INT_FAST8_MAX",
+ "INT_FAST8_MIN",
+ "INT_LEAST16_MAX",
+ "INT_LEAST16_MIN",
+ "INT_LEAST32_MAX",
+ "INT_LEAST32_MIN",
+ "INT_LEAST64_MAX",
+ "INT_LEAST64_MIN",
+ "INT_LEAST8_MAX",
+ "INT_LEAST8_MIN",
+ "INT_MAX",
+ "INT_MIN",
+ "INT16_MAX",
+ "INT16_MIN",
+ "INT32_MAX",
+ "INT32_MIN",
+ "INT64_MAX",
+ "INT64_MIN",
+ "INT8_MAX",
+ "INT8_MIN",
+ "INTMAX_MAX",
+ "INTMAX_MIN",
+ "INTPTR_MAX",
+ "INTPTR_MIN",
+ "L_tmpnam",
+ "LC_ALL",
+ "LC_COLLATE",
+ "LC_CTYPE",
+ "LC_MONETARY",
+ "LC_NUMERIC",
+ "LC_TIME",
+ "LDBL_DECIMAL_DIG",
+ "LDBL_DIG",
+ "LDBL_EPSILON",
+ "LDBL_HAS_SUBNORM",
+ "LDBL_MANT_DIG",
+ "LDBL_MAX",
+ "LDBL_MAX_10_EXP",
+ "LDBL_MAX_EXP",
+ "LDBL_MIN",
+ "LDBL_MIN_10_EXP",
+ "LDBL_MIN_EXP",
+ "LDBL_TRUE_MIN",
+ "LLONG_MAX",
+ "LLONG_MIN",
+ "LONG_MAX",
+ "LONG_MIN",
+ "MATH_ERREXCEPT",
+ "math_errhandling",
+ "MATH_ERRNO",
+ "MB_CUR_MAX",
+ "MB_LEN_MAX",
+ "NAN",
+ "NULL",
+ "ONCE_FLAG_INIT",
+ "PRId16",
+ "PRId32",
+ "PRId64",
+ "PRId8",
+ "PRIdFAST16",
+ "PRIdFAST32",
+ "PRIdFAST64",
+ "PRIdFAST8",
+ "PRIdLEAST16",
+ "PRIdLEAST32",
+ "PRIdLEAST64",
+ "PRIdLEAST8",
+ "PRIdMAX",
+ "PRIdPTR",
+ "PRIi16",
+ "PRIi32",
+ "PRIi64",
+ "PRIi8",
+ "PRIiFAST16",
+ "PRIiFAST32",
+ "PRIiFAST64",
+ "PRIiFAST8",
+ "PRIiLEAST16",
+ "PRIiLEAST32",
+ "PRIiLEAST64",
+ "PRIiLEAST8",
+ "PRIiMAX",
+ "PRIiPTR",
+ "PRIo16",
+ "PRIo32",
+ "PRIo64",
+ "PRIo8",
+ "PRIoFAST16",
+ "PRIoFAST32",
+ "PRIoFAST64",
+ "PRIoFAST8",
+ "PRIoLEAST16",
+ "PRIoLEAST32",
+ "PRIoLEAST64",
+ "PRIoLEAST8",
+ "PRIoMAX",
+ "PRIoPTR",
+ "PRIu16",
+ "PRIu32",
+ "PRIu64",
+ "PRIu8",
+ "PRIuFAST16",
+ "PRIuFAST32",
+ "PRIuFAST64",
+ "PRIuFAST8",
+ "PRIuLEAST16",
+ "PRIuLEAST32",
+ "PRIuLEAST64",
+ "PRIuLEAST8",
+ "PRIuMAX",
+ "PRIuPTR",
+ "PRIx16",
+ "PRIX16",
+ "PRIx32",
+ "PRIX32",
+ "PRIx64",
+ "PRIX64",
+ "PRIx8",
+ "PRIX8",
+ "PRIxFAST16",
+ "PRIXFAST16",
+ "PRIxFAST32",
+ "PRIXFAST32",
+ "PRIxFAST64",
+ "PRIXFAST64",
+ "PRIxFAST8",
+ "PRIXFAST8",
+ "PRIxLEAST16",
+ "PRIXLEAST16",
+ "PRIxLEAST32",
+ "PRIXLEAST32",
+ "PRIxLEAST64",
+ "PRIXLEAST64",
+ "PRIxLEAST8",
+ "PRIXLEAST8",
+ "PRIxMAX",
+ "PRIXMAX",
+ "PRIxPTR",
+ "PRIXPTR",
+ "PTRDIFF_MAX",
+ "PTRDIFF_MIN",
+ "RAND_MAX",
+ "SCHAR_MAX",
+ "SCHAR_MIN",
+ "SCNd16",
+ "SCNd32",
+ "SCNd64",
+ "SCNd8",
+ "SCNdFAST16",
+ "SCNdFAST32",
+ "SCNdFAST64",
+ "SCNdFAST8",
+ "SCNdLEAST16",
+ "SCNdLEAST32",
+ "SCNdLEAST64",
+ "SCNdLEAST8",
+ "SCNdMAX",
+ "SCNdPTR",
+ "SCNi16",
+ "SCNi32",
+ "SCNi64",
+ "SCNi8",
+ "SCNiFAST16",
+ "SCNiFAST32",
+ "SCNiFAST64",
+ "SCNiFAST8",
+ "SCNiLEAST16",
+ "SCNiLEAST32",
+ "SCNiLEAST64",
+ "SCNiLEAST8",
+ "SCNiMAX",
+ "SCNiPTR",
+ "SCNo16",
+ "SCNo32",
+ "SCNo64",
+ "SCNo8",
+ "SCNoFAST16",
+ "SCNoFAST32",
+ "SCNoFAST64",
+ "SCNoFAST8",
+ "SCNoLEAST16",
+ "SCNoLEAST32",
+ "SCNoLEAST64",
+ "SCNoLEAST8",
+ "SCNoMAX",
+ "SCNoPTR",
+ "SCNu16",
+ "SCNu32",
+ "SCNu64",
+ "SCNu8",
+ "SCNuFAST16",
+ "SCNuFAST32",
+ "SCNuFAST64",
+ "SCNuFAST8",
+ "SCNuLEAST16",
+ "SCNuLEAST32",
+ "SCNuLEAST64",
+ "SCNuLEAST8",
+ "SCNuMAX",
+ "SCNuPTR",
+ "SCNx16",
+ "SCNx32",
+ "SCNx64",
+ "SCNx8",
+ "SCNxFAST16",
+ "SCNxFAST32",
+ "SCNxFAST64",
+ "SCNxFAST8",
+ "SCNxLEAST16",
+ "SCNxLEAST32",
+ "SCNxLEAST64",
+ "SCNxLEAST8",
+ "SCNxMAX",
+ "SCNxPTR",
+ "SEEK_CUR",
+ "SEEK_END",
+ "SEEK_SET",
+ "SHRT_MAX",
+ "SHRT_MIN",
+ "SIG_ATOMIC_MAX",
+ "SIG_ATOMIC_MIN",
+ "SIG_DFL",
+ "SIG_ERR",
+ "SIG_IGN",
+ "SIGABRT",
+ "SIGFPE",
+ "SIGILL",
+ "SIGINT",
+ "SIGSEGV",
+ "SIGTERM",
+ "SIZE_MAX",
+ "stderr",
+ "stdin",
+ "stdout",
+ "TIME_UTC",
+ "TMP_MAX",
+ "UCHAR_MAX",
+ "UINT_FAST16_MAX",
+ "UINT_FAST32_MAX",
+ "UINT_FAST64_MAX",
+ "UINT_FAST8_MAX",
+ "UINT_LEAST16_MAX",
+ "UINT_LEAST32_MAX",
+ "UINT_LEAST64_MAX",
+ "UINT_LEAST8_MAX",
+ "UINT_MAX",
+ "UINT16_MAX",
+ "UINT32_MAX",
+ "UINT64_MAX",
+ "UINT8_MAX",
+ "UINTMAX_MAX",
+ "UINTPTR_MAX",
+ "ULLONG_MAX",
+ "ULONG_MAX",
+ "USHRT_MAX",
+ "WCHAR_MAX",
+ "WCHAR_MIN",
+ "WEOF",
+ "WINT_MAX",
+ "WINT_MIN",
+}
+
+
+def _transform_invalid_identifier(invalid_identifier: str) -> str:
+ """Applies a transformation to an invalid C++ identifier to make it valid.
+
+ Currently, this simply appends an underscore. This addresses the vast
+ majority of realistic cases, but there are some caveats; see
+ `fix_cc_identifier` function documentation for details.
+ """
+ return f"{invalid_identifier}_"
+
+
+def fix_cc_identifier(proto_identifier: str) -> str:
+ """Returns an adjusted form of the identifier for use in generated C++ code.
+
+ If the given identifier is already valid for use in the generated C++ code,
+ it will be returned as-is. If the identifier is a C++ keyword or a
+ preprocessor macro from the standard library, the returned identifier will
+ be modified slightly in order to avoid compiler errors.
+
+ Currently, this simply appends an underscore if necessary. This handles the
+ vast majority of realistic cases, though it doesn't attempt to fix
+ identifiers that the C++ spec reserves for the compiler's use.
+
+ For reference, C++ reserves two categories of identifiers for the compiler:
+ - Any identifier that contains the substring "__" anywhere in it.
+ - Any identifier with an underscore for the first character and a capital
+ letter for the second character.
+ """
+ return (_transform_invalid_identifier(proto_identifier) #
+ if proto_identifier in PW_PROTO_CODEGEN_RESERVED_WORDS #
+ else proto_identifier)
+
+
+def fix_cc_enum_value_name(proto_enum_entry: str) -> str:
+ """Returns an adjusted form of the enum-value name for use in generated C++.
+
+ Generates an UPPER_SNAKE_CASE variant of the given enum-value name and then
+ checks it for collisions with C++ keywords and standard-library macros.
+ Returns a potentially modified version of the input in order to fix
+ collisions if any are found.
+
+ Note that, although the code generation also creates enum-value aliases in
+ kHungarianNotationPascalCase, symbols of that form never conflict with
+ keywords or standard-library macros in C++20. Therefore, only the
+ UPPER_SNAKE_CASE versions need to be checked for conflicts.
+
+ See `fix_cc_identifier` for further details.
+ """
+ upper_snake_case = proto_enum_entry.upper()
+ return (_transform_invalid_identifier(proto_enum_entry) #
+ if upper_snake_case in PW_PROTO_CODEGEN_RESERVED_WORDS #
+ else proto_enum_entry)