blob: 0921473d6fea369a71b1d2a0e9e6e7c2cb2dbe24 [file] [log] [blame]
#!/usr/bin/env python3
# Copyright 2022 The Pigweed Authors
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License.
"""Fixes identifiers that would cause compiler errors in generated C++ code."""
from typing import Set
# Set of words that can't be used as identifiers in the generated code. Many of
# these are valid identifiers in proto syntax, but they need special handling in
# the generated C++ code.
#
# Note: This is primarily used for "if x in y" operations, hence the use of a
# set rather than a list.
PW_PROTO_CODEGEN_RESERVED_WORDS: Set[str] = {
# Identifiers that conflict with the codegen internals when used in certain
# contexts:
"Fields",
"Message",
# C++20 keywords (https://en.cppreference.com/w/cpp/keyword):
"alignas",
"alignof",
"and",
"and_eq",
"asm",
"atomic_cancel",
"atomic_commit",
"atomic_noexcept",
"auto",
"bitand",
"bitor",
"bool",
"break",
"case",
"catch",
"char",
"char8_t",
"char16_t",
"char32_t",
"class",
"compl",
"concept",
"const",
"consteval",
"constexpr",
"constinit",
"const_cast",
"continue",
"co_await",
"co_return",
"co_yield",
"decltype",
"default",
"delete",
"do",
"double",
"dynamic_cast",
"else",
"enum",
"explicit",
"export",
"extern",
"false",
"float",
"for",
"friend",
"goto",
"if",
"inline",
"int",
"long",
"mutable",
"namespace",
"new",
"noexcept",
"not",
"not_eq",
"nullptr",
"operator",
"or",
"or_eq",
"private",
"protected",
"public",
"reflexpr",
"register",
"reinterpret_cast",
"requires",
"return",
"short",
"signed",
"sizeof",
"static",
"static_assert",
"static_cast",
"struct",
"switch",
"synchronized",
"template",
"this",
"thread_local",
"throw",
"true",
"try",
"typedef",
"typeid",
"typename",
"union",
"unsigned",
"using",
"virtual",
"void",
"volatile",
"wchar_t",
"while",
"xor",
"xor_eq",
# C++20 macros (https://en.cppreference.com/w/cpp/symbol_index/macro),
# excluding the following:
# - Function-like macros, which have unambiguous syntax and thus won't
# conflict with generated symbols.
# - Macros that couldn't be made valid by appending underscores, namely
# those containing "__" or starting with "_[A-Z]". C++ reserves all such
# identifiers for the compiler, and appending underscores wouldn't change
# that.
"ATOMIC_BOOL_LOCK_FREE",
"ATOMIC_CHAR_LOCK_FREE",
"ATOMIC_CHAR16_T_LOCK_FREE",
"ATOMIC_CHAR32_T_LOCK_FREE",
"ATOMIC_CHAR8_T_LOCK_FREE",
"ATOMIC_FLAG_INIT",
"ATOMIC_INT_LOCK_FREE",
"ATOMIC_LLONG_LOCK_FREE",
"ATOMIC_LONG_LOCK_FREE",
"ATOMIC_POINTER_LOCK_FREE",
"ATOMIC_SHORT_LOCK_FREE",
"ATOMIC_WCHAR_T_LOCK_FREE",
"BUFSIZ",
"CHAR_BIT",
"CHAR_MAX",
"CHAR_MIN",
"CLOCKS_PER_SEC",
"DBL_DECIMAL_DIG",
"DBL_DIG",
"DBL_EPSILON",
"DBL_HAS_SUBNORM",
"DBL_MANT_DIG",
"DBL_MAX",
"DBL_MAX_10_EXP",
"DBL_MAX_EXP",
"DBL_MIN",
"DBL_MIN_10_EXP",
"DBL_MIN_EXP",
"DBL_TRUE_MIN",
"DECIMAL_DIG",
"E2BIG",
"EACCES",
"EADDRINUSE",
"EADDRNOTAVAIL",
"EAFNOSUPPORT",
"EAGAIN",
"EALREADY",
"EBADF",
"EBADMSG",
"EBUSY",
"ECANCELED",
"ECHILD",
"ECONNABORTED",
"ECONNREFUSED",
"ECONNRESET",
"EDEADLK",
"EDESTADDRREQ",
"EDOM",
"EEXIST",
"EFAULT",
"EFBIG",
"EHOSTUNREACH",
"EIDRM",
"EILSEQ",
"EINPROGRESS",
"EINTR",
"EINVAL",
"EIO",
"EISCONN",
"EISDIR",
"ELOOP",
"EMFILE",
"EMLINK",
"EMSGSIZE",
"ENAMETOOLONG",
"ENETDOWN",
"ENETRESET",
"ENETUNREACH",
"ENFILE",
"ENOBUFS",
"ENODATA",
"ENODEV",
"ENOENT",
"ENOEXEC",
"ENOLCK",
"ENOLINK",
"ENOMEM",
"ENOMSG",
"ENOPROTOOPT",
"ENOSPC",
"ENOSR",
"ENOSTR",
"ENOSYS",
"ENOTCONN",
"ENOTDIR",
"ENOTEMPTY",
"ENOTRECOVERABLE",
"ENOTSOCK",
"ENOTSUP",
"ENOTTY",
"ENXIO",
"EOF",
"EOPNOTSUPP",
"EOVERFLOW",
"EOWNERDEAD",
"EPERM",
"EPIPE",
"EPROTO",
"EPROTONOSUPPORT",
"EPROTOTYPE",
"ERANGE",
"EROFS",
"errno",
"ESPIPE",
"ESRCH",
"ETIME",
"ETIMEDOUT",
"ETXTBSY",
"EWOULDBLOCK",
"EXDEV",
"EXIT_FAILURE",
"EXIT_SUCCESS",
"FE_ALL_EXCEPT",
"FE_DFL_ENV",
"FE_DIVBYZERO",
"FE_DOWNWARD",
"FE_INEXACT",
"FE_INVALID",
"FE_OVERFLOW",
"FE_TONEAREST",
"FE_TOWARDZERO",
"FE_UNDERFLOW",
"FE_UPWARD",
"FILENAME_MAX",
"FLT_DECIMAL_DIG",
"FLT_DIG",
"FLT_EPSILON",
"FLT_EVAL_METHOD",
"FLT_HAS_SUBNORM",
"FLT_MANT_DIG",
"FLT_MAX",
"FLT_MAX_10_EXP",
"FLT_MAX_EXP",
"FLT_MIN",
"FLT_MIN_10_EXP",
"FLT_MIN_EXP",
"FLT_RADIX",
"FLT_ROUNDS",
"FLT_TRUE_MIN",
"FOPEN_MAX",
"FP_FAST_FMA",
"FP_FAST_FMAF",
"FP_FAST_FMAL",
"FP_ILOGB0",
"FP_ILOGBNAN",
"FP_SUBNORMAL",
"FP_ZERO",
"FP_INFINITE",
"FP_NAN",
"FP_NORMAL",
"HUGE_VAL",
"HUGE_VALF",
"HUGE_VALL",
"INFINITY",
"INT_FAST16_MAX",
"INT_FAST16_MIN",
"INT_FAST32_MAX",
"INT_FAST32_MIN",
"INT_FAST64_MAX",
"INT_FAST64_MIN",
"INT_FAST8_MAX",
"INT_FAST8_MIN",
"INT_LEAST16_MAX",
"INT_LEAST16_MIN",
"INT_LEAST32_MAX",
"INT_LEAST32_MIN",
"INT_LEAST64_MAX",
"INT_LEAST64_MIN",
"INT_LEAST8_MAX",
"INT_LEAST8_MIN",
"INT_MAX",
"INT_MIN",
"INT16_MAX",
"INT16_MIN",
"INT32_MAX",
"INT32_MIN",
"INT64_MAX",
"INT64_MIN",
"INT8_MAX",
"INT8_MIN",
"INTMAX_MAX",
"INTMAX_MIN",
"INTPTR_MAX",
"INTPTR_MIN",
"L_tmpnam",
"LC_ALL",
"LC_COLLATE",
"LC_CTYPE",
"LC_MONETARY",
"LC_NUMERIC",
"LC_TIME",
"LDBL_DECIMAL_DIG",
"LDBL_DIG",
"LDBL_EPSILON",
"LDBL_HAS_SUBNORM",
"LDBL_MANT_DIG",
"LDBL_MAX",
"LDBL_MAX_10_EXP",
"LDBL_MAX_EXP",
"LDBL_MIN",
"LDBL_MIN_10_EXP",
"LDBL_MIN_EXP",
"LDBL_TRUE_MIN",
"LLONG_MAX",
"LLONG_MIN",
"LONG_MAX",
"LONG_MIN",
"MATH_ERREXCEPT",
"math_errhandling",
"MATH_ERRNO",
"MB_CUR_MAX",
"MB_LEN_MAX",
"NAN",
"NULL",
"ONCE_FLAG_INIT",
"PRId16",
"PRId32",
"PRId64",
"PRId8",
"PRIdFAST16",
"PRIdFAST32",
"PRIdFAST64",
"PRIdFAST8",
"PRIdLEAST16",
"PRIdLEAST32",
"PRIdLEAST64",
"PRIdLEAST8",
"PRIdMAX",
"PRIdPTR",
"PRIi16",
"PRIi32",
"PRIi64",
"PRIi8",
"PRIiFAST16",
"PRIiFAST32",
"PRIiFAST64",
"PRIiFAST8",
"PRIiLEAST16",
"PRIiLEAST32",
"PRIiLEAST64",
"PRIiLEAST8",
"PRIiMAX",
"PRIiPTR",
"PRIo16",
"PRIo32",
"PRIo64",
"PRIo8",
"PRIoFAST16",
"PRIoFAST32",
"PRIoFAST64",
"PRIoFAST8",
"PRIoLEAST16",
"PRIoLEAST32",
"PRIoLEAST64",
"PRIoLEAST8",
"PRIoMAX",
"PRIoPTR",
"PRIu16",
"PRIu32",
"PRIu64",
"PRIu8",
"PRIuFAST16",
"PRIuFAST32",
"PRIuFAST64",
"PRIuFAST8",
"PRIuLEAST16",
"PRIuLEAST32",
"PRIuLEAST64",
"PRIuLEAST8",
"PRIuMAX",
"PRIuPTR",
"PRIx16",
"PRIX16",
"PRIx32",
"PRIX32",
"PRIx64",
"PRIX64",
"PRIx8",
"PRIX8",
"PRIxFAST16",
"PRIXFAST16",
"PRIxFAST32",
"PRIXFAST32",
"PRIxFAST64",
"PRIXFAST64",
"PRIxFAST8",
"PRIXFAST8",
"PRIxLEAST16",
"PRIXLEAST16",
"PRIxLEAST32",
"PRIXLEAST32",
"PRIxLEAST64",
"PRIXLEAST64",
"PRIxLEAST8",
"PRIXLEAST8",
"PRIxMAX",
"PRIXMAX",
"PRIxPTR",
"PRIXPTR",
"PTRDIFF_MAX",
"PTRDIFF_MIN",
"RAND_MAX",
"SCHAR_MAX",
"SCHAR_MIN",
"SCNd16",
"SCNd32",
"SCNd64",
"SCNd8",
"SCNdFAST16",
"SCNdFAST32",
"SCNdFAST64",
"SCNdFAST8",
"SCNdLEAST16",
"SCNdLEAST32",
"SCNdLEAST64",
"SCNdLEAST8",
"SCNdMAX",
"SCNdPTR",
"SCNi16",
"SCNi32",
"SCNi64",
"SCNi8",
"SCNiFAST16",
"SCNiFAST32",
"SCNiFAST64",
"SCNiFAST8",
"SCNiLEAST16",
"SCNiLEAST32",
"SCNiLEAST64",
"SCNiLEAST8",
"SCNiMAX",
"SCNiPTR",
"SCNo16",
"SCNo32",
"SCNo64",
"SCNo8",
"SCNoFAST16",
"SCNoFAST32",
"SCNoFAST64",
"SCNoFAST8",
"SCNoLEAST16",
"SCNoLEAST32",
"SCNoLEAST64",
"SCNoLEAST8",
"SCNoMAX",
"SCNoPTR",
"SCNu16",
"SCNu32",
"SCNu64",
"SCNu8",
"SCNuFAST16",
"SCNuFAST32",
"SCNuFAST64",
"SCNuFAST8",
"SCNuLEAST16",
"SCNuLEAST32",
"SCNuLEAST64",
"SCNuLEAST8",
"SCNuMAX",
"SCNuPTR",
"SCNx16",
"SCNx32",
"SCNx64",
"SCNx8",
"SCNxFAST16",
"SCNxFAST32",
"SCNxFAST64",
"SCNxFAST8",
"SCNxLEAST16",
"SCNxLEAST32",
"SCNxLEAST64",
"SCNxLEAST8",
"SCNxMAX",
"SCNxPTR",
"SEEK_CUR",
"SEEK_END",
"SEEK_SET",
"SHRT_MAX",
"SHRT_MIN",
"SIG_ATOMIC_MAX",
"SIG_ATOMIC_MIN",
"SIG_DFL",
"SIG_ERR",
"SIG_IGN",
"SIGABRT",
"SIGFPE",
"SIGILL",
"SIGINT",
"SIGSEGV",
"SIGTERM",
"SIZE_MAX",
"stderr",
"stdin",
"stdout",
"TIME_UTC",
"TMP_MAX",
"UCHAR_MAX",
"UINT_FAST16_MAX",
"UINT_FAST32_MAX",
"UINT_FAST64_MAX",
"UINT_FAST8_MAX",
"UINT_LEAST16_MAX",
"UINT_LEAST32_MAX",
"UINT_LEAST64_MAX",
"UINT_LEAST8_MAX",
"UINT_MAX",
"UINT16_MAX",
"UINT32_MAX",
"UINT64_MAX",
"UINT8_MAX",
"UINTMAX_MAX",
"UINTPTR_MAX",
"ULLONG_MAX",
"ULONG_MAX",
"USHRT_MAX",
"WCHAR_MAX",
"WCHAR_MIN",
"WEOF",
"WINT_MAX",
"WINT_MIN",
}
def _transform_invalid_identifier(invalid_identifier: str) -> str:
"""Applies a transformation to an invalid C++ identifier to make it valid.
Currently, this simply appends an underscore. This addresses the vast
majority of realistic cases, but there are some caveats; see
`fix_cc_identifier` function documentation for details.
"""
return f"{invalid_identifier}_"
def fix_cc_identifier(proto_identifier: str) -> str:
"""Returns an adjusted form of the identifier for use in generated C++ code.
If the given identifier is already valid for use in the generated C++ code,
it will be returned as-is. If the identifier is a C++ keyword or a
preprocessor macro from the standard library, the returned identifier will
be modified slightly in order to avoid compiler errors.
Currently, this simply appends an underscore if necessary. This handles the
vast majority of realistic cases, though it doesn't attempt to fix
identifiers that the C++ spec reserves for the compiler's use.
For reference, C++ reserves two categories of identifiers for the compiler:
- Any identifier that contains the substring "__" anywhere in it.
- Any identifier with an underscore for the first character and a capital
letter for the second character.
"""
return (_transform_invalid_identifier(proto_identifier) #
if proto_identifier in PW_PROTO_CODEGEN_RESERVED_WORDS #
else proto_identifier)
def fix_cc_enum_value_name(proto_enum_entry: str) -> str:
"""Returns an adjusted form of the enum-value name for use in generated C++.
Generates an UPPER_SNAKE_CASE variant of the given enum-value name and then
checks it for collisions with C++ keywords and standard-library macros.
Returns a potentially modified version of the input in order to fix
collisions if any are found.
Note that, although the code generation also creates enum-value aliases in
kHungarianNotationPascalCase, symbols of that form never conflict with
keywords or standard-library macros in C++20. Therefore, only the
UPPER_SNAKE_CASE versions need to be checked for conflicts.
See `fix_cc_identifier` for further details.
"""
upper_snake_case = proto_enum_entry.upper()
return (_transform_invalid_identifier(proto_enum_entry) #
if upper_snake_case in PW_PROTO_CODEGEN_RESERVED_WORDS #
else proto_enum_entry)