| #!/usr/bin/env python3 |
| # Copyright 2022 The Pigweed Authors |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); you may not |
| # use this file except in compliance with the License. You may obtain a copy of |
| # the License at |
| # |
| # https://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| # License for the specific language governing permissions and limitations under |
| # the License. |
| """Fixes identifiers that would cause compiler errors in generated C++ code.""" |
| |
| from typing import Set |
| |
| # Set of words that can't be used as identifiers in the generated code. Many of |
| # these are valid identifiers in proto syntax, but they need special handling in |
| # the generated C++ code. |
| # |
| # Note: This is primarily used for "if x in y" operations, hence the use of a |
| # set rather than a list. |
| PW_PROTO_CODEGEN_RESERVED_WORDS: Set[str] = { |
| # Identifiers that conflict with the codegen internals when used in certain |
| # contexts: |
| "Fields", |
| "Message", |
| # C++20 keywords (https://en.cppreference.com/w/cpp/keyword): |
| "alignas", |
| "alignof", |
| "and", |
| "and_eq", |
| "asm", |
| "atomic_cancel", |
| "atomic_commit", |
| "atomic_noexcept", |
| "auto", |
| "bitand", |
| "bitor", |
| "bool", |
| "break", |
| "case", |
| "catch", |
| "char", |
| "char8_t", |
| "char16_t", |
| "char32_t", |
| "class", |
| "compl", |
| "concept", |
| "const", |
| "consteval", |
| "constexpr", |
| "constinit", |
| "const_cast", |
| "continue", |
| "co_await", |
| "co_return", |
| "co_yield", |
| "decltype", |
| "default", |
| "delete", |
| "do", |
| "double", |
| "dynamic_cast", |
| "else", |
| "enum", |
| "explicit", |
| "export", |
| "extern", |
| "false", |
| "float", |
| "for", |
| "friend", |
| "goto", |
| "if", |
| "inline", |
| "int", |
| "long", |
| "mutable", |
| "namespace", |
| "new", |
| "noexcept", |
| "not", |
| "not_eq", |
| "nullptr", |
| "operator", |
| "or", |
| "or_eq", |
| "private", |
| "protected", |
| "public", |
| "reflexpr", |
| "register", |
| "reinterpret_cast", |
| "requires", |
| "return", |
| "short", |
| "signed", |
| "sizeof", |
| "static", |
| "static_assert", |
| "static_cast", |
| "struct", |
| "switch", |
| "synchronized", |
| "template", |
| "this", |
| "thread_local", |
| "throw", |
| "true", |
| "try", |
| "typedef", |
| "typeid", |
| "typename", |
| "union", |
| "unsigned", |
| "using", |
| "virtual", |
| "void", |
| "volatile", |
| "wchar_t", |
| "while", |
| "xor", |
| "xor_eq", |
| # C++20 macros (https://en.cppreference.com/w/cpp/symbol_index/macro), |
| # excluding the following: |
| # - Function-like macros, which have unambiguous syntax and thus won't |
| # conflict with generated symbols. |
| # - Macros that couldn't be made valid by appending underscores, namely |
| # those containing "__" or starting with "_[A-Z]". C++ reserves all such |
| # identifiers for the compiler, and appending underscores wouldn't change |
| # that. |
| "ATOMIC_BOOL_LOCK_FREE", |
| "ATOMIC_CHAR_LOCK_FREE", |
| "ATOMIC_CHAR16_T_LOCK_FREE", |
| "ATOMIC_CHAR32_T_LOCK_FREE", |
| "ATOMIC_CHAR8_T_LOCK_FREE", |
| "ATOMIC_FLAG_INIT", |
| "ATOMIC_INT_LOCK_FREE", |
| "ATOMIC_LLONG_LOCK_FREE", |
| "ATOMIC_LONG_LOCK_FREE", |
| "ATOMIC_POINTER_LOCK_FREE", |
| "ATOMIC_SHORT_LOCK_FREE", |
| "ATOMIC_WCHAR_T_LOCK_FREE", |
| "BUFSIZ", |
| "CHAR_BIT", |
| "CHAR_MAX", |
| "CHAR_MIN", |
| "CLOCKS_PER_SEC", |
| "DBL_DECIMAL_DIG", |
| "DBL_DIG", |
| "DBL_EPSILON", |
| "DBL_HAS_SUBNORM", |
| "DBL_MANT_DIG", |
| "DBL_MAX", |
| "DBL_MAX_10_EXP", |
| "DBL_MAX_EXP", |
| "DBL_MIN", |
| "DBL_MIN_10_EXP", |
| "DBL_MIN_EXP", |
| "DBL_TRUE_MIN", |
| "DECIMAL_DIG", |
| "E2BIG", |
| "EACCES", |
| "EADDRINUSE", |
| "EADDRNOTAVAIL", |
| "EAFNOSUPPORT", |
| "EAGAIN", |
| "EALREADY", |
| "EBADF", |
| "EBADMSG", |
| "EBUSY", |
| "ECANCELED", |
| "ECHILD", |
| "ECONNABORTED", |
| "ECONNREFUSED", |
| "ECONNRESET", |
| "EDEADLK", |
| "EDESTADDRREQ", |
| "EDOM", |
| "EEXIST", |
| "EFAULT", |
| "EFBIG", |
| "EHOSTUNREACH", |
| "EIDRM", |
| "EILSEQ", |
| "EINPROGRESS", |
| "EINTR", |
| "EINVAL", |
| "EIO", |
| "EISCONN", |
| "EISDIR", |
| "ELOOP", |
| "EMFILE", |
| "EMLINK", |
| "EMSGSIZE", |
| "ENAMETOOLONG", |
| "ENETDOWN", |
| "ENETRESET", |
| "ENETUNREACH", |
| "ENFILE", |
| "ENOBUFS", |
| "ENODATA", |
| "ENODEV", |
| "ENOENT", |
| "ENOEXEC", |
| "ENOLCK", |
| "ENOLINK", |
| "ENOMEM", |
| "ENOMSG", |
| "ENOPROTOOPT", |
| "ENOSPC", |
| "ENOSR", |
| "ENOSTR", |
| "ENOSYS", |
| "ENOTCONN", |
| "ENOTDIR", |
| "ENOTEMPTY", |
| "ENOTRECOVERABLE", |
| "ENOTSOCK", |
| "ENOTSUP", |
| "ENOTTY", |
| "ENXIO", |
| "EOF", |
| "EOPNOTSUPP", |
| "EOVERFLOW", |
| "EOWNERDEAD", |
| "EPERM", |
| "EPIPE", |
| "EPROTO", |
| "EPROTONOSUPPORT", |
| "EPROTOTYPE", |
| "ERANGE", |
| "EROFS", |
| "errno", |
| "ESPIPE", |
| "ESRCH", |
| "ETIME", |
| "ETIMEDOUT", |
| "ETXTBSY", |
| "EWOULDBLOCK", |
| "EXDEV", |
| "EXIT_FAILURE", |
| "EXIT_SUCCESS", |
| "FE_ALL_EXCEPT", |
| "FE_DFL_ENV", |
| "FE_DIVBYZERO", |
| "FE_DOWNWARD", |
| "FE_INEXACT", |
| "FE_INVALID", |
| "FE_OVERFLOW", |
| "FE_TONEAREST", |
| "FE_TOWARDZERO", |
| "FE_UNDERFLOW", |
| "FE_UPWARD", |
| "FILENAME_MAX", |
| "FLT_DECIMAL_DIG", |
| "FLT_DIG", |
| "FLT_EPSILON", |
| "FLT_EVAL_METHOD", |
| "FLT_HAS_SUBNORM", |
| "FLT_MANT_DIG", |
| "FLT_MAX", |
| "FLT_MAX_10_EXP", |
| "FLT_MAX_EXP", |
| "FLT_MIN", |
| "FLT_MIN_10_EXP", |
| "FLT_MIN_EXP", |
| "FLT_RADIX", |
| "FLT_ROUNDS", |
| "FLT_TRUE_MIN", |
| "FOPEN_MAX", |
| "FP_FAST_FMA", |
| "FP_FAST_FMAF", |
| "FP_FAST_FMAL", |
| "FP_ILOGB0", |
| "FP_ILOGBNAN", |
| "FP_SUBNORMAL", |
| "FP_ZERO", |
| "FP_INFINITE", |
| "FP_NAN", |
| "FP_NORMAL", |
| "HUGE_VAL", |
| "HUGE_VALF", |
| "HUGE_VALL", |
| "INFINITY", |
| "INT_FAST16_MAX", |
| "INT_FAST16_MIN", |
| "INT_FAST32_MAX", |
| "INT_FAST32_MIN", |
| "INT_FAST64_MAX", |
| "INT_FAST64_MIN", |
| "INT_FAST8_MAX", |
| "INT_FAST8_MIN", |
| "INT_LEAST16_MAX", |
| "INT_LEAST16_MIN", |
| "INT_LEAST32_MAX", |
| "INT_LEAST32_MIN", |
| "INT_LEAST64_MAX", |
| "INT_LEAST64_MIN", |
| "INT_LEAST8_MAX", |
| "INT_LEAST8_MIN", |
| "INT_MAX", |
| "INT_MIN", |
| "INT16_MAX", |
| "INT16_MIN", |
| "INT32_MAX", |
| "INT32_MIN", |
| "INT64_MAX", |
| "INT64_MIN", |
| "INT8_MAX", |
| "INT8_MIN", |
| "INTMAX_MAX", |
| "INTMAX_MIN", |
| "INTPTR_MAX", |
| "INTPTR_MIN", |
| "L_tmpnam", |
| "LC_ALL", |
| "LC_COLLATE", |
| "LC_CTYPE", |
| "LC_MONETARY", |
| "LC_NUMERIC", |
| "LC_TIME", |
| "LDBL_DECIMAL_DIG", |
| "LDBL_DIG", |
| "LDBL_EPSILON", |
| "LDBL_HAS_SUBNORM", |
| "LDBL_MANT_DIG", |
| "LDBL_MAX", |
| "LDBL_MAX_10_EXP", |
| "LDBL_MAX_EXP", |
| "LDBL_MIN", |
| "LDBL_MIN_10_EXP", |
| "LDBL_MIN_EXP", |
| "LDBL_TRUE_MIN", |
| "LLONG_MAX", |
| "LLONG_MIN", |
| "LONG_MAX", |
| "LONG_MIN", |
| "MATH_ERREXCEPT", |
| "math_errhandling", |
| "MATH_ERRNO", |
| "MB_CUR_MAX", |
| "MB_LEN_MAX", |
| "NAN", |
| "NULL", |
| "ONCE_FLAG_INIT", |
| "PRId16", |
| "PRId32", |
| "PRId64", |
| "PRId8", |
| "PRIdFAST16", |
| "PRIdFAST32", |
| "PRIdFAST64", |
| "PRIdFAST8", |
| "PRIdLEAST16", |
| "PRIdLEAST32", |
| "PRIdLEAST64", |
| "PRIdLEAST8", |
| "PRIdMAX", |
| "PRIdPTR", |
| "PRIi16", |
| "PRIi32", |
| "PRIi64", |
| "PRIi8", |
| "PRIiFAST16", |
| "PRIiFAST32", |
| "PRIiFAST64", |
| "PRIiFAST8", |
| "PRIiLEAST16", |
| "PRIiLEAST32", |
| "PRIiLEAST64", |
| "PRIiLEAST8", |
| "PRIiMAX", |
| "PRIiPTR", |
| "PRIo16", |
| "PRIo32", |
| "PRIo64", |
| "PRIo8", |
| "PRIoFAST16", |
| "PRIoFAST32", |
| "PRIoFAST64", |
| "PRIoFAST8", |
| "PRIoLEAST16", |
| "PRIoLEAST32", |
| "PRIoLEAST64", |
| "PRIoLEAST8", |
| "PRIoMAX", |
| "PRIoPTR", |
| "PRIu16", |
| "PRIu32", |
| "PRIu64", |
| "PRIu8", |
| "PRIuFAST16", |
| "PRIuFAST32", |
| "PRIuFAST64", |
| "PRIuFAST8", |
| "PRIuLEAST16", |
| "PRIuLEAST32", |
| "PRIuLEAST64", |
| "PRIuLEAST8", |
| "PRIuMAX", |
| "PRIuPTR", |
| "PRIx16", |
| "PRIX16", |
| "PRIx32", |
| "PRIX32", |
| "PRIx64", |
| "PRIX64", |
| "PRIx8", |
| "PRIX8", |
| "PRIxFAST16", |
| "PRIXFAST16", |
| "PRIxFAST32", |
| "PRIXFAST32", |
| "PRIxFAST64", |
| "PRIXFAST64", |
| "PRIxFAST8", |
| "PRIXFAST8", |
| "PRIxLEAST16", |
| "PRIXLEAST16", |
| "PRIxLEAST32", |
| "PRIXLEAST32", |
| "PRIxLEAST64", |
| "PRIXLEAST64", |
| "PRIxLEAST8", |
| "PRIXLEAST8", |
| "PRIxMAX", |
| "PRIXMAX", |
| "PRIxPTR", |
| "PRIXPTR", |
| "PTRDIFF_MAX", |
| "PTRDIFF_MIN", |
| "RAND_MAX", |
| "SCHAR_MAX", |
| "SCHAR_MIN", |
| "SCNd16", |
| "SCNd32", |
| "SCNd64", |
| "SCNd8", |
| "SCNdFAST16", |
| "SCNdFAST32", |
| "SCNdFAST64", |
| "SCNdFAST8", |
| "SCNdLEAST16", |
| "SCNdLEAST32", |
| "SCNdLEAST64", |
| "SCNdLEAST8", |
| "SCNdMAX", |
| "SCNdPTR", |
| "SCNi16", |
| "SCNi32", |
| "SCNi64", |
| "SCNi8", |
| "SCNiFAST16", |
| "SCNiFAST32", |
| "SCNiFAST64", |
| "SCNiFAST8", |
| "SCNiLEAST16", |
| "SCNiLEAST32", |
| "SCNiLEAST64", |
| "SCNiLEAST8", |
| "SCNiMAX", |
| "SCNiPTR", |
| "SCNo16", |
| "SCNo32", |
| "SCNo64", |
| "SCNo8", |
| "SCNoFAST16", |
| "SCNoFAST32", |
| "SCNoFAST64", |
| "SCNoFAST8", |
| "SCNoLEAST16", |
| "SCNoLEAST32", |
| "SCNoLEAST64", |
| "SCNoLEAST8", |
| "SCNoMAX", |
| "SCNoPTR", |
| "SCNu16", |
| "SCNu32", |
| "SCNu64", |
| "SCNu8", |
| "SCNuFAST16", |
| "SCNuFAST32", |
| "SCNuFAST64", |
| "SCNuFAST8", |
| "SCNuLEAST16", |
| "SCNuLEAST32", |
| "SCNuLEAST64", |
| "SCNuLEAST8", |
| "SCNuMAX", |
| "SCNuPTR", |
| "SCNx16", |
| "SCNx32", |
| "SCNx64", |
| "SCNx8", |
| "SCNxFAST16", |
| "SCNxFAST32", |
| "SCNxFAST64", |
| "SCNxFAST8", |
| "SCNxLEAST16", |
| "SCNxLEAST32", |
| "SCNxLEAST64", |
| "SCNxLEAST8", |
| "SCNxMAX", |
| "SCNxPTR", |
| "SEEK_CUR", |
| "SEEK_END", |
| "SEEK_SET", |
| "SHRT_MAX", |
| "SHRT_MIN", |
| "SIG_ATOMIC_MAX", |
| "SIG_ATOMIC_MIN", |
| "SIG_DFL", |
| "SIG_ERR", |
| "SIG_IGN", |
| "SIGABRT", |
| "SIGFPE", |
| "SIGILL", |
| "SIGINT", |
| "SIGSEGV", |
| "SIGTERM", |
| "SIZE_MAX", |
| "stderr", |
| "stdin", |
| "stdout", |
| "TIME_UTC", |
| "TMP_MAX", |
| "UCHAR_MAX", |
| "UINT_FAST16_MAX", |
| "UINT_FAST32_MAX", |
| "UINT_FAST64_MAX", |
| "UINT_FAST8_MAX", |
| "UINT_LEAST16_MAX", |
| "UINT_LEAST32_MAX", |
| "UINT_LEAST64_MAX", |
| "UINT_LEAST8_MAX", |
| "UINT_MAX", |
| "UINT16_MAX", |
| "UINT32_MAX", |
| "UINT64_MAX", |
| "UINT8_MAX", |
| "UINTMAX_MAX", |
| "UINTPTR_MAX", |
| "ULLONG_MAX", |
| "ULONG_MAX", |
| "USHRT_MAX", |
| "WCHAR_MAX", |
| "WCHAR_MIN", |
| "WEOF", |
| "WINT_MAX", |
| "WINT_MIN", |
| } |
| |
| |
| def _transform_invalid_identifier(invalid_identifier: str) -> str: |
| """Applies a transformation to an invalid C++ identifier to make it valid. |
| |
| Currently, this simply appends an underscore. This addresses the vast |
| majority of realistic cases, but there are some caveats; see |
| `fix_cc_identifier` function documentation for details. |
| """ |
| return f"{invalid_identifier}_" |
| |
| |
| def fix_cc_identifier(proto_identifier: str) -> str: |
| """Returns an adjusted form of the identifier for use in generated C++ code. |
| |
| If the given identifier is already valid for use in the generated C++ code, |
| it will be returned as-is. If the identifier is a C++ keyword or a |
| preprocessor macro from the standard library, the returned identifier will |
| be modified slightly in order to avoid compiler errors. |
| |
| Currently, this simply appends an underscore if necessary. This handles the |
| vast majority of realistic cases, though it doesn't attempt to fix |
| identifiers that the C++ spec reserves for the compiler's use. |
| |
| For reference, C++ reserves two categories of identifiers for the compiler: |
| - Any identifier that contains the substring "__" anywhere in it. |
| - Any identifier with an underscore for the first character and a capital |
| letter for the second character. |
| """ |
| return (_transform_invalid_identifier(proto_identifier) # |
| if proto_identifier in PW_PROTO_CODEGEN_RESERVED_WORDS # |
| else proto_identifier) |
| |
| |
| def fix_cc_enum_value_name(proto_enum_entry: str) -> str: |
| """Returns an adjusted form of the enum-value name for use in generated C++. |
| |
| Generates an UPPER_SNAKE_CASE variant of the given enum-value name and then |
| checks it for collisions with C++ keywords and standard-library macros. |
| Returns a potentially modified version of the input in order to fix |
| collisions if any are found. |
| |
| Note that, although the code generation also creates enum-value aliases in |
| kHungarianNotationPascalCase, symbols of that form never conflict with |
| keywords or standard-library macros in C++20. Therefore, only the |
| UPPER_SNAKE_CASE versions need to be checked for conflicts. |
| |
| See `fix_cc_identifier` for further details. |
| """ |
| upper_snake_case = proto_enum_entry.upper() |
| return (_transform_invalid_identifier(proto_enum_entry) # |
| if upper_snake_case in PW_PROTO_CODEGEN_RESERVED_WORDS # |
| else proto_enum_entry) |