pw_tokenizer: Support tokenizing __func__

- In C++, take an array reference for PW_TOKENIZE_STRING, so that
  literals or arrays may be tokenized, but not const char*.
- Use std::to_array to copy the original string to the tokenized section
  so that character arrays may be used.
- Add tests for tokenizing __func__ and __PRETTY_FUNCTION__ in C++.

Change-Id: I7a9e997d862a2eccad464e9113ce0cf5fc96697b
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/16962
Reviewed-by: Keir Mierle <keir@google.com>
Reviewed-by: Paul Mathieu <paulmathieu@google.com>
Commit-Queue: Wyatt Hepler <hepler@google.com>
diff --git a/pw_tokenizer/CMakeLists.txt b/pw_tokenizer/CMakeLists.txt
index 00fbe75..34044c4 100644
--- a/pw_tokenizer/CMakeLists.txt
+++ b/pw_tokenizer/CMakeLists.txt
@@ -17,6 +17,7 @@
     encode_args.cc
     tokenize.cc
   PUBLIC_DEPS
+    pw_polyfill.overrides
     pw_preprocessor
     pw_span
   PRIVATE_DEPS
diff --git a/pw_tokenizer/docs.rst b/pw_tokenizer/docs.rst
index 72a8040..f9f8536 100644
--- a/pw_tokenizer/docs.rst
+++ b/pw_tokenizer/docs.rst
@@ -294,6 +294,30 @@
 additional tokens, but it may not be desirable to fill a token database with
 duplicate log lines.
 
+Tokenizing function names
+-------------------------
+The string literal tokenization functions support tokenizing string literals or
+constexpr character arrays (``constexpr const char[]``). In GCC and Clang, the
+special ``__func__`` variable and ``__PRETTY_FUNCTION__`` extension are declared
+as ``static constexpr char[]`` in C++ instead of the standard ``static const
+char[]``. This means that ``__func__`` and ``__PRETTY_FUNCTION__`` can be
+tokenized while compiling C++ with GCC or Clang.
+
+.. code-block:: cpp
+
+  // Tokenize the special function name variables.
+  constexpr uint32_t function = PW_TOKENIZE_STRING(__func__);
+  constexpr uint32_t pretty_function = PW_TOKENIZE_STRING(__PRETTY_FUNCTION__);
+
+  // Tokenize the function name variables to a handler function.
+  PW_TOKENIZE_TO_GLOBAL_HANDLER(__func__)
+  PW_TOKENIZE_TO_GLOBAL_HANDLER(__PRETTY_FUNCTION__)
+
+Note that ``__func__`` and ``__PRETTY_FUNCTION__`` are not string literals.
+They are defined as static character arrays, so they cannot be implicitly
+concatentated with string literals. For example, ``printf(__func__ ": %d",
+123);`` will not compile.
+
 Tokenization in Python
 ----------------------
 The Python ``pw_tokenizer.encode`` module has limited support for encoding
diff --git a/pw_tokenizer/public/pw_tokenizer/internal/tokenize_string.h b/pw_tokenizer/public/pw_tokenizer/internal/tokenize_string.h
index b30223a..27434bd 100644
--- a/pw_tokenizer/public/pw_tokenizer/internal/tokenize_string.h
+++ b/pw_tokenizer/public/pw_tokenizer/internal/tokenize_string.h
@@ -33,9 +33,8 @@
 #include "pw_tokenizer/pw_tokenizer_65599_fixed_length_hash.h"
 
 #define PW_TOKENIZER_STRING_TOKEN(format)                \
-  pw::tokenizer::PwTokenizer65599FixedLengthHash(        \
-      std::string_view((format), sizeof(format "") - 1), \
-      PW_TOKENIZER_CFG_HASH_LENGTH)
+  ::pw::tokenizer::PwTokenizer65599FixedLengthHashArray( \
+      format, PW_TOKENIZER_CFG_HASH_LENGTH)
 
 #else  // In C or older C++ code, use the hashing macro.
 
diff --git a/pw_tokenizer/public/pw_tokenizer/pw_tokenizer_65599_fixed_length_hash.h b/pw_tokenizer/public/pw_tokenizer/pw_tokenizer_65599_fixed_length_hash.h
index 2b6039e..2d2983e 100644
--- a/pw_tokenizer/public/pw_tokenizer/pw_tokenizer_65599_fixed_length_hash.h
+++ b/pw_tokenizer/public/pw_tokenizer/pw_tokenizer_65599_fixed_length_hash.h
@@ -61,4 +61,14 @@
   return hash;
 }
 
+// Take the string as an array to support either literals or character arrays,
+// but not const char*.
+template <size_t length>
+constexpr uint32_t PwTokenizer65599FixedLengthHashArray(
+    const char (&string)[length], size_t hash_length) {
+  static_assert(length > 0);
+  return PwTokenizer65599FixedLengthHash(std::string_view(string, length - 1),
+                                         hash_length);
+}
+
 }  // namespace pw::tokenizer
diff --git a/pw_tokenizer/public/pw_tokenizer/tokenize.h b/pw_tokenizer/public/pw_tokenizer/tokenize.h
index 98eb303..c988fe5 100644
--- a/pw_tokenizer/public/pw_tokenizer/tokenize.h
+++ b/pw_tokenizer/public/pw_tokenizer/tokenize.h
@@ -13,10 +13,20 @@
 // the License.
 #pragma once
 
+#ifdef __cplusplus
+
+#include <array>
+#include <cstddef>
+#include <cstdint>
+
+#else
+
 #include <assert.h>
 #include <stddef.h>
 #include <stdint.h>
 
+#endif  // __cplusplus
+
 #include "pw_preprocessor/compiler.h"
 #include "pw_preprocessor/concat.h"
 #include "pw_preprocessor/macro_arg_count.h"
@@ -32,9 +42,12 @@
 // If no domain is specified, this default is used.
 #define PW_TOKENIZER_DEFAULT_DOMAIN "default"
 
-// Tokenizes a string literal and converts it to a pw_TokenizerStringToken. This
-// expression can be assigned to a local or global variable, but cannot be used
-// in another expression. For example:
+// Tokenizes a string and converts it to a pw_TokenizerStringToken. In C++, the
+// string may be a literal or a constexpr char array. In C, the argument must be
+// a string literal.
+//
+// This expression can be assigned to a local or global variable, but cannot be
+// used in another expression. For example:
 //
 //   constexpr uint32_t global = PW_TOKENIZE_STRING("Wow!");  // This works.
 //
@@ -48,16 +61,10 @@
   PW_TOKENIZE_STRING_DOMAIN(PW_TOKENIZER_DEFAULT_DOMAIN, string_literal)
 
 // Same as PW_TOKENIZE_STRING, but tokenizes to the specified domain.
-#define PW_TOKENIZE_STRING_DOMAIN(domain, string_literal)                     \
-  /* assign to a variable */ PW_TOKENIZER_STRING_TOKEN(string_literal);       \
-                                                                              \
-  /* Declare the format string as an array in the special tokenized string */ \
-  /* section, which should be excluded from the final binary. Use __LINE__ */ \
-  /* to create unique names for the section and variable, which avoids     */ \
-  /* compiler warnings.                                                    */ \
-  static _PW_TOKENIZER_CONST char PW_CONCAT(                                  \
-      _pw_tokenizer_string_literal_DO_NOT_USE_,                               \
-      __COUNTER__)[] _PW_TOKENIZER_SECTION(domain) = string_literal
+#define PW_TOKENIZE_STRING_DOMAIN(domain, string_literal)               \
+  /* assign to a variable */ PW_TOKENIZER_STRING_TOKEN(string_literal); \
+                                                                        \
+  _PW_TOKENIZER_RECORD_ORIGINAL_STRING(domain, string_literal)
 
 // Encodes a tokenized string and arguments to the provided buffer. The size of
 // the buffer is passed via a pointer to a size_t. After encoding is complete,
@@ -253,3 +260,25 @@
 #define _PW_TOKENIZER_SECTION(domain) \
   PW_KEEP_IN_SECTION(".pw_tokenized." domain "." PW_STRINGIFY(__LINE__))
 #endif  // __APPLE__
+
+// Declare the format string as an array in the special tokenized string
+// section, which should be excluded from the final binary. Use __COUNTER__
+// to create unique names for the section and variable, which avoids
+// compiler warnings.
+#ifdef __cplusplus
+
+// In C++, use std::to_array to support tokenizing string literals or constexpr
+// char arrays.
+#define _PW_TOKENIZER_RECORD_ORIGINAL_STRING(domain, string)   \
+  static constexpr std::array<char, sizeof(string)> PW_CONCAT( \
+      _pw_tokenizer_string_literal_DO_NOT_USE_, __COUNTER__)   \
+      _PW_TOKENIZER_SECTION(domain) = std::to_array<const char>(string)
+
+#else  // In C, only string literals may be tokenized.
+
+#define _PW_TOKENIZER_RECORD_ORIGINAL_STRING(domain, string_literal)         \
+  static const char PW_CONCAT(_pw_tokenizer_string_literal_DO_NOT_USE_,      \
+                              __COUNTER__)[] _PW_TOKENIZER_SECTION(domain) = \
+      string_literal
+
+#endif  // __cplusplus
diff --git a/pw_tokenizer/tokenize_test.cc b/pw_tokenizer/tokenize_test.cc
index bc552bc..cd96681 100644
--- a/pw_tokenizer/tokenize_test.cc
+++ b/pw_tokenizer/tokenize_test.cc
@@ -32,7 +32,7 @@
 // configuration.
 template <size_t kSize>
 constexpr uint32_t TestHash(const char (&string)[kSize]) {
-  constexpr unsigned kTestHashLength = 48;
+  constexpr unsigned kTestHashLength = 64;
   static_assert(kTestHashLength <= PW_TOKENIZER_CFG_HASH_LENGTH);
   static_assert(kSize <= kTestHashLength + 1);
   return PwTokenizer65599FixedLengthHash(std::string_view(string, kSize - 1),
@@ -51,22 +51,60 @@
       kData...};
 }
 
-TEST(TokenizeStringLiteral, EmptyString_IsZero) {
+TEST(TokenizeString, EmptyString_IsZero) {
   constexpr pw_TokenizerStringToken token = PW_TOKENIZE_STRING("");
   EXPECT_EQ(0u, token);
 }
 
-TEST(TokenizeStringLiteral, String_MatchesHash) {
+TEST(TokenizeString, String_MatchesHash) {
   constexpr uint32_t token = PW_TOKENIZE_STRING("[:-)");
   EXPECT_EQ(TestHash("[:-)"), token);
 }
 
 constexpr uint32_t kGlobalToken = PW_TOKENIZE_STRING(">:-[]");
 
-TEST(TokenizeStringLiteral, GlobalVariable_MatchesHash) {
+TEST(TokenizeString, GlobalVariable_MatchesHash) {
   EXPECT_EQ(TestHash(">:-[]"), kGlobalToken);
 }
 
+struct TokenizedWithinClass {
+  static constexpr uint32_t kThisToken = PW_TOKENIZE_STRING("???");
+};
+
+static_assert(TestHash("???") == TokenizedWithinClass::kThisToken);
+
+TEST(TokenizeString, ClassMember_MatchesHash) {
+  EXPECT_EQ(TestHash("???"), TokenizedWithinClass().kThisToken);
+}
+
+// Use a function with a shorter name to test tokenizing __func__ and
+// __PRETTY_FUNCTION__.
+//
+// WARNING: This function might cause errors for compilers other than GCC and
+// clang. It relies on two GCC/clang extensions:
+//
+//   1 - The __PRETTY_FUNCTION__ C++ function name variable.
+//   2 - __func__ as a static constexpr array instead of static const. See
+//       https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66639 for background.
+//
+void TestName() {
+  constexpr uint32_t function_hash = PW_TOKENIZE_STRING(__func__);
+  EXPECT_EQ(pw::tokenizer::TestHash(__func__), function_hash);
+
+  // Check the non-standard __PRETTY_FUNCTION__ name.
+  constexpr uint32_t pretty_function = PW_TOKENIZE_STRING(__PRETTY_FUNCTION__);
+  EXPECT_EQ(pw::tokenizer::TestHash(__PRETTY_FUNCTION__), pretty_function);
+}
+
+TEST(TokenizeString, FunctionName) { TestName(); }
+
+TEST(TokenizeString, Array) {
+  constexpr char array[] = "won-won-won-wonderful";
+
+  const uint32_t array_hash = PW_TOKENIZE_STRING(array);
+  EXPECT_EQ(TestHash(array), array_hash);
+}
+
 // Verify that we can tokenize multiple strings from one source line.
 #define THREE_FOR_ONE(first, second, third)         \
   [[maybe_unused]] constexpr uint32_t token_1 =     \
@@ -76,7 +114,7 @@
   [[maybe_unused]] constexpr uint32_t token_3 =     \
       PW_TOKENIZE_STRING_DOMAIN("ignored", third);
 
-TEST(TokenizeStringLiteral, MultipleTokenizationsInOneMacroExpansion) {
+TEST(TokenizeString, MultipleTokenizationsInOneMacroExpansion) {
   // This verifies that we can safely tokenize multiple times in a single macro
   // expansion. This can be useful when for example a name and description are
   // both tokenized after being passed into a macro.
@@ -240,6 +278,16 @@
   EXPECT_EQ(std::memcmp(empty.data(), buffer_, empty.size()), 0);
 }
 
+TEST_F(TokenizeToBuffer, Array) {
+  static constexpr char array[] = "1234";
+  size_t message_size = 4;
+  PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, array);
+
+  constexpr std::array<uint8_t, 4> result = ExpectedData<>("1234");
+  ASSERT_EQ(result.size(), message_size);
+  EXPECT_EQ(std::memcmp(result.data(), buffer_, result.size()), 0);
+}
+
 TEST_F(TokenizeToBuffer, NullptrString_EncodesNull) {
   char* string = nullptr;
   size_t message_size = 9;