pw_string: Match std::string semantics for literal / array overloads

Previously, the constructor, assign, and append overloads for string
literals or arrays used the length of the array as the length of the
string, and dropped the final character only if it was a null
terminator.

This avoided some strlen() calls and made assigning to an InlineString
more like working with an ""sv string literal. However, since the
character array is not guaranteed to be null terminated, this approach
had some drawbacks:

- A string of length N could not be assigned to an InlineString<N>, to
  account for non-terminated character arrays.
- The capacity template argument would be deduced to a value one larger
  than the length of the string.
- Behavior was inconsistent with std::string, since nulls could be
  included in assignments.

This commit updates pw::InlineString's string literal or array overloads
to use the same semantics as std::string. Unlike std::string,
pw::InlineString checks the array size to prevent out-of-bounds reads.

Change-Id: Ic0e61fd1c45847f5a0b6affa414ed0e624b09807
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/111711
Pigweed-Auto-Submit: Wyatt Hepler <hepler@google.com>
Reviewed-by: Erik Gilling <konkers@google.com>
Commit-Queue: Auto-Submit <auto-submit@pigweed.google.com.iam.gserviceaccount.com>
diff --git a/pw_string/docs.rst b/pw_string/docs.rst
index fb1e7e2..41b2564 100644
--- a/pw_string/docs.rst
+++ b/pw_string/docs.rst
@@ -48,9 +48,9 @@
 - **Fixed capacity** -- Operations that add characters to the string beyond its
   capacity are an error. These trigger a ``PW_ASSERT`` at runtime. When
   detectable, these situations trigger a ``static_assert`` at compile time.
-- **Character array support** -- :cpp:type:`pw::InlineString` provides overloads
-  specific to character arrays. These allow for compile-time capacity checks and
-  class template argument deduction.
+- **Minimal overhead** -- :cpp:type:`pw::InlineString` operations never
+  allocate. Reading the contents of the string is a direct memory access within
+  the string object, without pointer indirection.
 - **Constexpr support** -- :cpp:type:`pw::InlineString` works in ``constexpr``
   contexts, which is not supported by ``std::string`` until C++20.
 
@@ -59,10 +59,10 @@
 :cpp:type:`pw::InlineString` / :cpp:class:`pw::InlineBasicString` follows the
 ``std::string`` / ``std::basic_string<T>`` API, with a few variations:
 
-- Assigning and constructing from character literals or arrays uses the size of
-  the literal or array, rather than treating it as a null-terminated string.
-  This allows for compile-time capacity checks and class template argument
-  deduction.
+- :cpp:type:`pw::InlineString` provides overloads specific to character arrays.
+  These perform compile-time capacity checks and are used for class template
+  argument deduction. Like ``std::string``, character arrays are treated as
+  null-terminated strings.
 - :cpp:type:`pw::InlineString` allows implicit conversions from
   ``std::string_view``. Specifying the capacity parameter is cumbersome, so
   implicit conversions are helpful. Also, implicitly creating a
@@ -164,8 +164,8 @@
 
 .. code-block:: c++
 
-   // Deduces a capacity of 6 characters to match the 6-character string literal
-   // (counting the null terminator).
+   // Deduces a capacity of 5 characters to match the 5-character string literal
+   // (not counting the null terminator).
    pw::InlineBasicString inline_string = "12345";
 
    // In C++20, CTAD may be used with the pw::InlineString alias.
diff --git a/pw_string/public/pw_string/internal/string_common_functions.inc b/pw_string/public/pw_string/internal/string_common_functions.inc
index a72ace9..34b0ce6 100644
--- a/pw_string/public/pw_string/internal/string_common_functions.inc
+++ b/pw_string/public/pw_string/internal/string_common_functions.inc
@@ -72,10 +72,10 @@
 template <size_t kCharArraySize>
 constexpr InlineBasicString& assign(const T (&array)[kCharArraySize]) {
   static_assert(
-      kCharArraySize < string_impl::kGeneric && kCharArraySize <= kCapacity,
+      string_impl::NullTerminatedArrayFitsInString(kCharArraySize, kCapacity),
       _PW_STRING_CAPACITY_TOO_SMALL_FOR_ARRAY);
   return static_cast<InlineBasicString&>(
-      Copy(data(), array, string_impl::ArrayStringLength(array)));
+      Copy(data(), array, string_impl::ArrayStringLength(array, max_size())));
 }
 
 template <typename InputIterator,
@@ -224,9 +224,9 @@
 template <size_t kCharArraySize>
 constexpr InlineBasicString& append(const T (&array)[kCharArraySize]) {
   static_assert(
-      kCharArraySize < string_impl::kGeneric && kCharArraySize <= kCapacity,
+      string_impl::NullTerminatedArrayFitsInString(kCharArraySize, kCapacity),
       _PW_STRING_CAPACITY_TOO_SMALL_FOR_ARRAY);
-  return append(array, string_impl::ArrayStringLength(array));
+  return append(array, string_impl::ArrayStringLength(array, max_size()));
 }
 
 template <typename U, typename = string_impl::EnableIfNonArrayCharPointer<T, U>>
diff --git a/pw_string/public/pw_string/internal/string_impl.h b/pw_string/public/pw_string/internal/string_impl.h
index b90bd9e..5c2eb0d 100644
--- a/pw_string/public/pw_string/internal/string_impl.h
+++ b/pw_string/public/pw_string/internal/string_impl.h
@@ -96,34 +96,55 @@
 
 #endif  // __cpp_lib_constexpr_string
 
+// Used in static_asserts to check that a C array fits in an InlineString.
+constexpr bool NullTerminatedArrayFitsInString(
+    size_t null_terminated_array_size, size_type capacity) {
+  return null_terminated_array_size > 0u &&
+         null_terminated_array_size - 1 <= capacity &&
+         null_terminated_array_size - 1 < kGeneric;
+}
+
 // Constexpr utility functions for pw::InlineString. These are NOT intended for
 // general use. These mostly map directly to general purpose standard library
 // utilities that are not constexpr until C++20.
 
-// Calculates the length of a C string up to the capacity. Returns capacity +
-// 1 if the string is longer than the capacity. Use this instead of
-// std::char_traits<T>::length, which is unbounded.
+// Calculates the length of a C string up to the capacity. Returns capacity + 1
+// if the string is longer than the capacity. This replaces
+// std::char_traits<T>::length, which is unbounded. The string must contain at
+// least one character.
 template <typename T>
 constexpr size_type BoundedStringLength(const T* string, size_type capacity) {
-  size_type index = 0;
-  for (; !char_traits<T>::eq(string[index], T()); ++index) {
-    if (index > capacity) {
-      break;  // Return if size is too large. This may trigger an assert later.
+  size_type length = 0;
+  for (; length <= capacity; ++length) {
+    if (char_traits<T>::eq(string[length], T())) {
+      break;
     }
   }
-  return index;
+  return length;  // length is capacity + 1 if T() was not found.
 }
 
-// InlineString literals and character arrays are the same type, but string
-// literals are always null terminated. This returns the size of a character
-// array, ignoring the final character if it is a null terminator.
+// As with std::string, InlineString treats literals and character arrays as
+// null-terminated strings. ArrayStringLength checks that the array size fits
+// within size_type and asserts if no null terminator was found in the array.
+template <typename T>
+constexpr size_type ArrayStringLength(const T* array,
+                                      size_type max_string_length,
+                                      size_type capacity) {
+  const size_type max_length = std::min(max_string_length, capacity);
+  const size_type length = BoundedStringLength(array, max_length);
+  PW_ASSERT(length <= max_string_length);  // The array is not null terminated
+  return length;
+}
+
 template <typename T, size_t kCharArraySize>
-constexpr size_type ArrayStringLength(const T (&array)[kCharArraySize]) {
-  static_assert(kCharArraySize < kGeneric,
+constexpr size_type ArrayStringLength(const T (&array)[kCharArraySize],
+                                      size_type capacity) {
+  static_assert(kCharArraySize > 0u, "C arrays cannot have a length of 0");
+  static_assert(kCharArraySize - 1 < kGeneric,
                 "The size of this literal or character array is too large "
                 "for pw::InlineString<>::size_type");
-  return kCharArraySize -
-         (char_traits<T>::eq(array[kCharArraySize - 1], T()) ? 1 : 0);
+  return ArrayStringLength(
+      array, static_cast<size_type>(kCharArraySize - 1), capacity);
 }
 
 // Constexpr version of std::copy that returns the number of copied characters.
diff --git a/pw_string/public/pw_string/string.h b/pw_string/public/pw_string/string.h
index e17c7b5..ca41dc1 100644
--- a/pw_string/public/pw_string/string.h
+++ b/pw_string/public/pw_string/string.h
@@ -26,8 +26,8 @@
 #define _PW_STRING_CAPACITY_TOO_SMALL_FOR_ARRAY                               \
   "The pw::InlineString's capacity is too small to hold the assigned string " \
   "literal or character array. When assigning a literal or array to a "       \
-  "pw::InlineString, the pw::InlineString must be large enough for the "      \
-  "entire string and an additional null terminator."
+  "pw::InlineString, the pw::InlineString's capacity must be large enough "   \
+  "for the entire string, not counting the null terminator."
 
 #define _PW_STRING_CAPACITY_TOO_SMALL_FOR_STRING                               \
   "When assigning one pw::InlineString with known capacity to another, the "   \
@@ -37,7 +37,7 @@
 namespace pw {
 
 // pw::InlineBasicString<T, kCapacity> is a fixed-capacity version of
-// std::basic_string.  It implements mostly the same API as std::basic_string,
+// std::basic_string. It implements mostly the same API as std::basic_string,
 // but the capacity of the string is fixed at construction and cannot grow.
 // Attempting to increase the size beyond the capacity triggers an assert.
 //
@@ -103,9 +103,9 @@
   constexpr InlineBasicString(const T (&array)[kCharArraySize])
       : InlineBasicString() {
     static_assert(
-        kCharArraySize < string_impl::kGeneric && kCharArraySize <= kCapacity,
+        string_impl::NullTerminatedArrayFitsInString(kCharArraySize, kCapacity),
         _PW_STRING_CAPACITY_TOO_SMALL_FOR_ARRAY);
-    Copy(data(), array, string_impl::ArrayStringLength(array));
+    Copy(data(), array, string_impl::ArrayStringLength(array, max_size()));
   }
 
   template <typename InputIterator,
@@ -415,18 +415,18 @@
 
 // In C++17, the capacity of the string may be deduced from a string literal or
 // array. For example, the following deduces a character type of char and a
-// capacity of 5 (which includes the null terminator):
+// capacity of 4 (which does not include the null terminator).
 //
 //   InlineBasicString my_string = "1234";
 //
 // In C++20, the template parameters for the pw::InlineString alias may be
 // deduced similarly:
 //
-//   InlineString my_string = "abc";  // deduces capacity of 4.
+//   InlineString my_string = "abc";  // deduces capacity of 3.
 //
 template <typename T, size_t kCharArraySize>
 InlineBasicString(const T (&)[kCharArraySize])
-    -> InlineBasicString<T, kCharArraySize>;
+    -> InlineBasicString<T, kCharArraySize - 1>;
 
 #endif  // __cpp_deduction_guides
 
diff --git a/pw_string/string_test.cc b/pw_string/string_test.cc
index f29a5f1..1a20f00 100644
--- a/pw_string/string_test.cc
+++ b/pw_string/string_test.cc
@@ -90,20 +90,20 @@
 #ifdef __cpp_deduction_guides
 
 TEST(InlineString, DeduceBasicString_Char) {
-  InlineBasicString string_10("123456789");
+  InlineBasicString string_10("1234567890");
   static_assert(std::is_same_v<decltype(string_10), InlineString<10>>);
 
-  InlineBasicString string_4 = "abc";
-  static_assert(std::is_same_v<decltype(string_4), InlineString<4>>);
+  InlineBasicString string_3 = "abc";
+  static_assert(std::is_same_v<decltype(string_3), InlineString<3>>);
 
   string_10.resize(6);
   EXPECT_STREQ(
-      string_10.append(string_4).append(InlineBasicString("?")).c_str(),
+      string_10.append(string_3).append(InlineBasicString("?")).c_str(),
       "123456abc?");
 }
 
 TEST(InlineString, DeduceBasicString_Int) {
-  constexpr long kLongArray[3] = {0, 1, 2};
+  constexpr long kLongArray[4] = {0, 1, 2, 0};
   InlineBasicString string_3 = kLongArray;
   static_assert(std::is_same_v<decltype(string_3), InlineBasicString<long, 3>>);
 
@@ -216,11 +216,13 @@
 constexpr const char* kPointer0 = "";
 constexpr const char* kPointer10 = "9876543210";
 
-constexpr const char kArray1[1] = {'?'};
 constexpr const char kArrayNull1[1] = {'\0'};
+constexpr const char kArray5[5] = {'1', '2', '3', '4', '\0'};
 
-constexpr const char kArray5[5] = {'\0', '1', '2', '3', '4'};
-constexpr const char kArrayNull5[5] = {'\0', '1', '2', '3', '\0'};
+// Invalid, non-terminated arrays used in negative compilation tests.
+[[maybe_unused]] constexpr const char kArrayNonNull1[1] = {'?'};
+[[maybe_unused]] constexpr const char kArrayNonNull5[5] = {
+    '1', '2', '3', '4', '5'};
 
 constexpr EvenNumberIterator<char> kEvenNumbers0(0);
 constexpr EvenNumberIterator<char> kEvenNumbers2(2);
@@ -373,6 +375,8 @@
 }
 
 TEST(InlineString, Construct_Array) {
+  TEST_STRING(InlineString<0>(""), , "");
+
   TEST_STRING(InlineString<1>(""), , "");
   TEST_STRING(InlineString<10>(""), , "");
 
@@ -380,35 +384,36 @@
   TEST_STRING(InlineString<10>("A"), , "A");
   TEST_STRING(InlineString<10>("123456789"), , "123456789");
 
-  TEST_STRING(InlineString<2>("\0"), , "\0");
-  TEST_STRING(InlineString<10>("\0"), , "\0");
-  TEST_STRING(InlineString<10>("12\000456789"), , "12\000456789");
+  TEST_STRING(InlineString<2>("\0"), , "");
+  TEST_STRING(InlineString<10>(""), , "");
+  TEST_STRING(InlineString<10>("12\000456789"), , "12");
 
-  TEST_STRING(InlineString<1>(kArray1), , "?");
   TEST_STRING(InlineString<1>(kArrayNull1), , "");
+  TEST_STRING(InlineString<5>(kArray5), , "1234");
+  TEST_STRING(InlineString<10>(kArray5), , "1234");
 
-  TEST_STRING(InlineString<5>(kArray5), , "\0001234");
-  TEST_STRING(InlineString<5>(kArrayNull5), , "\000123");
-
-  TEST_STRING(InlineString<10>(kArray5), , "\0001234");
-  TEST_STRING(InlineString<10>(kArrayNull5), , "\000123");
-
-#if PW_NC_TEST(Construct_Array_EmptyLiteralRequiresCapacityOfAtLeast1)
+#if PW_NC_TEST(Construct_Array_NullTerminationIsRequiredFillsCapacity)
+  PW_NC_EXPECT("PW_ASSERT\(.*The array is not null terminated");
+  [[maybe_unused]] constexpr InlineString<1> bad_string(kArrayNonNull1);
+#elif PW_NC_TEST(Construct_Array_NullTerminationIsRequiredExtraCapacity)
+  PW_NC_EXPECT("PW_ASSERT\(.*The array is not null terminated");
+  [[maybe_unused]] constexpr InlineString<10> bad_string(kArrayNonNull5);
+#elif PW_NC_TEST(Construct_Array_NonTerminatedArrayDoesNotFit)
   PW_NC_EXPECT(
       "InlineString's capacity is too small to hold the assigned string");
-  [[maybe_unused]] constexpr InlineString<0> bad_string("");
-#elif PW_NC_TEST(Construct_Array_SingleCharLiteralRequiresCapacityOfAtLeast2)
+  [[maybe_unused]] constexpr InlineString<3> bad_string(kArrayNonNull5);
+#elif PW_NC_TEST(Construct_Array_SingleCharLiteralRequiresCapacityOfAtLeast1)
   PW_NC_EXPECT(
       "InlineString's capacity is too small to hold the assigned string");
-  [[maybe_unused]] constexpr InlineString<1> bad_string("A");
-#elif PW_NC_TEST(Construct_Array_5CharLiteralRequiresCapacityOfAtLeast6)
+  [[maybe_unused]] constexpr InlineString<0> bad_string("A");
+#elif PW_NC_TEST(Construct_Array_5CharLiteralRequiresCapacityOfAtLeast5)
   PW_NC_EXPECT(
       "InlineString's capacity is too small to hold the assigned string");
-  [[maybe_unused]] constexpr InlineString<5> bad_string("ACDEF");
+  [[maybe_unused]] constexpr InlineString<4> bad_string("ACDEF");
 #elif PW_NC_TEST(Construct_Array_TooManyNulls)
   PW_NC_EXPECT(
       "InlineString's capacity is too small to hold the assigned string");
-  [[maybe_unused]] constexpr InlineString<4> bad_string(kArrayNull5);
+  [[maybe_unused]] constexpr InlineString<3> bad_string(kArray5);
 #endif  // PW_NC_TEST
 }
 
@@ -602,7 +607,7 @@
   TEST_STRING(InlineString<0>(), fixed_str = InlineString<0>(), "");
   TEST_STRING(InlineString<10>("something"),
               fixed_str = InlineString<9>("el\0se"),
-              "el\0se");
+              "el");
   TEST_STRING(InlineString<10>("0_o"), fixed_str = InlineString<10>(), "");
 
 #if PW_NC_TEST(AssignOperator_Copy_DoesNotFit)
@@ -630,7 +635,7 @@
   PW_NC_EXPECT(
       "InlineString's capacity is too small to hold the assigned string");
   [[maybe_unused]] constexpr auto fail = [] {
-    InlineString<5> str("abc");
+    InlineString<4> str("abc");
     return str = "12345";
   }();
 #elif PW_NC_TEST(AssignOperator_Array_NotSupportedByGeneric)
@@ -899,25 +904,21 @@
 }
 
 TEST(InlineString, Assign_Array) {
+  TEST_STRING(InlineString<0>(), str.assign(""), "");
   TEST_STRING(InlineString<1>(), str.assign(""), "");
   TEST_STRING(InlineString<10>("a"), str.assign(""), "");
 
-  TEST_STRING(InlineString<2>(), str.assign("A"), "A");
+  TEST_STRING(InlineString<1>(), str.assign("A"), "A");
   TEST_STRING(InlineString<10>(), str.assign("A"), "A");
   TEST_STRING(InlineString<10>(), str.assign("123456789"), "123456789");
 
-  TEST_STRING(InlineString<2>(), str.assign("\0"), "\0");
-  TEST_STRING(InlineString<10>(), str.assign("\0"), "\0");
-  TEST_STRING(InlineString<10>(), str.assign("12\000456789"), "12\000456789");
+  TEST_STRING(InlineString<1>(), str.assign("\0"), "");
+  TEST_STRING(InlineString<10>(), str.assign("\0"), "");
+  TEST_STRING(InlineString<10>(), str.assign("12\000456789"), "12");
 
-  TEST_STRING(InlineString<1>(""), str.assign(kArray1), "?");
   TEST_STRING(InlineString<1>(""), str.assign(kArrayNull1), "");
-
-  TEST_STRING(InlineString<5>("abcd"), str.assign(kArray5), "\0001234");
-  TEST_STRING(InlineString<5>(), str.assign(kArrayNull5), "\000123");
-
-  TEST_STRING(InlineString<10>("abcde"), str.assign(kArray5), "\0001234");
-  TEST_STRING(InlineString<10>(), str.assign(kArrayNull5), "\000123");
+  TEST_STRING(InlineString<5>(), str.assign(kArray5), "1234");
+  TEST_STRING(InlineString<10>(), str.assign(kArray5), "1234");
 
   TEST_RUNTIME_STRING(InlineString<1>(), Generic(str).assign("?"), "?");
   TEST_RUNTIME_STRING(
@@ -928,18 +929,30 @@
   Generic(too_small).assign("123456");
 #endif  // 0
 
-#if PW_NC_TEST(Assign_Array_EmptyLiteralRequiresCapacityOfAtLeast1)
+#if PW_NC_TEST(Assign_Array_NullTerminationIsRequiredFillsCapacity)
+  PW_NC_EXPECT("PW_ASSERT\(.*The array is not null terminated");
+  [[maybe_unused]] constexpr auto fail = [] {
+    InlineString<1> bad_string;
+    return bad_string.assign(kArrayNonNull1);
+  }();
+#elif PW_NC_TEST(Assign_Array_NullTerminationIsRequiredExtraCapacity)
+  PW_NC_EXPECT("PW_ASSERT\(.*The array is not null terminated");
+  [[maybe_unused]] constexpr auto fail = [] {
+    InlineString<10> bad_string;
+    return bad_string.assign(kArrayNonNull5);
+  }();
+#elif PW_NC_TEST(Assign_Array_NonTerminatedArrayDoesNotFit)
+  PW_NC_EXPECT(
+      "InlineString's capacity is too small to hold the assigned string");
+  [[maybe_unused]] constexpr auto fail = [] {
+    InlineString<3> bad_string;
+    return bad_string.assign(kArrayNonNull5);
+  }();
+#elif PW_NC_TEST(Assign_Array_SingleCharLiteralRequiresCapacityOfAtLeast1)
   PW_NC_EXPECT(
       "InlineString's capacity is too small to hold the assigned string");
   [[maybe_unused]] constexpr auto fail = [] {
     InlineString<0> str;
-    return str.assign("");
-  }();
-#elif PW_NC_TEST(Assign_Array_SingleCharLiteralRequiresCapacityOfAtLeast2)
-  PW_NC_EXPECT(
-      "InlineString's capacity is too small to hold the assigned string");
-  [[maybe_unused]] constexpr auto fail = [] {
-    InlineString<1> str;
     return str.assign("?");
   }();
 #endif  // PW_NC_TEST
@@ -1334,7 +1347,7 @@
   PW_NC_EXPECT(
       "InlineString's capacity is too small to hold the assigned string");
   [[maybe_unused]] constexpr auto fail = [] {
-    InlineString<3> str;
+    InlineString<2> str;
     return str.append("123");
   }();
 #endif  // PW_NC_TEST
@@ -1476,7 +1489,7 @@
       "InlineString's capacity is too small to hold the assigned string");
   [[maybe_unused]] constexpr auto fail = [] {
     InlineString<3> str;
-    return str += "123";
+    return str += "1234";
   }();
 #endif  // PW_NC_TEST
 }
@@ -1670,7 +1683,7 @@
                 "not equal");
   static_assert(InlineString<10>("") != InlineString<5>("abc"), "not equal");
   static_assert(InlineString<1>({'\0'}) != InlineString<10>(""), "not equal");
-  static_assert(!(InlineString<1>({'\0'}) != InlineString<10>("\0")),
+  static_assert(!(InlineString<1>({'\0'}) != InlineString<10>("\0"sv)),
                 "not equal");
 
   static_assert(InlineString<10>() < InlineString<10>("a"), "less");
@@ -1685,9 +1698,9 @@
                 "less equal");
   static_assert(InlineString<1>({'\0'}) <= InlineString<10>("\1\0"),
                 "less equal");
-  static_assert(InlineString<2>({'\1', '\0'}) <= InlineString<10>("\1\0"),
+  static_assert(InlineString<2>({'\1', '\0'}) <= InlineString<10>("\1\0"sv),
                 "less equal");
-  static_assert(!(InlineString<2>({'\2', '\0'}) <= InlineString<10>("\1\0")),
+  static_assert(!(InlineString<2>({'\2', '\0'}) <= InlineString<10>("\1\0"sv)),
                 "less equal");
 
   static_assert(InlineString<10>("?") > InlineString<10>(""), "greater");
@@ -1833,7 +1846,7 @@
 
 TEST(BasicStrings, VariousOperations) {
 #define BASIC_STRINGS_VARIOUS_OPERATIONS(type, capacity) \
-  static constexpr type kOne[1] = {1};                   \
+  static constexpr type kOne[2] = {1, 0};                \
   constexpr auto string = [] {                           \
     InlineBasicString<type, capacity> str({0});          \
     str.append(kOne);                                    \