Speed up StrAppend by up to 4x.

PiperOrigin-RevId: 552802740
Change-Id: I662da1b03bfffb7939b44ea3850566d3c209c6cc
diff --git a/absl/strings/str_cat.cc b/absl/strings/str_cat.cc
index 2e49c31..5ec0299 100644
--- a/absl/strings/str_cat.cc
+++ b/absl/strings/str_cat.cc
@@ -14,17 +14,9 @@
 
 #include "absl/strings/str_cat.h"
 
-#include <assert.h>
-
-#include <algorithm>
-#include <cstddef>
 #include <cstdint>
-#include <cstring>
 #include <string>
 
-#include "absl/strings/ascii.h"
-#include "absl/strings/internal/resize_uninitialized.h"
-#include "absl/strings/numbers.h"
 #include "absl/strings/string_view.h"
 
 namespace absl {
@@ -37,170 +29,26 @@
 //    of a mix of raw C strings, string_views, strings, and integer values.
 // ----------------------------------------------------------------------
 
-// Append is merely a version of memcpy that returns the address of the byte
-// after the area just overwritten.
-static char* Append(char* out, const AlphaNum& x) {
-  // memcpy is allowed to overwrite arbitrary memory, so doing this after the
-  // call would force an extra fetch of x.size().
-  char* after = out + x.size();
-  if (x.size() != 0) {
-    memcpy(out, x.data(), x.size());
-  }
-  return after;
-}
-
-std::string StrCat(const AlphaNum& a, const AlphaNum& b) {
-  std::string result;
-  absl::strings_internal::STLStringResizeUninitialized(&result,
-                                                       a.size() + b.size());
-  char* const begin = &result[0];
-  char* out = begin;
-  out = Append(out, a);
-  out = Append(out, b);
-  assert(out == begin + result.size());
-  return result;
-}
-
-std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c) {
-  std::string result;
-  strings_internal::STLStringResizeUninitialized(
-      &result, a.size() + b.size() + c.size());
-  char* const begin = &result[0];
-  char* out = begin;
-  out = Append(out, a);
-  out = Append(out, b);
-  out = Append(out, c);
-  assert(out == begin + result.size());
-  return result;
-}
-
-std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c,
-                   const AlphaNum& d) {
-  std::string result;
-  strings_internal::STLStringResizeUninitialized(
-      &result, a.size() + b.size() + c.size() + d.size());
-  char* const begin = &result[0];
-  char* out = begin;
-  out = Append(out, a);
-  out = Append(out, b);
-  out = Append(out, c);
-  out = Append(out, d);
-  assert(out == begin + result.size());
-  return result;
-}
-
 namespace strings_internal {
 
-// Do not call directly - these are not part of the public API.
-std::string CatPieces(std::initializer_list<absl::string_view> pieces) {
-  std::string result;
-  size_t total_size = 0;
-  for (absl::string_view piece : pieces) total_size += piece.size();
-  strings_internal::STLStringResizeUninitialized(&result, total_size);
-
-  char* const begin = &result[0];
-  char* out = begin;
-  for (absl::string_view piece : pieces) {
-    const size_t this_size = piece.size();
-    if (this_size != 0) {
-      memcpy(out, piece.data(), this_size);
-      out += this_size;
-    }
-  }
-  assert(out == begin + result.size());
-  return result;
+bool HaveOverlap(const std::string& x, absl::string_view y) {
+  if (y.empty()) return false;
+  // TODO(b/290623057): Re-evaluate the check below: it detects when buffers
+  // overlap (which is good) but it also introduces undefined behaviour when
+  // buffers don't overlap (substracting pointers that do not belong to the same
+  // array is UB [expr.add]). In other words, if compiler assumes that a program
+  // never has UB, then it can replace `assert(HaveOverlap(x, y))` with
+  // `assert(false)`.
+  return (uintptr_t(y.data() - x.data()) <= uintptr_t(x.size()));
 }
 
-// It's possible to call StrAppend with an absl::string_view that is itself a
-// fragment of the string we're appending to.  However the results of this are
-// random. Therefore, check for this in debug mode.  Use unsigned math so we
-// only have to do one comparison. Note, there's an exception case: appending an
-// empty string is always allowed.
-#define ASSERT_NO_OVERLAP(dest, src) \
-  assert(((src).size() == 0) ||      \
-         (uintptr_t((src).data() - (dest).data()) > uintptr_t((dest).size())))
-
-void AppendPieces(std::string* dest,
-                  std::initializer_list<absl::string_view> pieces) {
-  size_t old_size = dest->size();
-  size_t total_size = old_size;
-  for (absl::string_view piece : pieces) {
-    ASSERT_NO_OVERLAP(*dest, piece);
-    total_size += piece.size();
-  }
-  strings_internal::STLStringResizeUninitializedAmortized(dest, total_size);
-
-  char* const begin = &(*dest)[0];
-  char* out = begin + old_size;
-  for (absl::string_view piece : pieces) {
-    const size_t this_size = piece.size();
-    if (this_size != 0) {
-      memcpy(out, piece.data(), this_size);
-      out += this_size;
-    }
-  }
-  assert(out == begin + dest->size());
+#if defined(__GNUC__) && !defined(__clang__)
+char* AppendAlphaNum(char* dst, const AlphaNum& a) {
+  return std::copy_n(a.data(), a.size(), dst);
 }
+#endif
 
 }  // namespace strings_internal
 
-void StrAppend(std::string* dest, const AlphaNum& a) {
-  ASSERT_NO_OVERLAP(*dest, a);
-  std::string::size_type old_size = dest->size();
-  strings_internal::STLStringResizeUninitializedAmortized(dest,
-                                                          old_size + a.size());
-  char* const begin = &(*dest)[0];
-  char* out = begin + old_size;
-  out = Append(out, a);
-  assert(out == begin + dest->size());
-}
-
-void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b) {
-  ASSERT_NO_OVERLAP(*dest, a);
-  ASSERT_NO_OVERLAP(*dest, b);
-  std::string::size_type old_size = dest->size();
-  strings_internal::STLStringResizeUninitializedAmortized(
-      dest, old_size + a.size() + b.size());
-  char* const begin = &(*dest)[0];
-  char* out = begin + old_size;
-  out = Append(out, a);
-  out = Append(out, b);
-  assert(out == begin + dest->size());
-}
-
-void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b,
-               const AlphaNum& c) {
-  ASSERT_NO_OVERLAP(*dest, a);
-  ASSERT_NO_OVERLAP(*dest, b);
-  ASSERT_NO_OVERLAP(*dest, c);
-  std::string::size_type old_size = dest->size();
-  strings_internal::STLStringResizeUninitializedAmortized(
-      dest, old_size + a.size() + b.size() + c.size());
-  char* const begin = &(*dest)[0];
-  char* out = begin + old_size;
-  out = Append(out, a);
-  out = Append(out, b);
-  out = Append(out, c);
-  assert(out == begin + dest->size());
-}
-
-void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b,
-               const AlphaNum& c, const AlphaNum& d) {
-  ASSERT_NO_OVERLAP(*dest, a);
-  ASSERT_NO_OVERLAP(*dest, b);
-  ASSERT_NO_OVERLAP(*dest, c);
-  ASSERT_NO_OVERLAP(*dest, d);
-  std::string::size_type old_size = dest->size();
-  strings_internal::STLStringResizeUninitializedAmortized(
-      dest, old_size + a.size() + b.size() + c.size() + d.size());
-  char* const begin = &(*dest)[0];
-  char* out = begin + old_size;
-  out = Append(out, a);
-  out = Append(out, b);
-  out = Append(out, c);
-  out = Append(out, d);
-  assert(out == begin + dest->size());
-}
-
 ABSL_NAMESPACE_END
 }  // namespace absl
diff --git a/absl/strings/str_cat.h b/absl/strings/str_cat.h
index d5f71ff..f5f2c01 100644
--- a/absl/strings/str_cat.h
+++ b/absl/strings/str_cat.h
@@ -91,7 +91,9 @@
 #include <array>
 #include <cstdint>
 #include <cstring>
+#include <functional>
 #include <string>
+#include <tuple>
 #include <type_traits>
 #include <utility>
 #include <vector>
@@ -99,6 +101,7 @@
 #include "absl/base/attributes.h"
 #include "absl/base/port.h"
 #include "absl/strings/internal/has_absl_stringify.h"
+#include "absl/strings/internal/resize_uninitialized.h"
 #include "absl/strings/internal/stringify_sink.h"
 #include "absl/strings/numbers.h"
 #include "absl/strings/string_view.h"
@@ -409,6 +412,93 @@
   char digits_[numbers_internal::kFastToBufferSize];
 };
 
+namespace strings_internal {
+
+// Inlining this function improves performance. However, on GCC this triggers
+// [-Werror=stringop-overflow=], and in that case we split declaration and
+// definition as a workaround.
+#if defined(__GNUC__) && !defined(__clang__)
+char* AppendAlphaNum(char* dst, const AlphaNum& a);
+#else
+inline char* AppendAlphaNum(char* dst, const AlphaNum& a) {
+  return std::copy_n(a.data(), a.size(), dst);
+}
+#endif
+
+// It's possible to call StrAppend with an absl::string_view that is itself a
+// fragment of the string we're appending to.  However the results of this are
+// UB. Therefore, check for this in debug mode.  Use unsigned math so we
+// only have to do one comparison. Note, there's an exception case: appending an
+// empty string is always allowed.
+bool HaveOverlap(const std::string& x, absl::string_view y);
+
+// C++14 compatible alternative to left
+// https://en.cppreference.com/w/cpp/language/fold expression.
+// TODO(b/290784225): Remove `FoldLeft` once Abseil drops C++14 support.
+template <typename F, typename Acc>
+auto FoldLeft(const F&, Acc&& acc) {
+  return std::forward<Acc>(acc);
+}
+template <typename F, typename Acc, typename First, typename... Rest>
+auto FoldLeft(const F& f, Acc&& acc, First&& first, Rest&&... rest) {
+  return FoldLeft(f, f(std::forward<Acc>(acc), std::forward<First>(first)),
+                  std::forward<Rest>(rest)...);
+}
+
+template <std::size_t... Indices, typename Tuple>
+inline void StrAppendTemplate(std::index_sequence<Indices...>,
+                              std::string* dest, Tuple args) {
+  // Note on `Is` and `Tuple`:
+  //
+  //   The goal is to pass N arguments to `StrAppend` and retain `N` as a
+  //   compile-time constant. Thus, we cannot use dynamically sized collections
+  //   such as `absl::Span` or `std::initialized_list`: `std::tuple` comes to
+  //   the rescue.
+  //   `Tuple` is a fixed-sized collection of N elements of `const AlphaNum&`.
+  //   `Indices` is deduced from `std::make_index_sequence<N>` and is used as a
+  //   helper to writing Fold expressions on `Tuple`.
+  assert(FoldLeft(std::logical_and<>{},
+                  !HaveOverlap(*dest, std::get<Indices>(args).Piece())...));
+  size_t old_size = dest->size();
+  size_t new_size =
+      FoldLeft(std::plus<>{}, old_size, std::get<Indices>(args).size()...);
+  strings_internal::STLStringResizeUninitializedAmortized(dest, new_size);
+  FoldLeft(AppendAlphaNum, &(*dest)[old_size], std::get<Indices>(args)...);
+}
+
+template <std::size_t... Indices, typename Tuple>
+inline std::string StrCatTemplate(std::index_sequence<Indices...>, Tuple args) {
+  // See implementation comments in `StrAppendTemplate`.
+  std::string dest;
+  size_t size = FoldLeft(std::plus<size_t>{}, size_t{0},
+                         std::get<Indices>(args).size()...);
+  strings_internal::STLStringResizeUninitializedAmortized(&dest, size);
+  FoldLeft(AppendAlphaNum, &dest[0], std::get<Indices>(args)...);
+  return dest;
+}
+
+// Template functions `StrAppendImpl` and `StrCatImpl` are not exported directly
+// due to historical reasons: there are many existing dependencies that expect
+// 0-4 argument overloads to be non-template functions. Hence implementations
+// are wrapped to keep the compatibility (0-4 arguments only, 5+ arguments are
+// templates).
+
+template <typename... AV>
+inline void StrAppendImpl(std::string* dest, const AV&... args) {
+  strings_internal::StrAppendTemplate(
+      std::make_index_sequence<sizeof...(AV)>{}, dest,
+      std::forward_as_tuple(static_cast<const AlphaNum&>(args)...));
+}
+
+template <typename... AV>
+inline std::string StrCatImpl(const AV&... args) {
+  return strings_internal::StrCatTemplate(
+      std::make_index_sequence<sizeof...(AV)>{},
+      std::forward_as_tuple(static_cast<const AlphaNum&>(args)...));
+}
+
+}  // namespace strings_internal
+
 // -----------------------------------------------------------------------------
 // StrCat()
 // -----------------------------------------------------------------------------
@@ -436,36 +526,32 @@
 // quadratic time operation with O(n) dynamic allocations.
 //
 // See `StrAppend()` below for more information.
-
-namespace strings_internal {
-
-// Do not call directly - this is not part of the public API.
-std::string CatPieces(std::initializer_list<absl::string_view> pieces);
-void AppendPieces(std::string* dest,
-                  std::initializer_list<absl::string_view> pieces);
-
-}  // namespace strings_internal
-
 ABSL_MUST_USE_RESULT inline std::string StrCat() { return std::string(); }
-
 ABSL_MUST_USE_RESULT inline std::string StrCat(const AlphaNum& a) {
   return std::string(a.data(), a.size());
 }
-
-ABSL_MUST_USE_RESULT std::string StrCat(const AlphaNum& a, const AlphaNum& b);
-ABSL_MUST_USE_RESULT std::string StrCat(const AlphaNum& a, const AlphaNum& b,
-                                        const AlphaNum& c);
-ABSL_MUST_USE_RESULT std::string StrCat(const AlphaNum& a, const AlphaNum& b,
-                                        const AlphaNum& c, const AlphaNum& d);
+ABSL_MUST_USE_RESULT inline std::string StrCat(const AlphaNum& a,
+                                               const AlphaNum& b) {
+  return strings_internal::StrCatImpl(a, b);
+}
+ABSL_MUST_USE_RESULT inline std::string StrCat(const AlphaNum& a,
+                                               const AlphaNum& b,
+                                               const AlphaNum& c) {
+  return strings_internal::StrCatImpl(a, b, c);
+}
+ABSL_MUST_USE_RESULT inline std::string StrCat(const AlphaNum& a,
+                                               const AlphaNum& b,
+                                               const AlphaNum& c,
+                                               const AlphaNum& d) {
+  return strings_internal::StrCatImpl(a, b, c, d);
+}
 
 // Support 5 or more arguments
 template <typename... AV>
 ABSL_MUST_USE_RESULT inline std::string StrCat(
     const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d,
     const AlphaNum& e, const AV&... args) {
-  return strings_internal::CatPieces(
-      {a.Piece(), b.Piece(), c.Piece(), d.Piece(), e.Piece(),
-       static_cast<const AlphaNum&>(args).Piece()...});
+  return strings_internal::StrCatImpl(a, b, c, d, e, args...);
 }
 
 // -----------------------------------------------------------------------------
@@ -494,23 +580,27 @@
 //   std::string s = "foobar";
 //   absl::string_view p = s;
 //   StrAppend(&s, p);
-
 inline void StrAppend(std::string*) {}
-void StrAppend(std::string* dest, const AlphaNum& a);
-void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b);
-void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b,
-               const AlphaNum& c);
-void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b,
-               const AlphaNum& c, const AlphaNum& d);
-
+inline void StrAppend(std::string* dest, const AlphaNum& a) {
+  strings_internal::StrAppendImpl(dest, a);
+}
+inline void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b) {
+  strings_internal::StrAppendImpl(dest, a, b);
+}
+inline void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b,
+                      const AlphaNum& c) {
+  strings_internal::StrAppendImpl(dest, a, b, c);
+}
+inline void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b,
+                      const AlphaNum& c, const AlphaNum& d) {
+  strings_internal::StrAppendImpl(dest, a, b, c, d);
+}
 // Support 5 or more arguments
 template <typename... AV>
 inline void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b,
                       const AlphaNum& c, const AlphaNum& d, const AlphaNum& e,
                       const AV&... args) {
-  strings_internal::AppendPieces(
-      dest, {a.Piece(), b.Piece(), c.Piece(), d.Piece(), e.Piece(),
-             static_cast<const AlphaNum&>(args).Piece()...});
+  strings_internal::StrAppendImpl(dest, a, b, c, d, e, args...);
 }
 
 // Helper function for the future StrCat default floating-point format, %.6g