No public description

PiperOrigin-RevId: 803641338
diff --git a/centipede/BUILD b/centipede/BUILD
index 4acaeac..5ffe2fa 100644
--- a/centipede/BUILD
+++ b/centipede/BUILD
@@ -18,6 +18,7 @@
 load("@rules_proto//proto:defs.bzl", "proto_library")
 load("@rules_cc//cc:cc_binary.bzl", "cc_binary")
 load("@rules_cc//cc:cc_library.bzl", "cc_library")
+load("@rules_cc//cc:cc_static_library.bzl", "cc_static_library")
 load("@rules_cc//cc:cc_test.bzl", "cc_test")
 load(":instrument.bzl", "cc_uninstrumented_binary")
 
@@ -1018,11 +1019,8 @@
     "sancov_runtime.h",
     "sancov_state.cc",
     "sancov_state.h",
-    "@com_google_fuzztest//common:defs.h",
 ]
 
-RUNNER_SOURCES_WITH_MAIN = RUNNER_SOURCES_NO_MAIN + ["runner_main.cc"]
-
 # Disable sancov and sanitizer instrumentation.
 #
 # Need to allow MSAN to avoid false positives when interacting with dependencies.
@@ -1066,6 +1064,7 @@
     ":runner_request",
     ":runner_result",
     ":shared_memory_blob_sequence",
+    "@com_google_fuzztest//common:defs",
     "@abseil-cpp//absl/base:core_headers",
     "@abseil-cpp//absl/base:nullability",
     "@abseil-cpp//absl/numeric:bits",
@@ -1115,18 +1114,29 @@
     deps = RUNNER_DEPS,
 )
 
-# A full self-contained library archive that external clients should link to
-# their fuzz targets to make them compatible with the Centipede fuzzing engine
-# (the `:centipede` target in this BUILD).
+# Centipede runner library with a main function for LLVM fuzzers.
 cc_library(
     name = "centipede_runner",
-    srcs = RUNNER_SOURCES_WITH_MAIN,
+    srcs = ["runner_main.cc"],
     hdrs = ["runner_interface.h"],
     copts = RUNNER_COPTS,
     linkopts = RUNNER_LINKOPTS,
     linkstatic = True,  # Must be linked statically even when dynamic_mode=on.
     visibility = ["//visibility:public"],
-    deps = RUNNER_DEPS,
+    deps = [
+        ":centipede_runner_no_main",
+        ":mutation_input",
+        "@abseil-cpp//absl/base:nullability",
+        "@com_google_fuzztest//common:defs",
+    ],
+)
+
+# A full self-contained library archive that external clients should link to
+# their fuzz targets to make them compatible with the Centipede fuzzing engine
+# (the `:centipede` target in this BUILD).
+cc_static_library(
+    name = "centipede_runner_static",
+    deps = [":centipede_runner"],
 )
 
 # Utilities for seed corpus generation.
diff --git a/centipede/sancov_callbacks.cc b/centipede/sancov_callbacks.cc
index b3dedb2..bc86f1a 100644
--- a/centipede/sancov_callbacks.cc
+++ b/centipede/sancov_callbacks.cc
@@ -83,11 +83,10 @@
           pc_offset, addr_offset, sancov_state->main_object.size));
 }
 
-// NOTE: Enforce inlining so that `__builtin_return_address` works.
-ENFORCE_INLINE static void TraceCmp(uint64_t Arg1, uint64_t Arg2) {
+ENFORCE_INLINE static void TraceCmpFeatures(uint64_t Arg1, uint64_t Arg2,
+                                            uintptr_t pc) {
   if (!sancov_state->flags.use_cmp_features) return;
-  auto caller_pc = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
-  auto pc_offset = caller_pc - sancov_state->main_object.start_address;
+  auto pc_offset = pc - sancov_state->main_object.start_address;
   uintptr_t hash =
       fuzztest::internal::Hash64Bits(pc_offset) ^ tls.path_ring_buffer.hash();
   if (Arg1 == Arg2) {
@@ -103,6 +102,28 @@
   }
 }
 
+ENFORCE_INLINE void TraceCmp1(uint8_t a, uint8_t b, uintptr_t pc) {
+  TraceCmpFeatures(a, b, pc);
+}
+
+ENFORCE_INLINE void TraceCmp2(uint16_t a, uint16_t b, uintptr_t pc) {
+  TraceCmpFeatures(a, b, pc);
+  if (a != b && sancov_state->flags.use_auto_dictionary)
+    tls.cmp_trace2.Capture(a, b);
+}
+
+ENFORCE_INLINE void TraceCmp4(uint32_t a, uint32_t b, uintptr_t pc) {
+  TraceCmpFeatures(a, b, pc);
+  if (a != b && sancov_state->flags.use_auto_dictionary)
+    tls.cmp_trace4.Capture(a, b);
+}
+
+ENFORCE_INLINE void TraceCmp8(uint64_t a, uint64_t b, uintptr_t pc) {
+  TraceCmpFeatures(a, b, pc);
+  if (a != b && sancov_state->flags.use_auto_dictionary)
+    tls.cmp_trace8.Capture(a, b);
+}
+
 //------------------------------------------------------------------------------
 // Implementations of the external sanitizer coverage hooks.
 //------------------------------------------------------------------------------
@@ -117,58 +138,79 @@
 NO_SANITIZE
 void __sanitizer_cov_trace_const_cmp1(uint8_t Arg1, uint8_t Arg2) {
   if (ABSL_PREDICT_FALSE(!tls.traced)) return;
-  TraceCmp(Arg1, Arg2);
+  TraceCmp1(Arg1, Arg2,
+            reinterpret_cast<uintptr_t>(__builtin_return_address(0)));
 }
 NO_SANITIZE
 void __sanitizer_cov_trace_const_cmp2(uint16_t Arg1, uint16_t Arg2) {
   if (ABSL_PREDICT_FALSE(!tls.traced)) return;
-  TraceCmp(Arg1, Arg2);
-  if (Arg1 != Arg2 && sancov_state->flags.use_auto_dictionary)
-    tls.cmp_trace2.Capture(Arg1, Arg2);
+  TraceCmp2(Arg1, Arg2,
+            reinterpret_cast<uintptr_t>(__builtin_return_address(0)));
 }
 NO_SANITIZE
 void __sanitizer_cov_trace_const_cmp4(uint32_t Arg1, uint32_t Arg2) {
   if (ABSL_PREDICT_FALSE(!tls.traced)) return;
-  TraceCmp(Arg1, Arg2);
-  if (Arg1 != Arg2 && sancov_state->flags.use_auto_dictionary)
-    tls.cmp_trace4.Capture(Arg1, Arg2);
+  TraceCmp4(Arg1, Arg2,
+            reinterpret_cast<uintptr_t>(__builtin_return_address(0)));
 }
 NO_SANITIZE
 void __sanitizer_cov_trace_const_cmp8(uint64_t Arg1, uint64_t Arg2) {
   if (ABSL_PREDICT_FALSE(!tls.traced)) return;
-  TraceCmp(Arg1, Arg2);
-  if (Arg1 != Arg2 && sancov_state->flags.use_auto_dictionary)
-    tls.cmp_trace8.Capture(Arg1, Arg2);
+  TraceCmp8(Arg1, Arg2,
+            reinterpret_cast<uintptr_t>(__builtin_return_address(0)));
 }
 NO_SANITIZE
 void __sanitizer_cov_trace_cmp1(uint8_t Arg1, uint8_t Arg2) {
   if (ABSL_PREDICT_FALSE(!tls.traced)) return;
-  TraceCmp(Arg1, Arg2);
+  TraceCmp1(Arg1, Arg2,
+            reinterpret_cast<uintptr_t>(__builtin_return_address(0)));
 }
 NO_SANITIZE
 void __sanitizer_cov_trace_cmp2(uint16_t Arg1, uint16_t Arg2) {
   if (ABSL_PREDICT_FALSE(!tls.traced)) return;
-  TraceCmp(Arg1, Arg2);
-  if (Arg1 != Arg2 && sancov_state->flags.use_auto_dictionary)
-    tls.cmp_trace2.Capture(Arg1, Arg2);
+  TraceCmp2(Arg1, Arg2,
+            reinterpret_cast<uintptr_t>(__builtin_return_address(0)));
 }
 NO_SANITIZE
 void __sanitizer_cov_trace_cmp4(uint32_t Arg1, uint32_t Arg2) {
   if (ABSL_PREDICT_FALSE(!tls.traced)) return;
-  TraceCmp(Arg1, Arg2);
-  if (Arg1 != Arg2 && sancov_state->flags.use_auto_dictionary)
-    tls.cmp_trace4.Capture(Arg1, Arg2);
+  TraceCmp4(Arg1, Arg2,
+            reinterpret_cast<uintptr_t>(__builtin_return_address(0)));
 }
 NO_SANITIZE
 void __sanitizer_cov_trace_cmp8(uint64_t Arg1, uint64_t Arg2) {
   if (ABSL_PREDICT_FALSE(!tls.traced)) return;
-  TraceCmp(Arg1, Arg2);
-  if (Arg1 != Arg2 && sancov_state->flags.use_auto_dictionary)
-    tls.cmp_trace8.Capture(Arg1, Arg2);
+  TraceCmp8(Arg1, Arg2,
+            reinterpret_cast<uintptr_t>(__builtin_return_address(0)));
 }
-// TODO(kcc): [impl] handle switch.
 NO_SANITIZE
-void __sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases) {}
+void __sanitizer_cov_trace_switch(uint64_t Val, uint64_t* Cases) {
+  if (ABSL_PREDICT_FALSE(!tls.traced)) return;
+  const auto num_cases = Cases[0];
+  const auto pc_base = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+  switch (Cases[1]) {
+    case 8:
+      for (uint64_t i = 0; i < num_cases; i++) {
+        TraceCmp1(Val, Cases[2 + i], pc_base + i);
+      }
+      break;
+    case 16:
+      for (uint64_t i = 0; i < num_cases; i++) {
+        TraceCmp2(Val, Cases[2 + i], pc_base + i);
+      }
+      break;
+    case 32:
+      for (uint64_t i = 0; i < num_cases; i++) {
+        TraceCmp4(Val, Cases[2 + i], pc_base + i);
+      }
+      break;
+    case 64:
+      for (uint64_t i = 0; i < num_cases; i++) {
+        TraceCmp8(Val, Cases[2 + i], pc_base + i);
+      }
+      break;
+  }
+}
 
 // This function is called at startup when
 // -fsanitize-coverage=inline-8bit-counters is used.
diff --git a/centipede/sancov_interceptors.cc b/centipede/sancov_interceptors.cc
index 60a7090..e186251 100644
--- a/centipede/sancov_interceptors.cc
+++ b/centipede/sancov_interceptors.cc
@@ -122,6 +122,9 @@
 DECLARE_CENTIPEDE_ORIG_FUNC(int, strcmp, (const char *s1, const char *s2));
 DECLARE_CENTIPEDE_ORIG_FUNC(int, strncmp,
                             (const char *s1, const char *s2, size_t n));
+DECLARE_CENTIPEDE_ORIG_FUNC(int, strcasecmp, (const char* s1, const char* s2));
+DECLARE_CENTIPEDE_ORIG_FUNC(int, strncasecmp,
+                            (const char* s1, const char* s2, size_t n));
 DECLARE_CENTIPEDE_ORIG_FUNC(int, pthread_create,
                             (pthread_t * thread, const pthread_attr_t *attr,
                              void *(*start_routine)(void *), void *arg));
@@ -139,6 +142,28 @@
   return 0;
 }
 
+// Fallback for case insensitive comparison.
+static NO_SANITIZE int memcasecmp_fallback(const void* s1, const void* s2,
+                                           size_t n) {
+  static char to_lower[256];
+  [[maybe_unused]] static bool initialize_to_lower = [&] {
+    for (size_t i = 0; i < sizeof(to_lower); ++i) {
+      to_lower[i] = i;
+      if ('A' <= to_lower[i] && to_lower[i] <= 'Z') {
+        to_lower[i] = to_lower[i] - 'A' + 'a';
+      }
+    }
+    return true;
+  }();
+  const auto* p1 = static_cast<const uint8_t*>(s1);
+  const auto* p2 = static_cast<const uint8_t*>(s2);
+  for (size_t i = 0; i < n; ++i) {
+    int diff = to_lower[p1[i]] - to_lower[p2[i]];
+    if (diff) return diff;
+  }
+  return 0;
+}
+
 // memcmp interceptor.
 // Calls the real memcmp() and possibly modifies state.cmp_feature_set.
 extern "C" NO_SANITIZE int memcmp(const void *s1, const void *s2, size_t n) {
@@ -201,6 +226,53 @@
   return NormalizeCmpResult(result);
 }
 
+// strcasecmp interceptor.
+// Calls the real strcasecmp() and possibly modifies state.cmp_feature_set.
+extern "C" NO_SANITIZE int strcasecmp(const char* s1, const char* s2) {
+  // Find the length of the shorter string, as this determines the actual number
+  // of bytes that are compared. Note that this is needed even if we call
+  // `strcasecmp_orig` because we're passing it to `TraceMemCmp()`.
+  size_t len = 0;
+  while (s1[len] && s2[len]) ++len;
+  const int result =
+      // Need to include one more byte than the shorter string length
+      // when falling back to memcmp e.g. "foo" < "foobar".
+      strcasecmp_orig ? strcasecmp_orig(s1, s2)
+                      : memcasecmp_fallback(s1, s2, len + 1);
+  if (ABSL_PREDICT_FALSE(!tls.traced)) {
+    return result;
+  }
+  // Pass `len` here to avoid storing the trailing '\0' in the dictionary.
+  tls.TraceMemCmp(reinterpret_cast<uintptr_t>(__builtin_return_address(0)),
+                  reinterpret_cast<const uint8_t*>(s1),
+                  reinterpret_cast<const uint8_t*>(s2), len, result == 0);
+  return NormalizeCmpResult(result);
+}
+
+// strncasecmp interceptor.
+// Calls the real strncasecmp() and possibly modifies state.cmp_feature_set.
+extern "C" NO_SANITIZE int strncasecmp(const char* s1, const char* s2,
+                                       size_t n) {
+  // Find the length of the shorter string, as this determines the actual number
+  // of bytes that are compared. Note that this is needed even if we call
+  // `strncasecmp_orig` because we're passing it to `TraceMemCmp()`.
+  size_t len = 0;
+  while (len < n && s1[len] && s2[len]) ++len;
+  // Need to include '\0' in the comparison if the shorter string is shorter
+  // than `n`, hence we add 1 to the length.
+  if (n > len + 1) n = len + 1;
+  const int result = strncasecmp_orig ? strncasecmp_orig(s1, s2, n)
+                                      : memcasecmp_fallback(s1, s2, n);
+  if (ABSL_PREDICT_FALSE(!tls.traced)) {
+    return result;
+  }
+  // Pass `len` here to avoid storing the trailing '\0' in the dictionary.
+  tls.TraceMemCmp(reinterpret_cast<uintptr_t>(__builtin_return_address(0)),
+                  reinterpret_cast<const uint8_t*>(s1),
+                  reinterpret_cast<const uint8_t*>(s2), len, result == 0);
+  return NormalizeCmpResult(result);
+}
+
 // pthread_create interceptor.
 // Calls real pthread_create, but wraps the start_routine() in MyThreadStart.
 extern "C" int pthread_create(