Refactor coverage out of `GlobalRunnerState` with the purpose of sharing it with the Rust FuzzTest framework for emitting features.

PiperOrigin-RevId: 783336614
diff --git a/centipede/BUILD b/centipede/BUILD
index cbc0df6..18699b4 100644
--- a/centipede/BUILD
+++ b/centipede/BUILD
@@ -1024,6 +1024,11 @@
 #  e.g. feature.cc. These files are compiled by the engine and the runner
 #  separately, with different compiler flags.
 RUNNER_SOURCES_NO_MAIN = [
+    "sancov_cpp.cc",
+    "shared_coverage_state.cc",
+    "shared_coverage_state.h",
+    "sancov_interface.h",
+    "sancov_shared.cc",
     "byte_array_mutator.cc",
     "byte_array_mutator.h",
     "callstack.h",
@@ -1202,6 +1207,37 @@
     ],
 )
 
+cc_library(
+    name = "shared_coverage",
+    srcs = [
+        "runner_dl_info.cc",
+        "runner_sancov.cc",
+        "runner_utils.cc",
+        "sancov_shared.cc",
+        "shared_coverage_state.cc",
+        "@com_google_fuzztest//common:defs.h",
+    ],
+    hdrs = [
+        "runner_dl_info.h",
+        "runner_interface.h",
+        "runner_utils.h",
+        "sancov_interface.h",
+        "shared_coverage_state.h",
+    ],
+    deps = [
+        ":callstack",
+        ":feature",
+        ":int_utils",
+        ":mutation_input",
+        ":pc_info",
+        ":runner_cmp_trace",
+        "@abseil-cpp//absl/base:core_headers",
+        "@abseil-cpp//absl/base:nullability",
+        "@abseil-cpp//absl/numeric:bits",
+        "@abseil-cpp//absl/types:span",
+    ],
+)
+
 # Flags for :seed_corpus_maker.
 cc_library(
     name = "seed_corpus_maker_flags",
diff --git a/centipede/runner.cc b/centipede/runner.cc
index d998350..33052f1 100644
--- a/centipede/runner.cc
+++ b/centipede/runner.cc
@@ -56,6 +56,7 @@
 #include "./centipede/runner_request.h"
 #include "./centipede/runner_result.h"
 #include "./centipede/runner_utils.h"
+#include "./centipede/shared_coverage_state.h"
 #include "./centipede/shared_memory_blob_sequence.h"
 #include "./common/defs.h"
 
@@ -94,24 +95,20 @@
 }  // namespace
 
 GlobalRunnerState state __attribute__((init_priority(200)));
-// We use __thread instead of thread_local so that the compiler warns if
-// the initializer for `tls` is not a constant expression.
-// `tls` thus must not have a CTOR.
-// This avoids calls to __tls_init() in hot functions that use `tls`.
-__thread ThreadLocalRunnerState tls;
 
 void ThreadLocalRunnerState::TraceMemCmp(uintptr_t caller_pc, const uint8_t *s1,
                                          const uint8_t *s2, size_t n,
                                          bool is_equal) {
-  if (state.run_time_flags.use_cmp_features) {
-    const uintptr_t pc_offset = caller_pc - state.main_object.start_address;
+  if (shared_coverage_state.run_time_flags.use_cmp_features) {
+    const uintptr_t pc_offset =
+        caller_pc - shared_coverage_state.main_object.start_address;
     const uintptr_t hash =
         fuzztest::internal::Hash64Bits(pc_offset) ^ tls.path_ring_buffer.hash();
     const size_t lcp = LengthOfCommonPrefix(s1, s2, n);
     // lcp is a 6-bit number.
     state.cmp_feature_set.set((hash << 6) | lcp);
   }
-  if (!is_equal && state.run_time_flags.use_auto_dictionary) {
+  if (!is_equal && shared_coverage_state.run_time_flags.use_auto_dictionary) {
     cmp_traceN.Capture(n, s1, s2);
   }
 }
@@ -126,8 +123,8 @@
     fprintf(stderr,
             "Disabling stack limit check due to missing stack region info.\n");
   }
-  tls.call_stack.Reset(state.run_time_flags.callstack_level);
-  tls.path_ring_buffer.Reset(state.run_time_flags.path_level);
+  tls.call_stack.Reset(shared_coverage_state.run_time_flags.callstack_level);
+  tls.path_ring_buffer.Reset(shared_coverage_state.run_time_flags.path_level);
   LockGuard lock(state.tls_list_mu);
   // Add myself to state.tls_list.
   auto *old_list = state.tls_list;
@@ -202,23 +199,25 @@
           /*what=*/"Per-input timeout",
           /*units=*/"sec",
           /*value=*/curr_time - input_start_time,
-          /*limit=*/state.run_time_flags.timeout_per_input,
-          /*ignore_report=*/state.run_time_flags.ignore_timeout_reports != 0,
+          /*limit=*/shared_coverage_state.run_time_flags.timeout_per_input,
+          /*ignore_report=*/
+          shared_coverage_state.run_time_flags.ignore_timeout_reports != 0,
           /*failure=*/kExecutionFailurePerInputTimeout.data(),
       }},
       {Resource{
           /*what=*/"Per-batch timeout",
           /*units=*/"sec",
           /*value=*/curr_time - batch_start_time,
-          /*limit=*/state.run_time_flags.timeout_per_batch,
-          /*ignore_report=*/state.run_time_flags.ignore_timeout_reports != 0,
+          /*limit=*/shared_coverage_state.run_time_flags.timeout_per_batch,
+          /*ignore_report=*/
+          shared_coverage_state.run_time_flags.ignore_timeout_reports != 0,
           /*failure=*/kExecutionFailurePerBatchTimeout.data(),
       }},
       {Resource{
           /*what=*/"RSS limit",
           /*units=*/"MB",
           /*value=*/GetPeakRSSMb(),
-          /*limit=*/state.run_time_flags.rss_limit_mb,
+          /*limit=*/shared_coverage_state.run_time_flags.rss_limit_mb,
           /*ignore_report=*/false,
           /*failure=*/kExecutionFailureRssLimitExceeded.data(),
       }},
@@ -278,7 +277,8 @@
 
 __attribute__((noinline)) void CheckStackLimit(uintptr_t sp) {
   static std::atomic_flag stack_limit_exceeded = ATOMIC_FLAG_INIT;
-  const size_t stack_limit = state.run_time_flags.stack_limit_kb.load() << 10;
+  const size_t stack_limit =
+      shared_coverage_state.run_time_flags.stack_limit_kb.load() << 10;
   // Check for the stack limit only if sp is inside the stack region.
   if (stack_limit > 0 && tls.stack_region_low &&
       tls.top_frame_sp - sp > stack_limit) {
@@ -311,10 +311,10 @@
           "Starting watchdog thread: timeout_per_input: %" PRIu64
           " sec; timeout_per_batch: %" PRIu64 " sec; rss_limit_mb: %" PRIu64
           " MB; stack_limit_kb: %" PRIu64 " KB\n",
-          state.run_time_flags.timeout_per_input.load(),
-          state.run_time_flags.timeout_per_batch,
-          state.run_time_flags.rss_limit_mb.load(),
-          state.run_time_flags.stack_limit_kb.load());
+          shared_coverage_state.run_time_flags.timeout_per_input.load(),
+          shared_coverage_state.run_time_flags.timeout_per_batch,
+          shared_coverage_state.run_time_flags.rss_limit_mb.load(),
+          shared_coverage_state.run_time_flags.stack_limit_kb.load());
   pthread_t watchdog_thread;
   pthread_create(&watchdog_thread, nullptr, WatchdogThread, nullptr);
   pthread_detach(watchdog_thread);
@@ -389,10 +389,12 @@
 static void
 PrepareCoverage(bool full_clear) {
   state.CleanUpDetachedTls();
-  if (state.run_time_flags.path_level != 0) {
+  if (shared_coverage_state.run_time_flags.path_level != 0) {
     state.ForEachTls([](ThreadLocalRunnerState &tls) {
-      tls.path_ring_buffer.Reset(state.run_time_flags.path_level);
-      tls.call_stack.Reset(state.run_time_flags.callstack_level);
+      tls.path_ring_buffer.Reset(
+          shared_coverage_state.run_time_flags.path_level);
+      tls.call_stack.Reset(
+          shared_coverage_state.run_time_flags.callstack_level);
       tls.lowest_sp = tls.top_frame_sp;
     });
   }
@@ -403,8 +405,9 @@
     }
   }
   if (!full_clear) return;
+  PrepareSharedCoverage(full_clear);
   state.ForEachTls([](ThreadLocalRunnerState &tls) {
-    if (state.run_time_flags.use_auto_dictionary) {
+    if (shared_coverage_state.run_time_flags.use_auto_dictionary) {
       tls.cmp_trace2.Clear();
       tls.cmp_trace4.Clear();
       tls.cmp_trace8.Clear();
@@ -414,18 +417,14 @@
   state.pc_counter_set.ForEachNonZeroByte(
       [](size_t idx, uint8_t value) {}, 0,
       state.actual_pc_counter_set_size_aligned);
-  if (state.run_time_flags.use_dataflow_features)
+  if (shared_coverage_state.run_time_flags.use_dataflow_features)
     state.data_flow_feature_set.ForEachNonZeroBit([](size_t idx) {});
-  if (state.run_time_flags.use_cmp_features) {
+  if (shared_coverage_state.run_time_flags.use_cmp_features) {
     state.cmp_feature_set.ForEachNonZeroBit([](size_t idx) {});
-    state.cmp_eq_set.ForEachNonZeroBit([](size_t idx) {});
-    state.cmp_moddiff_set.ForEachNonZeroBit([](size_t idx) {});
-    state.cmp_hamming_set.ForEachNonZeroBit([](size_t idx) {});
-    state.cmp_difflog_set.ForEachNonZeroBit([](size_t idx) {});
   }
-  if (state.run_time_flags.path_level != 0)
+  if (shared_coverage_state.run_time_flags.path_level != 0)
     state.path_feature_set.ForEachNonZeroBit([](size_t idx) {});
-  if (state.run_time_flags.callstack_level != 0)
+  if (shared_coverage_state.run_time_flags.callstack_level != 0)
     state.callstack_set.ForEachNonZeroBit([](size_t idx) {});
   for (auto *p = state.user_defined_begin; p != state.user_defined_end; ++p) {
     *p = 0;
@@ -433,23 +432,14 @@
   state.sancov_objects.ClearInlineCounters();
 }
 
-static void MaybeAddFeature(feature_t feature) {
-  if (!state.run_time_flags.skip_seen_features) {
-    state.g_features.push_back(feature);
-  } else if (!state.seen_features.get(feature)) {
-    state.g_features.push_back(feature);
-    state.seen_features.set(feature);
-  }
-}
-
 // Adds a kPCs and/or k8bitCounters feature to `g_features` based on arguments.
 // `idx` is a pc_index.
 // `counter_value` (non-zero) is a counter value associated with that PC.
 static void AddPcIndxedAndCounterToFeatures(size_t idx, uint8_t counter_value) {
-  if (state.run_time_flags.use_pc_features) {
+  if (shared_coverage_state.run_time_flags.use_pc_features) {
     MaybeAddFeature(feature_domains::kPCs.ConvertToMe(idx));
   }
-  if (state.run_time_flags.use_counter_features) {
+  if (shared_coverage_state.run_time_flags.use_counter_features) {
     MaybeAddFeature(feature_domains::k8bitCounters.ConvertToMe(
         Convert8bitCounterToNumber(idx, counter_value)));
   }
@@ -465,10 +455,12 @@
 __attribute__((noinline))  // so that we see it in profile.
 static void
 PostProcessCoverage(int target_return_value) {
-  state.g_features.clear();
+  shared_coverage_state.g_features.clear();
 
   if (target_return_value == -1) return;
 
+  PostProcessSharedCoverage();
+
   // Convert counters to features.
   state.pc_counter_set.ForEachNonZeroByte(
       [](size_t idx, uint8_t value) {
@@ -477,34 +469,22 @@
       0, state.actual_pc_counter_set_size_aligned);
 
   // Convert data flow bit set to features.
-  if (state.run_time_flags.use_dataflow_features) {
+  if (shared_coverage_state.run_time_flags.use_dataflow_features) {
     state.data_flow_feature_set.ForEachNonZeroBit([](size_t idx) {
       MaybeAddFeature(feature_domains::kDataFlow.ConvertToMe(idx));
     });
   }
 
   // Convert cmp bit set to features.
-  if (state.run_time_flags.use_cmp_features) {
+  if (shared_coverage_state.run_time_flags.use_cmp_features) {
     // TODO(kcc): remove cmp_feature_set.
     state.cmp_feature_set.ForEachNonZeroBit([](size_t idx) {
       MaybeAddFeature(feature_domains::kCMP.ConvertToMe(idx));
     });
-    state.cmp_eq_set.ForEachNonZeroBit([](size_t idx) {
-      MaybeAddFeature(feature_domains::kCMPEq.ConvertToMe(idx));
-    });
-    state.cmp_moddiff_set.ForEachNonZeroBit([](size_t idx) {
-      MaybeAddFeature(feature_domains::kCMPModDiff.ConvertToMe(idx));
-    });
-    state.cmp_hamming_set.ForEachNonZeroBit([](size_t idx) {
-      MaybeAddFeature(feature_domains::kCMPHamming.ConvertToMe(idx));
-    });
-    state.cmp_difflog_set.ForEachNonZeroBit([](size_t idx) {
-      MaybeAddFeature(feature_domains::kCMPDiffLog.ConvertToMe(idx));
-    });
   }
 
   // Convert path bit set to features.
-  if (state.run_time_flags.path_level != 0) {
+  if (shared_coverage_state.run_time_flags.path_level != 0) {
     state.path_feature_set.ForEachNonZeroBit([](size_t idx) {
       MaybeAddFeature(feature_domains::kBoundedPath.ConvertToMe(idx));
     });
@@ -512,7 +492,7 @@
 
   // Iterate all threads and get features from TLS data.
   state.ForEachTls([](ThreadLocalRunnerState &tls) {
-    if (state.run_time_flags.callstack_level != 0) {
+    if (shared_coverage_state.run_time_flags.callstack_level != 0) {
       RunnerCheck(tls.top_frame_sp >= tls.lowest_sp,
                   "bad values of tls.top_frame_sp and tls.lowest_sp");
       size_t sp_diff = tls.top_frame_sp - tls.lowest_sp;
@@ -520,7 +500,7 @@
     }
   });
 
-  if (state.run_time_flags.callstack_level != 0) {
+  if (shared_coverage_state.run_time_flags.callstack_level != 0) {
     state.callstack_set.ForEachNonZeroBit([](size_t idx) {
       MaybeAddFeature(feature_domains::kCallStack.ConvertToMe(idx));
     });
@@ -546,8 +526,8 @@
 
   // Iterates all non-zero inline 8-bit counters, if they are present.
   // Calls AddPcIndxedAndCounterToFeatures on non-zero counters and zeroes them.
-  if (state.run_time_flags.use_pc_features ||
-      state.run_time_flags.use_counter_features) {
+  if (shared_coverage_state.run_time_flags.use_pc_features ||
+      shared_coverage_state.run_time_flags.use_counter_features) {
     state.sancov_objects.ForEachNonZeroInlineCounter(
         [](size_t idx, uint8_t counter_value) {
           AddPcIndxedAndCounterToFeatures(idx, counter_value);
@@ -668,8 +648,8 @@
            input_path);
   FILE *features_file = fopen(features_file_path, "w");
   PrintErrorAndExitIf(features_file == nullptr, "can't open coverage file");
-  WriteFeaturesToFile(features_file, state.g_features.data(),
-                      state.g_features.size());
+  WriteFeaturesToFile(features_file, shared_coverage_state.g_features.data(),
+                      shared_coverage_state.g_features.size());
   fclose(features_file);
 }
 
@@ -699,9 +679,9 @@
 // Returns the byte size of `g_features`.
 static size_t CopyFeatures(uint8_t *data, size_t capacity) {
   const size_t features_len_in_bytes =
-      state.g_features.size() * sizeof(feature_t);
+      shared_coverage_state.g_features.size() * sizeof(feature_t);
   if (features_len_in_bytes > capacity) return 0;
-  memcpy(data, state.g_features.data(), features_len_in_bytes);
+  memcpy(data, shared_coverage_state.g_features.data(), features_len_in_bytes);
   return features_len_in_bytes;
 }
 
@@ -729,14 +709,15 @@
   }
 
   // Copy features to shared memory.
-  if (!BatchResult::WriteOneFeatureVec(
-          state.g_features.data(), state.g_features.size(), outputs_blobseq)) {
+  if (!BatchResult::WriteOneFeatureVec(shared_coverage_state.g_features.data(),
+                                       shared_coverage_state.g_features.size(),
+                                       outputs_blobseq)) {
     return false;
   }
 
   ExecutionMetadata metadata;
   // Copy the CMP traces to shared memory.
-  if (state.run_time_flags.use_auto_dictionary) {
+  if (shared_coverage_state.run_time_flags.use_auto_dictionary) {
     bool append_failed = false;
     state.ForEachTls([&metadata, &append_failed](ThreadLocalRunnerState &tls) {
       if (!AppendCmpEntries(tls.cmp_trace2, metadata)) append_failed = true;
@@ -794,7 +775,8 @@
 // Dumps the pc table to `output_path`.
 // Requires that state.main_object is already computed.
 static void DumpPcTable(const char *absl_nonnull output_path) {
-  PrintErrorAndExitIf(!state.main_object.IsSet(), "main_object is not set");
+  PrintErrorAndExitIf(!shared_coverage_state.main_object.IsSet(),
+                      "main_object is not set");
   FILE *output_file = fopen(output_path, "w");
   PrintErrorAndExitIf(output_file == nullptr, "can't open output file");
   std::vector<PCInfo> pcs = state.sancov_objects.CreatePCTable();
@@ -810,7 +792,8 @@
 // Dumps the control-flow table to `output_path`.
 // Requires that state.main_object is already computed.
 static void DumpCfTable(const char *absl_nonnull output_path) {
-  PrintErrorAndExitIf(!state.main_object.IsSet(), "main_object is not set");
+  PrintErrorAndExitIf(!shared_coverage_state.main_object.IsSet(),
+                      "main_object is not set");
   FILE *output_file = fopen(output_path, "w");
   PrintErrorAndExitIf(output_file == nullptr, "can't open output file");
   std::vector<uintptr_t> data = state.sancov_objects.CreateCfTable();
@@ -949,7 +932,7 @@
   if (custom_mutator_cb_ == nullptr) return false;
   unsigned int seed = GetRandomSeed();
   const size_t num_inputs = inputs.size();
-  const size_t max_mutant_size = state.run_time_flags.max_len;
+  const size_t max_mutant_size = shared_coverage_state.run_time_flags.max_len;
   constexpr size_t kAverageMutationAttempts = 2;
   ByteArray mutant(max_mutant_size);
   for (size_t attempt = 0, num_outputs = 0;
@@ -962,7 +945,8 @@
     std::copy(input_data.cbegin(), input_data.cbegin() + size, mutant.begin());
     size_t new_size = 0;
     if ((custom_crossover_cb_ != nullptr) &&
-        rand_r(&seed) % 100 < state.run_time_flags.crossover_level) {
+        rand_r(&seed) % 100 <
+            shared_coverage_state.run_time_flags.crossover_level) {
       // Perform crossover `crossover_level`% of the time.
       const auto &other_data = inputs[rand_r(&seed) % num_inputs].data;
       new_size = custom_crossover_cb_(
@@ -1008,7 +992,7 @@
   // No-op under ASAN/TSAN/MSAN - those may still rely on rss_limit_mb.
   if (vm_size_in_bytes < one_tb) {
     size_t address_space_limit_mb =
-        state.HasIntFlag(":address_space_limit_mb=", 0);
+        shared_coverage_state.HasIntFlag(":address_space_limit_mb=", 0);
     if (address_space_limit_mb > 0) {
       size_t limit_in_bytes = address_space_limit_mb << 20;
       struct rlimit rlimit_as = {limit_in_bytes, limit_in_bytes};
@@ -1023,7 +1007,8 @@
 }
 
 static void MaybePopulateReversePcTable() {
-  const char *pcs_file_path = state.GetStringFlag(":pcs_file_path=");
+  const char *pcs_file_path =
+      shared_coverage_state.GetStringFlag(":pcs_file_path=");
   if (!pcs_file_path) return;
   const auto pc_table = ReadBytesFromFilePath<PCInfo>(pcs_file_path);
   state.reverse_pc_table.SetFromPCs(pc_table);
@@ -1064,8 +1049,9 @@
   SetLimits();
 
   // Compute main_object.
-  main_object = GetDlInfo(state.GetStringFlag(":dl_path_suffix="));
-  if (!main_object.IsSet()) {
+  shared_coverage_state.main_object =
+      GetDlInfo(shared_coverage_state.GetStringFlag(":dl_path_suffix="));
+  if (!shared_coverage_state.main_object.IsSet()) {
     fprintf(
         stderr,
         "Failed to compute main_object. This may happen"
@@ -1073,13 +1059,16 @@
   }
 
   // Dump the binary info tables.
-  if (state.HasFlag(":dump_binary_info:")) {
-    RunnerCheck(state.arg1 && state.arg2 && state.arg3,
+  if (shared_coverage_state.HasFlag(":dump_binary_info:")) {
+    RunnerCheck(shared_coverage_state.arg1 && shared_coverage_state.arg2 &&
+                    shared_coverage_state.arg3,
                 "dump_binary_info requires 3 arguments");
-    if (!state.arg1 || !state.arg2 || !state.arg3) _exit(EXIT_FAILURE);
-    DumpPcTable(state.arg1);
-    DumpCfTable(state.arg2);
-    DumpDsoTable(state.arg3);
+    if (!shared_coverage_state.arg1 || !shared_coverage_state.arg2 ||
+        !shared_coverage_state.arg3)
+      _exit(EXIT_FAILURE);
+    DumpPcTable(shared_coverage_state.arg1);
+    DumpCfTable(shared_coverage_state.arg2);
+    DumpDsoTable(shared_coverage_state.arg3);
     _exit(EXIT_SUCCESS);
   }
 
@@ -1100,10 +1089,11 @@
   // The process is winding down, but CentipedeRunnerMain did not run.
   // This means, the binary is standalone with its own main(), and we need to
   // report the coverage now.
-  if (!state.centipede_runner_main_executed && state.HasFlag(":shmem:")) {
+  if (!state.centipede_runner_main_executed &&
+      shared_coverage_state.HasFlag(":shmem:")) {
     int exit_status = EXIT_SUCCESS;  // TODO(kcc): do we know our exit status?
     PostProcessCoverage(exit_status);
-    SharedMemoryBlobSequence outputs_blobseq(state.arg2);
+    SharedMemoryBlobSequence outputs_blobseq(shared_coverage_state.arg2);
     StartSendingOutputsToEngine(outputs_blobseq);
     FinishSendingOutputsToEngine(outputs_blobseq);
   }
@@ -1129,25 +1119,27 @@
   state.centipede_runner_main_executed = true;
 
   fprintf(stderr, "Centipede fuzz target runner; argv[0]: %s flags: %s\n",
-          argv[0], state.centipede_runner_flags);
+          argv[0], shared_coverage_state.centipede_runner_flags);
 
-  if (state.HasFlag(":dump_configuration:")) {
-    DumpSerializedTargetConfigToFile(callbacks,
-                                     /*output_file_path=*/state.arg1);
+  if (shared_coverage_state.HasFlag(":dump_configuration:")) {
+    DumpSerializedTargetConfigToFile(
+        callbacks,
+        /*output_file_path=*/shared_coverage_state.arg1);
     return EXIT_SUCCESS;
   }
 
-  if (state.HasFlag(":dump_seed_inputs:")) {
+  if (shared_coverage_state.HasFlag(":dump_seed_inputs:")) {
     // Seed request.
-    DumpSeedsToDir(callbacks, /*output_dir=*/state.arg1);
+    DumpSeedsToDir(callbacks, /*output_dir=*/shared_coverage_state.arg1);
     return EXIT_SUCCESS;
   }
 
   // Inputs / outputs from shmem.
-  if (state.HasFlag(":shmem:")) {
-    if (!state.arg1 || !state.arg2) return EXIT_FAILURE;
-    SharedMemoryBlobSequence inputs_blobseq(state.arg1);
-    SharedMemoryBlobSequence outputs_blobseq(state.arg2);
+  if (shared_coverage_state.HasFlag(":shmem:")) {
+    if (!shared_coverage_state.arg1 || !shared_coverage_state.arg2)
+      return EXIT_FAILURE;
+    SharedMemoryBlobSequence inputs_blobseq(shared_coverage_state.arg1);
+    SharedMemoryBlobSequence outputs_blobseq(shared_coverage_state.arg2);
     // Read the first blob. It indicates what further actions to take.
     auto request_type_blob = inputs_blobseq.Read();
     if (IsMutationRequest(request_type_blob)) {
@@ -1155,10 +1147,10 @@
       // We still pay for executing the coverage callbacks, but those will
       // return immediately.
       // TODO(kcc): do this more consistently, for all coverage types.
-      state.run_time_flags.use_cmp_features = false;
-      state.run_time_flags.use_pc_features = false;
-      state.run_time_flags.use_dataflow_features = false;
-      state.run_time_flags.use_counter_features = false;
+      shared_coverage_state.run_time_flags.use_cmp_features = false;
+      shared_coverage_state.run_time_flags.use_pc_features = false;
+      shared_coverage_state.run_time_flags.use_dataflow_features = false;
+      shared_coverage_state.run_time_flags.use_counter_features = false;
       // Mutation request.
       inputs_blobseq.Reset();
       state.byte_array_mutator =
@@ -1198,13 +1190,15 @@
 extern "C" void CentipedeSetRssLimit(size_t rss_limit_mb) {
   fprintf(stderr, "CentipedeSetRssLimit: changing rss_limit_mb to %zu\n",
           rss_limit_mb);
-  fuzztest::internal::state.run_time_flags.rss_limit_mb = rss_limit_mb;
+  fuzztest::internal::shared_coverage_state.run_time_flags.rss_limit_mb =
+      rss_limit_mb;
 }
 
 extern "C" void CentipedeSetStackLimit(size_t stack_limit_kb) {
   fprintf(stderr, "CentipedeSetStackLimit: changing stack_limit_kb to %zu\n",
           stack_limit_kb);
-  fuzztest::internal::state.run_time_flags.stack_limit_kb = stack_limit_kb;
+  fuzztest::internal::shared_coverage_state.run_time_flags.stack_limit_kb =
+      stack_limit_kb;
 }
 
 extern "C" void CentipedeSetTimeoutPerInput(uint64_t timeout_per_input) {
@@ -1212,17 +1206,10 @@
           "CentipedeSetTimeoutPerInput: changing timeout_per_input to %" PRIu64
           "\n",
           timeout_per_input);
-  fuzztest::internal::state.run_time_flags.timeout_per_input =
+  fuzztest::internal::shared_coverage_state.run_time_flags.timeout_per_input =
       timeout_per_input;
 }
 
-extern "C" __attribute__((weak)) const char *absl_nullable
-CentipedeGetRunnerFlags() {
-  if (const char *runner_flags_env = getenv("CENTIPEDE_RUNNER_FLAGS"))
-    return strdup(runner_flags_env);
-  return nullptr;
-}
-
 static std::atomic<bool> in_execution_batch = false;
 
 extern "C" void CentipedeBeginExecutionBatch() {
@@ -1289,11 +1276,11 @@
 }
 
 extern "C" void CentipedeSetFailureDescription(const char *description) {
-  using fuzztest::internal::state;
-  if (state.failure_description_path == nullptr) return;
+  using fuzztest::internal::shared_coverage_state;
+  if (shared_coverage_state.failure_description_path == nullptr) return;
   // Make sure that the write is atomic and only happens once.
   [[maybe_unused]] static int write_once = [=] {
-    FILE *f = fopen(state.failure_description_path, "w");
+    FILE *f = fopen(shared_coverage_state.failure_description_path, "w");
     if (f == nullptr) {
       perror("FAILURE: fopen()");
       return 0;
diff --git a/centipede/runner.h b/centipede/runner.h
index 8e4ff8e..d2e6c7f 100644
--- a/centipede/runner.h
+++ b/centipede/runner.h
@@ -19,28 +19,21 @@
 #include <string.h>
 #include <time.h>
 
-#include <algorithm>
 #include <atomic>
 #include <cstddef>
 #include <cstdint>
 #include <cstdlib>
 
 #include "absl/base/const_init.h"
-#include "absl/base/nullability.h"
-#include "absl/numeric/bits.h"
 #include "./centipede/byte_array_mutator.h"
-#include "./centipede/callstack.h"
 #include "./centipede/concurrent_bitset.h"
 #include "./centipede/concurrent_byteset.h"
 #include "./centipede/feature.h"
-#include "./centipede/hashed_ring_buffer.h"
 #include "./centipede/knobs.h"
 #include "./centipede/reverse_pc_table.h"
-#include "./centipede/runner_cmp_trace.h"
-#include "./centipede/runner_dl_info.h"
-#include "./centipede/runner_interface.h"
 #include "./centipede/runner_result.h"
 #include "./centipede/runner_sancov_object.h"
+#include "./centipede/shared_coverage_state.h"
 
 namespace fuzztest::internal {
 
@@ -54,79 +47,6 @@
   pthread_mutex_t &mu_;
 };
 
-// Flags derived from CENTIPEDE_RUNNER_FLAGS.
-// Flags used in instrumentation callbacks are bit-packed for efficiency.
-struct RunTimeFlags {
-  uint64_t path_level : 8;
-  uint64_t use_pc_features : 1;
-  uint64_t use_dataflow_features : 1;
-  uint64_t use_cmp_features : 1;
-  uint64_t callstack_level : 8;
-  uint64_t use_counter_features : 1;
-  uint64_t use_auto_dictionary : 1;
-  std::atomic<uint64_t> timeout_per_input;
-  uint64_t timeout_per_batch;
-  std::atomic<uint64_t> stack_limit_kb;
-  std::atomic<uint64_t> rss_limit_mb;
-  uint64_t crossover_level;
-  uint64_t skip_seen_features : 1;
-  uint64_t ignore_timeout_reports : 1;
-  uint64_t max_len;
-};
-
-// One such object is created in runner's TLS.
-// There is no CTOR, since we don't want to use the brittle and lazy TLS CTORs.
-// All data members are zero-initialized during thread creation.
-struct ThreadLocalRunnerState {
-  // Traces the memory comparison of `n` bytes at `s1` and `s2` called at
-  // `caller_pc` with `is_equal` indicating whether the two memory regions have
-  // equal contents. May add cmp features and auto-dictionary entries if
-  // enabled.
-  void TraceMemCmp(uintptr_t caller_pc, const uint8_t *s1, const uint8_t *s2,
-                   size_t n, bool is_equal);
-
-  // Intrusive doubly-linked list of TLS objects.
-  // Guarded by state.tls_list_mu.
-  ThreadLocalRunnerState *next, *prev;
-
-  // The pthread_create() interceptor calls OnThreadStart() before the thread
-  // callback. The main thread also calls OnThreadStart(). OnThreadStop() will
-  // be called when thread termination is detected internally - see runner.cc.
-  void OnThreadStart();
-  void OnThreadStop();
-
-  // Whether OnThreadStart() is called on this thread. This is used as a proxy
-  // of the readiness of the lower-level runtime.
-  bool started;
-
-  // Paths are thread-local, so we maintain the current bounded path here.
-  // We allow paths of up to 100, controlled at run-time via the "path_level".
-  static constexpr uint64_t kBoundedPathLength = 100;
-  HashedRingBuffer<kBoundedPathLength> path_ring_buffer;
-
-  // Value of SP in the top call frame of the thread, computed in OnThreadStart.
-  uintptr_t top_frame_sp;
-  // The lower bound of the stack region of this thread. 0 means unknown.
-  uintptr_t stack_region_low;
-  // Lowest observed value of SP.
-  uintptr_t lowest_sp;
-
-  // The (imprecise) call stack is updated by the PC callback.
-  CallStack<> call_stack;
-
-  // Cmp traces capture the arguments of CMP instructions, memcmp, etc.
-  // We have dedicated traces for 2-, 4-, and 8-byte comparison, and
-  // a catch-all `cmp_traceN` trace for memcmp, etc.
-  CmpTrace<2, 64> cmp_trace2;
-  CmpTrace<4, 64> cmp_trace4;
-  CmpTrace<8, 64> cmp_trace8;
-  CmpTrace<0, 64> cmp_traceN;
-
-  // Set this to true if the thread needs to be ignored in ForEachTLS.
-  // It should be always false if the state is in the global detached_tls_list.
-  bool ignore;
-};
-
 // One global object of this type is created by the runner at start up.
 // All data members will be initialized to zero, unless they have initializers.
 // Accesses to the subobjects should be fast, so we are trying to avoid
@@ -144,79 +64,6 @@
   GlobalRunnerState();
   ~GlobalRunnerState();
 
-  // Runner reads flags from CentipedeGetRunnerFlags(). We don't use flags
-  // passed via argv so that argv flags can be passed directly to
-  // LLVMFuzzerInitialize, w/o filtering. The flags are separated with
-  // ':' on both sides, i.e. like this: ":flag1:flag2:flag3=value3".
-  // We do it this way to make the flag parsing code extremely simple. The
-  // interface is private between Centipede and the runner and may change.
-  //
-  // Note that this field reflects the initial runner flags. But some
-  // flags can change later (if wrapped with std::atomic).
-  const char *centipede_runner_flags = CentipedeGetRunnerFlags();
-  const char *arg1 = GetStringFlag(":arg1=");
-  const char *arg2 = GetStringFlag(":arg2=");
-  const char *arg3 = GetStringFlag(":arg3=");
-  // The path to a file where the runner may write the description of failure.
-  const char *failure_description_path =
-      GetStringFlag(":failure_description_path=");
-
-  // Flags.
-  RunTimeFlags run_time_flags = {
-      /*path_level=*/std::min(ThreadLocalRunnerState::kBoundedPathLength,
-                              HasIntFlag(":path_level=", 0)),
-      /*use_pc_features=*/HasFlag(":use_pc_features:"),
-      /*use_dataflow_features=*/HasFlag(":use_dataflow_features:"),
-      /*use_cmp_features=*/HasFlag(":use_cmp_features:"),
-      /*callstack_level=*/HasIntFlag(":callstack_level=", 0),
-      /*use_counter_features=*/HasFlag(":use_counter_features:"),
-      /*use_auto_dictionary=*/HasFlag(":use_auto_dictionary:"),
-      /*timeout_per_input=*/HasIntFlag(":timeout_per_input=", 0),
-      /*timeout_per_batch=*/HasIntFlag(":timeout_per_batch=", 0),
-      /*stack_limit_kb=*/HasIntFlag(":stack_limit_kb=", 0),
-      /*rss_limit_mb=*/HasIntFlag(":rss_limit_mb=", 0),
-      /*crossover_level=*/HasIntFlag(":crossover_level=", 50),
-      /*skip_seen_features=*/HasFlag(":skip_seen_features:"),
-      /*ignore_timeout_reports=*/HasFlag(":ignore_timeout_reports:"),
-      /*max_len=*/HasIntFlag(":max_len=", 4000),
-  };
-
-  // Returns true iff `flag` is present.
-  // Typical usage: pass ":some_flag:", i.e. the flag name surrounded with ':'.
-  // TODO(ussuri): Refactor `char *` into a `string_view`.
-  bool HasFlag(const char *absl_nonnull flag) const {
-    if (!centipede_runner_flags) return false;
-    return strstr(centipede_runner_flags, flag) != nullptr;
-  }
-
-  // If a flag=value pair is present, returns value,
-  // otherwise returns `default_value`.
-  // Typical usage: pass ":some_flag=".
-  // TODO(ussuri): Refactor `char *` into a `string_view`.
-  uint64_t HasIntFlag(const char *absl_nonnull flag,
-                      uint64_t default_value) const {
-    if (!centipede_runner_flags) return default_value;
-    const char *beg = strstr(centipede_runner_flags, flag);
-    if (!beg) return default_value;
-    return atoll(beg + strlen(flag));  // NOLINT: can't use strto64, etc.
-  }
-
-  // If a :flag=value: pair is present returns value, otherwise returns nullptr.
-  // The result is obtained by calling strndup, so make sure to save
-  // it in `this` to avoid a leak.
-  // Typical usage: pass ":some_flag=".
-  // TODO(ussuri): Refactor `char *` into a `string_view`.
-  const char *absl_nullable GetStringFlag(const char *absl_nonnull flag) const {
-    if (!centipede_runner_flags) return nullptr;
-    // Extract "value" from ":flag=value:" inside centipede_runner_flags.
-    const char *beg = strstr(centipede_runner_flags, flag);
-    if (!beg) return nullptr;
-    const char *value_beg = beg + strlen(flag);
-    const char *end = strstr(value_beg, ":");
-    if (!end) return nullptr;
-    return strndup(value_beg, end - value_beg);
-  }
-
   pthread_mutex_t execution_result_override_mu = PTHREAD_MUTEX_INITIALIZER;
   // If not nullptr, it points to a batch result with either zero or one
   // execution. When an execution result present, it will be passed as the
@@ -247,20 +94,6 @@
   // Reclaims all TLSs in detached_tls_list and cleans up the list.
   void CleanUpDetachedTls();
 
-  // Computed by DlInfo().
-  // Usually, the main object is the executable binary containing main()
-  // and most of the executable code (we assume that the target is
-  // built in mostly-static mode, i.e. -dynamic_mode=off).
-  // When the `dl_path_suffix` runner flag is provided, the main_object refers
-  // to the dynamic library (DSO) pointed to by this flag.
-  //
-  // Note: this runner currently does not support more than one instrumented
-  // DSO in the process, i.e. you either instrument the main binary, or one DSO.
-  // Supporting more than one DSO will require major changes,
-  // major added complexity, and potentially cause slowdown.
-  // There is currently no motivation for such a change.
-  DlInfo main_object;
-
   // State for SanitizerCoverage.
   // See https://clang.llvm.org/docs/SanitizerCoverage.html.
   SanCovObjectArray sancov_objects;
@@ -272,14 +105,8 @@
   // Tracing CMP instructions, capture events from these domains:
   // kCMPEq, kCMPModDiff, kCMPHamming, kCMPModDiffLog, kCMPMsbEq.
   // See https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-data-flow.
-  // An arbitrarily large size.
-  static constexpr size_t kCmpFeatureSetSize = 1 << 18;
   // TODO(kcc): remove cmp_feature_set.
   ConcurrentBitSet<kCmpFeatureSetSize> cmp_feature_set{absl::kConstInit};
-  ConcurrentBitSet<kCmpFeatureSetSize> cmp_eq_set{absl::kConstInit};
-  ConcurrentBitSet<kCmpFeatureSetSize> cmp_moddiff_set{absl::kConstInit};
-  ConcurrentBitSet<kCmpFeatureSetSize> cmp_hamming_set{absl::kConstInit};
-  ConcurrentBitSet<kCmpFeatureSetSize> cmp_difflog_set{absl::kConstInit};
 
   // We think that call stack produces rich signal, so we give a few bits to it.
   static constexpr size_t kCallStackFeatureSetSize = 1 << 24;
@@ -346,20 +173,9 @@
 
   // The Watchdog thread sets this to true.
   std::atomic<bool> watchdog_thread_started;
-
-  // An arbitrarily large size.
-  static const size_t kMaxFeatures = 1 << 20;
-  // FeatureArray used to accumulate features from all sources.
-  FeatureArray<kMaxFeatures> g_features;
-
-  // Features that were seen before.
-  static constexpr size_t kSeenFeatureSetSize =
-      absl::bit_ceil(feature_domains::kLastDomain.end());
-  ConcurrentBitSet<kSeenFeatureSetSize> seen_features{absl::kConstInit};
 };
 
 extern GlobalRunnerState state;
-extern __thread ThreadLocalRunnerState tls;
 
 // Check for stack limit for the stack pointer `sp` in the current thread.
 void CheckStackLimit(uintptr_t sp);
diff --git a/centipede/runner_interceptors.cc b/centipede/runner_interceptors.cc
index 886c450..c705148 100644
--- a/centipede/runner_interceptors.cc
+++ b/centipede/runner_interceptors.cc
@@ -22,7 +22,7 @@
 
 #include "absl/base/nullability.h"
 #include "absl/base/optimization.h"
-#include "./centipede/runner.h"
+#include "./centipede/shared_coverage_state.h"
 
 using fuzztest::internal::tls;
 
diff --git a/centipede/runner_interface.h b/centipede/runner_interface.h
index f12691d..0360f34 100644
--- a/centipede/runner_interface.h
+++ b/centipede/runner_interface.h
@@ -22,7 +22,6 @@
 #include <functional>
 #include <memory>
 #include <string>
-#include <string_view>
 #include <vector>
 
 #include "absl/base/nullability.h"
diff --git a/centipede/runner_sancov.cc b/centipede/runner_sancov.cc
index 22435f4..4c4657f 100644
--- a/centipede/runner_sancov.cc
+++ b/centipede/runner_sancov.cc
@@ -17,17 +17,11 @@
 
 #include <pthread.h>
 
-#include <cstddef>
 #include <cstdint>
-#include <cstdio>
 
 #include "absl/base/nullability.h"
-#include "./centipede/feature.h"
-#include "./centipede/int_utils.h"
 #include "./centipede/pc_info.h"
-#include "./centipede/reverse_pc_table.h"
-#include "./centipede/runner.h"
-#include "./centipede/runner_dl_info.h"
+#include "./centipede/sancov_interface.h"
 
 namespace fuzztest::internal {
 void RunnerSancov() {}  // to be referenced in runner.cc
@@ -35,8 +29,6 @@
 
 using fuzztest::internal::PCGuard;
 using fuzztest::internal::PCInfo;
-using fuzztest::internal::state;
-using fuzztest::internal::tls;
 
 // Tracing data flow.
 // The instrumentation is provided by
@@ -54,102 +46,64 @@
 // * This creates plenty of features, easily 10x compared to control flow,
 //   and bloats the corpus. But this is also what we want to achieve here.
 
-// NOTE: In addition to `always_inline`, also use `inline`, because some
-// compilers require both to actually enforce inlining, e.g. GCC:
-// https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html.
-#define ENFORCE_INLINE __attribute__((always_inline)) inline
-
-// Use this attribute for functions that must not be instrumented even if
-// the runner is built with sanitizers (asan, etc).
-#define NO_SANITIZE __attribute__((no_sanitize("all")))
-
-// NOTE: Enforce inlining so that `__builtin_return_address` works.
-ENFORCE_INLINE static void TraceLoad(void *addr) {
-  if (!state.run_time_flags.use_dataflow_features) return;
-  auto caller_pc = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
-  auto load_addr = reinterpret_cast<uintptr_t>(addr);
-  auto pc_offset = caller_pc - state.main_object.start_address;
-  if (pc_offset >= state.main_object.size) return;  // PC outside main obj.
-  auto addr_offset = load_addr - state.main_object.start_address;
-  if (addr_offset >= state.main_object.size) return;  // Not a global address.
-  state.data_flow_feature_set.set(fuzztest::internal::ConvertPcPairToNumber(
-      pc_offset, addr_offset, state.main_object.size));
-}
-
-// NOTE: Enforce inlining so that `__builtin_return_address` works.
-ENFORCE_INLINE static void TraceCmp(uint64_t Arg1, uint64_t Arg2) {
-  if (!state.run_time_flags.use_cmp_features) return;
-  auto caller_pc = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
-  auto pc_offset = caller_pc - state.main_object.start_address;
-  uintptr_t hash =
-      fuzztest::internal::Hash64Bits(pc_offset) ^ tls.path_ring_buffer.hash();
-  if (Arg1 == Arg2) {
-    state.cmp_eq_set.set(hash);
-  } else {
-    hash <<= 6;  // ABTo* generate 6-bit numbers.
-    state.cmp_moddiff_set.set(hash |
-                              fuzztest::internal::ABToCmpModDiff(Arg1, Arg2));
-    state.cmp_hamming_set.set(hash |
-                              fuzztest::internal::ABToCmpHamming(Arg1, Arg2));
-    state.cmp_difflog_set.set(hash |
-                              fuzztest::internal::ABToCmpDiffLog(Arg1, Arg2));
-  }
-}
-
 //------------------------------------------------------------------------------
 // Implementations of the external sanitizer coverage hooks.
 //------------------------------------------------------------------------------
 
 extern "C" {
-NO_SANITIZE void __sanitizer_cov_load1(uint8_t *addr) { TraceLoad(addr); }
-NO_SANITIZE void __sanitizer_cov_load2(uint16_t *addr) { TraceLoad(addr); }
-NO_SANITIZE void __sanitizer_cov_load4(uint32_t *addr) { TraceLoad(addr); }
-NO_SANITIZE void __sanitizer_cov_load8(uint64_t *addr) { TraceLoad(addr); }
-NO_SANITIZE void __sanitizer_cov_load16(__uint128_t *addr) { TraceLoad(addr); }
+NO_SANITIZE void __sanitizer_cov_load1(uint8_t *addr) {
+  __framework_sanitizer_cov_load1(addr);
+}
+NO_SANITIZE void __sanitizer_cov_load2(uint16_t *addr) {
+  __framework_sanitizer_cov_load2(addr);
+}
+NO_SANITIZE void __sanitizer_cov_load4(uint32_t *addr) {
+  __framework_sanitizer_cov_load4(addr);
+}
+NO_SANITIZE void __sanitizer_cov_load8(uint64_t *addr) {
+  __framework_sanitizer_cov_load8(addr);
+}
+NO_SANITIZE void __sanitizer_cov_load16(__uint128_t *addr) {
+  __framework_sanitizer_cov_load16(addr);
+}
 
 NO_SANITIZE
 void __sanitizer_cov_trace_const_cmp1(uint8_t Arg1, uint8_t Arg2) {
-  TraceCmp(Arg1, Arg2);
+  __shared_sanitizer_cov_trace_const_cmp1(Arg1, Arg2);
 }
 NO_SANITIZE
 void __sanitizer_cov_trace_const_cmp2(uint16_t Arg1, uint16_t Arg2) {
-  TraceCmp(Arg1, Arg2);
-  if (Arg1 != Arg2 && state.run_time_flags.use_auto_dictionary)
-    tls.cmp_trace2.Capture(Arg1, Arg2);
+  __shared_sanitizer_cov_trace_const_cmp2(Arg1, Arg2);
+  __framework_sanitizer_cov_trace_const_cmp2(Arg1, Arg2);
 }
 NO_SANITIZE
 void __sanitizer_cov_trace_const_cmp4(uint32_t Arg1, uint32_t Arg2) {
-  TraceCmp(Arg1, Arg2);
-  if (Arg1 != Arg2 && state.run_time_flags.use_auto_dictionary)
-    tls.cmp_trace4.Capture(Arg1, Arg2);
+  __shared_sanitizer_cov_trace_const_cmp4(Arg1, Arg2);
+  __framework_sanitizer_cov_trace_const_cmp4(Arg1, Arg2);
 }
 NO_SANITIZE
 void __sanitizer_cov_trace_const_cmp8(uint64_t Arg1, uint64_t Arg2) {
-  TraceCmp(Arg1, Arg2);
-  if (Arg1 != Arg2 && state.run_time_flags.use_auto_dictionary)
-    tls.cmp_trace8.Capture(Arg1, Arg2);
+  __shared_sanitizer_cov_trace_const_cmp8(Arg1, Arg2);
+  __framework_sanitizer_cov_trace_const_cmp8(Arg1, Arg2);
 }
 NO_SANITIZE
 void __sanitizer_cov_trace_cmp1(uint8_t Arg1, uint8_t Arg2) {
-  TraceCmp(Arg1, Arg2);
+  __shared_sanitizer_cov_trace_cmp1(Arg1, Arg2);
 }
 NO_SANITIZE
 void __sanitizer_cov_trace_cmp2(uint16_t Arg1, uint16_t Arg2) {
-  TraceCmp(Arg1, Arg2);
-  if (Arg1 != Arg2 && state.run_time_flags.use_auto_dictionary)
-    tls.cmp_trace2.Capture(Arg1, Arg2);
+  __shared_sanitizer_cov_trace_cmp2(Arg1, Arg2);
+  __framework_sanitizer_cov_trace_cmp2(Arg1, Arg2);
 }
 NO_SANITIZE
 void __sanitizer_cov_trace_cmp4(uint32_t Arg1, uint32_t Arg2) {
-  TraceCmp(Arg1, Arg2);
-  if (Arg1 != Arg2 && state.run_time_flags.use_auto_dictionary)
-    tls.cmp_trace4.Capture(Arg1, Arg2);
+  __shared_sanitizer_cov_trace_cmp4(Arg1, Arg2);
+  __framework_sanitizer_cov_trace_cmp4(Arg1, Arg2);
 }
 NO_SANITIZE
 void __sanitizer_cov_trace_cmp8(uint64_t Arg1, uint64_t Arg2) {
-  TraceCmp(Arg1, Arg2);
-  if (Arg1 != Arg2 && state.run_time_flags.use_auto_dictionary)
-    tls.cmp_trace8.Capture(Arg1, Arg2);
+  __shared_sanitizer_cov_trace_cmp8(Arg1, Arg2);
+  __framework_sanitizer_cov_trace_cmp8(Arg1, Arg2);
 }
 // TODO(kcc): [impl] handle switch.
 NO_SANITIZE
@@ -159,7 +113,7 @@
 // -fsanitize-coverage=inline-8bit-counters is used.
 // See https://clang.llvm.org/docs/SanitizerCoverage.html#inline-8bit-counters
 void __sanitizer_cov_8bit_counters_init(uint8_t *beg, uint8_t *end) {
-  state.sancov_objects.Inline8BitCountersInit(beg, end);
+  __framework_sanitizer_cov_8bit_counters_init(beg, end);
 }
 
 // https://clang.llvm.org/docs/SanitizerCoverage.html#pc-table
@@ -169,147 +123,25 @@
 // We currently do not support more than one sancov-instrumented DSO.
 void __sanitizer_cov_pcs_init(const PCInfo *absl_nonnull beg,
                               const PCInfo *end) {
-  state.sancov_objects.PCInfoInit(beg, end);
+  __framework_sanitizer_cov_pcs_init(beg, end);
 }
 
 // https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-control-flow
 // This function is called at the DSO init time.
 void __sanitizer_cov_cfs_init(const uintptr_t *beg, const uintptr_t *end) {
-  state.sancov_objects.CFSInit(beg, end);
-}
-
-// Updates the state of the paths, `path_level > 0`.
-// Marked noinline so that not to create spills/fills on the fast path
-// of __sanitizer_cov_trace_pc_guard.
-__attribute__((noinline)) static void HandlePath(uintptr_t normalized_pc) {
-  uintptr_t hash = tls.path_ring_buffer.push(normalized_pc);
-  state.path_feature_set.set(hash);
-}
-
-// Handles one observed PC.
-// `normalized_pc` is an integer representation of PC that is stable between
-// the executions.
-// `is_function_entry` is true if the PC is known to be a function entry.
-// With __sanitizer_cov_trace_pc_guard this is an index of PC in the PC table.
-// With __sanitizer_cov_trace_pc this is PC itself, normalized by subtracting
-// the DSO's dynamic start address.
-static ENFORCE_INLINE void HandleOnePc(PCGuard pc_guard) {
-  if (!state.run_time_flags.use_pc_features) return;
-  state.pc_counter_set.SaturatedIncrement(pc_guard.pc_index);
-
-  if (pc_guard.is_function_entry) {
-    uintptr_t sp = reinterpret_cast<uintptr_t>(__builtin_frame_address(0));
-    // It should be rare for the stack depth to exceed the previous record.
-    if (__builtin_expect(
-            sp < tls.lowest_sp &&
-                // And ignore the stack pointer when it is not in the known
-                // region (e.g. for signal handling with an alternative stack).
-                (tls.stack_region_low == 0 || sp >= tls.stack_region_low),
-            0)) {
-      tls.lowest_sp = sp;
-      fuzztest::internal::CheckStackLimit(sp);
-    }
-    if (state.run_time_flags.callstack_level != 0) {
-      tls.call_stack.OnFunctionEntry(pc_guard.pc_index, sp);
-      state.callstack_set.set(tls.call_stack.Hash());
-    }
-  }
-
-  // path features.
-  if (state.run_time_flags.path_level != 0) HandlePath(pc_guard.pc_index);
-}
-
-// Caller PC is the PC of the call instruction.
-// Return address is the PC where the callee will return upon completion.
-// On x86_64, CallerPC == ReturnAddress - 5
-// On AArch64, CallerPC == ReturnAddress - 4
-static uintptr_t ReturnAddressToCallerPc(uintptr_t return_address) {
-#ifdef __x86_64__
-  return return_address - 5;
-#elif defined(__aarch64__)
-  return return_address - 4;
-#else
-#error "unsupported architecture"
-#endif
-}
-
-// Sets `actual_pc_counter_set_size_aligned` to `size`, properly aligned up.
-static void UpdatePcCounterSetSizeAligned(size_t size) {
-  constexpr size_t kAlignment = state.pc_counter_set.kSizeMultiple;
-  constexpr size_t kMask = kAlignment - 1;
-  state.actual_pc_counter_set_size_aligned = (size + kMask) & ~kMask;
-}
-
-// MainObjectLazyInit() and helpers allow us to initialize state.main_object
-// lazily and thread-safely on the first call to __sanitizer_cov_trace_pc().
-//
-// TODO(kcc): consider removing :dl_path_suffix= since with lazy init
-// we can auto-detect the instrumented DSO.
-//
-// TODO(kcc): this lazy init is brittle.
-// It assumes that __sanitizer_cov_trace_pc is the only code that touches
-// state.main_object concurrently. I.e. we can not blindly reuse this lazy init
-// for other instrumentation callbacks that use state.main_object.
-// This code is also considered *temporary* because
-// a) __sanitizer_cov_trace_pc is obsolete and we hope to not need it in future.
-// b) a better option might be to do a non-lazy init by intercepting dlopen.
-//
-// We do not call MainObjectLazyInit() in
-// __sanitizer_cov_trace_pc_guard() because
-// a) there is not use case for that currently and
-// b) it will slowdown the hot function.
-static pthread_once_t main_object_lazy_init_once = PTHREAD_ONCE_INIT;
-static void MainObjectLazyInitOnceCallback() {
-  state.main_object =
-      fuzztest::internal::GetDlInfo(state.GetStringFlag(":dl_path_suffix="));
-  fprintf(stderr, "MainObjectLazyInitOnceCallback %zx\n",
-          state.main_object.start_address);
-  UpdatePcCounterSetSizeAligned(state.reverse_pc_table.NumPcs());
-}
-
-__attribute__((noinline)) static void MainObjectLazyInit() {
-  pthread_once(&main_object_lazy_init_once, MainObjectLazyInitOnceCallback);
-}
-
-// TODO(kcc): [impl] add proper testing for this callback.
-// TODO(kcc): make sure the pc_table in the engine understands the raw PCs.
-// TODO(kcc): this implementation is temporary. In order for symbolization to
-// work we will need to translate the PC into a PCIndex or make pc_table sparse.
-// See https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-pcs.
-// This instrumentation is redundant if other instrumentation
-// (e.g. trace-pc-guard) is available, but GCC as of 2022-04 only supports
-// this variant.
-void __sanitizer_cov_trace_pc() {
-  uintptr_t pc = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
-  if (!state.main_object.start_address ||
-      !state.actual_pc_counter_set_size_aligned) {
-    // Don't track coverage at all before the PC table is initialized.
-    if (state.reverse_pc_table.NumPcs() == 0) return;
-    MainObjectLazyInit();
-  }
-  pc -= state.main_object.start_address;
-  pc = ReturnAddressToCallerPc(pc);
-  const auto pc_guard = state.reverse_pc_table.GetPCGuard(pc);
-  // TODO(kcc): compute is_function_entry for this case.
-  if (pc_guard.IsValid()) HandleOnePc(pc_guard);
+  __framework_sanitizer_cov_cfs_init(beg, end);
 }
 
 // This function is called at the DSO init time.
 void __sanitizer_cov_trace_pc_guard_init(PCGuard *absl_nonnull start,
                                          PCGuard *stop) {
-  state.sancov_objects.PCGuardInit(start, stop);
-  UpdatePcCounterSetSizeAligned(state.sancov_objects.NumInstrumentedPCs());
+  __framework_sanitizer_cov_trace_pc_guard_init(start, stop);
 }
 
 // This function is called on every instrumented edge.
 NO_SANITIZE
 void __sanitizer_cov_trace_pc_guard(PCGuard *absl_nonnull guard) {
-  // This function may be called very early during the DSO initialization,
-  // before the values of `*guard` are initialized to non-zero.
-  // But it will immidiately return bacause state.run_time_flags.use_pc_features
-  // is false. Once state.run_time_flags.use_pc_features becomes true, it is
-  // already ok to call this function.
-  HandleOnePc(*guard);
+  __framework_sanitizer_cov_trace_pc_guard(guard);
 }
 
 }  // extern "C"
diff --git a/centipede/sancov_cpp.cc b/centipede/sancov_cpp.cc
new file mode 100644
index 0000000..919a57a
--- /dev/null
+++ b/centipede/sancov_cpp.cc
@@ -0,0 +1,232 @@
+// Copyright 2022 The Centipede Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstdint>
+#include <cstdio>
+
+#include "absl/base/nullability.h"
+#include "./centipede/feature.h"
+#include "./centipede/pc_info.h"
+#include "./centipede/runner.h"
+#include "./centipede/runner_dl_info.h"
+#include "./centipede/sancov_interface.h"
+#include "./centipede/shared_coverage_state.h"
+
+using fuzztest::internal::PCGuard;
+using fuzztest::internal::PCInfo;
+using fuzztest::internal::shared_coverage_state;
+using fuzztest::internal::state;
+using fuzztest::internal::tls;
+
+// NOTE: Enforce inlining so that `__builtin_return_address` works.
+ENFORCE_INLINE static void TraceLoad(void *addr) {
+  if (!shared_coverage_state.run_time_flags.use_dataflow_features) return;
+  auto caller_pc = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+  auto load_addr = reinterpret_cast<uintptr_t>(addr);
+  auto pc_offset = caller_pc - shared_coverage_state.main_object.start_address;
+  if (pc_offset >= shared_coverage_state.main_object.size)
+    return;  // PC outside main obj.
+  auto addr_offset =
+      load_addr - shared_coverage_state.main_object.start_address;
+  if (addr_offset >= shared_coverage_state.main_object.size)
+    return;  // Not a global address.
+  state.data_flow_feature_set.set(fuzztest::internal::ConvertPcPairToNumber(
+      pc_offset, addr_offset, shared_coverage_state.main_object.size));
+}
+
+void __framework_sanitizer_cov_load1(uint8_t *addr) { TraceLoad(addr); }
+
+void __framework_sanitizer_cov_load2(uint16_t *addr) { TraceLoad(addr); }
+
+void __framework_sanitizer_cov_load4(uint32_t *addr) { TraceLoad(addr); }
+
+void __framework_sanitizer_cov_load8(uint64_t *addr) { TraceLoad(addr); }
+
+void __framework_sanitizer_cov_load16(__uint128_t *addr) { TraceLoad(addr); }
+
+void __framework_sanitizer_cov_trace_const_cmp2(uint16_t Arg1, uint16_t Arg2) {
+  if (Arg1 != Arg2 && shared_coverage_state.run_time_flags.use_auto_dictionary)
+    tls.cmp_trace2.Capture(Arg1, Arg2);
+}
+
+void __framework_sanitizer_cov_trace_const_cmp4(uint32_t Arg1, uint32_t Arg2) {
+  if (Arg1 != Arg2 && shared_coverage_state.run_time_flags.use_auto_dictionary)
+    tls.cmp_trace4.Capture(Arg1, Arg2);
+}
+
+void __framework_sanitizer_cov_trace_const_cmp8(uint64_t Arg1, uint64_t Arg2) {
+  if (Arg1 != Arg2 && shared_coverage_state.run_time_flags.use_auto_dictionary)
+    tls.cmp_trace8.Capture(Arg1, Arg2);
+}
+
+void __framework_sanitizer_cov_trace_cmp2(uint16_t Arg1, uint16_t Arg2) {
+  if (Arg1 != Arg2 && shared_coverage_state.run_time_flags.use_auto_dictionary)
+    tls.cmp_trace2.Capture(Arg1, Arg2);
+}
+
+void __framework_sanitizer_cov_trace_cmp4(uint32_t Arg1, uint32_t Arg2) {
+  if (Arg1 != Arg2 && shared_coverage_state.run_time_flags.use_auto_dictionary)
+    tls.cmp_trace4.Capture(Arg1, Arg2);
+}
+
+void __framework_sanitizer_cov_trace_cmp8(uint64_t Arg1, uint64_t Arg2) {
+  if (Arg1 != Arg2 && shared_coverage_state.run_time_flags.use_auto_dictionary)
+    tls.cmp_trace8.Capture(Arg1, Arg2);
+}
+
+void __framework_sanitizer_cov_8bit_counters_init(uint8_t *beg, uint8_t *end) {
+  state.sancov_objects.Inline8BitCountersInit(beg, end);
+}
+
+void __framework_sanitizer_cov_pcs_init(const PCInfo *absl_nonnull beg,
+                                        const PCInfo *end) {
+  state.sancov_objects.PCInfoInit(beg, end);
+}
+
+void __framework_sanitizer_cov_cfs_init(const uintptr_t *beg,
+                                        const uintptr_t *end) {
+  state.sancov_objects.CFSInit(beg, end);
+}
+
+// Updates the state of the paths, `path_level > 0`.
+// Marked noinline so that not to create spills/fills on the fast path
+// of __sanitizer_cov_trace_pc_guard.
+__attribute__((noinline)) static void HandlePath(uintptr_t normalized_pc) {
+  uintptr_t hash = tls.path_ring_buffer.push(normalized_pc);
+  state.path_feature_set.set(hash);
+}
+
+// Handles one observed PC.
+// `normalized_pc` is an integer representation of PC that is stable between
+// the executions.
+// `is_function_entry` is true if the PC is known to be a function entry.
+// With __sanitizer_cov_trace_pc_guard this is an index of PC in the PC table.
+// With __sanitizer_cov_trace_pc this is PC itself, normalized by subtracting
+// the DSO's dynamic start address.
+static ENFORCE_INLINE void HandleOnePc(PCGuard pc_guard) {
+  if (!shared_coverage_state.run_time_flags.use_pc_features) return;
+  state.pc_counter_set.SaturatedIncrement(pc_guard.pc_index);
+
+  if (pc_guard.is_function_entry) {
+    uintptr_t sp = reinterpret_cast<uintptr_t>(__builtin_frame_address(0));
+    // It should be rare for the stack depth to exceed the previous record.
+    if (__builtin_expect(
+            sp < tls.lowest_sp &&
+                // And ignore the stack pointer when it is not in the known
+                // region (e.g. for signal handling with an alternative stack).
+                (tls.stack_region_low == 0 || sp >= tls.stack_region_low),
+            0)) {
+      tls.lowest_sp = sp;
+      fuzztest::internal::CheckStackLimit(sp);
+    }
+    if (shared_coverage_state.run_time_flags.callstack_level != 0) {
+      tls.call_stack.OnFunctionEntry(pc_guard.pc_index, sp);
+      state.callstack_set.set(tls.call_stack.Hash());
+    }
+  }
+
+  // path features.
+  if (shared_coverage_state.run_time_flags.path_level != 0)
+    HandlePath(pc_guard.pc_index);
+}
+
+// Sets `actual_pc_counter_set_size_aligned` to `size`, properly aligned up.
+static void UpdatePcCounterSetSizeAligned(size_t size) {
+  constexpr size_t kAlignment = state.pc_counter_set.kSizeMultiple;
+  constexpr size_t kMask = kAlignment - 1;
+  state.actual_pc_counter_set_size_aligned = (size + kMask) & ~kMask;
+}
+
+void __framework_sanitizer_cov_trace_pc_guard_init(PCGuard *absl_nonnull start,
+                                                   PCGuard *stop) {
+  state.sancov_objects.PCGuardInit(start, stop);
+  UpdatePcCounterSetSizeAligned(state.sancov_objects.NumInstrumentedPCs());
+}
+
+void __framework_sanitizer_cov_trace_pc_guard(PCGuard *absl_nonnull guard) {
+  // This function may be called very early during the DSO initialization,
+  // before the values of `*guard` are initialized to non-zero.
+  // But it will immidiately return because state.run_time_flags.use_pc_features
+  // is false. Once state.run_time_flags.use_pc_features becomes true, it is
+  // already ok to call this function.
+  HandleOnePc(*guard);
+}
+
+// Caller PC is the PC of the call instruction.
+// Return address is the PC where the callee will return upon completion.
+// On x86_64, CallerPC == ReturnAddress - 5
+// On AArch64, CallerPC == ReturnAddress - 4
+static uintptr_t ReturnAddressToCallerPc(uintptr_t return_address) {
+#ifdef __x86_64__
+  return return_address - 5;
+#elif defined(__aarch64__)
+  return return_address - 4;
+#else
+#error "unsupported architecture"
+#endif
+}
+
+// MainObjectLazyInit() and helpers allow us to initialize state.main_object
+// lazily and thread-safely on the first call to __sanitizer_cov_trace_pc().
+//
+// TODO(kcc): consider removing :dl_path_suffix= since with lazy init
+// we can auto-detect the instrumented DSO.
+//
+// TODO(kcc): this lazy init is brittle.
+// It assumes that __sanitizer_cov_trace_pc is the only code that touches
+// state.main_object concurrently. I.e. we can not blindly reuse this lazy init
+// for other instrumentation callbacks that use state.main_object.
+// This code is also considered *temporary* because
+// a) __sanitizer_cov_trace_pc is obsolete and we hope to not need it in future.
+// b) a better option might be to do a non-lazy init by intercepting dlopen.
+//
+// We do not call MainObjectLazyInit() in
+// __sanitizer_cov_trace_pc_guard() because
+// a) there is not use case for that currently and
+// b) it will slowdown the hot function.
+static pthread_once_t main_object_lazy_init_once = PTHREAD_ONCE_INIT;
+static void MainObjectLazyInitOnceCallback() {
+  shared_coverage_state.main_object = fuzztest::internal::GetDlInfo(
+      shared_coverage_state.GetStringFlag(":dl_path_suffix="));
+  fprintf(stderr, "MainObjectLazyInitOnceCallback %zx\n",
+          shared_coverage_state.main_object.start_address);
+  UpdatePcCounterSetSizeAligned(state.reverse_pc_table.NumPcs());
+}
+
+__attribute__((noinline)) static void MainObjectLazyInit() {
+  pthread_once(&main_object_lazy_init_once, MainObjectLazyInitOnceCallback);
+}
+
+// TODO(kcc): [impl] add proper testing for this callback.
+// TODO(kcc): make sure the pc_table in the engine understands the raw PCs.
+// TODO(kcc): this implementation is temporary. In order for symbolization to
+// work we will need to translate the PC into a PCIndex or make pc_table sparse.
+// See https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-pcs.
+// This instrumentation is redundant if other instrumentation
+// (e.g. trace-pc-guard) is available, but GCC as of 2022-04 only supports
+// this variant.
+extern "C" void __sanitizer_cov_trace_pc() {
+  uintptr_t pc = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+  if (!shared_coverage_state.main_object.start_address ||
+      !state.actual_pc_counter_set_size_aligned) {
+    // Don't track coverage at all before the PC table is initialized.
+    if (state.reverse_pc_table.NumPcs() == 0) return;
+    MainObjectLazyInit();
+  }
+  pc -= shared_coverage_state.main_object.start_address;
+  pc = ReturnAddressToCallerPc(pc);
+  const auto pc_guard = state.reverse_pc_table.GetPCGuard(pc);
+  // TODO(kcc): compute is_function_entry for this case.
+  if (pc_guard.IsValid()) HandleOnePc(pc_guard);
+}
diff --git a/centipede/sancov_interface.h b/centipede/sancov_interface.h
new file mode 100644
index 0000000..c98ac9f
--- /dev/null
+++ b/centipede/sancov_interface.h
@@ -0,0 +1,93 @@
+// Copyright 2022 The Centipede Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef FUZZTEST_CENTIPEDE_SANCOV_INTERFACE_H_
+#define FUZZTEST_CENTIPEDE_SANCOV_INTERFACE_H_
+
+#include <cstdint>
+
+#include "absl/base/nullability.h"
+#include "./centipede/pc_info.h"
+
+// NOTE: In addition to `always_inline`, also use `inline`, because some
+// compilers require both to actually enforce inlining, e.g. GCC:
+// https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html.
+#define ENFORCE_INLINE __attribute__((always_inline)) inline
+
+// Use this attribute for functions that must not be instrumented even if
+// the runner is built with sanitizers (asan, etc).
+#define NO_SANITIZE __attribute__((no_sanitize("all")))
+
+NO_SANITIZE void __shared_sanitizer_cov_trace_const_cmp1(uint8_t Arg1,
+                                                         uint8_t Arg2);
+NO_SANITIZE void __shared_sanitizer_cov_trace_const_cmp2(uint16_t Arg1,
+                                                         uint16_t Arg2);
+NO_SANITIZE void __shared_sanitizer_cov_trace_const_cmp4(uint32_t Arg1,
+                                                         uint32_t Arg2);
+NO_SANITIZE void __shared_sanitizer_cov_trace_const_cmp8(uint64_t Arg1,
+                                                         uint64_t Arg2);
+
+NO_SANITIZE void __shared_sanitizer_cov_trace_cmp1(uint8_t Arg1, uint8_t Arg2);
+NO_SANITIZE void __shared_sanitizer_cov_trace_cmp2(uint16_t Arg1,
+                                                   uint16_t Arg2);
+NO_SANITIZE void __shared_sanitizer_cov_trace_cmp4(uint32_t Arg1,
+                                                   uint32_t Arg2);
+NO_SANITIZE void __shared_sanitizer_cov_trace_cmp8(uint64_t Arg1,
+                                                   uint64_t Arg2);
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+NO_SANITIZE void __framework_sanitizer_cov_load1(uint8_t *addr);
+NO_SANITIZE void __framework_sanitizer_cov_load2(uint16_t *addr);
+NO_SANITIZE void __framework_sanitizer_cov_load4(uint32_t *addr);
+NO_SANITIZE void __framework_sanitizer_cov_load8(uint64_t *addr);
+NO_SANITIZE void __framework_sanitizer_cov_load16(__uint128_t *addr);
+
+NO_SANITIZE void __framework_sanitizer_cov_trace_const_cmp2(uint16_t Arg1,
+                                                            uint16_t Arg2);
+NO_SANITIZE void __framework_sanitizer_cov_trace_const_cmp4(uint32_t Arg1,
+                                                            uint32_t Arg2);
+NO_SANITIZE void __framework_sanitizer_cov_trace_const_cmp8(uint64_t Arg1,
+                                                            uint64_t Arg2);
+
+NO_SANITIZE void __framework_sanitizer_cov_trace_cmp2(uint16_t Arg1,
+                                                      uint16_t Arg2);
+NO_SANITIZE void __framework_sanitizer_cov_trace_cmp4(uint32_t Arg1,
+                                                      uint32_t Arg2);
+NO_SANITIZE void __framework_sanitizer_cov_trace_cmp8(uint64_t Arg1,
+                                                      uint64_t Arg2);
+
+void __framework_sanitizer_cov_8bit_counters_init(uint8_t *beg, uint8_t *end);
+
+void __framework_sanitizer_cov_pcs_init(
+    const fuzztest::internal::PCInfo *absl_nonnull beg,
+    const fuzztest::internal::PCInfo *end);
+
+void __framework_sanitizer_cov_cfs_init(const uintptr_t *beg,
+                                        const uintptr_t *end);
+
+void __framework_sanitizer_cov_trace_pc_guard_init(
+    fuzztest::internal::PCGuard *absl_nonnull start,
+    fuzztest::internal::PCGuard *stop);
+
+NO_SANITIZE void __framework_sanitizer_cov_trace_pc_guard(
+    fuzztest::internal::PCGuard *absl_nonnull guard);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // FUZZTEST_CENTIPEDE_SANCOV_INTERFACE_H_
diff --git a/centipede/sancov_shared.cc b/centipede/sancov_shared.cc
new file mode 100644
index 0000000..c861c33
--- /dev/null
+++ b/centipede/sancov_shared.cc
@@ -0,0 +1,75 @@
+// Copyright 2022 The Centipede Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstdint>
+
+#include "./centipede/feature.h"
+#include "./centipede/int_utils.h"
+#include "./centipede/sancov_interface.h"
+#include "./centipede/shared_coverage_state.h"
+
+using fuzztest::internal::shared_coverage_state;
+using fuzztest::internal::tls;
+
+// NOTE: Enforce inlining so that `__builtin_return_address` works.
+ENFORCE_INLINE static void TraceCmp(uint64_t Arg1, uint64_t Arg2) {
+  if (!shared_coverage_state.run_time_flags.use_cmp_features) return;
+  auto caller_pc = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+  auto pc_offset = caller_pc - shared_coverage_state.main_object.start_address;
+  uintptr_t hash =
+      fuzztest::internal::Hash64Bits(pc_offset) ^ tls.path_ring_buffer.hash();
+  if (Arg1 == Arg2) {
+    shared_coverage_state.cmp_eq_set.set(hash);
+  } else {
+    hash <<= 6;  // ABTo* generate 6-bit numbers.
+    shared_coverage_state.cmp_moddiff_set.set(
+        hash | fuzztest::internal::ABToCmpModDiff(Arg1, Arg2));
+    shared_coverage_state.cmp_hamming_set.set(
+        hash | fuzztest::internal::ABToCmpHamming(Arg1, Arg2));
+    shared_coverage_state.cmp_difflog_set.set(
+        hash | fuzztest::internal::ABToCmpDiffLog(Arg1, Arg2));
+  }
+}
+
+void __shared_sanitizer_cov_trace_const_cmp1(uint8_t Arg1, uint8_t Arg2) {
+  TraceCmp(Arg1, Arg2);
+}
+
+void __shared_sanitizer_cov_trace_const_cmp2(uint16_t Arg1, uint16_t Arg2) {
+  TraceCmp(Arg1, Arg2);
+}
+
+void __shared_sanitizer_cov_trace_const_cmp4(uint32_t Arg1, uint32_t Arg2) {
+  TraceCmp(Arg1, Arg2);
+}
+
+void __shared_sanitizer_cov_trace_const_cmp8(uint64_t Arg1, uint64_t Arg2) {
+  TraceCmp(Arg1, Arg2);
+}
+
+void __shared_sanitizer_cov_trace_cmp1(uint8_t Arg1, uint8_t Arg2) {
+  TraceCmp(Arg1, Arg2);
+}
+
+void __shared_sanitizer_cov_trace_cmp2(uint16_t Arg1, uint16_t Arg2) {
+  TraceCmp(Arg1, Arg2);
+}
+
+void __shared_sanitizer_cov_trace_cmp4(uint32_t Arg1, uint32_t Arg2) {
+  TraceCmp(Arg1, Arg2);
+}
+
+void __shared_sanitizer_cov_trace_cmp8(uint64_t Arg1, uint64_t Arg2) {
+  TraceCmp(Arg1, Arg2);
+}
diff --git a/centipede/shared_coverage_state.cc b/centipede/shared_coverage_state.cc
new file mode 100644
index 0000000..9ae5b72
--- /dev/null
+++ b/centipede/shared_coverage_state.cc
@@ -0,0 +1,77 @@
+// Copyright 2022 The Centipede Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "./centipede/shared_coverage_state.h"
+
+#include <cstdlib>
+
+#include "absl/base/nullability.h"
+#include "./centipede/feature.h"
+#include "./centipede/runner_interface.h"
+
+namespace fuzztest::internal {
+
+SharedCoverageState shared_coverage_state __attribute__((init_priority(150)));
+
+// We use __thread instead of thread_local so that the compiler warns if
+// the initializer for `tls` is not a constant expression.
+// `tls` thus must not have a CTOR.
+// This avoids calls to __tls_init() in hot functions that use `tls`.
+__thread ThreadLocalRunnerState tls;
+
+void MaybeAddFeature(feature_t feature) {
+  if (!shared_coverage_state.run_time_flags.skip_seen_features) {
+    shared_coverage_state.g_features.push_back(feature);
+  } else if (!shared_coverage_state.seen_features.get(feature)) {
+    shared_coverage_state.g_features.push_back(feature);
+    shared_coverage_state.seen_features.set(feature);
+  }
+}
+
+void PrepareSharedCoverage(bool full_clear) {
+  if (!full_clear) return;
+  if (shared_coverage_state.run_time_flags.use_cmp_features) {
+    shared_coverage_state.cmp_eq_set.ForEachNonZeroBit([](size_t idx) {});
+    shared_coverage_state.cmp_moddiff_set.ForEachNonZeroBit([](size_t idx) {});
+    shared_coverage_state.cmp_hamming_set.ForEachNonZeroBit([](size_t idx) {});
+    shared_coverage_state.cmp_difflog_set.ForEachNonZeroBit([](size_t idx) {});
+  }
+}
+
+void PostProcessSharedCoverage() {
+  // Convert cmp bit set to features.
+  if (shared_coverage_state.run_time_flags.use_cmp_features) {
+    shared_coverage_state.cmp_eq_set.ForEachNonZeroBit([](size_t idx) {
+      MaybeAddFeature(feature_domains::kCMPEq.ConvertToMe(idx));
+    });
+    shared_coverage_state.cmp_moddiff_set.ForEachNonZeroBit([](size_t idx) {
+      MaybeAddFeature(feature_domains::kCMPModDiff.ConvertToMe(idx));
+    });
+    shared_coverage_state.cmp_hamming_set.ForEachNonZeroBit([](size_t idx) {
+      MaybeAddFeature(feature_domains::kCMPHamming.ConvertToMe(idx));
+    });
+    shared_coverage_state.cmp_difflog_set.ForEachNonZeroBit([](size_t idx) {
+      MaybeAddFeature(feature_domains::kCMPDiffLog.ConvertToMe(idx));
+    });
+  }
+}
+
+extern "C" __attribute__((weak)) const char *absl_nullable
+CentipedeGetRunnerFlags() {
+  if (const char *runner_flags_env = getenv("CENTIPEDE_RUNNER_FLAGS"))
+    return strdup(runner_flags_env);
+  return nullptr;
+}
+
+}  // namespace fuzztest::internal
diff --git a/centipede/shared_coverage_state.h b/centipede/shared_coverage_state.h
new file mode 100644
index 0000000..0394be7
--- /dev/null
+++ b/centipede/shared_coverage_state.h
@@ -0,0 +1,239 @@
+// Copyright 2022 The Centipede Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef FUZZTEST_CENTIPEDE_SHARED_COVERAGE_STATE_H_
+#define FUZZTEST_CENTIPEDE_SHARED_COVERAGE_STATE_H_
+
+#include <algorithm>
+#include <atomic>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+
+#include "absl/base/const_init.h"
+#include "absl/base/nullability.h"
+#include "absl/numeric/bits.h"
+#include "./centipede/callstack.h"
+#include "./centipede/concurrent_bitset.h"
+#include "./centipede/feature.h"
+#include "./centipede/hashed_ring_buffer.h"
+#include "./centipede/runner_cmp_trace.h"
+#include "./centipede/runner_dl_info.h"
+#include "./centipede/runner_interface.h"
+
+namespace fuzztest::internal {
+
+// An arbitrarily large size.
+static constexpr size_t kCmpFeatureSetSize = 1 << 18;
+
+// Flags derived from CENTIPEDE_RUNNER_FLAGS.
+// Flags used in instrumentation callbacks are bit-packed for efficiency.
+struct RunTimeFlags {
+  uint64_t path_level : 8;
+  uint64_t use_pc_features : 1;
+  uint64_t use_dataflow_features : 1;
+  uint64_t use_cmp_features : 1;
+  uint64_t callstack_level : 8;
+  uint64_t use_counter_features : 1;
+  uint64_t use_auto_dictionary : 1;
+  std::atomic<uint64_t> timeout_per_input;
+  uint64_t timeout_per_batch;
+  std::atomic<uint64_t> stack_limit_kb;
+  std::atomic<uint64_t> rss_limit_mb;
+  uint64_t crossover_level;
+  uint64_t skip_seen_features : 1;
+  uint64_t ignore_timeout_reports : 1;
+  uint64_t max_len;
+};
+
+// One such object is created in runner's TLS.
+// There is no CTOR, since we don't want to use the brittle and lazy TLS CTORs.
+// All data members are zero-initialized during thread creation.
+struct ThreadLocalRunnerState {
+  // Traces the memory comparison of `n` bytes at `s1` and `s2` called at
+  // `caller_pc` with `is_equal` indicating whether the two memory regions have
+  // equal contents. May add cmp features and auto-dictionary entries if
+  // enabled.
+  void TraceMemCmp(uintptr_t caller_pc, const uint8_t *s1, const uint8_t *s2,
+                   size_t n,
+                   bool is_equal);  // Not called on shared coverage library
+
+  // Intrusive doubly-linked list of TLS objects.
+  // Guarded by state.tls_list_mu.
+  ThreadLocalRunnerState *next, *prev;
+
+  // The pthread_create() interceptor calls OnThreadStart() before the thread
+  // callback. The main thread also calls OnThreadStart(). OnThreadStop() will
+  // be called when thread termination is detected internally - see runner.cc.
+  void OnThreadStart();  // Not called on shared coverage library
+  void OnThreadStop();   // Not called on shared coverage library
+
+  // Whether OnThreadStart() is called on this thread. This is used as a proxy
+  // of the readiness of the lower-level runtime.
+  bool started;
+
+  // Paths are thread-local, so we maintain the current bounded path here.
+  // We allow paths of up to 100, controlled at run-time via the "path_level".
+  static constexpr uint64_t kBoundedPathLength = 100;
+  HashedRingBuffer<kBoundedPathLength> path_ring_buffer;
+
+  // Value of SP in the top call frame of the thread, computed in OnThreadStart.
+  uintptr_t top_frame_sp;
+  // The lower bound of the stack region of this thread. 0 means unknown.
+  uintptr_t stack_region_low;
+  // Lowest observed value of SP.
+  uintptr_t lowest_sp;
+
+  // The (imprecise) call stack is updated by the PC callback.
+  CallStack<> call_stack;
+
+  // Cmp traces capture the arguments of CMP instructions, memcmp, etc.
+  // We have dedicated traces for 2-, 4-, and 8-byte comparison, and
+  // a catch-all `cmp_traceN` trace for memcmp, etc.
+  CmpTrace<2, 64> cmp_trace2;
+  CmpTrace<4, 64> cmp_trace4;
+  CmpTrace<8, 64> cmp_trace8;
+  CmpTrace<0, 64> cmp_traceN;
+
+  // Set this to true if the thread needs to be ignored in ForEachTLS.
+  // It should be always false if the state is in the global detached_tls_list.
+  bool ignore;
+};
+
+struct CoverageFlags {
+  uint64_t use_cmp_features : 1;
+};
+
+struct SharedCoverageState {
+  // Runner reads flags from CentipedeGetRunnerFlags(). We don't use flags
+  // passed via argv so that argv flags can be passed directly to
+  // LLVMFuzzerInitialize, w/o filtering. The flags are separated with
+  // ':' on both sides, i.e. like this: ":flag1:flag2:flag3=value3".
+  // We do it this way to make the flag parsing code extremely simple. The
+  // interface is private between Centipede and the runner and may change.
+  //
+  // Note that this field reflects the initial runner flags. But some
+  // flags can change later (if wrapped with std::atomic).
+  const char *centipede_runner_flags = CentipedeGetRunnerFlags();
+  const char *arg1 = GetStringFlag(":arg1=");
+  const char *arg2 = GetStringFlag(":arg2=");
+  const char *arg3 = GetStringFlag(":arg3=");
+  // The path to a file where the runner may write the description of failure.
+  const char *failure_description_path =
+      GetStringFlag(":failure_description_path=");
+
+  // Flags.
+  RunTimeFlags run_time_flags = {
+      /*path_level=*/std::min(ThreadLocalRunnerState::kBoundedPathLength,
+                              HasIntFlag(":path_level=", 0)),
+      /*use_pc_features=*/HasFlag(":use_pc_features:"),
+      /*use_dataflow_features=*/HasFlag(":use_dataflow_features:"),
+      /*use_cmp_features=*/HasFlag(":use_cmp_features:"),
+      /*callstack_level=*/HasIntFlag(":callstack_level=", 0),
+      /*use_counter_features=*/HasFlag(":use_counter_features:"),
+      /*use_auto_dictionary=*/HasFlag(":use_auto_dictionary:"),
+      /*timeout_per_input=*/HasIntFlag(":timeout_per_input=", 0),
+      /*timeout_per_batch=*/HasIntFlag(":timeout_per_batch=", 0),
+      /*stack_limit_kb=*/HasIntFlag(":stack_limit_kb=", 0),
+      /*rss_limit_mb=*/HasIntFlag(":rss_limit_mb=", 0),
+      /*crossover_level=*/HasIntFlag(":crossover_level=", 50),
+      /*skip_seen_features=*/HasFlag(":skip_seen_features:"),
+      /*ignore_timeout_reports=*/HasFlag(":ignore_timeout_reports:"),
+      /*max_len=*/HasIntFlag(":max_len=", 4000),
+  };
+
+  // Returns true iff `flag` is present.
+  // Typical usage: pass ":some_flag:", i.e. the flag name surrounded with ':'.
+  // TODO(ussuri): Refactor `char *` into a `string_view`.
+  bool HasFlag(const char *absl_nonnull flag) const {
+    if (!centipede_runner_flags) return false;
+    return strstr(centipede_runner_flags, flag) != nullptr;
+  }
+
+  // If a flag=value pair is present, returns value,
+  // otherwise returns `default_value`.
+  // Typical usage: pass ":some_flag=".
+  // TODO(ussuri): Refactor `char *` into a `string_view`.
+  uint64_t HasIntFlag(const char *absl_nonnull flag,
+                      uint64_t default_value) const {
+    if (!centipede_runner_flags) return default_value;
+    const char *beg = strstr(centipede_runner_flags, flag);
+    if (!beg) return default_value;
+    return atoll(beg + strlen(flag));  // NOLINT: can't use strto64, etc.
+  }
+
+  // If a :flag=value: pair is present returns value, otherwise returns nullptr.
+  // The result is obtained by calling strndup, so make sure to save
+  // it in `this` to avoid a leak.
+  // Typical usage: pass ":some_flag=".
+  // TODO(ussuri): Refactor `char *` into a `string_view`.
+  const char *absl_nullable GetStringFlag(const char *absl_nonnull flag) const {
+    if (!centipede_runner_flags) return nullptr;
+    // Extract "value" from ":flag=value:" inside centipede_runner_flags.
+    const char *beg = strstr(centipede_runner_flags, flag);
+    if (!beg) return nullptr;
+    const char *value_beg = beg + strlen(flag);
+    const char *end = strstr(value_beg, ":");
+    if (!end) return nullptr;
+    return strndup(value_beg, end - value_beg);
+  }
+
+  // Computed by DlInfo().
+  // Usually, the main object is the executable binary containing main()
+  // and most of the executable code (we assume that the target is
+  // built in mostly-static mode, i.e. -dynamic_mode=off).
+  // When the `dl_path_suffix` runner flag is provided, the main_object refers
+  // to the dynamic library (DSO) pointed to by this flag.
+  //
+  // Note: this runner currently does not support more than one instrumented
+  // DSO in the process, i.e. you either instrument the main binary, or one DSO.
+  // Supporting more than one DSO will require major changes,
+  // major added complexity, and potentially cause slowdown.
+  // There is currently no motivation for such a change.
+  DlInfo main_object;
+
+  // Tracing CMP instructions, capture events from these domains:
+  // kCMPEq, kCMPModDiff, kCMPHamming, kCMPModDiffLog, kCMPMsbEq.
+  // See https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-data-flow.
+  ConcurrentBitSet<kCmpFeatureSetSize> cmp_eq_set{absl::kConstInit};
+  ConcurrentBitSet<kCmpFeatureSetSize> cmp_moddiff_set{absl::kConstInit};
+  ConcurrentBitSet<kCmpFeatureSetSize> cmp_hamming_set{absl::kConstInit};
+  ConcurrentBitSet<kCmpFeatureSetSize> cmp_difflog_set{absl::kConstInit};
+
+  // An arbitrarily large size.
+  static const size_t kMaxFeatures = 1 << 20;
+  // FeatureArray used to accumulate features from all sources.
+  FeatureArray<kMaxFeatures> g_features;
+
+  // Features that were seen before.
+  static constexpr size_t kSeenFeatureSetSize =
+      absl::bit_ceil(feature_domains::kLastDomain.end());
+  ConcurrentBitSet<kSeenFeatureSetSize> seen_features{absl::kConstInit};
+};
+
+__attribute__((noinline))  // so that we see it in profile.
+extern "C" void PrepareSharedCoverage(bool full_clear);
+
+__attribute__((noinline))  // so that we see it in profile.
+extern "C" void PostProcessSharedCoverage();
+
+void MaybeAddFeature(feature_t feature);
+
+extern SharedCoverageState shared_coverage_state;
+// extern RunTimeFlags run_time_flags;
+extern __thread ThreadLocalRunnerState tls;
+
+}  // namespace fuzztest::internal
+
+#endif  // FUZZTEST_CENTIPEDE_SHARED_COVERAGE_STATE_H_