Refactor coverage out of `GlobalRunnerState` with the purpose of sharing it with the Rust FuzzTest framework for emitting features.

PiperOrigin-RevId: 783788660
diff --git a/centipede/BUILD b/centipede/BUILD
index cbc0df6..f9a7a3f 100644
--- a/centipede/BUILD
+++ b/centipede/BUILD
@@ -1024,6 +1024,8 @@
 #  e.g. feature.cc. These files are compiled by the engine and the runner
 #  separately, with different compiler flags.
 RUNNER_SOURCES_NO_MAIN = [
+    "shared_coverage_state.cc",
+    "shared_coverage_state.h",
     "byte_array_mutator.cc",
     "byte_array_mutator.h",
     "callstack.h",
@@ -1202,6 +1204,40 @@
     ],
 )
 
+cc_library(
+    name = "shared_coverage",
+    srcs = [
+        "runner_dl_info.cc",
+        "runner_sancov.cc",
+        "runner_sancov_object.cc",
+        "runner_utils.cc",
+        "shared_coverage_state.cc",
+        "@com_google_fuzztest//common:defs.h",
+    ],
+    hdrs = [
+        "runner_dl_info.h",
+        "runner_interface.h",
+        "runner_sancov_object.h",
+        "runner_utils.h",
+        "shared_coverage_state.h",
+    ],
+    deps = [
+        ":callstack",
+        ":feature",
+        ":foreach_nonzero",
+        ":int_utils",
+        ":mutation_input",
+        ":pc_info",
+        ":reverse_pc_table",
+        ":runner_cmp_trace",
+        ":runner_result",
+        "@abseil-cpp//absl/base:core_headers",
+        "@abseil-cpp//absl/base:nullability",
+        "@abseil-cpp//absl/numeric:bits",
+        "@abseil-cpp//absl/types:span",
+    ],
+)
+
 # Flags for :seed_corpus_maker.
 cc_library(
     name = "seed_corpus_maker_flags",
diff --git a/centipede/runner.cc b/centipede/runner.cc
index d998350..2c76296 100644
--- a/centipede/runner.cc
+++ b/centipede/runner.cc
@@ -56,6 +56,7 @@
 #include "./centipede/runner_request.h"
 #include "./centipede/runner_result.h"
 #include "./centipede/runner_utils.h"
+#include "./centipede/shared_coverage_state.h"
 #include "./centipede/shared_memory_blob_sequence.h"
 #include "./common/defs.h"
 
@@ -94,24 +95,20 @@
 }  // namespace
 
 GlobalRunnerState state __attribute__((init_priority(200)));
-// We use __thread instead of thread_local so that the compiler warns if
-// the initializer for `tls` is not a constant expression.
-// `tls` thus must not have a CTOR.
-// This avoids calls to __tls_init() in hot functions that use `tls`.
-__thread ThreadLocalRunnerState tls;
 
 void ThreadLocalRunnerState::TraceMemCmp(uintptr_t caller_pc, const uint8_t *s1,
                                          const uint8_t *s2, size_t n,
                                          bool is_equal) {
-  if (state.run_time_flags.use_cmp_features) {
-    const uintptr_t pc_offset = caller_pc - state.main_object.start_address;
+  if (shared_coverage_state.run_time_flags.use_cmp_features) {
+    const uintptr_t pc_offset =
+        caller_pc - shared_coverage_state.main_object.start_address;
     const uintptr_t hash =
         fuzztest::internal::Hash64Bits(pc_offset) ^ tls.path_ring_buffer.hash();
     const size_t lcp = LengthOfCommonPrefix(s1, s2, n);
     // lcp is a 6-bit number.
     state.cmp_feature_set.set((hash << 6) | lcp);
   }
-  if (!is_equal && state.run_time_flags.use_auto_dictionary) {
+  if (!is_equal && shared_coverage_state.run_time_flags.use_auto_dictionary) {
     cmp_traceN.Capture(n, s1, s2);
   }
 }
@@ -126,8 +123,8 @@
     fprintf(stderr,
             "Disabling stack limit check due to missing stack region info.\n");
   }
-  tls.call_stack.Reset(state.run_time_flags.callstack_level);
-  tls.path_ring_buffer.Reset(state.run_time_flags.path_level);
+  tls.call_stack.Reset(shared_coverage_state.run_time_flags.callstack_level);
+  tls.path_ring_buffer.Reset(shared_coverage_state.run_time_flags.path_level);
   LockGuard lock(state.tls_list_mu);
   // Add myself to state.tls_list.
   auto *old_list = state.tls_list;
@@ -194,7 +191,7 @@
     bool ignore_report;
     const char *failure;
   };
-  const uint64_t input_start_time = state.input_start_time;
+  const uint64_t input_start_time = shared_coverage_state.input_start_time;
   const uint64_t batch_start_time = state.batch_start_time;
   if (input_start_time == 0 || batch_start_time == 0) return;
   const Resource resources[] = {
@@ -202,23 +199,25 @@
           /*what=*/"Per-input timeout",
           /*units=*/"sec",
           /*value=*/curr_time - input_start_time,
-          /*limit=*/state.run_time_flags.timeout_per_input,
-          /*ignore_report=*/state.run_time_flags.ignore_timeout_reports != 0,
+          /*limit=*/shared_coverage_state.run_time_flags.timeout_per_input,
+          /*ignore_report=*/
+          shared_coverage_state.run_time_flags.ignore_timeout_reports != 0,
           /*failure=*/kExecutionFailurePerInputTimeout.data(),
       }},
       {Resource{
           /*what=*/"Per-batch timeout",
           /*units=*/"sec",
           /*value=*/curr_time - batch_start_time,
-          /*limit=*/state.run_time_flags.timeout_per_batch,
-          /*ignore_report=*/state.run_time_flags.ignore_timeout_reports != 0,
+          /*limit=*/shared_coverage_state.run_time_flags.timeout_per_batch,
+          /*ignore_report=*/
+          shared_coverage_state.run_time_flags.ignore_timeout_reports != 0,
           /*failure=*/kExecutionFailurePerBatchTimeout.data(),
       }},
       {Resource{
           /*what=*/"RSS limit",
           /*units=*/"MB",
           /*value=*/GetPeakRSSMb(),
-          /*limit=*/state.run_time_flags.rss_limit_mb,
+          /*limit=*/shared_coverage_state.run_time_flags.rss_limit_mb,
           /*ignore_report=*/false,
           /*failure=*/kExecutionFailureRssLimitExceeded.data(),
       }},
@@ -270,32 +269,12 @@
     sleep(1);
 
     // No calls to ResetInputTimer() yet: input execution hasn't started.
-    if (state.input_start_time == 0) continue;
+    if (shared_coverage_state.input_start_time == 0) continue;
 
     CheckWatchdogLimits();
   }
 }
 
-__attribute__((noinline)) void CheckStackLimit(uintptr_t sp) {
-  static std::atomic_flag stack_limit_exceeded = ATOMIC_FLAG_INIT;
-  const size_t stack_limit = state.run_time_flags.stack_limit_kb.load() << 10;
-  // Check for the stack limit only if sp is inside the stack region.
-  if (stack_limit > 0 && tls.stack_region_low &&
-      tls.top_frame_sp - sp > stack_limit) {
-    const bool test_not_running = state.input_start_time == 0;
-    if (test_not_running) return;
-    if (stack_limit_exceeded.test_and_set()) return;
-    fprintf(stderr,
-            "========= Stack limit exceeded: %" PRIuPTR
-            " > %zu"
-            " (byte); aborting\n",
-            tls.top_frame_sp - sp, stack_limit);
-    CentipedeSetFailureDescription(
-        fuzztest::internal::kExecutionFailureStackLimitExceeded.data());
-    std::abort();
-  }
-}
-
 void GlobalRunnerState::CleanUpDetachedTls() {
   LockGuard lock(tls_list_mu);
   ThreadLocalRunnerState *it_next = nullptr;
@@ -311,10 +290,10 @@
           "Starting watchdog thread: timeout_per_input: %" PRIu64
           " sec; timeout_per_batch: %" PRIu64 " sec; rss_limit_mb: %" PRIu64
           " MB; stack_limit_kb: %" PRIu64 " KB\n",
-          state.run_time_flags.timeout_per_input.load(),
-          state.run_time_flags.timeout_per_batch,
-          state.run_time_flags.rss_limit_mb.load(),
-          state.run_time_flags.stack_limit_kb.load());
+          shared_coverage_state.run_time_flags.timeout_per_input.load(),
+          shared_coverage_state.run_time_flags.timeout_per_batch,
+          shared_coverage_state.run_time_flags.rss_limit_mb.load(),
+          shared_coverage_state.run_time_flags.stack_limit_kb.load());
   pthread_t watchdog_thread;
   pthread_create(&watchdog_thread, nullptr, WatchdogThread, nullptr);
   pthread_detach(watchdog_thread);
@@ -326,7 +305,7 @@
 
 void GlobalRunnerState::ResetTimers() {
   const auto curr_time = time(nullptr);
-  input_start_time = curr_time;
+  shared_coverage_state.input_start_time = curr_time;
   // batch_start_time is set only once -- just before the first input of the
   // batch is about to start running.
   if (batch_start_time == 0) {
@@ -389,10 +368,12 @@
 static void
 PrepareCoverage(bool full_clear) {
   state.CleanUpDetachedTls();
-  if (state.run_time_flags.path_level != 0) {
+  if (shared_coverage_state.run_time_flags.path_level != 0) {
     state.ForEachTls([](ThreadLocalRunnerState &tls) {
-      tls.path_ring_buffer.Reset(state.run_time_flags.path_level);
-      tls.call_stack.Reset(state.run_time_flags.callstack_level);
+      tls.path_ring_buffer.Reset(
+          shared_coverage_state.run_time_flags.path_level);
+      tls.call_stack.Reset(
+          shared_coverage_state.run_time_flags.callstack_level);
       tls.lowest_sp = tls.top_frame_sp;
     });
   }
@@ -403,53 +384,42 @@
     }
   }
   if (!full_clear) return;
+  PrepareSharedCoverage(full_clear);
   state.ForEachTls([](ThreadLocalRunnerState &tls) {
-    if (state.run_time_flags.use_auto_dictionary) {
+    if (shared_coverage_state.run_time_flags.use_auto_dictionary) {
       tls.cmp_trace2.Clear();
       tls.cmp_trace4.Clear();
       tls.cmp_trace8.Clear();
       tls.cmp_traceN.Clear();
     }
   });
-  state.pc_counter_set.ForEachNonZeroByte(
+  shared_coverage_state.pc_counter_set.ForEachNonZeroByte(
       [](size_t idx, uint8_t value) {}, 0,
-      state.actual_pc_counter_set_size_aligned);
-  if (state.run_time_flags.use_dataflow_features)
-    state.data_flow_feature_set.ForEachNonZeroBit([](size_t idx) {});
-  if (state.run_time_flags.use_cmp_features) {
+      shared_coverage_state.actual_pc_counter_set_size_aligned);
+  if (shared_coverage_state.run_time_flags.use_dataflow_features)
+    shared_coverage_state.data_flow_feature_set.ForEachNonZeroBit(
+        [](size_t idx) {});
+  if (shared_coverage_state.run_time_flags.use_cmp_features) {
     state.cmp_feature_set.ForEachNonZeroBit([](size_t idx) {});
-    state.cmp_eq_set.ForEachNonZeroBit([](size_t idx) {});
-    state.cmp_moddiff_set.ForEachNonZeroBit([](size_t idx) {});
-    state.cmp_hamming_set.ForEachNonZeroBit([](size_t idx) {});
-    state.cmp_difflog_set.ForEachNonZeroBit([](size_t idx) {});
   }
-  if (state.run_time_flags.path_level != 0)
-    state.path_feature_set.ForEachNonZeroBit([](size_t idx) {});
-  if (state.run_time_flags.callstack_level != 0)
-    state.callstack_set.ForEachNonZeroBit([](size_t idx) {});
+  if (shared_coverage_state.run_time_flags.path_level != 0)
+    shared_coverage_state.path_feature_set.ForEachNonZeroBit([](size_t idx) {});
+  if (shared_coverage_state.run_time_flags.callstack_level != 0)
+    shared_coverage_state.callstack_set.ForEachNonZeroBit([](size_t idx) {});
   for (auto *p = state.user_defined_begin; p != state.user_defined_end; ++p) {
     *p = 0;
   }
-  state.sancov_objects.ClearInlineCounters();
-}
-
-static void MaybeAddFeature(feature_t feature) {
-  if (!state.run_time_flags.skip_seen_features) {
-    state.g_features.push_back(feature);
-  } else if (!state.seen_features.get(feature)) {
-    state.g_features.push_back(feature);
-    state.seen_features.set(feature);
-  }
+  shared_coverage_state.sancov_objects.ClearInlineCounters();
 }
 
 // Adds a kPCs and/or k8bitCounters feature to `g_features` based on arguments.
 // `idx` is a pc_index.
 // `counter_value` (non-zero) is a counter value associated with that PC.
 static void AddPcIndxedAndCounterToFeatures(size_t idx, uint8_t counter_value) {
-  if (state.run_time_flags.use_pc_features) {
+  if (shared_coverage_state.run_time_flags.use_pc_features) {
     MaybeAddFeature(feature_domains::kPCs.ConvertToMe(idx));
   }
-  if (state.run_time_flags.use_counter_features) {
+  if (shared_coverage_state.run_time_flags.use_counter_features) {
     MaybeAddFeature(feature_domains::k8bitCounters.ConvertToMe(
         Convert8bitCounterToNumber(idx, counter_value)));
   }
@@ -465,54 +435,45 @@
 __attribute__((noinline))  // so that we see it in profile.
 static void
 PostProcessCoverage(int target_return_value) {
-  state.g_features.clear();
+  shared_coverage_state.g_features.clear();
 
   if (target_return_value == -1) return;
 
+  PostProcessSharedCoverage();
+
   // Convert counters to features.
-  state.pc_counter_set.ForEachNonZeroByte(
+  shared_coverage_state.pc_counter_set.ForEachNonZeroByte(
       [](size_t idx, uint8_t value) {
         AddPcIndxedAndCounterToFeatures(idx, value);
       },
-      0, state.actual_pc_counter_set_size_aligned);
+      0, shared_coverage_state.actual_pc_counter_set_size_aligned);
 
   // Convert data flow bit set to features.
-  if (state.run_time_flags.use_dataflow_features) {
-    state.data_flow_feature_set.ForEachNonZeroBit([](size_t idx) {
-      MaybeAddFeature(feature_domains::kDataFlow.ConvertToMe(idx));
-    });
+  if (shared_coverage_state.run_time_flags.use_dataflow_features) {
+    shared_coverage_state.data_flow_feature_set.ForEachNonZeroBit(
+        [](size_t idx) {
+          MaybeAddFeature(feature_domains::kDataFlow.ConvertToMe(idx));
+        });
   }
 
   // Convert cmp bit set to features.
-  if (state.run_time_flags.use_cmp_features) {
+  if (shared_coverage_state.run_time_flags.use_cmp_features) {
     // TODO(kcc): remove cmp_feature_set.
     state.cmp_feature_set.ForEachNonZeroBit([](size_t idx) {
       MaybeAddFeature(feature_domains::kCMP.ConvertToMe(idx));
     });
-    state.cmp_eq_set.ForEachNonZeroBit([](size_t idx) {
-      MaybeAddFeature(feature_domains::kCMPEq.ConvertToMe(idx));
-    });
-    state.cmp_moddiff_set.ForEachNonZeroBit([](size_t idx) {
-      MaybeAddFeature(feature_domains::kCMPModDiff.ConvertToMe(idx));
-    });
-    state.cmp_hamming_set.ForEachNonZeroBit([](size_t idx) {
-      MaybeAddFeature(feature_domains::kCMPHamming.ConvertToMe(idx));
-    });
-    state.cmp_difflog_set.ForEachNonZeroBit([](size_t idx) {
-      MaybeAddFeature(feature_domains::kCMPDiffLog.ConvertToMe(idx));
-    });
   }
 
   // Convert path bit set to features.
-  if (state.run_time_flags.path_level != 0) {
-    state.path_feature_set.ForEachNonZeroBit([](size_t idx) {
+  if (shared_coverage_state.run_time_flags.path_level != 0) {
+    shared_coverage_state.path_feature_set.ForEachNonZeroBit([](size_t idx) {
       MaybeAddFeature(feature_domains::kBoundedPath.ConvertToMe(idx));
     });
   }
 
   // Iterate all threads and get features from TLS data.
   state.ForEachTls([](ThreadLocalRunnerState &tls) {
-    if (state.run_time_flags.callstack_level != 0) {
+    if (shared_coverage_state.run_time_flags.callstack_level != 0) {
       RunnerCheck(tls.top_frame_sp >= tls.lowest_sp,
                   "bad values of tls.top_frame_sp and tls.lowest_sp");
       size_t sp_diff = tls.top_frame_sp - tls.lowest_sp;
@@ -520,8 +481,8 @@
     }
   });
 
-  if (state.run_time_flags.callstack_level != 0) {
-    state.callstack_set.ForEachNonZeroBit([](size_t idx) {
+  if (shared_coverage_state.run_time_flags.callstack_level != 0) {
+    shared_coverage_state.callstack_set.ForEachNonZeroBit([](size_t idx) {
       MaybeAddFeature(feature_domains::kCallStack.ConvertToMe(idx));
     });
   }
@@ -546,9 +507,9 @@
 
   // Iterates all non-zero inline 8-bit counters, if they are present.
   // Calls AddPcIndxedAndCounterToFeatures on non-zero counters and zeroes them.
-  if (state.run_time_flags.use_pc_features ||
-      state.run_time_flags.use_counter_features) {
-    state.sancov_objects.ForEachNonZeroInlineCounter(
+  if (shared_coverage_state.run_time_flags.use_pc_features ||
+      shared_coverage_state.run_time_flags.use_counter_features) {
+    shared_coverage_state.sancov_objects.ForEachNonZeroInlineCounter(
         [](size_t idx, uint8_t counter_value) {
           AddPcIndxedAndCounterToFeatures(idx, counter_value);
         });
@@ -627,7 +588,8 @@
   int target_return_value = callbacks.Execute({data, size}) ? 0 : -1;
   state.stats.exec_time_usec = UsecSinceLast();
   CheckWatchdogLimits();
-  if (fuzztest::internal::state.input_start_time.exchange(0) != 0) {
+  if (fuzztest::internal::shared_coverage_state.input_start_time.exchange(0) !=
+      0) {
     PostProcessCoverage(target_return_value);
   }
   state.stats.post_time_usec = UsecSinceLast();
@@ -668,8 +630,8 @@
            input_path);
   FILE *features_file = fopen(features_file_path, "w");
   PrintErrorAndExitIf(features_file == nullptr, "can't open coverage file");
-  WriteFeaturesToFile(features_file, state.g_features.data(),
-                      state.g_features.size());
+  WriteFeaturesToFile(features_file, shared_coverage_state.g_features.data(),
+                      shared_coverage_state.g_features.size());
   fclose(features_file);
 }
 
@@ -699,9 +661,9 @@
 // Returns the byte size of `g_features`.
 static size_t CopyFeatures(uint8_t *data, size_t capacity) {
   const size_t features_len_in_bytes =
-      state.g_features.size() * sizeof(feature_t);
+      shared_coverage_state.g_features.size() * sizeof(feature_t);
   if (features_len_in_bytes > capacity) return 0;
-  memcpy(data, state.g_features.data(), features_len_in_bytes);
+  memcpy(data, shared_coverage_state.g_features.data(), features_len_in_bytes);
   return features_len_in_bytes;
 }
 
@@ -729,14 +691,15 @@
   }
 
   // Copy features to shared memory.
-  if (!BatchResult::WriteOneFeatureVec(
-          state.g_features.data(), state.g_features.size(), outputs_blobseq)) {
+  if (!BatchResult::WriteOneFeatureVec(shared_coverage_state.g_features.data(),
+                                       shared_coverage_state.g_features.size(),
+                                       outputs_blobseq)) {
     return false;
   }
 
   ExecutionMetadata metadata;
   // Copy the CMP traces to shared memory.
-  if (state.run_time_flags.use_auto_dictionary) {
+  if (shared_coverage_state.run_time_flags.use_auto_dictionary) {
     bool append_failed = false;
     state.ForEachTls([&metadata, &append_failed](ThreadLocalRunnerState &tls) {
       if (!AppendCmpEntries(tls.cmp_trace2, metadata)) append_failed = true;
@@ -794,10 +757,12 @@
 // Dumps the pc table to `output_path`.
 // Requires that state.main_object is already computed.
 static void DumpPcTable(const char *absl_nonnull output_path) {
-  PrintErrorAndExitIf(!state.main_object.IsSet(), "main_object is not set");
+  PrintErrorAndExitIf(!shared_coverage_state.main_object.IsSet(),
+                      "main_object is not set");
   FILE *output_file = fopen(output_path, "w");
   PrintErrorAndExitIf(output_file == nullptr, "can't open output file");
-  std::vector<PCInfo> pcs = state.sancov_objects.CreatePCTable();
+  std::vector<PCInfo> pcs =
+      shared_coverage_state.sancov_objects.CreatePCTable();
   // Dump the pc table.
   const auto data_size_in_bytes = pcs.size() * sizeof(PCInfo);
   auto num_bytes_written =
@@ -810,10 +775,12 @@
 // Dumps the control-flow table to `output_path`.
 // Requires that state.main_object is already computed.
 static void DumpCfTable(const char *absl_nonnull output_path) {
-  PrintErrorAndExitIf(!state.main_object.IsSet(), "main_object is not set");
+  PrintErrorAndExitIf(!shared_coverage_state.main_object.IsSet(),
+                      "main_object is not set");
   FILE *output_file = fopen(output_path, "w");
   PrintErrorAndExitIf(output_file == nullptr, "can't open output file");
-  std::vector<uintptr_t> data = state.sancov_objects.CreateCfTable();
+  std::vector<uintptr_t> data =
+      shared_coverage_state.sancov_objects.CreateCfTable();
   size_t data_size_in_bytes = data.size() * sizeof(data[0]);
   // Dump the table.
   auto num_bytes_written =
@@ -828,7 +795,7 @@
 static void DumpDsoTable(const char *absl_nonnull output_path) {
   FILE *output_file = fopen(output_path, "w");
   RunnerCheck(output_file != nullptr, "DumpDsoTable: can't open output file");
-  DsoTable dso_table = state.sancov_objects.CreateDsoTable();
+  DsoTable dso_table = shared_coverage_state.sancov_objects.CreateDsoTable();
   for (const auto &entry : dso_table) {
     fprintf(output_file, "%s %zd\n", entry.path.c_str(),
             entry.num_instrumented_pcs);
@@ -949,7 +916,7 @@
   if (custom_mutator_cb_ == nullptr) return false;
   unsigned int seed = GetRandomSeed();
   const size_t num_inputs = inputs.size();
-  const size_t max_mutant_size = state.run_time_flags.max_len;
+  const size_t max_mutant_size = shared_coverage_state.run_time_flags.max_len;
   constexpr size_t kAverageMutationAttempts = 2;
   ByteArray mutant(max_mutant_size);
   for (size_t attempt = 0, num_outputs = 0;
@@ -962,7 +929,8 @@
     std::copy(input_data.cbegin(), input_data.cbegin() + size, mutant.begin());
     size_t new_size = 0;
     if ((custom_crossover_cb_ != nullptr) &&
-        rand_r(&seed) % 100 < state.run_time_flags.crossover_level) {
+        rand_r(&seed) % 100 <
+            shared_coverage_state.run_time_flags.crossover_level) {
       // Perform crossover `crossover_level`% of the time.
       const auto &other_data = inputs[rand_r(&seed) % num_inputs].data;
       new_size = custom_crossover_cb_(
@@ -1008,7 +976,7 @@
   // No-op under ASAN/TSAN/MSAN - those may still rely on rss_limit_mb.
   if (vm_size_in_bytes < one_tb) {
     size_t address_space_limit_mb =
-        state.HasIntFlag(":address_space_limit_mb=", 0);
+        shared_coverage_state.HasIntFlag(":address_space_limit_mb=", 0);
     if (address_space_limit_mb > 0) {
       size_t limit_in_bytes = address_space_limit_mb << 20;
       struct rlimit rlimit_as = {limit_in_bytes, limit_in_bytes};
@@ -1023,10 +991,11 @@
 }
 
 static void MaybePopulateReversePcTable() {
-  const char *pcs_file_path = state.GetStringFlag(":pcs_file_path=");
+  const char *pcs_file_path =
+      shared_coverage_state.GetStringFlag(":pcs_file_path=");
   if (!pcs_file_path) return;
   const auto pc_table = ReadBytesFromFilePath<PCInfo>(pcs_file_path);
-  state.reverse_pc_table.SetFromPCs(pc_table);
+  shared_coverage_state.reverse_pc_table.SetFromPCs(pc_table);
 }
 
 // Create a fake reference to ForkServerCallMeVeryEarly() here so that the
@@ -1064,8 +1033,9 @@
   SetLimits();
 
   // Compute main_object.
-  main_object = GetDlInfo(state.GetStringFlag(":dl_path_suffix="));
-  if (!main_object.IsSet()) {
+  shared_coverage_state.main_object =
+      GetDlInfo(shared_coverage_state.GetStringFlag(":dl_path_suffix="));
+  if (!shared_coverage_state.main_object.IsSet()) {
     fprintf(
         stderr,
         "Failed to compute main_object. This may happen"
@@ -1073,13 +1043,16 @@
   }
 
   // Dump the binary info tables.
-  if (state.HasFlag(":dump_binary_info:")) {
-    RunnerCheck(state.arg1 && state.arg2 && state.arg3,
+  if (shared_coverage_state.HasFlag(":dump_binary_info:")) {
+    RunnerCheck(shared_coverage_state.arg1 && shared_coverage_state.arg2 &&
+                    shared_coverage_state.arg3,
                 "dump_binary_info requires 3 arguments");
-    if (!state.arg1 || !state.arg2 || !state.arg3) _exit(EXIT_FAILURE);
-    DumpPcTable(state.arg1);
-    DumpCfTable(state.arg2);
-    DumpDsoTable(state.arg3);
+    if (!shared_coverage_state.arg1 || !shared_coverage_state.arg2 ||
+        !shared_coverage_state.arg3)
+      _exit(EXIT_FAILURE);
+    DumpPcTable(shared_coverage_state.arg1);
+    DumpCfTable(shared_coverage_state.arg2);
+    DumpDsoTable(shared_coverage_state.arg3);
     _exit(EXIT_SUCCESS);
   }
 
@@ -1100,10 +1073,11 @@
   // The process is winding down, but CentipedeRunnerMain did not run.
   // This means, the binary is standalone with its own main(), and we need to
   // report the coverage now.
-  if (!state.centipede_runner_main_executed && state.HasFlag(":shmem:")) {
+  if (!state.centipede_runner_main_executed &&
+      shared_coverage_state.HasFlag(":shmem:")) {
     int exit_status = EXIT_SUCCESS;  // TODO(kcc): do we know our exit status?
     PostProcessCoverage(exit_status);
-    SharedMemoryBlobSequence outputs_blobseq(state.arg2);
+    SharedMemoryBlobSequence outputs_blobseq(shared_coverage_state.arg2);
     StartSendingOutputsToEngine(outputs_blobseq);
     FinishSendingOutputsToEngine(outputs_blobseq);
   }
@@ -1129,25 +1103,27 @@
   state.centipede_runner_main_executed = true;
 
   fprintf(stderr, "Centipede fuzz target runner; argv[0]: %s flags: %s\n",
-          argv[0], state.centipede_runner_flags);
+          argv[0], shared_coverage_state.centipede_runner_flags);
 
-  if (state.HasFlag(":dump_configuration:")) {
-    DumpSerializedTargetConfigToFile(callbacks,
-                                     /*output_file_path=*/state.arg1);
+  if (shared_coverage_state.HasFlag(":dump_configuration:")) {
+    DumpSerializedTargetConfigToFile(
+        callbacks,
+        /*output_file_path=*/shared_coverage_state.arg1);
     return EXIT_SUCCESS;
   }
 
-  if (state.HasFlag(":dump_seed_inputs:")) {
+  if (shared_coverage_state.HasFlag(":dump_seed_inputs:")) {
     // Seed request.
-    DumpSeedsToDir(callbacks, /*output_dir=*/state.arg1);
+    DumpSeedsToDir(callbacks, /*output_dir=*/shared_coverage_state.arg1);
     return EXIT_SUCCESS;
   }
 
   // Inputs / outputs from shmem.
-  if (state.HasFlag(":shmem:")) {
-    if (!state.arg1 || !state.arg2) return EXIT_FAILURE;
-    SharedMemoryBlobSequence inputs_blobseq(state.arg1);
-    SharedMemoryBlobSequence outputs_blobseq(state.arg2);
+  if (shared_coverage_state.HasFlag(":shmem:")) {
+    if (!shared_coverage_state.arg1 || !shared_coverage_state.arg2)
+      return EXIT_FAILURE;
+    SharedMemoryBlobSequence inputs_blobseq(shared_coverage_state.arg1);
+    SharedMemoryBlobSequence outputs_blobseq(shared_coverage_state.arg2);
     // Read the first blob. It indicates what further actions to take.
     auto request_type_blob = inputs_blobseq.Read();
     if (IsMutationRequest(request_type_blob)) {
@@ -1155,10 +1131,10 @@
       // We still pay for executing the coverage callbacks, but those will
       // return immediately.
       // TODO(kcc): do this more consistently, for all coverage types.
-      state.run_time_flags.use_cmp_features = false;
-      state.run_time_flags.use_pc_features = false;
-      state.run_time_flags.use_dataflow_features = false;
-      state.run_time_flags.use_counter_features = false;
+      shared_coverage_state.run_time_flags.use_cmp_features = false;
+      shared_coverage_state.run_time_flags.use_pc_features = false;
+      shared_coverage_state.run_time_flags.use_dataflow_features = false;
+      shared_coverage_state.run_time_flags.use_counter_features = false;
       // Mutation request.
       inputs_blobseq.Reset();
       state.byte_array_mutator =
@@ -1198,13 +1174,15 @@
 extern "C" void CentipedeSetRssLimit(size_t rss_limit_mb) {
   fprintf(stderr, "CentipedeSetRssLimit: changing rss_limit_mb to %zu\n",
           rss_limit_mb);
-  fuzztest::internal::state.run_time_flags.rss_limit_mb = rss_limit_mb;
+  fuzztest::internal::shared_coverage_state.run_time_flags.rss_limit_mb =
+      rss_limit_mb;
 }
 
 extern "C" void CentipedeSetStackLimit(size_t stack_limit_kb) {
   fprintf(stderr, "CentipedeSetStackLimit: changing stack_limit_kb to %zu\n",
           stack_limit_kb);
-  fuzztest::internal::state.run_time_flags.stack_limit_kb = stack_limit_kb;
+  fuzztest::internal::shared_coverage_state.run_time_flags.stack_limit_kb =
+      stack_limit_kb;
 }
 
 extern "C" void CentipedeSetTimeoutPerInput(uint64_t timeout_per_input) {
@@ -1212,17 +1190,10 @@
           "CentipedeSetTimeoutPerInput: changing timeout_per_input to %" PRIu64
           "\n",
           timeout_per_input);
-  fuzztest::internal::state.run_time_flags.timeout_per_input =
+  fuzztest::internal::shared_coverage_state.run_time_flags.timeout_per_input =
       timeout_per_input;
 }
 
-extern "C" __attribute__((weak)) const char *absl_nullable
-CentipedeGetRunnerFlags() {
-  if (const char *runner_flags_env = getenv("CENTIPEDE_RUNNER_FLAGS"))
-    return strdup(runner_flags_env);
-  return nullptr;
-}
-
 static std::atomic<bool> in_execution_batch = false;
 
 extern "C" void CentipedeBeginExecutionBatch() {
@@ -1244,7 +1215,7 @@
     _exit(EXIT_FAILURE);
   }
   in_execution_batch = false;
-  fuzztest::internal::state.input_start_time = 0;
+  fuzztest::internal::shared_coverage_state.input_start_time = 0;
   fuzztest::internal::state.batch_start_time = 0;
 }
 
@@ -1255,7 +1226,8 @@
 
 extern "C" void CentipedeFinalizeProcessing() {
   fuzztest::internal::CheckWatchdogLimits();
-  if (fuzztest::internal::state.input_start_time.exchange(0) != 0) {
+  if (fuzztest::internal::shared_coverage_state.input_start_time.exchange(0) !=
+      0) {
     fuzztest::internal::PostProcessCoverage(/*target_return_value=*/0);
   }
 }
@@ -1289,11 +1261,11 @@
 }
 
 extern "C" void CentipedeSetFailureDescription(const char *description) {
-  using fuzztest::internal::state;
-  if (state.failure_description_path == nullptr) return;
+  using fuzztest::internal::shared_coverage_state;
+  if (shared_coverage_state.failure_description_path == nullptr) return;
   // Make sure that the write is atomic and only happens once.
   [[maybe_unused]] static int write_once = [=] {
-    FILE *f = fopen(state.failure_description_path, "w");
+    FILE *f = fopen(shared_coverage_state.failure_description_path, "w");
     if (f == nullptr) {
       perror("FAILURE: fopen()");
       return 0;
diff --git a/centipede/runner.h b/centipede/runner.h
index 8e4ff8e..86b9751 100644
--- a/centipede/runner.h
+++ b/centipede/runner.h
@@ -16,31 +16,17 @@
 #define THIRD_PARTY_CENTIPEDE_RUNNER_H_
 
 #include <pthread.h>  // NOLINT: use pthread to avoid extra dependencies.
-#include <string.h>
 #include <time.h>
 
-#include <algorithm>
 #include <atomic>
-#include <cstddef>
-#include <cstdint>
-#include <cstdlib>
 
 #include "absl/base/const_init.h"
-#include "absl/base/nullability.h"
-#include "absl/numeric/bits.h"
 #include "./centipede/byte_array_mutator.h"
-#include "./centipede/callstack.h"
 #include "./centipede/concurrent_bitset.h"
-#include "./centipede/concurrent_byteset.h"
 #include "./centipede/feature.h"
-#include "./centipede/hashed_ring_buffer.h"
 #include "./centipede/knobs.h"
-#include "./centipede/reverse_pc_table.h"
-#include "./centipede/runner_cmp_trace.h"
-#include "./centipede/runner_dl_info.h"
-#include "./centipede/runner_interface.h"
 #include "./centipede/runner_result.h"
-#include "./centipede/runner_sancov_object.h"
+#include "./centipede/shared_coverage_state.h"
 
 namespace fuzztest::internal {
 
@@ -54,79 +40,6 @@
   pthread_mutex_t &mu_;
 };
 
-// Flags derived from CENTIPEDE_RUNNER_FLAGS.
-// Flags used in instrumentation callbacks are bit-packed for efficiency.
-struct RunTimeFlags {
-  uint64_t path_level : 8;
-  uint64_t use_pc_features : 1;
-  uint64_t use_dataflow_features : 1;
-  uint64_t use_cmp_features : 1;
-  uint64_t callstack_level : 8;
-  uint64_t use_counter_features : 1;
-  uint64_t use_auto_dictionary : 1;
-  std::atomic<uint64_t> timeout_per_input;
-  uint64_t timeout_per_batch;
-  std::atomic<uint64_t> stack_limit_kb;
-  std::atomic<uint64_t> rss_limit_mb;
-  uint64_t crossover_level;
-  uint64_t skip_seen_features : 1;
-  uint64_t ignore_timeout_reports : 1;
-  uint64_t max_len;
-};
-
-// One such object is created in runner's TLS.
-// There is no CTOR, since we don't want to use the brittle and lazy TLS CTORs.
-// All data members are zero-initialized during thread creation.
-struct ThreadLocalRunnerState {
-  // Traces the memory comparison of `n` bytes at `s1` and `s2` called at
-  // `caller_pc` with `is_equal` indicating whether the two memory regions have
-  // equal contents. May add cmp features and auto-dictionary entries if
-  // enabled.
-  void TraceMemCmp(uintptr_t caller_pc, const uint8_t *s1, const uint8_t *s2,
-                   size_t n, bool is_equal);
-
-  // Intrusive doubly-linked list of TLS objects.
-  // Guarded by state.tls_list_mu.
-  ThreadLocalRunnerState *next, *prev;
-
-  // The pthread_create() interceptor calls OnThreadStart() before the thread
-  // callback. The main thread also calls OnThreadStart(). OnThreadStop() will
-  // be called when thread termination is detected internally - see runner.cc.
-  void OnThreadStart();
-  void OnThreadStop();
-
-  // Whether OnThreadStart() is called on this thread. This is used as a proxy
-  // of the readiness of the lower-level runtime.
-  bool started;
-
-  // Paths are thread-local, so we maintain the current bounded path here.
-  // We allow paths of up to 100, controlled at run-time via the "path_level".
-  static constexpr uint64_t kBoundedPathLength = 100;
-  HashedRingBuffer<kBoundedPathLength> path_ring_buffer;
-
-  // Value of SP in the top call frame of the thread, computed in OnThreadStart.
-  uintptr_t top_frame_sp;
-  // The lower bound of the stack region of this thread. 0 means unknown.
-  uintptr_t stack_region_low;
-  // Lowest observed value of SP.
-  uintptr_t lowest_sp;
-
-  // The (imprecise) call stack is updated by the PC callback.
-  CallStack<> call_stack;
-
-  // Cmp traces capture the arguments of CMP instructions, memcmp, etc.
-  // We have dedicated traces for 2-, 4-, and 8-byte comparison, and
-  // a catch-all `cmp_traceN` trace for memcmp, etc.
-  CmpTrace<2, 64> cmp_trace2;
-  CmpTrace<4, 64> cmp_trace4;
-  CmpTrace<8, 64> cmp_trace8;
-  CmpTrace<0, 64> cmp_traceN;
-
-  // Set this to true if the thread needs to be ignored in ForEachTLS.
-  // It should be always false if the state is in the global detached_tls_list.
-  bool ignore;
-};
-
 // One global object of this type is created by the runner at start up.
 // All data members will be initialized to zero, unless they have initializers.
 // Accesses to the subobjects should be fast, so we are trying to avoid
@@ -144,79 +57,6 @@
   GlobalRunnerState();
   ~GlobalRunnerState();
 
-  // Runner reads flags from CentipedeGetRunnerFlags(). We don't use flags
-  // passed via argv so that argv flags can be passed directly to
-  // LLVMFuzzerInitialize, w/o filtering. The flags are separated with
-  // ':' on both sides, i.e. like this: ":flag1:flag2:flag3=value3".
-  // We do it this way to make the flag parsing code extremely simple. The
-  // interface is private between Centipede and the runner and may change.
-  //
-  // Note that this field reflects the initial runner flags. But some
-  // flags can change later (if wrapped with std::atomic).
-  const char *centipede_runner_flags = CentipedeGetRunnerFlags();
-  const char *arg1 = GetStringFlag(":arg1=");
-  const char *arg2 = GetStringFlag(":arg2=");
-  const char *arg3 = GetStringFlag(":arg3=");
-  // The path to a file where the runner may write the description of failure.
-  const char *failure_description_path =
-      GetStringFlag(":failure_description_path=");
-
-  // Flags.
-  RunTimeFlags run_time_flags = {
-      /*path_level=*/std::min(ThreadLocalRunnerState::kBoundedPathLength,
-                              HasIntFlag(":path_level=", 0)),
-      /*use_pc_features=*/HasFlag(":use_pc_features:"),
-      /*use_dataflow_features=*/HasFlag(":use_dataflow_features:"),
-      /*use_cmp_features=*/HasFlag(":use_cmp_features:"),
-      /*callstack_level=*/HasIntFlag(":callstack_level=", 0),
-      /*use_counter_features=*/HasFlag(":use_counter_features:"),
-      /*use_auto_dictionary=*/HasFlag(":use_auto_dictionary:"),
-      /*timeout_per_input=*/HasIntFlag(":timeout_per_input=", 0),
-      /*timeout_per_batch=*/HasIntFlag(":timeout_per_batch=", 0),
-      /*stack_limit_kb=*/HasIntFlag(":stack_limit_kb=", 0),
-      /*rss_limit_mb=*/HasIntFlag(":rss_limit_mb=", 0),
-      /*crossover_level=*/HasIntFlag(":crossover_level=", 50),
-      /*skip_seen_features=*/HasFlag(":skip_seen_features:"),
-      /*ignore_timeout_reports=*/HasFlag(":ignore_timeout_reports:"),
-      /*max_len=*/HasIntFlag(":max_len=", 4000),
-  };
-
-  // Returns true iff `flag` is present.
-  // Typical usage: pass ":some_flag:", i.e. the flag name surrounded with ':'.
-  // TODO(ussuri): Refactor `char *` into a `string_view`.
-  bool HasFlag(const char *absl_nonnull flag) const {
-    if (!centipede_runner_flags) return false;
-    return strstr(centipede_runner_flags, flag) != nullptr;
-  }
-
-  // If a flag=value pair is present, returns value,
-  // otherwise returns `default_value`.
-  // Typical usage: pass ":some_flag=".
-  // TODO(ussuri): Refactor `char *` into a `string_view`.
-  uint64_t HasIntFlag(const char *absl_nonnull flag,
-                      uint64_t default_value) const {
-    if (!centipede_runner_flags) return default_value;
-    const char *beg = strstr(centipede_runner_flags, flag);
-    if (!beg) return default_value;
-    return atoll(beg + strlen(flag));  // NOLINT: can't use strto64, etc.
-  }
-
-  // If a :flag=value: pair is present returns value, otherwise returns nullptr.
-  // The result is obtained by calling strndup, so make sure to save
-  // it in `this` to avoid a leak.
-  // Typical usage: pass ":some_flag=".
-  // TODO(ussuri): Refactor `char *` into a `string_view`.
-  const char *absl_nullable GetStringFlag(const char *absl_nonnull flag) const {
-    if (!centipede_runner_flags) return nullptr;
-    // Extract "value" from ":flag=value:" inside centipede_runner_flags.
-    const char *beg = strstr(centipede_runner_flags, flag);
-    if (!beg) return nullptr;
-    const char *value_beg = beg + strlen(flag);
-    const char *end = strstr(value_beg, ":");
-    if (!end) return nullptr;
-    return strndup(value_beg, end - value_beg);
-  }
-
   pthread_mutex_t execution_result_override_mu = PTHREAD_MUTEX_INITIALIZER;
   // If not nullptr, it points to a batch result with either zero or one
   // execution. When an execution result present, it will be passed as the
@@ -247,52 +87,11 @@
   // Reclaims all TLSs in detached_tls_list and cleans up the list.
   void CleanUpDetachedTls();
 
-  // Computed by DlInfo().
-  // Usually, the main object is the executable binary containing main()
-  // and most of the executable code (we assume that the target is
-  // built in mostly-static mode, i.e. -dynamic_mode=off).
-  // When the `dl_path_suffix` runner flag is provided, the main_object refers
-  // to the dynamic library (DSO) pointed to by this flag.
-  //
-  // Note: this runner currently does not support more than one instrumented
-  // DSO in the process, i.e. you either instrument the main binary, or one DSO.
-  // Supporting more than one DSO will require major changes,
-  // major added complexity, and potentially cause slowdown.
-  // There is currently no motivation for such a change.
-  DlInfo main_object;
-
-  // State for SanitizerCoverage.
-  // See https://clang.llvm.org/docs/SanitizerCoverage.html.
-  SanCovObjectArray sancov_objects;
-  // An arbitrarily large size.
-  static constexpr size_t kDataFlowFeatureSetSize = 1 << 18;
-  ConcurrentBitSet<kDataFlowFeatureSetSize> data_flow_feature_set{
-      absl::kConstInit};
-
   // Tracing CMP instructions, capture events from these domains:
   // kCMPEq, kCMPModDiff, kCMPHamming, kCMPModDiffLog, kCMPMsbEq.
   // See https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-data-flow.
-  // An arbitrarily large size.
-  static constexpr size_t kCmpFeatureSetSize = 1 << 18;
   // TODO(kcc): remove cmp_feature_set.
   ConcurrentBitSet<kCmpFeatureSetSize> cmp_feature_set{absl::kConstInit};
-  ConcurrentBitSet<kCmpFeatureSetSize> cmp_eq_set{absl::kConstInit};
-  ConcurrentBitSet<kCmpFeatureSetSize> cmp_moddiff_set{absl::kConstInit};
-  ConcurrentBitSet<kCmpFeatureSetSize> cmp_hamming_set{absl::kConstInit};
-  ConcurrentBitSet<kCmpFeatureSetSize> cmp_difflog_set{absl::kConstInit};
-
-  // We think that call stack produces rich signal, so we give a few bits to it.
-  static constexpr size_t kCallStackFeatureSetSize = 1 << 24;
-  ConcurrentBitSet<kCallStackFeatureSetSize> callstack_set{absl::kConstInit};
-
-  // kMaxNumPcs is the maximum number of instrumented PCs in the binary.
-  // We can be generous here since the unused memory will not cost anything.
-  // `pc_counter_set` is a static byte set supporting up to kMaxNumPcs PCs.
-  static constexpr size_t kMaxNumPcs = 1 << 28;
-  TwoLayerConcurrentByteSet<kMaxNumPcs> pc_counter_set{absl::kConstInit};
-  // This is the actual number of PCs, aligned up to
-  // pc_counter_set::kSizeMultiple, computed at startup.
-  size_t actual_pc_counter_set_size_aligned;
 
   // Initialized in CTOR from the __centipede_extra_features section.
   feature_t *user_defined_begin;
@@ -313,19 +112,9 @@
   // * Use call stacks instead of paths (via unwinding or other
   // instrumentation).
 
-  // An arbitrarily large size.
-  static constexpr size_t kPathBitSetSize = 1 << 25;
-  // Observed paths. The total number of observed paths for --path_level=N
-  // can be up to NumPCs**N.
-  // So, we make the bitset very large, but it may still saturate.
-  ConcurrentBitSet<kPathBitSetSize> path_feature_set{absl::kConstInit};
-
   // Execution stats for the currently executed input.
   ExecutionResult::Stats stats;
 
-  // Used by trace_pc instrumentation. Populated if `pcs_file_path` flag is set.
-  ReversePCTable reverse_pc_table;
-
   // CentipedeRunnerMain() sets this to true.
   bool centipede_runner_main_executed = false;
 
@@ -337,32 +126,15 @@
   // Resets the per-input timer. Call this before executing every input.
   void ResetTimers();
 
-  // Per-input timer. Initially, zero. ResetInputTimer() sets it to the current
-  // time.
-  std::atomic<time_t> input_start_time;
   // Per-batch timer. Initially, zero. ResetInputTimer() sets it to the current
   // time before the first input and never resets it.
   std::atomic<time_t> batch_start_time;
 
   // The Watchdog thread sets this to true.
   std::atomic<bool> watchdog_thread_started;
-
-  // An arbitrarily large size.
-  static const size_t kMaxFeatures = 1 << 20;
-  // FeatureArray used to accumulate features from all sources.
-  FeatureArray<kMaxFeatures> g_features;
-
-  // Features that were seen before.
-  static constexpr size_t kSeenFeatureSetSize =
-      absl::bit_ceil(feature_domains::kLastDomain.end());
-  ConcurrentBitSet<kSeenFeatureSetSize> seen_features{absl::kConstInit};
 };
 
 extern GlobalRunnerState state;
-extern __thread ThreadLocalRunnerState tls;
-
-// Check for stack limit for the stack pointer `sp` in the current thread.
-void CheckStackLimit(uintptr_t sp);
 
 }  // namespace fuzztest::internal
 
diff --git a/centipede/runner_interceptors.cc b/centipede/runner_interceptors.cc
index 886c450..c705148 100644
--- a/centipede/runner_interceptors.cc
+++ b/centipede/runner_interceptors.cc
@@ -22,7 +22,7 @@
 
 #include "absl/base/nullability.h"
 #include "absl/base/optimization.h"
-#include "./centipede/runner.h"
+#include "./centipede/shared_coverage_state.h"
 
 using fuzztest::internal::tls;
 
diff --git a/centipede/runner_interface.h b/centipede/runner_interface.h
index f12691d..0360f34 100644
--- a/centipede/runner_interface.h
+++ b/centipede/runner_interface.h
@@ -22,7 +22,6 @@
 #include <functional>
 #include <memory>
 #include <string>
-#include <string_view>
 #include <vector>
 
 #include "absl/base/nullability.h"
diff --git a/centipede/runner_sancov.cc b/centipede/runner_sancov.cc
index 22435f4..dd29e71 100644
--- a/centipede/runner_sancov.cc
+++ b/centipede/runner_sancov.cc
@@ -26,8 +26,8 @@
 #include "./centipede/int_utils.h"
 #include "./centipede/pc_info.h"
 #include "./centipede/reverse_pc_table.h"
-#include "./centipede/runner.h"
 #include "./centipede/runner_dl_info.h"
+#include "./centipede/shared_coverage_state.h"
 
 namespace fuzztest::internal {
 void RunnerSancov() {}  // to be referenced in runner.cc
@@ -35,7 +35,7 @@
 
 using fuzztest::internal::PCGuard;
 using fuzztest::internal::PCInfo;
-using fuzztest::internal::state;
+using fuzztest::internal::shared_coverage_state;
 using fuzztest::internal::tls;
 
 // Tracing data flow.
@@ -65,34 +65,38 @@
 
 // NOTE: Enforce inlining so that `__builtin_return_address` works.
 ENFORCE_INLINE static void TraceLoad(void *addr) {
-  if (!state.run_time_flags.use_dataflow_features) return;
+  if (!shared_coverage_state.run_time_flags.use_dataflow_features) return;
   auto caller_pc = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
   auto load_addr = reinterpret_cast<uintptr_t>(addr);
-  auto pc_offset = caller_pc - state.main_object.start_address;
-  if (pc_offset >= state.main_object.size) return;  // PC outside main obj.
-  auto addr_offset = load_addr - state.main_object.start_address;
-  if (addr_offset >= state.main_object.size) return;  // Not a global address.
-  state.data_flow_feature_set.set(fuzztest::internal::ConvertPcPairToNumber(
-      pc_offset, addr_offset, state.main_object.size));
+  auto pc_offset = caller_pc - shared_coverage_state.main_object.start_address;
+  if (pc_offset >= shared_coverage_state.main_object.size)
+    return;  // PC outside main obj.
+  auto addr_offset =
+      load_addr - shared_coverage_state.main_object.start_address;
+  if (addr_offset >= shared_coverage_state.main_object.size)
+    return;  // Not a global address.
+  shared_coverage_state.data_flow_feature_set.set(
+      fuzztest::internal::ConvertPcPairToNumber(
+          pc_offset, addr_offset, shared_coverage_state.main_object.size));
 }
 
 // NOTE: Enforce inlining so that `__builtin_return_address` works.
 ENFORCE_INLINE static void TraceCmp(uint64_t Arg1, uint64_t Arg2) {
-  if (!state.run_time_flags.use_cmp_features) return;
+  if (!shared_coverage_state.run_time_flags.use_cmp_features) return;
   auto caller_pc = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
-  auto pc_offset = caller_pc - state.main_object.start_address;
+  auto pc_offset = caller_pc - shared_coverage_state.main_object.start_address;
   uintptr_t hash =
       fuzztest::internal::Hash64Bits(pc_offset) ^ tls.path_ring_buffer.hash();
   if (Arg1 == Arg2) {
-    state.cmp_eq_set.set(hash);
+    shared_coverage_state.cmp_eq_set.set(hash);
   } else {
     hash <<= 6;  // ABTo* generate 6-bit numbers.
-    state.cmp_moddiff_set.set(hash |
-                              fuzztest::internal::ABToCmpModDiff(Arg1, Arg2));
-    state.cmp_hamming_set.set(hash |
-                              fuzztest::internal::ABToCmpHamming(Arg1, Arg2));
-    state.cmp_difflog_set.set(hash |
-                              fuzztest::internal::ABToCmpDiffLog(Arg1, Arg2));
+    shared_coverage_state.cmp_moddiff_set.set(
+        hash | fuzztest::internal::ABToCmpModDiff(Arg1, Arg2));
+    shared_coverage_state.cmp_hamming_set.set(
+        hash | fuzztest::internal::ABToCmpHamming(Arg1, Arg2));
+    shared_coverage_state.cmp_difflog_set.set(
+        hash | fuzztest::internal::ABToCmpDiffLog(Arg1, Arg2));
   }
 }
 
@@ -114,19 +118,19 @@
 NO_SANITIZE
 void __sanitizer_cov_trace_const_cmp2(uint16_t Arg1, uint16_t Arg2) {
   TraceCmp(Arg1, Arg2);
-  if (Arg1 != Arg2 && state.run_time_flags.use_auto_dictionary)
+  if (Arg1 != Arg2 && shared_coverage_state.run_time_flags.use_auto_dictionary)
     tls.cmp_trace2.Capture(Arg1, Arg2);
 }
 NO_SANITIZE
 void __sanitizer_cov_trace_const_cmp4(uint32_t Arg1, uint32_t Arg2) {
   TraceCmp(Arg1, Arg2);
-  if (Arg1 != Arg2 && state.run_time_flags.use_auto_dictionary)
+  if (Arg1 != Arg2 && shared_coverage_state.run_time_flags.use_auto_dictionary)
     tls.cmp_trace4.Capture(Arg1, Arg2);
 }
 NO_SANITIZE
 void __sanitizer_cov_trace_const_cmp8(uint64_t Arg1, uint64_t Arg2) {
   TraceCmp(Arg1, Arg2);
-  if (Arg1 != Arg2 && state.run_time_flags.use_auto_dictionary)
+  if (Arg1 != Arg2 && shared_coverage_state.run_time_flags.use_auto_dictionary)
     tls.cmp_trace8.Capture(Arg1, Arg2);
 }
 NO_SANITIZE
@@ -136,19 +140,19 @@
 NO_SANITIZE
 void __sanitizer_cov_trace_cmp2(uint16_t Arg1, uint16_t Arg2) {
   TraceCmp(Arg1, Arg2);
-  if (Arg1 != Arg2 && state.run_time_flags.use_auto_dictionary)
+  if (Arg1 != Arg2 && shared_coverage_state.run_time_flags.use_auto_dictionary)
     tls.cmp_trace2.Capture(Arg1, Arg2);
 }
 NO_SANITIZE
 void __sanitizer_cov_trace_cmp4(uint32_t Arg1, uint32_t Arg2) {
   TraceCmp(Arg1, Arg2);
-  if (Arg1 != Arg2 && state.run_time_flags.use_auto_dictionary)
+  if (Arg1 != Arg2 && shared_coverage_state.run_time_flags.use_auto_dictionary)
     tls.cmp_trace4.Capture(Arg1, Arg2);
 }
 NO_SANITIZE
 void __sanitizer_cov_trace_cmp8(uint64_t Arg1, uint64_t Arg2) {
   TraceCmp(Arg1, Arg2);
-  if (Arg1 != Arg2 && state.run_time_flags.use_auto_dictionary)
+  if (Arg1 != Arg2 && shared_coverage_state.run_time_flags.use_auto_dictionary)
     tls.cmp_trace8.Capture(Arg1, Arg2);
 }
 // TODO(kcc): [impl] handle switch.
@@ -159,7 +163,7 @@
 // -fsanitize-coverage=inline-8bit-counters is used.
 // See https://clang.llvm.org/docs/SanitizerCoverage.html#inline-8bit-counters
 void __sanitizer_cov_8bit_counters_init(uint8_t *beg, uint8_t *end) {
-  state.sancov_objects.Inline8BitCountersInit(beg, end);
+  shared_coverage_state.sancov_objects.Inline8BitCountersInit(beg, end);
 }
 
 // https://clang.llvm.org/docs/SanitizerCoverage.html#pc-table
@@ -169,13 +173,13 @@
 // We currently do not support more than one sancov-instrumented DSO.
 void __sanitizer_cov_pcs_init(const PCInfo *absl_nonnull beg,
                               const PCInfo *end) {
-  state.sancov_objects.PCInfoInit(beg, end);
+  shared_coverage_state.sancov_objects.PCInfoInit(beg, end);
 }
 
 // https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-control-flow
 // This function is called at the DSO init time.
 void __sanitizer_cov_cfs_init(const uintptr_t *beg, const uintptr_t *end) {
-  state.sancov_objects.CFSInit(beg, end);
+  shared_coverage_state.sancov_objects.CFSInit(beg, end);
 }
 
 // Updates the state of the paths, `path_level > 0`.
@@ -183,7 +187,7 @@
 // of __sanitizer_cov_trace_pc_guard.
 __attribute__((noinline)) static void HandlePath(uintptr_t normalized_pc) {
   uintptr_t hash = tls.path_ring_buffer.push(normalized_pc);
-  state.path_feature_set.set(hash);
+  shared_coverage_state.path_feature_set.set(hash);
 }
 
 // Handles one observed PC.
@@ -194,8 +198,8 @@
 // With __sanitizer_cov_trace_pc this is PC itself, normalized by subtracting
 // the DSO's dynamic start address.
 static ENFORCE_INLINE void HandleOnePc(PCGuard pc_guard) {
-  if (!state.run_time_flags.use_pc_features) return;
-  state.pc_counter_set.SaturatedIncrement(pc_guard.pc_index);
+  if (!shared_coverage_state.run_time_flags.use_pc_features) return;
+  shared_coverage_state.pc_counter_set.SaturatedIncrement(pc_guard.pc_index);
 
   if (pc_guard.is_function_entry) {
     uintptr_t sp = reinterpret_cast<uintptr_t>(__builtin_frame_address(0));
@@ -209,14 +213,15 @@
       tls.lowest_sp = sp;
       fuzztest::internal::CheckStackLimit(sp);
     }
-    if (state.run_time_flags.callstack_level != 0) {
+    if (shared_coverage_state.run_time_flags.callstack_level != 0) {
       tls.call_stack.OnFunctionEntry(pc_guard.pc_index, sp);
-      state.callstack_set.set(tls.call_stack.Hash());
+      shared_coverage_state.callstack_set.set(tls.call_stack.Hash());
     }
   }
 
   // path features.
-  if (state.run_time_flags.path_level != 0) HandlePath(pc_guard.pc_index);
+  if (shared_coverage_state.run_time_flags.path_level != 0)
+    HandlePath(pc_guard.pc_index);
 }
 
 // Caller PC is the PC of the call instruction.
@@ -235,9 +240,11 @@
 
 // Sets `actual_pc_counter_set_size_aligned` to `size`, properly aligned up.
 static void UpdatePcCounterSetSizeAligned(size_t size) {
-  constexpr size_t kAlignment = state.pc_counter_set.kSizeMultiple;
+  constexpr size_t kAlignment =
+      shared_coverage_state.pc_counter_set.kSizeMultiple;
   constexpr size_t kMask = kAlignment - 1;
-  state.actual_pc_counter_set_size_aligned = (size + kMask) & ~kMask;
+  shared_coverage_state.actual_pc_counter_set_size_aligned =
+      (size + kMask) & ~kMask;
 }
 
 // MainObjectLazyInit() and helpers allow us to initialize state.main_object
@@ -260,11 +267,12 @@
 // b) it will slowdown the hot function.
 static pthread_once_t main_object_lazy_init_once = PTHREAD_ONCE_INIT;
 static void MainObjectLazyInitOnceCallback() {
-  state.main_object =
-      fuzztest::internal::GetDlInfo(state.GetStringFlag(":dl_path_suffix="));
+  shared_coverage_state.main_object = fuzztest::internal::GetDlInfo(
+      shared_coverage_state.GetStringFlag(":dl_path_suffix="));
   fprintf(stderr, "MainObjectLazyInitOnceCallback %zx\n",
-          state.main_object.start_address);
-  UpdatePcCounterSetSizeAligned(state.reverse_pc_table.NumPcs());
+          shared_coverage_state.main_object.start_address);
+  UpdatePcCounterSetSizeAligned(
+      shared_coverage_state.reverse_pc_table.NumPcs());
 }
 
 __attribute__((noinline)) static void MainObjectLazyInit() {
@@ -281,15 +289,15 @@
 // this variant.
 void __sanitizer_cov_trace_pc() {
   uintptr_t pc = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
-  if (!state.main_object.start_address ||
-      !state.actual_pc_counter_set_size_aligned) {
+  if (!shared_coverage_state.main_object.start_address ||
+      !shared_coverage_state.actual_pc_counter_set_size_aligned) {
     // Don't track coverage at all before the PC table is initialized.
-    if (state.reverse_pc_table.NumPcs() == 0) return;
+    if (shared_coverage_state.reverse_pc_table.NumPcs() == 0) return;
     MainObjectLazyInit();
   }
-  pc -= state.main_object.start_address;
+  pc -= shared_coverage_state.main_object.start_address;
   pc = ReturnAddressToCallerPc(pc);
-  const auto pc_guard = state.reverse_pc_table.GetPCGuard(pc);
+  const auto pc_guard = shared_coverage_state.reverse_pc_table.GetPCGuard(pc);
   // TODO(kcc): compute is_function_entry for this case.
   if (pc_guard.IsValid()) HandleOnePc(pc_guard);
 }
@@ -297,8 +305,9 @@
 // This function is called at the DSO init time.
 void __sanitizer_cov_trace_pc_guard_init(PCGuard *absl_nonnull start,
                                          PCGuard *stop) {
-  state.sancov_objects.PCGuardInit(start, stop);
-  UpdatePcCounterSetSizeAligned(state.sancov_objects.NumInstrumentedPCs());
+  shared_coverage_state.sancov_objects.PCGuardInit(start, stop);
+  UpdatePcCounterSetSizeAligned(
+      shared_coverage_state.sancov_objects.NumInstrumentedPCs());
 }
 
 // This function is called on every instrumented edge.
diff --git a/centipede/shared_coverage_state.cc b/centipede/shared_coverage_state.cc
new file mode 100644
index 0000000..533a30e
--- /dev/null
+++ b/centipede/shared_coverage_state.cc
@@ -0,0 +1,102 @@
+// Copyright 2022 The Centipede Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "./centipede/shared_coverage_state.h"
+
+#include <atomic>
+#include <cinttypes>
+#include <cstdint>
+#include <cstdlib>
+
+#include "absl/base/nullability.h"
+#include "./centipede/feature.h"
+#include "./centipede/runner_interface.h"
+#include "./centipede/runner_result.h"
+
+namespace fuzztest::internal {
+
+SharedCoverageState shared_coverage_state __attribute__((init_priority(199)));
+
+// We use __thread instead of thread_local so that the compiler warns if
+// the initializer for `tls` is not a constant expression.
+// `tls` thus must not have a CTOR.
+// This avoids calls to __tls_init() in hot functions that use `tls`.
+__thread ThreadLocalRunnerState tls;
+
+void MaybeAddFeature(feature_t feature) {
+  if (!shared_coverage_state.run_time_flags.skip_seen_features) {
+    shared_coverage_state.g_features.push_back(feature);
+  } else if (!shared_coverage_state.seen_features.get(feature)) {
+    shared_coverage_state.g_features.push_back(feature);
+    shared_coverage_state.seen_features.set(feature);
+  }
+}
+
+void PrepareSharedCoverage(bool full_clear) {
+  if (!full_clear) return;
+  if (shared_coverage_state.run_time_flags.use_cmp_features) {
+    shared_coverage_state.cmp_eq_set.ForEachNonZeroBit([](size_t idx) {});
+    shared_coverage_state.cmp_moddiff_set.ForEachNonZeroBit([](size_t idx) {});
+    shared_coverage_state.cmp_hamming_set.ForEachNonZeroBit([](size_t idx) {});
+    shared_coverage_state.cmp_difflog_set.ForEachNonZeroBit([](size_t idx) {});
+  }
+}
+
+void PostProcessSharedCoverage() {
+  // Convert cmp bit set to features.
+  if (shared_coverage_state.run_time_flags.use_cmp_features) {
+    shared_coverage_state.cmp_eq_set.ForEachNonZeroBit([](size_t idx) {
+      MaybeAddFeature(feature_domains::kCMPEq.ConvertToMe(idx));
+    });
+    shared_coverage_state.cmp_moddiff_set.ForEachNonZeroBit([](size_t idx) {
+      MaybeAddFeature(feature_domains::kCMPModDiff.ConvertToMe(idx));
+    });
+    shared_coverage_state.cmp_hamming_set.ForEachNonZeroBit([](size_t idx) {
+      MaybeAddFeature(feature_domains::kCMPHamming.ConvertToMe(idx));
+    });
+    shared_coverage_state.cmp_difflog_set.ForEachNonZeroBit([](size_t idx) {
+      MaybeAddFeature(feature_domains::kCMPDiffLog.ConvertToMe(idx));
+    });
+  }
+}
+
+__attribute__((noinline)) void CheckStackLimit(uintptr_t sp) {
+  static std::atomic_flag stack_limit_exceeded = ATOMIC_FLAG_INIT;
+  const size_t stack_limit =
+      shared_coverage_state.run_time_flags.stack_limit_kb.load() << 10;
+  // Check for the stack limit only if sp is inside the stack region.
+  if (stack_limit > 0 && tls.stack_region_low &&
+      tls.top_frame_sp - sp > stack_limit) {
+    const bool test_not_running = shared_coverage_state.input_start_time == 0;
+    if (test_not_running) return;
+    if (stack_limit_exceeded.test_and_set()) return;
+    fprintf(stderr,
+            "========= Stack limit exceeded: %" PRIuPTR
+            " > %zu"
+            " (byte); aborting\n",
+            tls.top_frame_sp - sp, stack_limit);
+    CentipedeSetFailureDescription(
+        fuzztest::internal::kExecutionFailureStackLimitExceeded.data());
+    std::abort();
+  }
+}
+
+extern "C" __attribute__((weak)) const char *absl_nullable
+CentipedeGetRunnerFlags() {
+  if (const char *runner_flags_env = getenv("CENTIPEDE_RUNNER_FLAGS"))
+    return strdup(runner_flags_env);
+  return nullptr;
+}
+
+}  // namespace fuzztest::internal
diff --git a/centipede/shared_coverage_state.h b/centipede/shared_coverage_state.h
new file mode 100644
index 0000000..42a7802
--- /dev/null
+++ b/centipede/shared_coverage_state.h
@@ -0,0 +1,282 @@
+// Copyright 2022 The Centipede Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef FUZZTEST_CENTIPEDE_SHARED_COVERAGE_STATE_H_
+#define FUZZTEST_CENTIPEDE_SHARED_COVERAGE_STATE_H_
+
+#include <algorithm>
+#include <atomic>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <ctime>
+
+#include "absl/base/const_init.h"
+#include "absl/base/nullability.h"
+#include "absl/numeric/bits.h"
+#include "./centipede/callstack.h"
+#include "./centipede/concurrent_bitset.h"
+#include "./centipede/concurrent_byteset.h"
+#include "./centipede/feature.h"
+#include "./centipede/hashed_ring_buffer.h"
+#include "./centipede/reverse_pc_table.h"
+#include "./centipede/runner_cmp_trace.h"
+#include "./centipede/runner_dl_info.h"
+#include "./centipede/runner_interface.h"
+#include "./centipede/runner_sancov_object.h"
+
+namespace fuzztest::internal {
+
+// An arbitrarily large size.
+static constexpr size_t kCmpFeatureSetSize = 1 << 18;
+
+// Flags derived from CENTIPEDE_RUNNER_FLAGS.
+// Flags used in instrumentation callbacks are bit-packed for efficiency.
+struct RunTimeFlags {
+  uint64_t path_level : 8;
+  uint64_t use_pc_features : 1;
+  uint64_t use_dataflow_features : 1;
+  uint64_t use_cmp_features : 1;
+  uint64_t callstack_level : 8;
+  uint64_t use_counter_features : 1;
+  uint64_t use_auto_dictionary : 1;
+  std::atomic<uint64_t> timeout_per_input;
+  uint64_t timeout_per_batch;
+  std::atomic<uint64_t> stack_limit_kb;
+  std::atomic<uint64_t> rss_limit_mb;
+  uint64_t crossover_level;
+  uint64_t skip_seen_features : 1;
+  uint64_t ignore_timeout_reports : 1;
+  uint64_t max_len;
+};
+
+// One such object is created in runner's TLS.
+// There is no CTOR, since we don't want to use the brittle and lazy TLS CTORs.
+// All data members are zero-initialized during thread creation.
+struct ThreadLocalRunnerState {
+  // Traces the memory comparison of `n` bytes at `s1` and `s2` called at
+  // `caller_pc` with `is_equal` indicating whether the two memory regions have
+  // equal contents. May add cmp features and auto-dictionary entries if
+  // enabled.
+  void TraceMemCmp(uintptr_t caller_pc, const uint8_t *s1, const uint8_t *s2,
+                   size_t n,
+                   bool is_equal);  // Not called on shared coverage library
+
+  // Intrusive doubly-linked list of TLS objects.
+  // Guarded by state.tls_list_mu.
+  ThreadLocalRunnerState *next, *prev;
+
+  // The pthread_create() interceptor calls OnThreadStart() before the thread
+  // callback. The main thread also calls OnThreadStart(). OnThreadStop() will
+  // be called when thread termination is detected internally - see runner.cc.
+  void OnThreadStart();  // Not called on shared coverage library
+  void OnThreadStop();   // Not called on shared coverage library
+
+  // Whether OnThreadStart() is called on this thread. This is used as a proxy
+  // of the readiness of the lower-level runtime.
+  bool started;
+
+  // Paths are thread-local, so we maintain the current bounded path here.
+  // We allow paths of up to 100, controlled at run-time via the "path_level".
+  static constexpr uint64_t kBoundedPathLength = 100;
+  HashedRingBuffer<kBoundedPathLength> path_ring_buffer;
+
+  // Value of SP in the top call frame of the thread, computed in OnThreadStart.
+  uintptr_t top_frame_sp;
+  // The lower bound of the stack region of this thread. 0 means unknown.
+  uintptr_t stack_region_low;
+  // Lowest observed value of SP.
+  uintptr_t lowest_sp;
+
+  // The (imprecise) call stack is updated by the PC callback.
+  CallStack<> call_stack;
+
+  // Cmp traces capture the arguments of CMP instructions, memcmp, etc.
+  // We have dedicated traces for 2-, 4-, and 8-byte comparison, and
+  // a catch-all `cmp_traceN` trace for memcmp, etc.
+  CmpTrace<2, 64> cmp_trace2;
+  CmpTrace<4, 64> cmp_trace4;
+  CmpTrace<8, 64> cmp_trace8;
+  CmpTrace<0, 64> cmp_traceN;
+
+  // Set this to true if the thread needs to be ignored in ForEachTLS.
+  // It should be always false if the state is in the global detached_tls_list.
+  bool ignore;
+};
+
+struct CoverageFlags {
+  uint64_t use_cmp_features : 1;
+};
+
+struct SharedCoverageState {
+  // Runner reads flags from CentipedeGetRunnerFlags(). We don't use flags
+  // passed via argv so that argv flags can be passed directly to
+  // LLVMFuzzerInitialize, w/o filtering. The flags are separated with
+  // ':' on both sides, i.e. like this: ":flag1:flag2:flag3=value3".
+  // We do it this way to make the flag parsing code extremely simple. The
+  // interface is private between Centipede and the runner and may change.
+  //
+  // Note that this field reflects the initial runner flags. But some
+  // flags can change later (if wrapped with std::atomic).
+  const char *centipede_runner_flags = CentipedeGetRunnerFlags();
+  const char *arg1 = GetStringFlag(":arg1=");
+  const char *arg2 = GetStringFlag(":arg2=");
+  const char *arg3 = GetStringFlag(":arg3=");
+  // The path to a file where the runner may write the description of failure.
+  const char *failure_description_path =
+      GetStringFlag(":failure_description_path=");
+
+  // Flags.
+  RunTimeFlags run_time_flags = {
+      /*path_level=*/std::min(ThreadLocalRunnerState::kBoundedPathLength,
+                              HasIntFlag(":path_level=", 0)),
+      /*use_pc_features=*/HasFlag(":use_pc_features:"),
+      /*use_dataflow_features=*/HasFlag(":use_dataflow_features:"),
+      /*use_cmp_features=*/HasFlag(":use_cmp_features:"),
+      /*callstack_level=*/HasIntFlag(":callstack_level=", 0),
+      /*use_counter_features=*/HasFlag(":use_counter_features:"),
+      /*use_auto_dictionary=*/HasFlag(":use_auto_dictionary:"),
+      /*timeout_per_input=*/HasIntFlag(":timeout_per_input=", 0),
+      /*timeout_per_batch=*/HasIntFlag(":timeout_per_batch=", 0),
+      /*stack_limit_kb=*/HasIntFlag(":stack_limit_kb=", 0),
+      /*rss_limit_mb=*/HasIntFlag(":rss_limit_mb=", 0),
+      /*crossover_level=*/HasIntFlag(":crossover_level=", 50),
+      /*skip_seen_features=*/HasFlag(":skip_seen_features:"),
+      /*ignore_timeout_reports=*/HasFlag(":ignore_timeout_reports:"),
+      /*max_len=*/HasIntFlag(":max_len=", 4000),
+  };
+
+  // Returns true iff `flag` is present.
+  // Typical usage: pass ":some_flag:", i.e. the flag name surrounded with ':'.
+  // TODO(ussuri): Refactor `char *` into a `string_view`.
+  bool HasFlag(const char *absl_nonnull flag) const {
+    if (!centipede_runner_flags) return false;
+    return strstr(centipede_runner_flags, flag) != nullptr;
+  }
+
+  // If a flag=value pair is present, returns value,
+  // otherwise returns `default_value`.
+  // Typical usage: pass ":some_flag=".
+  // TODO(ussuri): Refactor `char *` into a `string_view`.
+  uint64_t HasIntFlag(const char *absl_nonnull flag,
+                      uint64_t default_value) const {
+    if (!centipede_runner_flags) return default_value;
+    const char *beg = strstr(centipede_runner_flags, flag);
+    if (!beg) return default_value;
+    return atoll(beg + strlen(flag));  // NOLINT: can't use strto64, etc.
+  }
+
+  // If a :flag=value: pair is present returns value, otherwise returns nullptr.
+  // The result is obtained by calling strndup, so make sure to save
+  // it in `this` to avoid a leak.
+  // Typical usage: pass ":some_flag=".
+  // TODO(ussuri): Refactor `char *` into a `string_view`.
+  const char *absl_nullable GetStringFlag(const char *absl_nonnull flag) const {
+    if (!centipede_runner_flags) return nullptr;
+    // Extract "value" from ":flag=value:" inside centipede_runner_flags.
+    const char *beg = strstr(centipede_runner_flags, flag);
+    if (!beg) return nullptr;
+    const char *value_beg = beg + strlen(flag);
+    const char *end = strstr(value_beg, ":");
+    if (!end) return nullptr;
+    return strndup(value_beg, end - value_beg);
+  }
+
+  // Computed by DlInfo().
+  // Usually, the main object is the executable binary containing main()
+  // and most of the executable code (we assume that the target is
+  // built in mostly-static mode, i.e. -dynamic_mode=off).
+  // When the `dl_path_suffix` runner flag is provided, the main_object refers
+  // to the dynamic library (DSO) pointed to by this flag.
+  //
+  // Note: this runner currently does not support more than one instrumented
+  // DSO in the process, i.e. you either instrument the main binary, or one DSO.
+  // Supporting more than one DSO will require major changes,
+  // major added complexity, and potentially cause slowdown.
+  // There is currently no motivation for such a change.
+  DlInfo main_object;
+
+  // State for SanitizerCoverage.
+  // See https://clang.llvm.org/docs/SanitizerCoverage.html.
+  SanCovObjectArray sancov_objects;
+  // An arbitrarily large size.
+  static constexpr size_t kDataFlowFeatureSetSize = 1 << 18;
+  ConcurrentBitSet<kDataFlowFeatureSetSize> data_flow_feature_set{
+      absl::kConstInit};
+
+  // Tracing CMP instructions, capture events from these domains:
+  // kCMPEq, kCMPModDiff, kCMPHamming, kCMPModDiffLog, kCMPMsbEq.
+  // See https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-data-flow.
+  ConcurrentBitSet<kCmpFeatureSetSize> cmp_eq_set{absl::kConstInit};
+  ConcurrentBitSet<kCmpFeatureSetSize> cmp_moddiff_set{absl::kConstInit};
+  ConcurrentBitSet<kCmpFeatureSetSize> cmp_hamming_set{absl::kConstInit};
+  ConcurrentBitSet<kCmpFeatureSetSize> cmp_difflog_set{absl::kConstInit};
+
+  // We think that call stack produces rich signal, so we give a few bits to it.
+  static constexpr size_t kCallStackFeatureSetSize = 1 << 24;
+  ConcurrentBitSet<kCallStackFeatureSetSize> callstack_set{absl::kConstInit};
+
+  // kMaxNumPcs is the maximum number of instrumented PCs in the binary.
+  // We can be generous here since the unused memory will not cost anything.
+  // `pc_counter_set` is a static byte set supporting up to kMaxNumPcs PCs.
+  static constexpr size_t kMaxNumPcs = 1 << 28;
+  TwoLayerConcurrentByteSet<kMaxNumPcs> pc_counter_set{absl::kConstInit};
+  // This is the actual number of PCs, aligned up to
+  // pc_counter_set::kSizeMultiple, computed at startup.
+  size_t actual_pc_counter_set_size_aligned;
+
+  // Used by trace_pc instrumentation. Populated if `pcs_file_path` flag is set.
+  ReversePCTable reverse_pc_table;
+
+  // An arbitrarily large size.
+  static constexpr size_t kPathBitSetSize = 1 << 25;
+  // Observed paths. The total number of observed paths for --path_level=N
+  // can be up to NumPCs**N.
+  // So, we make the bitset very large, but it may still saturate.
+  ConcurrentBitSet<kPathBitSetSize> path_feature_set{absl::kConstInit};
+
+  // Per-input timer. Initially, zero. ResetInputTimer() sets it to the current
+  // time.
+  std::atomic<time_t> input_start_time;
+
+  // An arbitrarily large size.
+  static const size_t kMaxFeatures = 1 << 20;
+  // FeatureArray used to accumulate features from all sources.
+  FeatureArray<kMaxFeatures> g_features;
+
+  // Features that were seen before.
+  static constexpr size_t kSeenFeatureSetSize =
+      absl::bit_ceil(feature_domains::kLastDomain.end());
+  ConcurrentBitSet<kSeenFeatureSetSize> seen_features{absl::kConstInit};
+};
+
+__attribute__((noinline))  // so that we see it in profile.
+extern "C" void PrepareSharedCoverage(bool full_clear);
+
+__attribute__((noinline))  // so that we see it in profile.
+extern "C" void PostProcessSharedCoverage();
+
+void MaybeAddFeature(feature_t feature);
+
+// Check for stack limit for the stack pointer `sp` in the current thread.
+void CheckStackLimit(uintptr_t sp);
+
+extern SharedCoverageState shared_coverage_state;
+// extern RunTimeFlags run_time_flags;
+extern __thread ThreadLocalRunnerState tls;
+
+}  // namespace fuzztest::internal
+
+#endif  // FUZZTEST_CENTIPEDE_SHARED_COVERAGE_STATE_H_