centipede/centipede_interface.cc - third_party/github/google/fuzztest - Git at Google

 // Copyright 2022 The Centipede Authors.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      https://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #include "./centipede/centipede_interface.h"

 #include <unistd.h>

 #include <algorithm>
 #include <atomic>
 #include <csignal>
 #include <cstdint>
 #include <cstdlib>
 #include <cstring>
 #include <filesystem>  // NOLINT
 #include <iostream>
 #include <memory>
 #include <optional>
 #include <string>
 #include <string_view>
 #include <utility>
 #include <vector>

 #include "absl/base/optimization.h"
 #include "absl/cleanup/cleanup.h"
 #include "absl/container/flat_hash_set.h"
 #include "absl/status/status.h"
 #include "absl/status/statusor.h"
 #include "absl/strings/ascii.h"
 #include "absl/strings/escaping.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_format.h"
 #include "absl/strings/str_join.h"
 #include "absl/strings/str_replace.h"
 #include "absl/time/clock.h"
 #include "absl/time/time.h"
 #include "absl/types/span.h"
 #include "./centipede/analyze_corpora.h"
 #include "./centipede/binary_info.h"
 #include "./centipede/centipede.h"
 #include "./centipede/centipede_callbacks.h"
 #include "./centipede/command.h"
 #include "./centipede/coverage.h"
 #include "./centipede/crash_summary.h"
 #include "./centipede/distill.h"
 #include "./centipede/environment.h"
 #include "./centipede/minimize_crash.h"
 #include "./centipede/pc_info.h"
 #include "./centipede/periodic_action.h"
 #include "./centipede/runner_result.h"
 #include "./centipede/seed_corpus_maker_lib.h"
 #include "./centipede/stats.h"
 #include "./centipede/stop.h"
 #include "./centipede/thread_pool.h"
 #include "./centipede/util.h"
 #include "./centipede/workdir.h"
 #include "./common/bazel.h"
 #include "./common/blob_file.h"
 #include "./common/defs.h"
 #include "./common/hash.h"
 #include "./common/logging.h"
 #include "./common/remote_file.h"
 #include "./common/status_macros.h"
 #include "./fuzztest/internal/configuration.h"

 namespace fuzztest::internal {

 namespace {

 // Sets signal handler for SIGINT.
 // TODO(b/378532202): Replace this with a more generic mechanism that allows
 // the called or `CentipedeMain()` to indicate when to stop.
 void SetSignalHandlers() {
   struct sigaction sigact = {};
   sigact.sa_flags = SA_ONSTACK;
   sigact.sa_handler = [](int received_signum) {
     if (received_signum == SIGINT) {
       FUZZTEST_LOG(INFO) << "Ctrl-C pressed: winding down";
       RequestEarlyStop(EXIT_FAILURE);
       return;
     }
     ABSL_UNREACHABLE();
   };
   sigaction(SIGINT, &sigact, nullptr);
 }

 // Runs env.for_each_blob on every blob extracted from env.args.
 // Returns EXIT_SUCCESS on success, EXIT_FAILURE otherwise.
 int ForEachBlob(const Environment& env) {
   auto tmpdir = TemporaryLocalDirPath();
   CreateLocalDirRemovedAtExit(tmpdir);
   std::string tmpfile = std::filesystem::path(tmpdir).append("t");

   for (const auto& arg : env.args) {
     FUZZTEST_LOG(INFO) << "Running '" << env.for_each_blob << "' on " << arg;
     auto blob_reader = DefaultBlobFileReaderFactory();
     absl::Status open_status = blob_reader->Open(arg);
     if (!open_status.ok()) {
       FUZZTEST_LOG(INFO) << "Failed to open " << arg << ": " << open_status;
       return EXIT_FAILURE;
     }
     ByteSpan blob;
     while (blob_reader->Read(blob) == absl::OkStatus()) {
       ByteArray bytes;
       bytes.insert(bytes.begin(), blob.data(), blob.end());
       // TODO(kcc): [impl] add a variant of WriteToLocalFile that accepts Span.
       WriteToLocalFile(tmpfile, bytes);
       std::string command_line = absl::StrReplaceAll(
           env.for_each_blob, {{"%P", tmpfile}, {"%H", Hash(bytes)}});
       Command cmd(command_line);
       // TODO(kcc): [as-needed] this creates one process per blob.
       // If this flag gets active use, we may want to define special cases,
       // e.g. if for_each_blob=="cp %P /some/where" we can do it in-process.
       cmd.Execute();
       if (ShouldStop()) return ExitCode();
     }
   }
   return EXIT_SUCCESS;
 }

 // Loads corpora from work dirs provided in `env.args`, if there are two args
 // provided, analyzes differences. If there is one arg provided, reports the
 // function coverage. Returns EXIT_SUCCESS on success, EXIT_FAILURE otherwise.
 int Analyze(const Environment& env) {
   FUZZTEST_LOG(INFO) << "Analyze " << absl::StrJoin(env.args, ",");
   FUZZTEST_CHECK(!env.binary.empty()) << "--binary must be used";
   if (env.args.size() == 1) {
     const CoverageResults coverage_results =
         GetCoverage(env.binary_name, env.binary_hash, env.args[0]);
     WorkDir workdir{env};
     const std::string coverage_report_path =
         workdir.CoverageReportPath(/*annotation=*/"");
     DumpCoverageReport(coverage_results, coverage_report_path);
   } else if (env.args.size() == 2) {
     AnalyzeCorporaToLog(env.binary_name, env.binary_hash, env.args[0],
                         env.args[1]);
   } else {
     FUZZTEST_LOG(FATAL)
         << "for now, --analyze supports only 1 or 2 work dirs; got "
         << env.args.size();
   }
   return EXIT_SUCCESS;
 }

 void SavePCTableToFile(const PCTable& pc_table, std::string_view file_path) {
   WriteToLocalFile(file_path, AsByteSpan(pc_table));
 }

 BinaryInfo PopulateBinaryInfoAndSavePCsIfNecessary(
     const Environment& env, CentipedeCallbacksFactory& callbacks_factory,
     std::string& pcs_file_path) {
   BinaryInfo binary_info;
   // Some fuzz targets have coverage not based on instrumenting binaries.
   // For those target, we should not populate binary info.
   if (env.populate_binary_info) {
     ScopedCentipedeCallbacks scoped_callbacks(callbacks_factory, env);
     scoped_callbacks.callbacks()->PopulateBinaryInfo(binary_info);
   }
   if (env.save_binary_info) {
     const std::string binary_info_dir = WorkDir{env}.BinaryInfoDirPath();
     FUZZTEST_CHECK_OK(RemoteMkdir(binary_info_dir));
     FUZZTEST_LOG(INFO) << "Serializing binary info to: " << binary_info_dir;
     binary_info.Write(binary_info_dir);
   }
   if (binary_info.uses_legacy_trace_pc_instrumentation) {
     pcs_file_path = std::filesystem::path(TemporaryLocalDirPath()) / "pcs";
     SavePCTableToFile(binary_info.pc_table, pcs_file_path);
   }
   if (env.use_pcpair_features) {
     FUZZTEST_CHECK(!binary_info.pc_table.empty())
         << "--use_pcpair_features requires non-empty pc_table";
   }
   return binary_info;
 }

 std::vector<Environment> CreateEnvironmentsForThreads(
     const Environment& origin_env, std::string_view pcs_file_path) {
   std::vector<Environment> envs(origin_env.num_threads, origin_env);
   size_t thread_idx = 0;
   for (auto& env : envs) {
     env.my_shard_index += thread_idx++;
     env.UpdateForExperiment();
     env.pcs_file_path = pcs_file_path;
   }
   return envs;
 }

 int Fuzz(const Environment& env, const BinaryInfo& binary_info,
          std::string_view pcs_file_path,
          CentipedeCallbacksFactory& callbacks_factory) {
   CoverageLogger coverage_logger(binary_info.pc_table, binary_info.symbols);

   std::vector<Environment> envs =
       CreateEnvironmentsForThreads(env, pcs_file_path);
   std::vector<std::atomic<Stats>> stats_vec(env.num_threads);

   // Start periodic stats dumping and, optionally, logging.
   std::vector<PeriodicAction> stats_reporters;
   stats_reporters.emplace_back(
       [csv_appender = StatsCsvFileAppender{stats_vec, envs}]() mutable {
         csv_appender.ReportCurrStats();
       },
       PeriodicAction::Options{
           /*sleep_before_each=*/
           [](size_t iteration) {
             return absl::Minutes(std::clamp(iteration, 0UL, 10UL));
           },
       });
   if (!envs.front().experiment.empty() || FUZZTEST_VLOG_IS_ON(1)) {
     stats_reporters.emplace_back(
         [logger = StatsLogger{stats_vec, envs}]() mutable {
           logger.ReportCurrStats();
         },
         PeriodicAction::Options{
             /*sleep_before_each=*/
             [](size_t iteration) {
               return absl::Seconds(std::clamp(iteration, 5UL, 600UL));
             },
         });
   }

   auto fuzzing_worker =
       [&env, &callbacks_factory, &binary_info, &coverage_logger](
           Environment& my_env, std::atomic<Stats>& stats, bool create_tmpdir) {
         if (create_tmpdir) CreateLocalDirRemovedAtExit(TemporaryLocalDirPath());
         // Uses TID, call in this thread.
         my_env.seed = GetRandomSeed(env.seed);

         if (env.dry_run) return;

         ScopedCentipedeCallbacks scoped_callbacks(callbacks_factory, my_env);
         Centipede centipede(my_env, *scoped_callbacks.callbacks(), binary_info,
                             coverage_logger, stats);
         centipede.FuzzingLoop();
       };

   if (env.num_threads == 1) {
     // When fuzzing with one thread, run fuzzing loop in the current
     // thread. This is because FuzzTest/Centipede's single-process
     // fuzzing requires the test body, which is invoked by the fuzzing
     // loop, to run in the main thread.
     //
     // Here, the fuzzing worker should not re-create the tmpdir since the path
     // is thread-local and it has been created in the current function.
     fuzzing_worker(envs[0], stats_vec[0], /*create_tmpdir=*/false);
   } else {
     ThreadPool fuzzing_worker_threads{static_cast<int>(env.num_threads)};
     for (size_t thread_idx = 0; thread_idx < env.num_threads; thread_idx++) {
       Environment& my_env = envs[thread_idx];
       std::atomic<Stats>& my_stats = stats_vec[thread_idx];
       fuzzing_worker_threads.Schedule([&fuzzing_worker, &my_env, &my_stats]() {
         fuzzing_worker(my_env, my_stats, /*create_tmpdir=*/true);
       });
     }  // All `fuzzing_worker_threads` join here.
   }

   for (auto& reporter : stats_reporters) {
     // Nudge one final update and stop the reporting thread.
     reporter.Nudge();
     reporter.Stop();
   }

   if (!env.knobs_file.empty()) PrintRewardValues(stats_vec, std::cerr);

   return ExitCode();
 }

 TestShard SetUpTestSharding() {
   TestShard test_shard = GetBazelTestShard();
   // Update the shard status file to indicate that we support test sharding.
   // It suffices to update the file's modification time, but we clear the
   // contents for simplicity. This is also what the GoogleTest framework does.
   if (const char* test_shard_status_file =
           std::getenv("TEST_SHARD_STATUS_FILE");
       test_shard_status_file != nullptr) {
     ClearLocalFileContents(test_shard_status_file);
   }
   return test_shard;
 }

 // Prunes non-reproducible and duplicate crashes and returns the crash
 // signatures of the remaining crashes.
 absl::flat_hash_set<std::string> PruneOldCrashesAndGetRemainingCrashSignatures(
     const std::filesystem::path& crashing_dir, const Environment& env,
     CentipedeCallbacksFactory& callbacks_factory, CrashSummary& crash_summary) {
   const std::vector<std::string> crashing_input_files =
       // The corpus database layout assumes the crash input files are located
       // directly in the crashing subdirectory, so we don't list recursively.
       ValueOrDie(RemoteListFiles(crashing_dir.c_str(), /*recursively=*/false));
   ScopedCentipedeCallbacks scoped_callbacks(callbacks_factory, env);
   BatchResult batch_result;
   absl::flat_hash_set<std::string> remaining_crash_signatures;

   for (const std::string& crashing_input_file : crashing_input_files) {
     ByteArray crashing_input;
     FUZZTEST_CHECK_OK(
         RemoteFileGetContents(crashing_input_file, crashing_input));
     const bool is_reproducible = !scoped_callbacks.callbacks()->Execute(
         env.binary, {crashing_input}, batch_result);
     const bool is_duplicate =
         is_reproducible && !batch_result.IsSetupFailure() &&
         !remaining_crash_signatures.insert(batch_result.failure_signature())
              .second;
     if (!is_reproducible || batch_result.IsSetupFailure() || is_duplicate) {
       FUZZTEST_CHECK_OK(
           RemotePathDelete(crashing_input_file, /*recursively=*/false));
     } else {
       crash_summary.AddCrash(
           {std::filesystem::path(crashing_input_file).filename(),
            /*category=*/batch_result.failure_description(),
            batch_result.failure_signature(),
            batch_result.failure_description()});
       FUZZTEST_CHECK_OK(RemotePathTouchExistingFile(crashing_input_file));
     }
   }
   return remaining_crash_signatures;
 }

 // TODO(b/405382531): Add unit tests once the function is unit-testable.
 void DeduplicateAndOptionallyStoreNewCrashes(
     const WorkDir& workdir, size_t total_shards,
     absl::flat_hash_set<std::string> crash_signatures,
     const std::optional<std::filesystem::path>& crashing_dir,
     CrashSummary& crash_summary) {
   for (size_t shard_idx = 0; shard_idx < total_shards; ++shard_idx) {
     const std::vector<std::string> new_crashing_input_files =
         // The crash reproducer directory may contain subdirectories with
         // input files that don't individually cause a crash. We ignore those
         // for now and don't list the files recursively.
         ValueOrDie(
             RemoteListFiles(workdir.CrashReproducerDirPaths().Shard(shard_idx),
                             /*recursively=*/false));
     const std::filesystem::path crash_metadata_dir =
         workdir.CrashMetadataDirPaths().Shard(shard_idx);

     if (crashing_dir.has_value()) {
       FUZZTEST_CHECK_OK(RemoteMkdir(crashing_dir->c_str()));
     }
     for (const std::string& crashing_input_file : new_crashing_input_files) {
       const std::string crashing_input_file_name =
           std::filesystem::path(crashing_input_file).filename();
       const std::string crash_signature_path =
           crash_metadata_dir / absl::StrCat(crashing_input_file_name, ".sig");
       std::string new_crash_signature;
       const absl::Status status =
           RemoteFileGetContents(crash_signature_path, new_crash_signature);
       if (!status.ok()) {
         FUZZTEST_LOG(WARNING)
             << "Ignoring crashing input " << crashing_input_file_name
             << " due to failure to read the crash signature: " << status;
         continue;
       }
       const bool is_duplicate =
           !crash_signatures.insert(new_crash_signature).second;
       if (is_duplicate) continue;

       const std::string crash_description_path =
           crash_metadata_dir / absl::StrCat(crashing_input_file_name, ".desc");
       std::string new_crash_description;
       const absl::Status description_status =
           RemoteFileGetContents(crash_description_path, new_crash_description);
       if (!description_status.ok()) {
         FUZZTEST_LOG(WARNING)
             << "Failed to read crash description for "
             << crashing_input_file_name
             << ". Will use the crash signature as the description. Status: "
             << description_status;
         new_crash_description = new_crash_signature;
       }
       crash_summary.AddCrash({crashing_input_file_name,
                               /*category=*/new_crash_description,
                               std::move(new_crash_signature),
                               new_crash_description});
       if (crashing_dir.has_value()) {
         FUZZTEST_CHECK_OK(RemoteFileRename(
             crashing_input_file,
             (*crashing_dir / crashing_input_file_name).c_str()));
       }
     }
   }
 }

 // Seeds the corpus files in `env.workdir` with the inputs in `regression_dir`
 // (always used) and the previously distilled corpus files from `coverage_dir`
 // (used if non-empty).
 SeedCorpusConfig GetSeedCorpusConfig(const Environment& env,
                                      std::string_view regression_dir,
                                      std::string_view coverage_dir) {
   const WorkDir workdir{env};
   SeedCorpusSource regression;
   regression.dir_glob = std::string(regression_dir);
   regression.num_recent_dirs = 1;
   regression.individual_input_rel_glob = "*";
   regression.sampled_fraction_or_count = 1.0f;
   std::vector<SeedCorpusSource> sources = {std::move(regression)};
   if (!coverage_dir.empty()) {
     SeedCorpusSource coverage;
     coverage.dir_glob = std::string(coverage_dir);
     coverage.num_recent_dirs = 1;
     // We're using the previously distilled corpus files as seeds.
     coverage.shard_rel_glob =
         std::filesystem::path{
             workdir.DistilledCorpusFilePaths().AllShardsGlob()}
             .filename();
     coverage.individual_input_rel_glob = "*";
     coverage.sampled_fraction_or_count = 1.0f;
     sources.push_back(std::move(coverage));
   }
   SeedCorpusDestination destination;
   destination.dir_path = env.workdir;
   // We're seeding the current corpus files.
   destination.shard_rel_glob =
       std::filesystem::path{workdir.CorpusFilePaths().AllShardsGlob()}
           .filename();
   destination.shard_index_digits = WorkDir::kDigitsInShardIndex;
   destination.num_shards = static_cast<uint32_t>(env.num_threads);
   return {
       std::move(sources),
       std::move(destination),
   };
 }

 absl::Duration ReadFuzzingTime(std::string_view fuzzing_time_file) {
   std::string fuzzing_time_str;
   FUZZTEST_CHECK_OK(RemoteFileGetContents(fuzzing_time_file, fuzzing_time_str));
   absl::Duration fuzzing_time;
   if (!absl::ParseDuration(absl::StripAsciiWhitespace(fuzzing_time_str),
                            &fuzzing_time)) {
     FUZZTEST_LOG(WARNING)
         << "Failed to parse fuzzing time of a resuming fuzz test: '"
         << fuzzing_time_str << "'. Assuming no fuzzing time so far.";
     return absl::ZeroDuration();
   }
   return fuzzing_time;
 }

 PeriodicAction RecordFuzzingTime(std::string_view fuzzing_time_file,
                                  absl::Time start_time) {
   return {[=] {
             absl::Status status = RemoteFileSetContents(
                 fuzzing_time_file,
                 absl::FormatDuration(absl::Now() - start_time));
             FUZZTEST_LOG_IF(WARNING, !status.ok())
                 << "Failed to write fuzzing time: " << status;
           },
           PeriodicAction::ZeroDelayConstInterval(absl::Seconds(15))};
 }

 // TODO(b/368325638): Add tests for this.
 int UpdateCorpusDatabaseForFuzzTests(
     Environment env, const fuzztest::internal::Configuration& fuzztest_config,
     CentipedeCallbacksFactory& callbacks_factory) {
   env.UpdateWithTargetConfig(fuzztest_config);

   absl::Time start_time = absl::Now();
   FUZZTEST_LOG(INFO)
       << "Starting the update of the corpus database for fuzz tests:"
       << "\nBinary: " << env.binary
       << "\nCorpus database: " << fuzztest_config.corpus_database;

   // Step 1: Preliminary set up of test sharding, binary info, etc.
   const auto [test_shard_index, total_test_shards] = SetUpTestSharding();
   const auto corpus_database_path =
       std::filesystem::path(fuzztest_config.corpus_database) /
       fuzztest_config.binary_identifier;
   const auto stats_root_path =
       fuzztest_config.stats_root.empty()
           ? std::filesystem::path()
           : std::filesystem::path(fuzztest_config.stats_root) /
                 fuzztest_config.binary_identifier;
   const auto workdir_root_path =
       fuzztest_config.workdir_root.empty()
           ? corpus_database_path
           : std::filesystem::path(fuzztest_config.workdir_root) /
                 fuzztest_config.binary_identifier;
   const auto execution_stamp = [] {
     std::string stamp =
         absl::FormatTime("%Y-%m-%d-%H-%M-%S", absl::Now(), absl::UTCTimeZone());
     return stamp;
   }();
   std::vector<std::string> fuzz_tests_to_run;
   if (!env.fuzztest_multi_test_mode_soon_to_be_removed) {
     FUZZTEST_CHECK(fuzztest_config.fuzz_tests_in_current_shard.size() == 1)
         << "Centipede handles only one test when using FuzzTest. Use "
            "`--fuzztest_multi_test_mode_soon_to_be_removed` if you need "
            "Centipede to operate on multiple tests in one invocation - this "
            "feature is going to be removed soon.";
     fuzz_tests_to_run = fuzztest_config.fuzz_tests_in_current_shard;
   } else {
     // TODO: xinhaoyuan - remove this branch after merging the FuzzTest
     // configuration into Centipede flags.
     //
     // We hide shard information when querying the available tests. So we use
     // `fuzz_tests_in_current_shard` as the full list and shard it here. We
     // cannot use `fuzz_tests` because it does not take test filter into
     // account.
     for (int i = 0; i < fuzztest_config.fuzz_tests_in_current_shard.size();
          ++i) {
       if (i % total_test_shards == test_shard_index) {
         fuzz_tests_to_run.push_back(
             fuzztest_config.fuzz_tests_in_current_shard[i]);
       }
     }
   }
   FUZZTEST_LOG(INFO) << "Fuzz tests to run: "
                      << absl::StrJoin(fuzz_tests_to_run, ", ");

   const bool is_workdir_specified = !env.workdir.empty();
   FUZZTEST_CHECK(!is_workdir_specified ||
                  !env.fuzztest_multi_test_mode_soon_to_be_removed);
   // When env.workdir is empty, the full workdir paths will be formed by
   // appending the fuzz test names to the base workdir path. We use different
   // path when only replaying to avoid replaying an unfinished fuzzing sessions.
   const auto base_workdir_path =
       is_workdir_specified
           ? std::filesystem::path{}  // Will not be used.
           : workdir_root_path /
                 absl::StrFormat("workdir%s.%03d",
                                 fuzztest_config.only_replay ? "-replay" : "",
                                 test_shard_index);
   // There's no point in saving the binary info to the workdir, since the
   // workdir is deleted at the end.
   env.save_binary_info = false;
   std::string pcs_file_path;
   BinaryInfo binary_info = PopulateBinaryInfoAndSavePCsIfNecessary(
       env, callbacks_factory, pcs_file_path);

   FUZZTEST_LOG(INFO) << "Test shard index: " << test_shard_index
                      << " Total test shards: " << total_test_shards;

   // Step 2: Iterate over the fuzz tests and run them.
   const std::string binary = env.binary;
   for (int i = 0; i < fuzz_tests_to_run.size(); ++i) {
     // Clean up previous stop requests. stop_time will be set later.
     ClearEarlyStopRequestAndSetStopTime(/*stop_time=*/absl::InfiniteFuture());
     if (env.fuzztest_multi_test_mode_soon_to_be_removed &&
         fuzztest_config.GetTimeLimitPerTest() < absl::InfiniteDuration()) {
       const absl::Duration test_time_limit =
           fuzztest_config.GetTimeLimitPerTest();
       const absl::Status has_enough_time = VerifyBazelHasEnoughTimeToRunTest(
           start_time, test_time_limit,
           /*executed_tests_in_shard=*/i, fuzztest_config.fuzz_tests.size());
       FUZZTEST_CHECK_OK(has_enough_time)
           << "Not enough time for running the fuzz test "
           << fuzz_tests_to_run[i] << " for " << test_time_limit;
     }
     if (!is_workdir_specified) {
       env.workdir = base_workdir_path / fuzz_tests_to_run[i];
     }
     const auto execution_id_path =
         (base_workdir_path /
          absl::StrCat(fuzz_tests_to_run[i], ".execution_id"))
             .string();

     bool is_resuming = false;
     if (!is_workdir_specified && fuzztest_config.execution_id.has_value()) {
       // Use the execution IDs to resume or skip tests.
       const bool execution_id_matched = [&] {
         if (!RemotePathExists(execution_id_path)) return false;
         FUZZTEST_CHECK(!RemotePathIsDirectory(execution_id_path));
         std::string prev_execution_id;
         FUZZTEST_CHECK_OK(
             RemoteFileGetContents(execution_id_path, prev_execution_id));
         return prev_execution_id == *fuzztest_config.execution_id;
       }();
       if (execution_id_matched) {
         // If execution IDs match but the previous coverage is missing, it means
         // the test was previously finished, and we skip running for the test.
         if (!RemotePathExists(WorkDir{env}.CoverageDirPath())) {
           FUZZTEST_LOG(INFO)
               << "Skipping running the fuzz test " << fuzz_tests_to_run[i];
           continue;
         }
         // If execution IDs match and the previous coverage exists, it means
         // the same workflow got interrupted when running the test. So we resume
         // the test.
         is_resuming = true;
         FUZZTEST_LOG(INFO) << "Resuming running the fuzz test "
                            << fuzz_tests_to_run[i];
       } else {
         // If the execution IDs mismatch, we start a new run.
         is_resuming = false;
         FUZZTEST_LOG(INFO) << "Starting a new run of the fuzz test "
                            << fuzz_tests_to_run[i];
       }
     }
     if (RemotePathExists(env.workdir) && !is_resuming) {
       // This could be a workdir from a failed run that used a different version
       // of the binary. We delete it so that we don't have to deal with
       // the assumptions under which it is safe to reuse an old workdir.
       FUZZTEST_CHECK_OK(RemotePathDelete(env.workdir, /*recursively=*/true));
     }
     const WorkDir workdir{env};
     FUZZTEST_CHECK_OK(RemoteMkdir(
         workdir.CoverageDirPath()));  // Implicitly creates the workdir

     // Updating execution ID must be after creating the coverage dir. Otherwise
     // if it fails to create coverage dir after updating execution ID, next
     // attempt would skip this test.
     if (!is_workdir_specified && fuzztest_config.execution_id.has_value() &&
         !is_resuming) {
       FUZZTEST_CHECK_OK(RemoteFileSetContents(execution_id_path,
                                               *fuzztest_config.execution_id));
     }

     absl::Cleanup clean_up_workdir = [is_workdir_specified, &env] {
       if (!is_workdir_specified && !EarlyStopRequested()) {
         FUZZTEST_CHECK_OK(RemotePathDelete(env.workdir, /*recursively=*/true));
       }
     };

     const std::filesystem::path fuzztest_db_path =
         corpus_database_path / fuzz_tests_to_run[i];
     const std::filesystem::path regression_dir =
         fuzztest_db_path / "regression";
     const std::filesystem::path coverage_dir = fuzztest_db_path / "coverage";

     // Seed the fuzzing session with the latest coverage corpus and regression
     // inputs from the previous fuzzing session.
     if (!is_resuming) {
       FUZZTEST_CHECK_OK(GenerateSeedCorpusFromConfig(
           GetSeedCorpusConfig(env, regression_dir.c_str(),
                               fuzztest_config.replay_coverage_inputs
                                   ? coverage_dir.c_str()
                                   : ""),
           env.binary_name, env.binary_hash))
           << "while generating the seed corpus";
     }

     if (env.fuzztest_multi_test_mode_soon_to_be_removed) {
       // TODO: b/338217594 - Call the FuzzTest binary in a flag-agnostic way.
       constexpr std::string_view kFuzzTestFuzzFlag = "--fuzz=";
       constexpr std::string_view kFuzzTestReplayCorpusFlag =
           "--replay_corpus=";
       std::string_view test_selection_flag = fuzztest_config.only_replay
                                                  ? kFuzzTestReplayCorpusFlag
                                                  : kFuzzTestFuzzFlag;
       env.binary =
           absl::StrCat(binary, " ", test_selection_flag, fuzz_tests_to_run[i]);
     }

     absl::Duration time_limit = fuzztest_config.GetTimeLimitPerTest();
     absl::Duration time_spent = absl::ZeroDuration();
     const std::string fuzzing_time_file =
         std::filesystem::path(env.workdir) / "fuzzing_time";
     if (is_resuming && RemotePathExists(fuzzing_time_file)) {
       time_spent = ReadFuzzingTime(fuzzing_time_file);
       time_limit = std::max(time_limit - time_spent, absl::ZeroDuration());
     }
     is_resuming = false;

     if (EarlyStopRequested()) {
       FUZZTEST_LOG(INFO) << "Skipping test " << fuzz_tests_to_run[i]
                          << " because early stop requested.";
       continue;
     }

     FUZZTEST_LOG(INFO) << (fuzztest_config.only_replay ? "Replaying "
                                                        : "Fuzzing ")
                        << fuzz_tests_to_run[i] << " for " << time_limit
                        << "\n\tTest binary: " << env.binary;

     const absl::Time start_time = absl::Now();
     ClearEarlyStopRequestAndSetStopTime(/*stop_time=*/start_time + time_limit);
     PeriodicAction record_fuzzing_time =
         RecordFuzzingTime(fuzzing_time_file, start_time - time_spent);
     Fuzz(env, binary_info, pcs_file_path, callbacks_factory);
     record_fuzzing_time.Nudge();
     record_fuzzing_time.Stop();

     if (!stats_root_path.empty()) {
       const auto stats_dir = stats_root_path / fuzz_tests_to_run[i];
       FUZZTEST_CHECK_OK(RemoteMkdir(stats_dir.c_str()));
       FUZZTEST_CHECK_OK(RemoteFileRename(
           workdir.FuzzingStatsPath(),
           (stats_dir / absl::StrCat("fuzzing_stats_", execution_stamp))
               .c_str()));
     }

     if (EarlyStopRequested()) {
       FUZZTEST_LOG(INFO)
           << "Skip updating corpus database due to early stop requested.";
       continue;
     }

     // TODO(xinhaoyuan): Have a separate flag to skip corpus updating instead
     // of checking whether workdir is specified or not.
     const bool skip_corpus_db_update =
         fuzztest_config.only_replay || is_workdir_specified;
     if (skip_corpus_db_update && !env.report_crash_summary) continue;

     // Deduplicate and optionally update the crashing inputs.
     CrashSummary crash_summary{fuzztest_config.binary_identifier,
                                fuzz_tests_to_run[i]};
     const std::optional<std::filesystem::path> crashing_dir =
         skip_corpus_db_update ? std::nullopt
                               : std::make_optional<std::filesystem::path>(
                                     fuzztest_db_path / "crashing");
     absl::flat_hash_set<std::string> crash_signatures =
         skip_corpus_db_update
             ? absl::flat_hash_set<std::string>{}
             : PruneOldCrashesAndGetRemainingCrashSignatures(
                   *crashing_dir, env, callbacks_factory, crash_summary);
     DeduplicateAndOptionallyStoreNewCrashes(workdir, env.total_shards,
                                             std::move(crash_signatures),
                                             crashing_dir, crash_summary);
     if (env.report_crash_summary) crash_summary.Report(&std::cerr);
     if (skip_corpus_db_update) continue;

     // Distill and store the coverage corpus.
     Distill(env);
     if (RemotePathExists(coverage_dir.c_str())) {
       // In the future, we will store k latest coverage corpora for some k, but
       // for now we only keep the latest one.
       FUZZTEST_CHECK_OK(
           RemotePathDelete(coverage_dir.c_str(), /*recursively=*/true));
     }
     FUZZTEST_CHECK_OK(RemoteMkdir(coverage_dir.c_str()));
     std::vector<std::string> distilled_corpus_files;
     FUZZTEST_CHECK_OK(
         RemoteGlobMatch(workdir.DistilledCorpusFilePaths().AllShardsGlob(),
                         distilled_corpus_files));
     for (const std::string& corpus_file : distilled_corpus_files) {
       const std::string file_name =
           std::filesystem::path(corpus_file).filename();
       FUZZTEST_CHECK_OK(
           RemoteFileRename(corpus_file, (coverage_dir / file_name).c_str()));
     }
   }

   return EXIT_SUCCESS;
 }

 int ListCrashIds(const Environment& env,
                  const fuzztest::internal::Configuration& target_config) {
   FUZZTEST_CHECK(!env.list_crash_ids_file.empty())
       << "Need list_crash_ids_file to be set for listing crash IDs";
   FUZZTEST_CHECK_EQ(target_config.fuzz_tests_in_current_shard.size(), 1);
   std::vector<std::string> crash_paths;
   // TODO: b/406003594 - move the path construction to a library.
   const auto crash_dir = std::filesystem::path(target_config.corpus_database) /
                          target_config.binary_identifier /
                          target_config.fuzz_tests_in_current_shard[0] /
                          "crashing";
   if (RemotePathExists(crash_dir.string())) {
     FUZZTEST_CHECK(RemotePathIsDirectory(crash_dir.string()))
         << "Crash dir " << crash_dir << " in the corpus database "
         << target_config.corpus_database << " is not a directory";
     crash_paths =
         ValueOrDie(RemoteListFiles(crash_dir.string(), /*recursively=*/false));
   }
   std::vector<std::string> results;
   results.reserve(crash_paths.size());
   for (const auto& crash_path : crash_paths) {
     std::string crash_id = std::filesystem::path{crash_path}.filename();
     results.push_back(std::move(crash_id));
   }
   FUZZTEST_CHECK_OK(RemoteFileSetContents(env.list_crash_ids_file,
                                           absl::StrJoin(results, "\n")));
   return EXIT_SUCCESS;
 }

 int ReplayCrash(const Environment& env,
                 const fuzztest::internal::Configuration& target_config,
                 CentipedeCallbacksFactory& callbacks_factory) {
   FUZZTEST_CHECK(!env.crash_id.empty())
       << "Need crash_id to be set for replay a crash";
   FUZZTEST_CHECK(target_config.fuzz_tests_in_current_shard.size() == 1)
       << "Expecting exactly one test for replay_crash";
   // TODO: b/406003594 - move the path construction to a library.
   const auto crash_dir = std::filesystem::path(target_config.corpus_database) /
                          target_config.binary_identifier /
                          target_config.fuzz_tests_in_current_shard[0] /
                          "crashing";
   const WorkDir workdir{env};
   SeedCorpusSource crash_corpus_source;
   crash_corpus_source.dir_glob = crash_dir;
   crash_corpus_source.num_recent_dirs = 1;
   crash_corpus_source.individual_input_rel_glob = env.crash_id;
   crash_corpus_source.sampled_fraction_or_count = 1.0f;
   const SeedCorpusConfig crash_corpus_config = {
       /*sources=*/{crash_corpus_source},
       /*destination=*/{
           /*dir_path=*/env.workdir,
           /*shard_rel_glob=*/
           std::filesystem::path{workdir.CorpusFilePaths().AllShardsGlob()}
               .filename(),
           /*shard_index_digits=*/WorkDir::kDigitsInShardIndex,
           /*num_shards=*/1}};
   FUZZTEST_CHECK_OK(GenerateSeedCorpusFromConfig(
       crash_corpus_config, env.binary_name, env.binary_hash));
   Environment run_crash_env = env;
   run_crash_env.load_shards_only = true;
   int fuzz_result = Fuzz(run_crash_env, {}, "", callbacks_factory);
   if (env.report_crash_summary) {
     CrashSummary crash_summary{target_config.binary_identifier,
                                target_config.fuzz_tests_in_current_shard[0]};
     // There should be at most one crash, so no deduplication actually happens.
     DeduplicateAndOptionallyStoreNewCrashes(workdir, /*total_shards=*/1,
                                             /*crash_signatures=*/{},
                                             /*crashing_dir=*/std::nullopt,
                                             crash_summary);
     crash_summary.Report(&std::cerr);
   }
   return fuzz_result;
 }

 int ExportCrash(const Environment& env,
                 const fuzztest::internal::Configuration& target_config) {
   FUZZTEST_CHECK(!env.crash_id.empty())
       << "Need crash_id to be set for exporting a crash";
   FUZZTEST_CHECK(!env.export_crash_file.empty())
       << "Need export_crash_file to be set for exporting a crash";
   FUZZTEST_CHECK(target_config.fuzz_tests_in_current_shard.size() == 1)
       << "Expecting exactly one test for exporting a crash";
   // TODO: b/406003594 - move the path construction to a library.
   const auto crash_dir = std::filesystem::path(target_config.corpus_database) /
                          target_config.binary_identifier /
                          target_config.fuzz_tests_in_current_shard[0] /
                          "crashing";
   std::string crash_contents;
   const auto read_status =
       RemoteFileGetContents((crash_dir / env.crash_id).c_str(), crash_contents);
   if (!read_status.ok()) {
     FUZZTEST_LOG(ERROR) << "Failed reading the crash " << env.crash_id
                         << " from " << crash_dir.c_str() << ": " << read_status;
     return EXIT_FAILURE;
   }
   const auto write_status =
       RemoteFileSetContents(env.export_crash_file, crash_contents);
   if (!write_status.ok()) {
     FUZZTEST_LOG(ERROR) << "Failed write the crash " << env.crash_id << " to "
                         << env.export_crash_file << ": " << write_status;
     return EXIT_FAILURE;
   }
   return EXIT_SUCCESS;
 }

 }  // namespace

 int CentipedeMain(const Environment& env,
                   CentipedeCallbacksFactory& callbacks_factory) {
   ClearEarlyStopRequestAndSetStopTime(env.stop_at);
   SetSignalHandlers();

   if (!env.corpus_to_files.empty()) {
     Centipede::CorpusToFiles(env, env.corpus_to_files);
     return EXIT_SUCCESS;
   }

   if (!env.crashes_to_files.empty()) {
     const auto status = Centipede::CrashesToFiles(env, env.crashes_to_files);
     if (status.ok()) return EXIT_SUCCESS;
     FUZZTEST_LOG(ERROR) << "Got error when exporting crashes to files: "
                         << status;
     return EXIT_FAILURE;
   }

   if (!env.for_each_blob.empty()) return ForEachBlob(env);

   if (!env.minimize_crash_file_path.empty()) {
     ByteArray crashy_input;
     ReadFromLocalFile(env.minimize_crash_file_path, crashy_input);
     return MinimizeCrash(crashy_input, env, callbacks_factory);
   }

   // Just export the corpus from a local dir and exit.
   if (!env.corpus_from_files.empty()) {
     Centipede::CorpusFromFiles(env, env.corpus_from_files);
     return EXIT_SUCCESS;
   }

   // Export the corpus from a local dir and then fuzz.
   if (!env.corpus_dir.empty()) {
     for (size_t i = 0; i < env.corpus_dir.size(); ++i) {
       const auto& corpus_dir = env.corpus_dir[i];
       if (i > 0 || !env.first_corpus_dir_output_only)
         Centipede::CorpusFromFiles(env, corpus_dir);
     }
   }

   if (env.distill) return Distill(env);

   // Create the local temporary dir once, before creating any threads. The
   // temporary dir must typically exist before `CentipedeCallbacks` can be used.
   const auto tmpdir = TemporaryLocalDirPath();
   CreateLocalDirRemovedAtExit(tmpdir);

   // Enter the update corpus database mode only if we have a binary to invoke
   // and a corpus database to update.
   // We don't update the corpus database for standalone binaries (i.e., when
   // `env.has_input_wildcards` is true).
   if (!env.binary.empty() && !env.has_input_wildcards) {
     const auto serialized_target_config = [&]() -> absl::StatusOr<std::string> {
       // TODO: b/410051414 Use Centipede flags to pass necessary information
       // instead of passing the entirely serialized Configuration once switched
       // to the unified execution model.
       if (!env.fuzztest_configuration.empty()) {
         std::string result;
         FUZZTEST_CHECK(
             absl::WebSafeBase64Unescape(env.fuzztest_configuration, &result));
         return result;
       }
       ScopedCentipedeCallbacks scoped_callbacks(callbacks_factory, env);
       return scoped_callbacks.callbacks()->GetSerializedTargetConfig();
     }();
     FUZZTEST_CHECK_OK(serialized_target_config.status());
     if (!serialized_target_config->empty()) {
       const auto target_config = fuzztest::internal::Configuration::Deserialize(
           *serialized_target_config);
       FUZZTEST_CHECK_OK(target_config.status())
           << "Failed to deserialize target configuration";
       if (!target_config->corpus_database.empty()) {
         FUZZTEST_LOG_IF(
             FATAL, env.list_crash_ids + env.replay_crash + env.export_crash > 1)
             << "At most one of list_crash_ids/replay_crash/export_crash can "
                "be set, but seeing list_crash_ids: "
             << env.list_crash_ids << ", replay_crash: " << env.replay_crash
             << ", export_crash: " << env.export_crash;
         if (env.list_crash_ids) {
           return ListCrashIds(env, *target_config);
         }
         if (env.replay_crash) {
           return ReplayCrash(env, *target_config, callbacks_factory);
         }
         if (env.export_crash) {
           return ExportCrash(env, *target_config);
         }

         const auto time_limit_per_test = target_config->GetTimeLimitPerTest();
         FUZZTEST_CHECK(target_config->only_replay ||
                        time_limit_per_test < absl::InfiniteDuration() ||
                        target_config->fuzz_tests_in_current_shard.size() == 1)
             << "Updating corpus database requires specifying time limit per "
                "fuzz test when there are more than one tests.";
         FUZZTEST_CHECK(time_limit_per_test >= absl::Seconds(1))
             << "Time limit per fuzz test must be at least 1 second.";
         return UpdateCorpusDatabaseForFuzzTests(env, *target_config,
                                                 callbacks_factory);
       }
     }
   }

   // Create the remote coverage dirs once, before creating any threads.
   const auto coverage_dir = WorkDir{env}.CoverageDirPath();
   FUZZTEST_CHECK_OK(RemoteMkdir(coverage_dir));
   FUZZTEST_LOG(INFO) << "Coverage dir: " << coverage_dir
                      << "; temporary dir: " << tmpdir;

   std::string pcs_file_path;
   BinaryInfo binary_info = PopulateBinaryInfoAndSavePCsIfNecessary(
       env, callbacks_factory, pcs_file_path);

   if (env.analyze) return Analyze(env);

   return Fuzz(env, binary_info, pcs_file_path, callbacks_factory);
   // TODO: fniksic - Report the crash summary here if requested. What are the
   // binary identifier and the fuzz test name here?
 }

 }  // namespace fuzztest::internal