| // Copyright 2022 The Centipede Authors. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // https://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #include "./centipede/centipede_interface.h" |
| |
| #include <unistd.h> |
| |
| #include <algorithm> |
| #include <atomic> |
| #include <csignal> |
| #include <cstdint> |
| #include <cstdlib> |
| #include <cstring> |
| #include <filesystem> // NOLINT |
| #include <iostream> |
| #include <memory> |
| #include <optional> |
| #include <string> |
| #include <string_view> |
| #include <utility> |
| #include <vector> |
| |
| #include "absl/base/optimization.h" |
| #include "absl/cleanup/cleanup.h" |
| #include "absl/container/flat_hash_set.h" |
| #include "absl/status/status.h" |
| #include "absl/status/statusor.h" |
| #include "absl/strings/ascii.h" |
| #include "absl/strings/escaping.h" |
| #include "absl/strings/str_cat.h" |
| #include "absl/strings/str_format.h" |
| #include "absl/strings/str_join.h" |
| #include "absl/strings/str_replace.h" |
| #include "absl/time/clock.h" |
| #include "absl/time/time.h" |
| #include "absl/types/span.h" |
| #include "./centipede/analyze_corpora.h" |
| #include "./centipede/binary_info.h" |
| #include "./centipede/centipede.h" |
| #include "./centipede/centipede_callbacks.h" |
| #include "./centipede/command.h" |
| #include "./centipede/coverage.h" |
| #include "./centipede/crash_summary.h" |
| #include "./centipede/distill.h" |
| #include "./centipede/environment.h" |
| #include "./centipede/minimize_crash.h" |
| #include "./centipede/pc_info.h" |
| #include "./centipede/periodic_action.h" |
| #include "./centipede/runner_result.h" |
| #include "./centipede/seed_corpus_maker_lib.h" |
| #include "./centipede/stats.h" |
| #include "./centipede/stop.h" |
| #include "./centipede/thread_pool.h" |
| #include "./centipede/util.h" |
| #include "./centipede/workdir.h" |
| #include "./common/bazel.h" |
| #include "./common/blob_file.h" |
| #include "./common/defs.h" |
| #include "./common/hash.h" |
| #include "./common/logging.h" |
| #include "./common/remote_file.h" |
| #include "./common/status_macros.h" |
| #include "./fuzztest/internal/configuration.h" |
| |
| namespace fuzztest::internal { |
| |
| namespace { |
| |
| // Sets signal handler for SIGINT. |
| // TODO(b/378532202): Replace this with a more generic mechanism that allows |
| // the called or `CentipedeMain()` to indicate when to stop. |
| void SetSignalHandlers() { |
| struct sigaction sigact = {}; |
| sigact.sa_flags = SA_ONSTACK; |
| sigact.sa_handler = [](int received_signum) { |
| if (received_signum == SIGINT) { |
| FUZZTEST_LOG(INFO) << "Ctrl-C pressed: winding down"; |
| RequestEarlyStop(EXIT_FAILURE); |
| return; |
| } |
| ABSL_UNREACHABLE(); |
| }; |
| sigaction(SIGINT, &sigact, nullptr); |
| } |
| |
| // Runs env.for_each_blob on every blob extracted from env.args. |
| // Returns EXIT_SUCCESS on success, EXIT_FAILURE otherwise. |
| int ForEachBlob(const Environment& env) { |
| auto tmpdir = TemporaryLocalDirPath(); |
| CreateLocalDirRemovedAtExit(tmpdir); |
| std::string tmpfile = std::filesystem::path(tmpdir).append("t"); |
| |
| for (const auto& arg : env.args) { |
| FUZZTEST_LOG(INFO) << "Running '" << env.for_each_blob << "' on " << arg; |
| auto blob_reader = DefaultBlobFileReaderFactory(); |
| absl::Status open_status = blob_reader->Open(arg); |
| if (!open_status.ok()) { |
| FUZZTEST_LOG(INFO) << "Failed to open " << arg << ": " << open_status; |
| return EXIT_FAILURE; |
| } |
| ByteSpan blob; |
| while (blob_reader->Read(blob) == absl::OkStatus()) { |
| ByteArray bytes; |
| bytes.insert(bytes.begin(), blob.data(), blob.end()); |
| // TODO(kcc): [impl] add a variant of WriteToLocalFile that accepts Span. |
| WriteToLocalFile(tmpfile, bytes); |
| std::string command_line = absl::StrReplaceAll( |
| env.for_each_blob, {{"%P", tmpfile}, {"%H", Hash(bytes)}}); |
| Command cmd(command_line); |
| // TODO(kcc): [as-needed] this creates one process per blob. |
| // If this flag gets active use, we may want to define special cases, |
| // e.g. if for_each_blob=="cp %P /some/where" we can do it in-process. |
| cmd.Execute(); |
| if (ShouldStop()) return ExitCode(); |
| } |
| } |
| return EXIT_SUCCESS; |
| } |
| |
| // Loads corpora from work dirs provided in `env.args`, if there are two args |
| // provided, analyzes differences. If there is one arg provided, reports the |
| // function coverage. Returns EXIT_SUCCESS on success, EXIT_FAILURE otherwise. |
| int Analyze(const Environment& env) { |
| FUZZTEST_LOG(INFO) << "Analyze " << absl::StrJoin(env.args, ","); |
| FUZZTEST_CHECK(!env.binary.empty()) << "--binary must be used"; |
| if (env.args.size() == 1) { |
| const CoverageResults coverage_results = |
| GetCoverage(env.binary_name, env.binary_hash, env.args[0]); |
| WorkDir workdir{env}; |
| const std::string coverage_report_path = |
| workdir.CoverageReportPath(/*annotation=*/""); |
| DumpCoverageReport(coverage_results, coverage_report_path); |
| } else if (env.args.size() == 2) { |
| AnalyzeCorporaToLog(env.binary_name, env.binary_hash, env.args[0], |
| env.args[1]); |
| } else { |
| FUZZTEST_LOG(FATAL) |
| << "for now, --analyze supports only 1 or 2 work dirs; got " |
| << env.args.size(); |
| } |
| return EXIT_SUCCESS; |
| } |
| |
| void SavePCTableToFile(const PCTable& pc_table, std::string_view file_path) { |
| WriteToLocalFile(file_path, AsByteSpan(pc_table)); |
| } |
| |
| BinaryInfo PopulateBinaryInfoAndSavePCsIfNecessary( |
| const Environment& env, CentipedeCallbacksFactory& callbacks_factory, |
| std::string& pcs_file_path) { |
| BinaryInfo binary_info; |
| // Some fuzz targets have coverage not based on instrumenting binaries. |
| // For those target, we should not populate binary info. |
| if (env.populate_binary_info) { |
| ScopedCentipedeCallbacks scoped_callbacks(callbacks_factory, env); |
| scoped_callbacks.callbacks()->PopulateBinaryInfo(binary_info); |
| } |
| if (env.save_binary_info) { |
| const std::string binary_info_dir = WorkDir{env}.BinaryInfoDirPath(); |
| FUZZTEST_CHECK_OK(RemoteMkdir(binary_info_dir)); |
| FUZZTEST_LOG(INFO) << "Serializing binary info to: " << binary_info_dir; |
| binary_info.Write(binary_info_dir); |
| } |
| if (binary_info.uses_legacy_trace_pc_instrumentation) { |
| pcs_file_path = std::filesystem::path(TemporaryLocalDirPath()) / "pcs"; |
| SavePCTableToFile(binary_info.pc_table, pcs_file_path); |
| } |
| if (env.use_pcpair_features) { |
| FUZZTEST_CHECK(!binary_info.pc_table.empty()) |
| << "--use_pcpair_features requires non-empty pc_table"; |
| } |
| return binary_info; |
| } |
| |
| std::vector<Environment> CreateEnvironmentsForThreads( |
| const Environment& origin_env, std::string_view pcs_file_path) { |
| std::vector<Environment> envs(origin_env.num_threads, origin_env); |
| size_t thread_idx = 0; |
| for (auto& env : envs) { |
| env.my_shard_index += thread_idx++; |
| env.UpdateForExperiment(); |
| env.pcs_file_path = pcs_file_path; |
| } |
| return envs; |
| } |
| |
| int Fuzz(const Environment& env, const BinaryInfo& binary_info, |
| std::string_view pcs_file_path, |
| CentipedeCallbacksFactory& callbacks_factory) { |
| CoverageLogger coverage_logger(binary_info.pc_table, binary_info.symbols); |
| |
| std::vector<Environment> envs = |
| CreateEnvironmentsForThreads(env, pcs_file_path); |
| std::vector<std::atomic<Stats>> stats_vec(env.num_threads); |
| |
| // Start periodic stats dumping and, optionally, logging. |
| std::vector<PeriodicAction> stats_reporters; |
| stats_reporters.emplace_back( |
| [csv_appender = StatsCsvFileAppender{stats_vec, envs}]() mutable { |
| csv_appender.ReportCurrStats(); |
| }, |
| PeriodicAction::Options{ |
| /*sleep_before_each=*/ |
| [](size_t iteration) { |
| return absl::Minutes(std::clamp(iteration, 0UL, 10UL)); |
| }, |
| }); |
| if (!envs.front().experiment.empty() || FUZZTEST_VLOG_IS_ON(1)) { |
| stats_reporters.emplace_back( |
| [logger = StatsLogger{stats_vec, envs}]() mutable { |
| logger.ReportCurrStats(); |
| }, |
| PeriodicAction::Options{ |
| /*sleep_before_each=*/ |
| [](size_t iteration) { |
| return absl::Seconds(std::clamp(iteration, 5UL, 600UL)); |
| }, |
| }); |
| } |
| |
| auto fuzzing_worker = |
| [&env, &callbacks_factory, &binary_info, &coverage_logger]( |
| Environment& my_env, std::atomic<Stats>& stats, bool create_tmpdir) { |
| if (create_tmpdir) CreateLocalDirRemovedAtExit(TemporaryLocalDirPath()); |
| // Uses TID, call in this thread. |
| my_env.seed = GetRandomSeed(env.seed); |
| |
| if (env.dry_run) return; |
| |
| ScopedCentipedeCallbacks scoped_callbacks(callbacks_factory, my_env); |
| Centipede centipede(my_env, *scoped_callbacks.callbacks(), binary_info, |
| coverage_logger, stats); |
| centipede.FuzzingLoop(); |
| }; |
| |
| if (env.num_threads == 1) { |
| // When fuzzing with one thread, run fuzzing loop in the current |
| // thread. This is because FuzzTest/Centipede's single-process |
| // fuzzing requires the test body, which is invoked by the fuzzing |
| // loop, to run in the main thread. |
| // |
| // Here, the fuzzing worker should not re-create the tmpdir since the path |
| // is thread-local and it has been created in the current function. |
| fuzzing_worker(envs[0], stats_vec[0], /*create_tmpdir=*/false); |
| } else { |
| ThreadPool fuzzing_worker_threads{static_cast<int>(env.num_threads)}; |
| for (size_t thread_idx = 0; thread_idx < env.num_threads; thread_idx++) { |
| Environment& my_env = envs[thread_idx]; |
| std::atomic<Stats>& my_stats = stats_vec[thread_idx]; |
| fuzzing_worker_threads.Schedule([&fuzzing_worker, &my_env, &my_stats]() { |
| fuzzing_worker(my_env, my_stats, /*create_tmpdir=*/true); |
| }); |
| } // All `fuzzing_worker_threads` join here. |
| } |
| |
| for (auto& reporter : stats_reporters) { |
| // Nudge one final update and stop the reporting thread. |
| reporter.Nudge(); |
| reporter.Stop(); |
| } |
| |
| if (!env.knobs_file.empty()) PrintRewardValues(stats_vec, std::cerr); |
| |
| return ExitCode(); |
| } |
| |
| TestShard SetUpTestSharding() { |
| TestShard test_shard = GetBazelTestShard(); |
| // Update the shard status file to indicate that we support test sharding. |
| // It suffices to update the file's modification time, but we clear the |
| // contents for simplicity. This is also what the GoogleTest framework does. |
| if (const char* test_shard_status_file = |
| std::getenv("TEST_SHARD_STATUS_FILE"); |
| test_shard_status_file != nullptr) { |
| ClearLocalFileContents(test_shard_status_file); |
| } |
| return test_shard; |
| } |
| |
| // Prunes non-reproducible and duplicate crashes and returns the crash |
| // signatures of the remaining crashes. |
| absl::flat_hash_set<std::string> PruneOldCrashesAndGetRemainingCrashSignatures( |
| const std::filesystem::path& crashing_dir, const Environment& env, |
| CentipedeCallbacksFactory& callbacks_factory, CrashSummary& crash_summary) { |
| const std::vector<std::string> crashing_input_files = |
| // The corpus database layout assumes the crash input files are located |
| // directly in the crashing subdirectory, so we don't list recursively. |
| ValueOrDie(RemoteListFiles(crashing_dir.c_str(), /*recursively=*/false)); |
| ScopedCentipedeCallbacks scoped_callbacks(callbacks_factory, env); |
| BatchResult batch_result; |
| absl::flat_hash_set<std::string> remaining_crash_signatures; |
| |
| for (const std::string& crashing_input_file : crashing_input_files) { |
| ByteArray crashing_input; |
| FUZZTEST_CHECK_OK( |
| RemoteFileGetContents(crashing_input_file, crashing_input)); |
| const bool is_reproducible = !scoped_callbacks.callbacks()->Execute( |
| env.binary, {crashing_input}, batch_result); |
| const bool is_duplicate = |
| is_reproducible && !batch_result.IsSetupFailure() && |
| !remaining_crash_signatures.insert(batch_result.failure_signature()) |
| .second; |
| if (!is_reproducible || batch_result.IsSetupFailure() || is_duplicate) { |
| FUZZTEST_CHECK_OK( |
| RemotePathDelete(crashing_input_file, /*recursively=*/false)); |
| } else { |
| crash_summary.AddCrash( |
| {std::filesystem::path(crashing_input_file).filename(), |
| /*category=*/batch_result.failure_description(), |
| batch_result.failure_signature(), |
| batch_result.failure_description()}); |
| FUZZTEST_CHECK_OK(RemotePathTouchExistingFile(crashing_input_file)); |
| } |
| } |
| return remaining_crash_signatures; |
| } |
| |
| // TODO(b/405382531): Add unit tests once the function is unit-testable. |
| void DeduplicateAndOptionallyStoreNewCrashes( |
| const WorkDir& workdir, size_t total_shards, |
| absl::flat_hash_set<std::string> crash_signatures, |
| const std::optional<std::filesystem::path>& crashing_dir, |
| CrashSummary& crash_summary) { |
| for (size_t shard_idx = 0; shard_idx < total_shards; ++shard_idx) { |
| const std::vector<std::string> new_crashing_input_files = |
| // The crash reproducer directory may contain subdirectories with |
| // input files that don't individually cause a crash. We ignore those |
| // for now and don't list the files recursively. |
| ValueOrDie( |
| RemoteListFiles(workdir.CrashReproducerDirPaths().Shard(shard_idx), |
| /*recursively=*/false)); |
| const std::filesystem::path crash_metadata_dir = |
| workdir.CrashMetadataDirPaths().Shard(shard_idx); |
| |
| if (crashing_dir.has_value()) { |
| FUZZTEST_CHECK_OK(RemoteMkdir(crashing_dir->c_str())); |
| } |
| for (const std::string& crashing_input_file : new_crashing_input_files) { |
| const std::string crashing_input_file_name = |
| std::filesystem::path(crashing_input_file).filename(); |
| const std::string crash_signature_path = |
| crash_metadata_dir / absl::StrCat(crashing_input_file_name, ".sig"); |
| std::string new_crash_signature; |
| const absl::Status status = |
| RemoteFileGetContents(crash_signature_path, new_crash_signature); |
| if (!status.ok()) { |
| FUZZTEST_LOG(WARNING) |
| << "Ignoring crashing input " << crashing_input_file_name |
| << " due to failure to read the crash signature: " << status; |
| continue; |
| } |
| const bool is_duplicate = |
| !crash_signatures.insert(new_crash_signature).second; |
| if (is_duplicate) continue; |
| |
| const std::string crash_description_path = |
| crash_metadata_dir / absl::StrCat(crashing_input_file_name, ".desc"); |
| std::string new_crash_description; |
| const absl::Status description_status = |
| RemoteFileGetContents(crash_description_path, new_crash_description); |
| if (!description_status.ok()) { |
| FUZZTEST_LOG(WARNING) |
| << "Failed to read crash description for " |
| << crashing_input_file_name |
| << ". Will use the crash signature as the description. Status: " |
| << description_status; |
| new_crash_description = new_crash_signature; |
| } |
| crash_summary.AddCrash({crashing_input_file_name, |
| /*category=*/new_crash_description, |
| std::move(new_crash_signature), |
| new_crash_description}); |
| if (crashing_dir.has_value()) { |
| FUZZTEST_CHECK_OK(RemoteFileRename( |
| crashing_input_file, |
| (*crashing_dir / crashing_input_file_name).c_str())); |
| } |
| } |
| } |
| } |
| |
| // Seeds the corpus files in `env.workdir` with the inputs in `regression_dir` |
| // (always used) and the previously distilled corpus files from `coverage_dir` |
| // (used if non-empty). |
| SeedCorpusConfig GetSeedCorpusConfig(const Environment& env, |
| std::string_view regression_dir, |
| std::string_view coverage_dir) { |
| const WorkDir workdir{env}; |
| SeedCorpusSource regression; |
| regression.dir_glob = std::string(regression_dir); |
| regression.num_recent_dirs = 1; |
| regression.individual_input_rel_glob = "*"; |
| regression.sampled_fraction_or_count = 1.0f; |
| std::vector<SeedCorpusSource> sources = {std::move(regression)}; |
| if (!coverage_dir.empty()) { |
| SeedCorpusSource coverage; |
| coverage.dir_glob = std::string(coverage_dir); |
| coverage.num_recent_dirs = 1; |
| // We're using the previously distilled corpus files as seeds. |
| coverage.shard_rel_glob = |
| std::filesystem::path{ |
| workdir.DistilledCorpusFilePaths().AllShardsGlob()} |
| .filename(); |
| coverage.individual_input_rel_glob = "*"; |
| coverage.sampled_fraction_or_count = 1.0f; |
| sources.push_back(std::move(coverage)); |
| } |
| SeedCorpusDestination destination; |
| destination.dir_path = env.workdir; |
| // We're seeding the current corpus files. |
| destination.shard_rel_glob = |
| std::filesystem::path{workdir.CorpusFilePaths().AllShardsGlob()} |
| .filename(); |
| destination.shard_index_digits = WorkDir::kDigitsInShardIndex; |
| destination.num_shards = static_cast<uint32_t>(env.num_threads); |
| return { |
| std::move(sources), |
| std::move(destination), |
| }; |
| } |
| |
| absl::Duration ReadFuzzingTime(std::string_view fuzzing_time_file) { |
| std::string fuzzing_time_str; |
| FUZZTEST_CHECK_OK(RemoteFileGetContents(fuzzing_time_file, fuzzing_time_str)); |
| absl::Duration fuzzing_time; |
| if (!absl::ParseDuration(absl::StripAsciiWhitespace(fuzzing_time_str), |
| &fuzzing_time)) { |
| FUZZTEST_LOG(WARNING) |
| << "Failed to parse fuzzing time of a resuming fuzz test: '" |
| << fuzzing_time_str << "'. Assuming no fuzzing time so far."; |
| return absl::ZeroDuration(); |
| } |
| return fuzzing_time; |
| } |
| |
| PeriodicAction RecordFuzzingTime(std::string_view fuzzing_time_file, |
| absl::Time start_time) { |
| return {[=] { |
| absl::Status status = RemoteFileSetContents( |
| fuzzing_time_file, |
| absl::FormatDuration(absl::Now() - start_time)); |
| FUZZTEST_LOG_IF(WARNING, !status.ok()) |
| << "Failed to write fuzzing time: " << status; |
| }, |
| PeriodicAction::ZeroDelayConstInterval(absl::Seconds(15))}; |
| } |
| |
| // TODO(b/368325638): Add tests for this. |
| int UpdateCorpusDatabaseForFuzzTests( |
| Environment env, const fuzztest::internal::Configuration& fuzztest_config, |
| CentipedeCallbacksFactory& callbacks_factory) { |
| env.UpdateWithTargetConfig(fuzztest_config); |
| |
| absl::Time start_time = absl::Now(); |
| FUZZTEST_LOG(INFO) |
| << "Starting the update of the corpus database for fuzz tests:" |
| << "\nBinary: " << env.binary |
| << "\nCorpus database: " << fuzztest_config.corpus_database; |
| |
| // Step 1: Preliminary set up of test sharding, binary info, etc. |
| const auto [test_shard_index, total_test_shards] = SetUpTestSharding(); |
| const auto corpus_database_path = |
| std::filesystem::path(fuzztest_config.corpus_database) / |
| fuzztest_config.binary_identifier; |
| const auto stats_root_path = |
| fuzztest_config.stats_root.empty() |
| ? std::filesystem::path() |
| : std::filesystem::path(fuzztest_config.stats_root) / |
| fuzztest_config.binary_identifier; |
| const auto workdir_root_path = |
| fuzztest_config.workdir_root.empty() |
| ? corpus_database_path |
| : std::filesystem::path(fuzztest_config.workdir_root) / |
| fuzztest_config.binary_identifier; |
| const auto execution_stamp = [] { |
| std::string stamp = |
| absl::FormatTime("%Y-%m-%d-%H-%M-%S", absl::Now(), absl::UTCTimeZone()); |
| return stamp; |
| }(); |
| std::vector<std::string> fuzz_tests_to_run; |
| if (!env.fuzztest_multi_test_mode_soon_to_be_removed) { |
| FUZZTEST_CHECK(fuzztest_config.fuzz_tests_in_current_shard.size() == 1) |
| << "Centipede handles only one test when using FuzzTest. Use " |
| "`--fuzztest_multi_test_mode_soon_to_be_removed` if you need " |
| "Centipede to operate on multiple tests in one invocation - this " |
| "feature is going to be removed soon."; |
| fuzz_tests_to_run = fuzztest_config.fuzz_tests_in_current_shard; |
| } else { |
| // TODO: xinhaoyuan - remove this branch after merging the FuzzTest |
| // configuration into Centipede flags. |
| // |
| // We hide shard information when querying the available tests. So we use |
| // `fuzz_tests_in_current_shard` as the full list and shard it here. We |
| // cannot use `fuzz_tests` because it does not take test filter into |
| // account. |
| for (int i = 0; i < fuzztest_config.fuzz_tests_in_current_shard.size(); |
| ++i) { |
| if (i % total_test_shards == test_shard_index) { |
| fuzz_tests_to_run.push_back( |
| fuzztest_config.fuzz_tests_in_current_shard[i]); |
| } |
| } |
| } |
| FUZZTEST_LOG(INFO) << "Fuzz tests to run: " |
| << absl::StrJoin(fuzz_tests_to_run, ", "); |
| |
| const bool is_workdir_specified = !env.workdir.empty(); |
| FUZZTEST_CHECK(!is_workdir_specified || |
| !env.fuzztest_multi_test_mode_soon_to_be_removed); |
| // When env.workdir is empty, the full workdir paths will be formed by |
| // appending the fuzz test names to the base workdir path. We use different |
| // path when only replaying to avoid replaying an unfinished fuzzing sessions. |
| const auto base_workdir_path = |
| is_workdir_specified |
| ? std::filesystem::path{} // Will not be used. |
| : workdir_root_path / |
| absl::StrFormat("workdir%s.%03d", |
| fuzztest_config.only_replay ? "-replay" : "", |
| test_shard_index); |
| // There's no point in saving the binary info to the workdir, since the |
| // workdir is deleted at the end. |
| env.save_binary_info = false; |
| std::string pcs_file_path; |
| BinaryInfo binary_info = PopulateBinaryInfoAndSavePCsIfNecessary( |
| env, callbacks_factory, pcs_file_path); |
| |
| FUZZTEST_LOG(INFO) << "Test shard index: " << test_shard_index |
| << " Total test shards: " << total_test_shards; |
| |
| // Step 2: Iterate over the fuzz tests and run them. |
| const std::string binary = env.binary; |
| for (int i = 0; i < fuzz_tests_to_run.size(); ++i) { |
| // Clean up previous stop requests. stop_time will be set later. |
| ClearEarlyStopRequestAndSetStopTime(/*stop_time=*/absl::InfiniteFuture()); |
| if (env.fuzztest_multi_test_mode_soon_to_be_removed && |
| fuzztest_config.GetTimeLimitPerTest() < absl::InfiniteDuration()) { |
| const absl::Duration test_time_limit = |
| fuzztest_config.GetTimeLimitPerTest(); |
| const absl::Status has_enough_time = VerifyBazelHasEnoughTimeToRunTest( |
| start_time, test_time_limit, |
| /*executed_tests_in_shard=*/i, fuzztest_config.fuzz_tests.size()); |
| FUZZTEST_CHECK_OK(has_enough_time) |
| << "Not enough time for running the fuzz test " |
| << fuzz_tests_to_run[i] << " for " << test_time_limit; |
| } |
| if (!is_workdir_specified) { |
| env.workdir = base_workdir_path / fuzz_tests_to_run[i]; |
| } |
| const auto execution_id_path = |
| (base_workdir_path / |
| absl::StrCat(fuzz_tests_to_run[i], ".execution_id")) |
| .string(); |
| |
| bool is_resuming = false; |
| if (!is_workdir_specified && fuzztest_config.execution_id.has_value()) { |
| // Use the execution IDs to resume or skip tests. |
| const bool execution_id_matched = [&] { |
| if (!RemotePathExists(execution_id_path)) return false; |
| FUZZTEST_CHECK(!RemotePathIsDirectory(execution_id_path)); |
| std::string prev_execution_id; |
| FUZZTEST_CHECK_OK( |
| RemoteFileGetContents(execution_id_path, prev_execution_id)); |
| return prev_execution_id == *fuzztest_config.execution_id; |
| }(); |
| if (execution_id_matched) { |
| // If execution IDs match but the previous coverage is missing, it means |
| // the test was previously finished, and we skip running for the test. |
| if (!RemotePathExists(WorkDir{env}.CoverageDirPath())) { |
| FUZZTEST_LOG(INFO) |
| << "Skipping running the fuzz test " << fuzz_tests_to_run[i]; |
| continue; |
| } |
| // If execution IDs match and the previous coverage exists, it means |
| // the same workflow got interrupted when running the test. So we resume |
| // the test. |
| is_resuming = true; |
| FUZZTEST_LOG(INFO) << "Resuming running the fuzz test " |
| << fuzz_tests_to_run[i]; |
| } else { |
| // If the execution IDs mismatch, we start a new run. |
| is_resuming = false; |
| FUZZTEST_LOG(INFO) << "Starting a new run of the fuzz test " |
| << fuzz_tests_to_run[i]; |
| } |
| } |
| if (RemotePathExists(env.workdir) && !is_resuming) { |
| // This could be a workdir from a failed run that used a different version |
| // of the binary. We delete it so that we don't have to deal with |
| // the assumptions under which it is safe to reuse an old workdir. |
| FUZZTEST_CHECK_OK(RemotePathDelete(env.workdir, /*recursively=*/true)); |
| } |
| const WorkDir workdir{env}; |
| FUZZTEST_CHECK_OK(RemoteMkdir( |
| workdir.CoverageDirPath())); // Implicitly creates the workdir |
| |
| // Updating execution ID must be after creating the coverage dir. Otherwise |
| // if it fails to create coverage dir after updating execution ID, next |
| // attempt would skip this test. |
| if (!is_workdir_specified && fuzztest_config.execution_id.has_value() && |
| !is_resuming) { |
| FUZZTEST_CHECK_OK(RemoteFileSetContents(execution_id_path, |
| *fuzztest_config.execution_id)); |
| } |
| |
| absl::Cleanup clean_up_workdir = [is_workdir_specified, &env] { |
| if (!is_workdir_specified && !EarlyStopRequested()) { |
| FUZZTEST_CHECK_OK(RemotePathDelete(env.workdir, /*recursively=*/true)); |
| } |
| }; |
| |
| const std::filesystem::path fuzztest_db_path = |
| corpus_database_path / fuzz_tests_to_run[i]; |
| const std::filesystem::path regression_dir = |
| fuzztest_db_path / "regression"; |
| const std::filesystem::path coverage_dir = fuzztest_db_path / "coverage"; |
| |
| // Seed the fuzzing session with the latest coverage corpus and regression |
| // inputs from the previous fuzzing session. |
| if (!is_resuming) { |
| FUZZTEST_CHECK_OK(GenerateSeedCorpusFromConfig( |
| GetSeedCorpusConfig(env, regression_dir.c_str(), |
| fuzztest_config.replay_coverage_inputs |
| ? coverage_dir.c_str() |
| : ""), |
| env.binary_name, env.binary_hash)) |
| << "while generating the seed corpus"; |
| } |
| |
| if (env.fuzztest_multi_test_mode_soon_to_be_removed) { |
| // TODO: b/338217594 - Call the FuzzTest binary in a flag-agnostic way. |
| constexpr std::string_view kFuzzTestFuzzFlag = "--fuzz="; |
| constexpr std::string_view kFuzzTestReplayCorpusFlag = |
| "--replay_corpus="; |
| std::string_view test_selection_flag = fuzztest_config.only_replay |
| ? kFuzzTestReplayCorpusFlag |
| : kFuzzTestFuzzFlag; |
| env.binary = |
| absl::StrCat(binary, " ", test_selection_flag, fuzz_tests_to_run[i]); |
| } |
| |
| absl::Duration time_limit = fuzztest_config.GetTimeLimitPerTest(); |
| absl::Duration time_spent = absl::ZeroDuration(); |
| const std::string fuzzing_time_file = |
| std::filesystem::path(env.workdir) / "fuzzing_time"; |
| if (is_resuming && RemotePathExists(fuzzing_time_file)) { |
| time_spent = ReadFuzzingTime(fuzzing_time_file); |
| time_limit = std::max(time_limit - time_spent, absl::ZeroDuration()); |
| } |
| is_resuming = false; |
| |
| if (EarlyStopRequested()) { |
| FUZZTEST_LOG(INFO) << "Skipping test " << fuzz_tests_to_run[i] |
| << " because early stop requested."; |
| continue; |
| } |
| |
| FUZZTEST_LOG(INFO) << (fuzztest_config.only_replay ? "Replaying " |
| : "Fuzzing ") |
| << fuzz_tests_to_run[i] << " for " << time_limit |
| << "\n\tTest binary: " << env.binary; |
| |
| const absl::Time start_time = absl::Now(); |
| ClearEarlyStopRequestAndSetStopTime(/*stop_time=*/start_time + time_limit); |
| PeriodicAction record_fuzzing_time = |
| RecordFuzzingTime(fuzzing_time_file, start_time - time_spent); |
| Fuzz(env, binary_info, pcs_file_path, callbacks_factory); |
| record_fuzzing_time.Nudge(); |
| record_fuzzing_time.Stop(); |
| |
| if (!stats_root_path.empty()) { |
| const auto stats_dir = stats_root_path / fuzz_tests_to_run[i]; |
| FUZZTEST_CHECK_OK(RemoteMkdir(stats_dir.c_str())); |
| FUZZTEST_CHECK_OK(RemoteFileRename( |
| workdir.FuzzingStatsPath(), |
| (stats_dir / absl::StrCat("fuzzing_stats_", execution_stamp)) |
| .c_str())); |
| } |
| |
| if (EarlyStopRequested()) { |
| FUZZTEST_LOG(INFO) |
| << "Skip updating corpus database due to early stop requested."; |
| continue; |
| } |
| |
| // TODO(xinhaoyuan): Have a separate flag to skip corpus updating instead |
| // of checking whether workdir is specified or not. |
| const bool skip_corpus_db_update = |
| fuzztest_config.only_replay || is_workdir_specified; |
| if (skip_corpus_db_update && !env.report_crash_summary) continue; |
| |
| // Deduplicate and optionally update the crashing inputs. |
| CrashSummary crash_summary{fuzztest_config.binary_identifier, |
| fuzz_tests_to_run[i]}; |
| const std::optional<std::filesystem::path> crashing_dir = |
| skip_corpus_db_update ? std::nullopt |
| : std::make_optional<std::filesystem::path>( |
| fuzztest_db_path / "crashing"); |
| absl::flat_hash_set<std::string> crash_signatures = |
| skip_corpus_db_update |
| ? absl::flat_hash_set<std::string>{} |
| : PruneOldCrashesAndGetRemainingCrashSignatures( |
| *crashing_dir, env, callbacks_factory, crash_summary); |
| DeduplicateAndOptionallyStoreNewCrashes(workdir, env.total_shards, |
| std::move(crash_signatures), |
| crashing_dir, crash_summary); |
| if (env.report_crash_summary) crash_summary.Report(&std::cerr); |
| if (skip_corpus_db_update) continue; |
| |
| // Distill and store the coverage corpus. |
| Distill(env); |
| if (RemotePathExists(coverage_dir.c_str())) { |
| // In the future, we will store k latest coverage corpora for some k, but |
| // for now we only keep the latest one. |
| FUZZTEST_CHECK_OK( |
| RemotePathDelete(coverage_dir.c_str(), /*recursively=*/true)); |
| } |
| FUZZTEST_CHECK_OK(RemoteMkdir(coverage_dir.c_str())); |
| std::vector<std::string> distilled_corpus_files; |
| FUZZTEST_CHECK_OK( |
| RemoteGlobMatch(workdir.DistilledCorpusFilePaths().AllShardsGlob(), |
| distilled_corpus_files)); |
| for (const std::string& corpus_file : distilled_corpus_files) { |
| const std::string file_name = |
| std::filesystem::path(corpus_file).filename(); |
| FUZZTEST_CHECK_OK( |
| RemoteFileRename(corpus_file, (coverage_dir / file_name).c_str())); |
| } |
| } |
| |
| return EXIT_SUCCESS; |
| } |
| |
| int ListCrashIds(const Environment& env, |
| const fuzztest::internal::Configuration& target_config) { |
| FUZZTEST_CHECK(!env.list_crash_ids_file.empty()) |
| << "Need list_crash_ids_file to be set for listing crash IDs"; |
| FUZZTEST_CHECK_EQ(target_config.fuzz_tests_in_current_shard.size(), 1); |
| std::vector<std::string> crash_paths; |
| // TODO: b/406003594 - move the path construction to a library. |
| const auto crash_dir = std::filesystem::path(target_config.corpus_database) / |
| target_config.binary_identifier / |
| target_config.fuzz_tests_in_current_shard[0] / |
| "crashing"; |
| if (RemotePathExists(crash_dir.string())) { |
| FUZZTEST_CHECK(RemotePathIsDirectory(crash_dir.string())) |
| << "Crash dir " << crash_dir << " in the corpus database " |
| << target_config.corpus_database << " is not a directory"; |
| crash_paths = |
| ValueOrDie(RemoteListFiles(crash_dir.string(), /*recursively=*/false)); |
| } |
| std::vector<std::string> results; |
| results.reserve(crash_paths.size()); |
| for (const auto& crash_path : crash_paths) { |
| std::string crash_id = std::filesystem::path{crash_path}.filename(); |
| results.push_back(std::move(crash_id)); |
| } |
| FUZZTEST_CHECK_OK(RemoteFileSetContents(env.list_crash_ids_file, |
| absl::StrJoin(results, "\n"))); |
| return EXIT_SUCCESS; |
| } |
| |
| int ReplayCrash(const Environment& env, |
| const fuzztest::internal::Configuration& target_config, |
| CentipedeCallbacksFactory& callbacks_factory) { |
| FUZZTEST_CHECK(!env.crash_id.empty()) |
| << "Need crash_id to be set for replay a crash"; |
| FUZZTEST_CHECK(target_config.fuzz_tests_in_current_shard.size() == 1) |
| << "Expecting exactly one test for replay_crash"; |
| // TODO: b/406003594 - move the path construction to a library. |
| const auto crash_dir = std::filesystem::path(target_config.corpus_database) / |
| target_config.binary_identifier / |
| target_config.fuzz_tests_in_current_shard[0] / |
| "crashing"; |
| const WorkDir workdir{env}; |
| SeedCorpusSource crash_corpus_source; |
| crash_corpus_source.dir_glob = crash_dir; |
| crash_corpus_source.num_recent_dirs = 1; |
| crash_corpus_source.individual_input_rel_glob = env.crash_id; |
| crash_corpus_source.sampled_fraction_or_count = 1.0f; |
| const SeedCorpusConfig crash_corpus_config = { |
| /*sources=*/{crash_corpus_source}, |
| /*destination=*/{ |
| /*dir_path=*/env.workdir, |
| /*shard_rel_glob=*/ |
| std::filesystem::path{workdir.CorpusFilePaths().AllShardsGlob()} |
| .filename(), |
| /*shard_index_digits=*/WorkDir::kDigitsInShardIndex, |
| /*num_shards=*/1}}; |
| FUZZTEST_CHECK_OK(GenerateSeedCorpusFromConfig( |
| crash_corpus_config, env.binary_name, env.binary_hash)); |
| Environment run_crash_env = env; |
| run_crash_env.load_shards_only = true; |
| int fuzz_result = Fuzz(run_crash_env, {}, "", callbacks_factory); |
| if (env.report_crash_summary) { |
| CrashSummary crash_summary{target_config.binary_identifier, |
| target_config.fuzz_tests_in_current_shard[0]}; |
| // There should be at most one crash, so no deduplication actually happens. |
| DeduplicateAndOptionallyStoreNewCrashes(workdir, /*total_shards=*/1, |
| /*crash_signatures=*/{}, |
| /*crashing_dir=*/std::nullopt, |
| crash_summary); |
| crash_summary.Report(&std::cerr); |
| } |
| return fuzz_result; |
| } |
| |
| int ExportCrash(const Environment& env, |
| const fuzztest::internal::Configuration& target_config) { |
| FUZZTEST_CHECK(!env.crash_id.empty()) |
| << "Need crash_id to be set for exporting a crash"; |
| FUZZTEST_CHECK(!env.export_crash_file.empty()) |
| << "Need export_crash_file to be set for exporting a crash"; |
| FUZZTEST_CHECK(target_config.fuzz_tests_in_current_shard.size() == 1) |
| << "Expecting exactly one test for exporting a crash"; |
| // TODO: b/406003594 - move the path construction to a library. |
| const auto crash_dir = std::filesystem::path(target_config.corpus_database) / |
| target_config.binary_identifier / |
| target_config.fuzz_tests_in_current_shard[0] / |
| "crashing"; |
| std::string crash_contents; |
| const auto read_status = |
| RemoteFileGetContents((crash_dir / env.crash_id).c_str(), crash_contents); |
| if (!read_status.ok()) { |
| FUZZTEST_LOG(ERROR) << "Failed reading the crash " << env.crash_id |
| << " from " << crash_dir.c_str() << ": " << read_status; |
| return EXIT_FAILURE; |
| } |
| const auto write_status = |
| RemoteFileSetContents(env.export_crash_file, crash_contents); |
| if (!write_status.ok()) { |
| FUZZTEST_LOG(ERROR) << "Failed write the crash " << env.crash_id << " to " |
| << env.export_crash_file << ": " << write_status; |
| return EXIT_FAILURE; |
| } |
| return EXIT_SUCCESS; |
| } |
| |
| } // namespace |
| |
| int CentipedeMain(const Environment& env, |
| CentipedeCallbacksFactory& callbacks_factory) { |
| ClearEarlyStopRequestAndSetStopTime(env.stop_at); |
| SetSignalHandlers(); |
| |
| if (!env.corpus_to_files.empty()) { |
| Centipede::CorpusToFiles(env, env.corpus_to_files); |
| return EXIT_SUCCESS; |
| } |
| |
| if (!env.crashes_to_files.empty()) { |
| const auto status = Centipede::CrashesToFiles(env, env.crashes_to_files); |
| if (status.ok()) return EXIT_SUCCESS; |
| FUZZTEST_LOG(ERROR) << "Got error when exporting crashes to files: " |
| << status; |
| return EXIT_FAILURE; |
| } |
| |
| if (!env.for_each_blob.empty()) return ForEachBlob(env); |
| |
| if (!env.minimize_crash_file_path.empty()) { |
| ByteArray crashy_input; |
| ReadFromLocalFile(env.minimize_crash_file_path, crashy_input); |
| return MinimizeCrash(crashy_input, env, callbacks_factory); |
| } |
| |
| // Just export the corpus from a local dir and exit. |
| if (!env.corpus_from_files.empty()) { |
| Centipede::CorpusFromFiles(env, env.corpus_from_files); |
| return EXIT_SUCCESS; |
| } |
| |
| // Export the corpus from a local dir and then fuzz. |
| if (!env.corpus_dir.empty()) { |
| for (size_t i = 0; i < env.corpus_dir.size(); ++i) { |
| const auto& corpus_dir = env.corpus_dir[i]; |
| if (i > 0 || !env.first_corpus_dir_output_only) |
| Centipede::CorpusFromFiles(env, corpus_dir); |
| } |
| } |
| |
| if (env.distill) return Distill(env); |
| |
| // Create the local temporary dir once, before creating any threads. The |
| // temporary dir must typically exist before `CentipedeCallbacks` can be used. |
| const auto tmpdir = TemporaryLocalDirPath(); |
| CreateLocalDirRemovedAtExit(tmpdir); |
| |
| // Enter the update corpus database mode only if we have a binary to invoke |
| // and a corpus database to update. |
| // We don't update the corpus database for standalone binaries (i.e., when |
| // `env.has_input_wildcards` is true). |
| if (!env.binary.empty() && !env.has_input_wildcards) { |
| const auto serialized_target_config = [&]() -> absl::StatusOr<std::string> { |
| // TODO: b/410051414 Use Centipede flags to pass necessary information |
| // instead of passing the entirely serialized Configuration once switched |
| // to the unified execution model. |
| if (!env.fuzztest_configuration.empty()) { |
| std::string result; |
| FUZZTEST_CHECK( |
| absl::WebSafeBase64Unescape(env.fuzztest_configuration, &result)); |
| return result; |
| } |
| ScopedCentipedeCallbacks scoped_callbacks(callbacks_factory, env); |
| return scoped_callbacks.callbacks()->GetSerializedTargetConfig(); |
| }(); |
| FUZZTEST_CHECK_OK(serialized_target_config.status()); |
| if (!serialized_target_config->empty()) { |
| const auto target_config = fuzztest::internal::Configuration::Deserialize( |
| *serialized_target_config); |
| FUZZTEST_CHECK_OK(target_config.status()) |
| << "Failed to deserialize target configuration"; |
| if (!target_config->corpus_database.empty()) { |
| FUZZTEST_LOG_IF( |
| FATAL, env.list_crash_ids + env.replay_crash + env.export_crash > 1) |
| << "At most one of list_crash_ids/replay_crash/export_crash can " |
| "be set, but seeing list_crash_ids: " |
| << env.list_crash_ids << ", replay_crash: " << env.replay_crash |
| << ", export_crash: " << env.export_crash; |
| if (env.list_crash_ids) { |
| return ListCrashIds(env, *target_config); |
| } |
| if (env.replay_crash) { |
| return ReplayCrash(env, *target_config, callbacks_factory); |
| } |
| if (env.export_crash) { |
| return ExportCrash(env, *target_config); |
| } |
| |
| const auto time_limit_per_test = target_config->GetTimeLimitPerTest(); |
| FUZZTEST_CHECK(target_config->only_replay || |
| time_limit_per_test < absl::InfiniteDuration() || |
| target_config->fuzz_tests_in_current_shard.size() == 1) |
| << "Updating corpus database requires specifying time limit per " |
| "fuzz test when there are more than one tests."; |
| FUZZTEST_CHECK(time_limit_per_test >= absl::Seconds(1)) |
| << "Time limit per fuzz test must be at least 1 second."; |
| return UpdateCorpusDatabaseForFuzzTests(env, *target_config, |
| callbacks_factory); |
| } |
| } |
| } |
| |
| // Create the remote coverage dirs once, before creating any threads. |
| const auto coverage_dir = WorkDir{env}.CoverageDirPath(); |
| FUZZTEST_CHECK_OK(RemoteMkdir(coverage_dir)); |
| FUZZTEST_LOG(INFO) << "Coverage dir: " << coverage_dir |
| << "; temporary dir: " << tmpdir; |
| |
| std::string pcs_file_path; |
| BinaryInfo binary_info = PopulateBinaryInfoAndSavePCsIfNecessary( |
| env, callbacks_factory, pcs_file_path); |
| |
| if (env.analyze) return Analyze(env); |
| |
| return Fuzz(env, binary_info, pcs_file_path, callbacks_factory); |
| // TODO: fniksic - Report the crash summary here if requested. What are the |
| // binary identifier and the fuzz test name here? |
| } |
| |
| } // namespace fuzztest::internal |