blob: bd5a272cd48082515db4288eb0b10815cb827779 [file] [log] [blame]
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/centipede_interface.h"
#include <unistd.h>
#include <algorithm>
#include <atomic>
#include <cerrno>
#include <csignal>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <filesystem> // NOLINT
#include <iostream>
#include <memory>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include "absl/base/optimization.h"
#include "absl/container/flat_hash_set.h"
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/status/status.h"
#include "absl/strings/numbers.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "absl/strings/str_join.h"
#include "absl/strings/str_replace.h"
#include "absl/time/clock.h"
#include "absl/time/time.h"
#include "absl/types/span.h"
#include "./centipede/analyze_corpora.h"
#include "./centipede/binary_info.h"
#include "./centipede/centipede.h"
#include "./centipede/centipede_callbacks.h"
#include "./centipede/command.h"
#include "./centipede/coverage.h"
#include "./centipede/distill.h"
#include "./centipede/early_exit.h"
#include "./centipede/environment.h"
#include "./centipede/minimize_crash.h"
#include "./centipede/pc_info.h"
#include "./centipede/periodic_action.h"
#include "./centipede/runner_result.h"
#include "./centipede/seed_corpus_maker_lib.h"
#include "./centipede/stats.h"
#include "./centipede/thread_pool.h"
#include "./centipede/util.h"
#include "./centipede/workdir.h"
#include "./common/blob_file.h"
#include "./common/defs.h"
#include "./common/hash.h"
#include "./common/logging.h" // IWYU pragma: keep
#include "./common/remote_file.h"
#include "./common/status_macros.h"
#include "./fuzztest/internal/configuration.h"
namespace centipede {
namespace {
// Sets signal handler for SIGINT and SIGALRM.
void SetSignalHandlers(absl::Time stop_at) {
for (int signum : {SIGINT, SIGALRM}) {
struct sigaction sigact = {};
sigact.sa_handler = [](int received_signum) {
if (received_signum == SIGINT) {
ABSL_RAW_LOG(INFO, "Ctrl-C pressed: winding down");
RequestEarlyExit(EXIT_FAILURE); // => abnormal outcome
} else if (received_signum == SIGALRM) {
ABSL_RAW_LOG(INFO, "Reached --stop_at time: winding down");
RequestEarlyExit(EXIT_SUCCESS); // => expected outcome
} else {
ABSL_UNREACHABLE();
}
};
sigaction(signum, &sigact, nullptr);
}
if (stop_at != absl::InfiniteFuture()) {
const absl::Duration stop_in = stop_at - absl::Now();
// Setting an alarm works only if the delay is longer than 1 second.
if (stop_in >= absl::Seconds(1)) {
LOG(INFO) << "Setting alarm for --stop_at time " << stop_at << " (in "
<< stop_in << ")";
PCHECK(alarm(absl::ToInt64Seconds(stop_in)) == 0) << "Alarm already set";
} else {
LOG(WARNING) << "Already reached --stop_at time " << stop_at
<< " upon starting: winding down immediately";
RequestEarlyExit(EXIT_SUCCESS); // => expected outcome
}
}
}
// Runs env.for_each_blob on every blob extracted from env.args.
// Returns EXIT_SUCCESS on success, EXIT_FAILURE otherwise.
int ForEachBlob(const Environment &env) {
auto tmpdir = TemporaryLocalDirPath();
CreateLocalDirRemovedAtExit(tmpdir);
std::string tmpfile = std::filesystem::path(tmpdir).append("t");
for (const auto &arg : env.args) {
LOG(INFO) << "Running '" << env.for_each_blob << "' on " << arg;
auto blob_reader = DefaultBlobFileReaderFactory();
absl::Status open_status = blob_reader->Open(arg);
if (!open_status.ok()) {
LOG(INFO) << "Failed to open " << arg << ": " << open_status;
return EXIT_FAILURE;
}
ByteSpan blob;
while (blob_reader->Read(blob) == absl::OkStatus()) {
ByteArray bytes;
bytes.insert(bytes.begin(), blob.data(), blob.end());
// TODO(kcc): [impl] add a variant of WriteToLocalFile that accepts Span.
WriteToLocalFile(tmpfile, bytes);
std::string command_line = absl::StrReplaceAll(
env.for_each_blob, {{"%P", tmpfile}, {"%H", Hash(bytes)}});
Command cmd(command_line);
// TODO(kcc): [as-needed] this creates one process per blob.
// If this flag gets active use, we may want to define special cases,
// e.g. if for_each_blob=="cp %P /some/where" we can do it in-process.
cmd.Execute();
if (EarlyExitRequested()) return ExitCode();
}
}
return EXIT_SUCCESS;
}
// Loads corpora from work dirs provided in `env.args`, if there are two args
// provided, analyzes differences. If there is one arg provided, reports the
// function coverage. Returns EXIT_SUCCESS on success, EXIT_FAILURE otherwise.
int Analyze(const Environment &env) {
LOG(INFO) << "Analyze " << absl::StrJoin(env.args, ",");
CHECK(!env.binary.empty()) << "--binary must be used";
if (env.args.size() == 1) {
const CoverageResults coverage_results =
GetCoverage(env.binary_name, env.binary_hash, env.args[0]);
WorkDir workdir{env};
const std::string coverage_report_path =
workdir.CoverageReportPath(/*annotation=*/"");
DumpCoverageReport(coverage_results, coverage_report_path);
} else if (env.args.size() == 2) {
AnalyzeCorporaToLog(env.binary_name, env.binary_hash, env.args[0],
env.args[1]);
} else {
LOG(FATAL) << "for now, --analyze supports only 1 or 2 work dirs; got "
<< env.args.size();
}
return EXIT_SUCCESS;
}
void SavePCTableToFile(const PCTable &pc_table, std::string_view file_path) {
WriteToLocalFile(file_path, AsByteSpan(pc_table));
}
BinaryInfo PopulateBinaryInfoAndSavePCsIfNecessary(
const Environment &env, CentipedeCallbacksFactory &callbacks_factory,
std::string &pcs_file_path) {
BinaryInfo binary_info;
// Some fuzz targets have coverage not based on instrumenting binaries.
// For those target, we should not populate binary info.
if (env.populate_binary_info) {
ScopedCentipedeCallbacks scoped_callbacks(callbacks_factory, env);
scoped_callbacks.callbacks()->PopulateBinaryInfo(binary_info);
}
if (env.save_binary_info) {
const std::string binary_info_dir = WorkDir{env}.BinaryInfoDirPath();
CHECK_OK(RemoteMkdir(binary_info_dir));
LOG(INFO) << "Serializing binary info to: " << binary_info_dir;
binary_info.Write(binary_info_dir);
}
if (binary_info.uses_legacy_trace_pc_instrumentation) {
pcs_file_path = std::filesystem::path(TemporaryLocalDirPath()) / "pcs";
SavePCTableToFile(binary_info.pc_table, pcs_file_path);
}
if (env.use_pcpair_features) {
CHECK(!binary_info.pc_table.empty())
<< "--use_pcpair_features requires non-empty pc_table";
}
return binary_info;
}
int Fuzz(const Environment &env, const BinaryInfo &binary_info,
std::string_view pcs_file_path,
CentipedeCallbacksFactory &callbacks_factory) {
CoverageLogger coverage_logger(binary_info.pc_table, binary_info.symbols);
std::vector<Environment> envs(env.num_threads, env);
std::vector<std::atomic<Stats>> stats_vec(env.num_threads);
// Start periodic stats dumping and, optionally, logging.
std::vector<PeriodicAction> stats_reporters;
stats_reporters.emplace_back(
[csv_appender = StatsCsvFileAppender{stats_vec, envs}]() mutable {
csv_appender.ReportCurrStats();
},
PeriodicAction::Options{
.sleep_before_each =
[](size_t iteration) {
return absl::Minutes(std::clamp(iteration, 0UL, 10UL));
},
});
if (!envs.front().experiment.empty() || ABSL_VLOG_IS_ON(1)) {
stats_reporters.emplace_back(
[logger = StatsLogger{stats_vec, envs}]() mutable {
logger.ReportCurrStats();
},
PeriodicAction::Options{
.sleep_before_each =
[](size_t iteration) {
return absl::Seconds(std::clamp(iteration, 5UL, 600UL));
},
});
}
auto fuzzing_worker =
[&env, pcs_file_path, &callbacks_factory, &binary_info, &coverage_logger](
Environment &my_env, std::atomic<Stats> &stats, bool create_tmpdir) {
if (create_tmpdir) CreateLocalDirRemovedAtExit(TemporaryLocalDirPath());
my_env.UpdateForExperiment();
// Uses TID, call in this thread.
my_env.seed = GetRandomSeed(env.seed);
// Same for all threads.
my_env.pcs_file_path = pcs_file_path;
if (env.dry_run) return;
ScopedCentipedeCallbacks scoped_callbacks(callbacks_factory, my_env);
Centipede centipede(my_env, *scoped_callbacks.callbacks(), binary_info,
coverage_logger, stats);
centipede.FuzzingLoop();
};
if (env.num_threads == 1) {
// When fuzzing with one thread, run fuzzing loop in the current
// thread. This is because FuzzTest/Centipede's single-process
// fuzzing requires the test body, which is invoked by the fuzzing
// loop, to run in the main thread.
//
// Here, the fuzzing worker should not re-create the tmpdir since the path
// is thread-local and it has been created in the current function.
fuzzing_worker(envs[0], stats_vec[0], /*create_tmpdir=*/false);
} else {
ThreadPool fuzzing_worker_threads{static_cast<int>(env.num_threads)};
for (size_t thread_idx = 0; thread_idx < env.num_threads; thread_idx++) {
Environment &my_env = envs[thread_idx];
my_env.my_shard_index = env.my_shard_index + thread_idx;
std::atomic<Stats> &my_stats = stats_vec[thread_idx];
fuzzing_worker_threads.Schedule([&fuzzing_worker, &my_env, &my_stats]() {
fuzzing_worker(my_env, my_stats, /*create_tmpdir=*/true);
});
} // All `fuzzing_worker_threads` join here.
}
for (auto &reporter : stats_reporters) {
// Nudge one final update and stop the reporting thread.
reporter.Nudge();
reporter.Stop();
}
if (!env.knobs_file.empty()) PrintRewardValues(stats_vec, std::cerr);
return ExitCode();
}
struct TestShard {
int index = 0;
int total_shards = 1;
};
// https://bazel.build/reference/test-encyclopedia#initial-conditions
absl::Duration GetBazelTestTimeout() {
const char *test_timeout_env = std::getenv("TEST_TIMEOUT");
if (test_timeout_env == nullptr) return absl::InfiniteDuration();
int timeout_s = 0;
CHECK(absl::SimpleAtoi(test_timeout_env, &timeout_s))
<< "Failed to parse TEST_TIMEOUT: \"" << test_timeout_env << "\"";
return absl::Seconds(timeout_s);
}
void ReportErrorWhenNotEnoughTimeToRunEverything(absl::Time start_time,
absl::Duration test_time_limit,
int executed_tests_in_shard,
int fuzz_test_count,
int shard_count) {
static const absl::Duration bazel_test_timeout = GetBazelTestTimeout();
constexpr float kTimeoutSafetyFactor = 1.2;
const auto required_test_time = kTimeoutSafetyFactor * test_time_limit;
const auto remaining_duration =
bazel_test_timeout - (absl::Now() - start_time);
if (required_test_time <= remaining_duration) return;
std::string error =
"Cannot fuzz a fuzz test within the given timeout. Please ";
if (executed_tests_in_shard == 0) {
// Increasing number of shards won't help.
const absl::Duration suggested_timeout =
required_test_time * ((fuzz_test_count - 1) / shard_count + 1);
absl::StrAppend(&error, "set the `timeout` to ", suggested_timeout,
" or reduce the fuzzing time, ");
} else {
constexpr int kMaxShardCount = 50;
const int suggested_shard_count = std::min(
(fuzz_test_count - 1) / executed_tests_in_shard + 1, kMaxShardCount);
const int suggested_tests_per_shard =
(fuzz_test_count - 1) / suggested_shard_count + 1;
if (suggested_tests_per_shard > executed_tests_in_shard) {
// We wouldn't be able to execute the suggested number of tests without
// timeout. This case can only happen if we would in fact need more than
// `kMaxShardCount` shards, indicating that there are simply too many fuzz
// tests in a binary.
CHECK_EQ(suggested_shard_count, kMaxShardCount);
absl::StrAppend(&error,
"split the fuzz tests into several test binaries where "
"each binary has at most ",
executed_tests_in_shard * kMaxShardCount, "tests ",
"with `shard_count` = ", kMaxShardCount, ", ");
} else {
// In this case, `suggested_shard_count` must be greater than
// `shard_count`, otherwise we would have already executed all the tests
// without a timeout.
CHECK_GT(suggested_shard_count, shard_count);
absl::StrAppend(&error, "increase the `shard_count` to ",
suggested_shard_count, ", ");
}
}
absl::StrAppend(&error, "to avoid this issue. ");
absl::StrAppend(&error,
"(https://bazel.build/reference/be/"
"common-definitions#common-attributes-tests)");
CHECK(false) << error;
}
TestShard SetUpTestSharding() {
TestShard test_shard;
if (const char *test_total_shards_env = std::getenv("TEST_TOTAL_SHARDS");
test_total_shards_env != nullptr) {
CHECK(absl::SimpleAtoi(test_total_shards_env, &test_shard.total_shards))
<< "Failed to parse TEST_TOTAL_SHARDS as an integer: \""
<< test_total_shards_env << "\"";
CHECK_GT(test_shard.total_shards, 0)
<< "TEST_TOTAL_SHARDS must be greater than 0.";
}
if (const char *test_shard_index_env = std::getenv("TEST_SHARD_INDEX");
test_shard_index_env != nullptr) {
CHECK(absl::SimpleAtoi(test_shard_index_env, &test_shard.index))
<< "Failed to parse TEST_SHARD_INDEX as an integer: \""
<< test_shard_index_env << "\"";
CHECK(0 <= test_shard.index && test_shard.index < test_shard.total_shards)
<< "TEST_SHARD_INDEX must be in the range [0, "
<< test_shard.total_shards << ").";
}
// Update the shard status file to indicate that we support test sharding.
// It suffices to update the file's modification time, but we clear the
// contents for simplicity. This is also what the GoogleTest framework does.
if (const char *test_shard_status_file =
std::getenv("TEST_SHARD_STATUS_FILE");
test_shard_status_file != nullptr) {
ClearLocalFileContents(test_shard_status_file);
}
// Unset the environment variables so they don't affect the child processes.
CHECK_EQ(unsetenv("TEST_TOTAL_SHARDS"), 0)
<< "Failed to unset TEST_TOTAL_SHARDS: " << std::strerror(errno);
CHECK_EQ(unsetenv("TEST_SHARD_INDEX"), 0)
<< "Failed to unset TEST_SHARD_INDEX: " << std::strerror(errno);
CHECK_EQ(unsetenv("TEST_SHARD_STATUS_FILE"), 0)
<< "Failed to unset TEST_SHARD_STATUS_FILE: " << std::strerror(errno);
return test_shard;
}
// Prunes non-reproducible and duplicate crashes and returns the crash metadata
// of the remaining crashes.
absl::flat_hash_set<std::string> PruneOldCrashesAndGetRemainingCrashMetadata(
const std::filesystem::path &crashing_dir, const Environment &env,
CentipedeCallbacksFactory &callbacks_factory) {
const std::vector<std::string> crashing_input_files =
// The corpus database layout assumes the crash input files are located
// directly in the crashing subdirectory, so we don't list recursively.
ValueOrDie(RemoteListFiles(crashing_dir.c_str(), /*recursively=*/false));
ScopedCentipedeCallbacks scoped_callbacks(callbacks_factory, env);
BatchResult batch_result;
absl::flat_hash_set<std::string> remaining_crash_metadata;
for (const std::string &crashing_input_file : crashing_input_files) {
ByteArray crashing_input;
CHECK_OK(RemoteFileGetContents(crashing_input_file, crashing_input));
const bool is_reproducible = !scoped_callbacks.callbacks()->Execute(
env.binary, {crashing_input}, batch_result);
const bool is_duplicate =
is_reproducible &&
!remaining_crash_metadata.insert(batch_result.failure_description())
.second;
if (!is_reproducible || is_duplicate) {
CHECK_OK(RemotePathDelete(crashing_input_file, /*recursively=*/false));
}
}
return remaining_crash_metadata;
}
void DeduplicateAndStoreNewCrashes(
const std::filesystem::path &crashing_dir, const WorkDir &workdir,
absl::flat_hash_set<std::string> crash_metadata) {
const std::vector<std::string> new_crashing_input_files =
// The crash reproducer directory may contain subdirectories with
// input files that don't individually cause a crash. We ignore those
// for now and don't list the files recursively.
ValueOrDie(RemoteListFiles(workdir.CrashReproducerDirPath(),
/*recursively=*/false));
const std::filesystem::path crash_metadata_dir =
workdir.CrashMetadataDirPath();
CHECK_OK(RemoteMkdir(crashing_dir.c_str()));
for (const std::string &crashing_input_file : new_crashing_input_files) {
const std::string crashing_input_file_name =
std::filesystem::path(crashing_input_file).filename();
const std::string crash_metadata_file =
crash_metadata_dir / crashing_input_file_name;
std::string new_crash_metadata;
CHECK_OK(RemoteFileGetContents(crash_metadata_file, new_crash_metadata));
const bool is_duplicate = !crash_metadata.insert(new_crash_metadata).second;
if (is_duplicate) continue;
CHECK_OK(
RemotePathRename(crashing_input_file,
(crashing_dir / crashing_input_file_name).c_str()));
}
}
// Seeds the corpus files in `env.workdir` with the previously distilled corpus
// files from `src_dir`.
SeedCorpusConfig GetSeedCorpusConfig(const Environment &env,
std::string_view src_dir) {
const WorkDir workdir{env};
return {
.sources = {SeedCorpusSource{
.dir_glob = std::string(src_dir),
.num_recent_dirs = 1,
// We're using the previously distilled corpus files as seeds.
.shard_rel_glob =
std::filesystem::path{
workdir.DistilledCorpusFiles().AllShardsGlob()}
.filename(),
.sampled_fraction_or_count = 1.0f,
}},
.destination =
{
.dir_path = env.workdir,
// We're seeding the current corpus files.
.shard_rel_glob =
std::filesystem::path{workdir.CorpusFiles().AllShardsGlob()}
.filename(),
.shard_index_digits = WorkDir::kDigitsInShardIndex,
.num_shards = static_cast<uint32_t>(env.num_threads),
},
};
}
// TODO(b/368325638): Add tests for this.
int UpdateCorpusDatabaseForFuzzTests(
Environment env, const fuzztest::internal::Configuration &fuzztest_config,
CentipedeCallbacksFactory &callbacks_factory) {
env.UpdateWithTargetConfig(fuzztest_config);
absl::Time start_time = absl::Now();
LOG(INFO) << "Starting the update of the corpus database for fuzz tests:"
<< "\nBinary: " << env.binary
<< "\nCorpus database: " << fuzztest_config.corpus_database
<< "\nFuzz tests: "
<< absl::StrJoin(fuzztest_config.fuzz_tests, ", ");
// Step 1: Preliminary set up of test sharding, binary info, etc.
const auto [test_shard_index, total_test_shards] = SetUpTestSharding();
const auto corpus_database_path =
std::filesystem::path(fuzztest_config.corpus_database) /
fuzztest_config.binary_identifier;
const auto stats_root_path =
fuzztest_config.stats_root.empty()
? std::filesystem::path()
: std::filesystem::path(fuzztest_config.stats_root) /
fuzztest_config.binary_identifier;
const auto execution_stamp = [] {
std::string stamp =
absl::FormatTime("%Y-%m-%d-%H-%M-%S", absl::Now(), absl::UTCTimeZone());
return stamp;
}();
// the full workdir paths will be formed by appending the fuzz test names to
// the base workdir path.
const auto base_workdir_path =
corpus_database_path / absl::StrFormat("workdir.%03d", test_shard_index);
// There's no point in saving the binary info to the workdir, since the
// workdir is deleted at the end.
env.save_binary_info = false;
std::string pcs_file_path;
BinaryInfo binary_info = PopulateBinaryInfoAndSavePCsIfNecessary(
env, callbacks_factory, pcs_file_path);
LOG(INFO) << "Test shard index: " << test_shard_index
<< " Total test shards: " << total_test_shards;
// Step 2: Are we resuming from a previously terminated run?
// Find the last index of a fuzz test for which we already have a workdir.
bool is_resuming = false;
int resuming_fuzztest_idx = 0;
for (int i = 0; i < fuzztest_config.fuzz_tests.size(); ++i) {
if (i % total_test_shards != test_shard_index) continue;
env.workdir = base_workdir_path / fuzztest_config.fuzz_tests[i];
// Check the existence of the coverage path to not only make sure the
// workdir exists, but also that it was created for the same binary as in
// this run.
if (RemotePathExists(WorkDir{env}.CoverageDirPath())) {
is_resuming = true;
resuming_fuzztest_idx = i;
}
}
LOG_IF(INFO, is_resuming) << "Resuming from the fuzz test "
<< fuzztest_config.fuzz_tests[resuming_fuzztest_idx]
<< " (index: " << resuming_fuzztest_idx << ")";
// Step 3: Iterate over the fuzz tests and run them.
const std::string binary = env.binary;
for (int i = resuming_fuzztest_idx; i < fuzztest_config.fuzz_tests.size();
++i) {
if (i % total_test_shards != test_shard_index) continue;
ReportErrorWhenNotEnoughTimeToRunEverything(
start_time, fuzztest_config.time_limit,
/*executed_tests_in_shard=*/i / total_test_shards,
fuzztest_config.fuzz_tests.size(), total_test_shards);
env.workdir = base_workdir_path / fuzztest_config.fuzz_tests[i];
if (RemotePathExists(env.workdir) && !is_resuming) {
// This could be a workdir from a failed run that used a different version
// of the binary. We delete it so that we don't have to deal with the
// assumptions under which it is safe to reuse an old workdir.
CHECK_OK(RemotePathDelete(env.workdir, /*recursively=*/true));
}
const WorkDir workdir{env};
CHECK_OK(RemoteMkdir(
workdir.CoverageDirPath())); // Implicitly creates the workdir
// Seed the fuzzing session with the latest coverage corpus from the
// previous fuzzing session.
const std::filesystem::path fuzztest_db_path =
corpus_database_path / fuzztest_config.fuzz_tests[i];
const std::filesystem::path coverage_dir = fuzztest_db_path / "coverage";
if (RemotePathExists(coverage_dir.c_str()) && !is_resuming) {
CHECK_OK(GenerateSeedCorpusFromConfig(
GetSeedCorpusConfig(env, coverage_dir.c_str()), env.binary_name,
env.binary_hash));
}
is_resuming = false;
// TODO: b/338217594 - Call the FuzzTest binary in a flag-agnostic way.
constexpr std::string_view kFuzzTestFuzzFlag = "--fuzz=";
env.binary = absl::StrCat(binary, " ", kFuzzTestFuzzFlag,
fuzztest_config.fuzz_tests[i]);
LOG(INFO) << "Fuzzing " << fuzztest_config.fuzz_tests[i]
<< "\n\tTest binary: " << env.binary;
ClearEarlyExitRequest();
alarm(absl::ToInt64Seconds(fuzztest_config.GetTimeLimitPerTest()));
Fuzz(env, binary_info, pcs_file_path, callbacks_factory);
if (!stats_root_path.empty()) {
const auto stats_dir = stats_root_path / fuzztest_config.fuzz_tests[i];
CHECK_OK(RemoteMkdir(stats_dir.c_str()));
CHECK_OK(RemotePathRename(
workdir.FuzzingStatsPath(),
(stats_dir / absl::StrCat("fuzzing_stats_", execution_stamp))
.c_str()));
}
// Distill and store the coverage corpus.
Distill(env);
if (RemotePathExists(coverage_dir.c_str())) {
// In the future, we will store k latest coverage corpora for some k, but
// for now we only keep the latest one.
CHECK_OK(RemotePathDelete(coverage_dir.c_str(), /*recursively=*/true));
}
CHECK_OK(RemoteMkdir(coverage_dir.c_str()));
std::vector<std::string> distilled_corpus_files;
CHECK_OK(RemoteGlobMatch(workdir.DistilledCorpusFiles().AllShardsGlob(),
distilled_corpus_files));
for (const std::string &corpus_file : distilled_corpus_files) {
const std::string file_name =
std::filesystem::path(corpus_file).filename();
CHECK_OK(
RemotePathRename(corpus_file, (coverage_dir / file_name).c_str()));
}
// Deduplicate and update the crashing inputs.
const std::filesystem::path crashing_dir = fuzztest_db_path / "crashing";
absl::flat_hash_set<std::string> crash_metadata =
PruneOldCrashesAndGetRemainingCrashMetadata(crashing_dir, env,
callbacks_factory);
DeduplicateAndStoreNewCrashes(crashing_dir, workdir,
std::move(crash_metadata));
}
CHECK_OK(RemotePathDelete(base_workdir_path.c_str(), /*recursively=*/true));
return EXIT_SUCCESS;
}
} // namespace
int CentipedeMain(const Environment &env,
CentipedeCallbacksFactory &callbacks_factory) {
ClearEarlyExitRequest();
SetSignalHandlers(env.stop_at);
if (!env.corpus_to_files.empty()) {
Centipede::CorpusToFiles(env, env.corpus_to_files);
return EXIT_SUCCESS;
}
if (!env.for_each_blob.empty()) return ForEachBlob(env);
if (!env.minimize_crash_file_path.empty()) {
ByteArray crashy_input;
ReadFromLocalFile(env.minimize_crash_file_path, crashy_input);
return MinimizeCrash(crashy_input, env, callbacks_factory);
}
// Just export the corpus from a local dir and exit.
if (!env.corpus_from_files.empty()) {
Centipede::CorpusFromFiles(env, env.corpus_from_files);
return EXIT_SUCCESS;
}
// Export the corpus from a local dir and then fuzz.
if (!env.corpus_dir.empty()) {
for (size_t i = 0; i < env.corpus_dir.size(); ++i) {
const auto &corpus_dir = env.corpus_dir[i];
if (i > 0 || !env.first_corpus_dir_output_only)
Centipede::CorpusFromFiles(env, corpus_dir);
}
}
if (env.distill) return Distill(env);
// Create the local temporary dir once, before creating any threads. The
// temporary dir must typically exist before `CentipedeCallbacks` can be used.
const auto tmpdir = TemporaryLocalDirPath();
CreateLocalDirRemovedAtExit(tmpdir);
// Enter the update corpus database mode only if we have a binary to invoke
// and a corpus database to update.
if (!env.binary.empty()) {
const std::string serialized_target_config = [&] {
ScopedCentipedeCallbacks scoped_callbacks(callbacks_factory, env);
return scoped_callbacks.callbacks()->GetSerializedTargetConfig();
}();
if (!serialized_target_config.empty()) {
const auto target_config = fuzztest::internal::Configuration::Deserialize(
serialized_target_config);
CHECK_OK(target_config.status())
<< "Failed to deserialize target configuration";
if (!target_config->corpus_database.empty()) {
const auto time_limit_per_test = target_config->GetTimeLimitPerTest();
CHECK(time_limit_per_test < absl::InfiniteDuration())
<< "Updating corpus database requires specifying time limit per "
"fuzz test.";
CHECK(time_limit_per_test >= absl::Seconds(1))
<< "Time limit per fuzz test must be at least 1 second.";
return UpdateCorpusDatabaseForFuzzTests(env, *target_config,
callbacks_factory);
}
} else if (std::getenv("CENTIPEDE_NO_FUZZ_IF_NO_CONFIG") != nullptr) {
// Target config is empty when the shard does not contain any fuzz tests.
LOG(INFO) << "No fuzz test found!";
return EXIT_SUCCESS;
}
}
// Create the remote coverage dirs once, before creating any threads.
const auto coverage_dir = WorkDir{env}.CoverageDirPath();
CHECK_OK(RemoteMkdir(coverage_dir));
LOG(INFO) << "Coverage dir: " << coverage_dir
<< "; temporary dir: " << tmpdir;
std::string pcs_file_path;
BinaryInfo binary_info = PopulateBinaryInfoAndSavePCsIfNecessary(
env, callbacks_factory, pcs_file_path);
if (env.analyze) return Analyze(env);
return Fuzz(env, binary_info, pcs_file_path, callbacks_factory);
}
} // namespace centipede