Split crash metadata into description and signature. Signature will be used for deduplication while the description is human-readable for reporting. For runner, the signature will be the same as the description, which is compatible with the current behavior. PiperOrigin-RevId: 770274129
diff --git a/centipede/centipede.cc b/centipede/centipede.cc index 84490e4..8510af8 100644 --- a/centipede/centipede.cc +++ b/centipede/centipede.cc
@@ -206,9 +206,8 @@ absl::Status Centipede::CrashesToFiles(const Environment &env, std::string_view dir) { std::vector<std::string> reproducer_dirs; - const auto wd = WorkDir{env}; auto reproducer_match_status = RemoteGlobMatch( - wd.CrashReproducerDirPaths().AllShardsGlob(), reproducer_dirs); + WorkDir{env}.CrashReproducerDirPaths().AllShardsGlob(), reproducer_dirs); if (!reproducer_match_status.ok() && !absl::IsNotFound(reproducer_match_status)) { return reproducer_match_status; @@ -226,18 +225,27 @@ RETURN_IF_NOT_OK(RemoteFileCopy( reproducer_path, (std::filesystem::path{dir} / absl::StrCat(id, ".data")).string())); - const auto shard_index = wd.CrashReproducerDirPaths().GetShardIndex( - std::filesystem::path{reproducer_path}.parent_path().string()); - CHECK(shard_index.has_value()); - const auto metadata_dir = wd.CrashMetadataDirPaths().Shard(*shard_index); - const auto description_filename = absl::StrCat(id, ".desc"); - const auto signature_filename = absl::StrCat(id, ".sig"); + } + } + std::vector<std::string> metadata_dirs; + auto metadata_match_status = RemoteGlobMatch( + WorkDir{env}.CrashMetadataDirPaths().AllShardsGlob(), metadata_dirs); + if (!metadata_match_status.ok() && !absl::IsNotFound(metadata_match_status)) { + return metadata_match_status; + } + for (const auto &metadata_dir : metadata_dirs) { + ASSIGN_OR_RETURN_IF_NOT_OK( + std::vector<std::string> metadata_paths, + RemoteListFiles(metadata_dir, /*recursively=*/false)); + for (const auto &metadata_path : metadata_paths) { + std::string id = std::filesystem::path{metadata_path}.filename(); + if (crash_ids.erase(id) == 0) { + continue; + } RETURN_IF_NOT_OK(RemoteFileCopy( - (std::filesystem::path{metadata_dir} / description_filename).string(), - (std::filesystem::path{dir} / description_filename).string())); - RETURN_IF_NOT_OK(RemoteFileCopy( - (std::filesystem::path{metadata_dir} / signature_filename).string(), - (std::filesystem::path{dir} / signature_filename).string())); + metadata_path, + (std::filesystem::path{dir} / absl::StrCat(id, ".metadata")) + .string())); } } return absl::OkStatus(); @@ -880,9 +888,6 @@ << "\nExit code : " << batch_result.exit_code() << "\nFailure : " << batch_result.failure_description() - << "\nSignature : " - << AsPrintableString(AsByteSpan(batch_result.failure_signature()), - /*max_len=*/32) << "\nNumber of inputs : " << input_vec.size() << "\nNumber of inputs read: " << batch_result.num_outputs_read() << (batch_result.IsSetupFailure() @@ -971,7 +976,7 @@ std::string input_file_path = std::filesystem::path(crash_dir) / hash; auto crash_metadata_dir = wd_.CrashMetadataDirPaths().MyShard(); CHECK_OK(RemoteMkdir(crash_metadata_dir)); - std::string crash_metadata_path_prefix = + std::string crash_metadata_file_path = std::filesystem::path(crash_metadata_dir) / hash; LOG(INFO) << log_prefix << "Detected crash-reproducing input:" << "\nInput index : " << input_idx << "\nInput bytes : " @@ -979,20 +984,13 @@ << "\nExit code : " << one_input_batch_result.exit_code() << "\nFailure : " << one_input_batch_result.failure_description() - << "\nSignature : " - << AsPrintableString( - AsByteSpan(one_input_batch_result.failure_signature()), - /*max_len=*/32) << "\nSaving input to: " << input_file_path << "\nSaving crash" // - << "\nmetadata to : " << crash_metadata_path_prefix << ".*"; + << "\nmetadata to : " << crash_metadata_file_path; CHECK_OK(RemoteFileSetContents(input_file_path, one_input)); - CHECK_OK(RemoteFileSetContents( - absl::StrCat(crash_metadata_path_prefix, ".desc"), - one_input_batch_result.failure_description())); - CHECK_OK(RemoteFileSetContents( - absl::StrCat(crash_metadata_path_prefix, ".sig"), - one_input_batch_result.failure_signature())); + CHECK_OK( + RemoteFileSetContents(crash_metadata_file_path, + one_input_batch_result.failure_description())); return; } }
diff --git a/centipede/centipede_callbacks.cc b/centipede/centipede_callbacks.cc index 8a2ec0c..0a94e84 100644 --- a/centipede/centipede_callbacks.cc +++ b/centipede/centipede_callbacks.cc
@@ -157,7 +157,7 @@ absl::StrCat(":shmem:test=", env_.test_name, ":arg1=", inputs_blobseq_.path(), ":arg2=", outputs_blobseq_.path(), ":failure_description_path=", failure_description_path_, - ":failure_signature_path=", failure_signature_path_, ":"), + ":"), disable_coverage)}; if (env_.clang_coverage_binary == binary) @@ -243,18 +243,9 @@ ReadFromLocalFile(execute_log_path_, batch_result.log()); ReadFromLocalFile(failure_description_path_, batch_result.failure_description()); - if (std::filesystem::exists(failure_signature_path_)) { - ReadFromLocalFile(failure_signature_path_, - batch_result.failure_signature()); - } else { - // TODO(xinhaoyuan): Refactor runner to use dispatcher so this branch can - // be removed. - batch_result.failure_signature() = batch_result.failure_description(); - } - // Remove the failure description and signature files here so that they do - // not stay until another failed execution. + // Remove failure_description_ here so that it doesn't stay until another + // failed execution. std::filesystem::remove(failure_description_path_); - std::filesystem::remove(failure_signature_path_); } VLOG(1) << __FUNCTION__ << " took " << (absl::Now() - start_time); return retval;
diff --git a/centipede/centipede_callbacks.h b/centipede/centipede_callbacks.h index 57b9aab..792608d 100644 --- a/centipede/centipede_callbacks.h +++ b/centipede/centipede_callbacks.h
@@ -174,8 +174,6 @@ std::filesystem::path(temp_dir_).append("log"); std::string failure_description_path_ = std::filesystem::path(temp_dir_).append("failure_description"); - std::string failure_signature_path_ = - std::filesystem::path(temp_dir_).append("failure_signature"); const std::string shmem_name1_ = ProcessAndThreadUniqueID("/ctpd-shm1-"); const std::string shmem_name2_ = ProcessAndThreadUniqueID("/ctpd-shm2-");
diff --git a/centipede/centipede_flags.inc b/centipede/centipede_flags.inc index fd331ac..66e1c19 100644 --- a/centipede/centipede_flags.inc +++ b/centipede/centipede_flags.inc
@@ -295,10 +295,11 @@ CENTIPEDE_FLAG( std::string, crashes_to_files, "", "When set to a directory path, save the crashing reproducers and " - "metadata from the workdir to the given path: Each crash with `ID`" - "will be saved with file `ID.data` for the reproducer, `ID.desc` the " - "description, `ID.sig` the signature. If multiple crashes with the same ID " - "exist, only one crash will be saved.") + "metadata from the workdir to the given path. Each crash with `ID`" + "will be saved with file `ID.data` for the reproducer and " + "`ID.metadata` the metadata, which currently contains the failure " + "description. If multiple crashes with the same ID exist, only one " + "crash will be saved.") CENTIPEDE_FLAG( std::string, corpus_from_files, "", "Export a corpus from a local directory with one file per input into "
diff --git a/centipede/centipede_interface.cc b/centipede/centipede_interface.cc index ca4c856..504ccf2 100644 --- a/centipede/centipede_interface.cc +++ b/centipede/centipede_interface.cc
@@ -288,9 +288,9 @@ return test_shard; } -// Prunes non-reproducible and duplicate crashes and returns the crash -// signatures of the remaining crashes. -absl::flat_hash_set<std::string> PruneOldCrashesAndGetRemainingCrashSignatures( +// Prunes non-reproducible and duplicate crashes and returns the crash metadata +// of the remaining crashes. +absl::flat_hash_set<std::string> PruneOldCrashesAndGetRemainingCrashMetadata( const std::filesystem::path &crashing_dir, const Environment &env, CentipedeCallbacksFactory &callbacks_factory) { const std::vector<std::string> crashing_input_files = @@ -299,7 +299,7 @@ ValueOrDie(RemoteListFiles(crashing_dir.c_str(), /*recursively=*/false)); ScopedCentipedeCallbacks scoped_callbacks(callbacks_factory, env); BatchResult batch_result; - absl::flat_hash_set<std::string> remaining_crash_signatures; + absl::flat_hash_set<std::string> remaining_crash_metadata; for (const std::string &crashing_input_file : crashing_input_files) { ByteArray crashing_input; @@ -308,8 +308,7 @@ env.binary, {crashing_input}, batch_result); const bool is_duplicate = is_reproducible && !batch_result.IsSetupFailure() && - !batch_result.failure_signature().empty() && - !remaining_crash_signatures.insert(batch_result.failure_signature()) + !remaining_crash_metadata.insert(batch_result.failure_description()) .second; if (!is_reproducible || batch_result.IsSetupFailure() || is_duplicate) { CHECK_OK(RemotePathDelete(crashing_input_file, /*recursively=*/false)); @@ -317,13 +316,13 @@ CHECK_OK(RemotePathTouchExistingFile(crashing_input_file)); } } - return remaining_crash_signatures; + return remaining_crash_metadata; } // TODO(b/405382531): Add unit tests once the function is unit-testable. void DeduplicateAndStoreNewCrashes( const std::filesystem::path &crashing_dir, const WorkDir &workdir, - size_t total_shards, absl::flat_hash_set<std::string> crash_signatures) { + size_t total_shards, absl::flat_hash_set<std::string> crash_metadata) { for (size_t shard_idx = 0; shard_idx < total_shards; ++shard_idx) { const std::vector<std::string> new_crashing_input_files = // The crash reproducer directory may contain subdirectories with @@ -339,20 +338,19 @@ for (const std::string &crashing_input_file : new_crashing_input_files) { const std::string crashing_input_file_name = std::filesystem::path(crashing_input_file).filename(); - const std::string crash_signature_path = - crash_metadata_dir / absl::StrCat(crashing_input_file_name, ".sig"); - std::string new_crash_signature; + const std::string crash_metadata_file = + crash_metadata_dir / crashing_input_file_name; + std::string new_crash_metadata; const absl::Status status = - RemoteFileGetContents(crash_signature_path, new_crash_signature); + RemoteFileGetContents(crash_metadata_file, new_crash_metadata); if (!status.ok()) { LOG(WARNING) << "Ignoring crashing input " << crashing_input_file_name - << " due to failure to read the crash signature: " + << " due to failure to read the crash metadata file: " << status; continue; } const bool is_duplicate = - !new_crash_signature.empty() && - !crash_signatures.insert(new_crash_signature).second; + !crash_metadata.insert(new_crash_metadata).second; if (is_duplicate) continue; CHECK_OK( RemoteFileRename(crashing_input_file, @@ -669,11 +667,11 @@ // Deduplicate and update the crashing inputs. const std::filesystem::path crashing_dir = fuzztest_db_path / "crashing"; - absl::flat_hash_set<std::string> crash_signatures = - PruneOldCrashesAndGetRemainingCrashSignatures(crashing_dir, env, - callbacks_factory); + absl::flat_hash_set<std::string> crash_metadata = + PruneOldCrashesAndGetRemainingCrashMetadata(crashing_dir, env, + callbacks_factory); DeduplicateAndStoreNewCrashes(crashing_dir, workdir, env.total_shards, - std::move(crash_signatures)); + std::move(crash_metadata)); } return EXIT_SUCCESS;
diff --git a/centipede/centipede_test.cc b/centipede/centipede_test.cc index c66e7ab..3254b5e 100644 --- a/centipede/centipede_test.cc +++ b/centipede/centipede_test.cc
@@ -685,7 +685,6 @@ std::string binary; unsigned char input = 0; std::string description; - std::string signature; }; // A mock for ExtraBinaries test. @@ -704,7 +703,6 @@ for (const Crash &crash : crashes_) { if (binary == crash.binary && input[0] == crash.input) { batch_result.failure_description() = crash.description; - batch_result.failure_signature() = crash.signature; res = false; } } @@ -768,9 +766,9 @@ env.binary = "b1"; env.extra_binaries = {"b2", "b3", "b4"}; env.require_pc_table = false; // No PC table here. - ExtraBinariesMock mock(env, {Crash{"b1", 10, "b1-crash", "b1-sig"}, - Crash{"b2", 30, "b2-crash", "b2-sig"}, - Crash{"b3", 50, "b3-crash", "b3-sig"}}); + ExtraBinariesMock mock( + env, {Crash{"b1", 10, "b1-crash"}, Crash{"b2", 30, "b2-crash"}, + Crash{"b3", 50, "b3-crash"}}); MockFactory factory(mock); CentipedeMain(env, factory); @@ -791,15 +789,11 @@ auto crash_metadata_dir_path = WorkDir{env}.CrashMetadataDirPaths().MyShard(); ASSERT_TRUE(std::filesystem::exists(crash_metadata_dir_path)) << VV(crash_metadata_dir_path); - EXPECT_THAT( - crash_metadata_dir_path, - HasFilesWithContents(testing::UnorderedElementsAre( - FileAndContents{absl::StrCat(Hash({10}), ".desc"), "b1-crash"}, - FileAndContents{absl::StrCat(Hash({10}), ".sig"), "b1-sig"}, - FileAndContents{absl::StrCat(Hash({30}), ".desc"), "b2-crash"}, - FileAndContents{absl::StrCat(Hash({30}), ".sig"), "b2-sig"}, - FileAndContents{absl::StrCat(Hash({50}), ".desc"), "b3-crash"}, - FileAndContents{absl::StrCat(Hash({50}), ".sig"), "b3-sig"}))); + EXPECT_THAT(crash_metadata_dir_path, + HasFilesWithContents(testing::UnorderedElementsAre( + FileAndContents{Hash({10}), "b1-crash"}, + FileAndContents{Hash({30}), "b2-crash"}, + FileAndContents{Hash({50}), "b3-crash"}))); } // A mock for UndetectedCrashingInput test.
diff --git a/centipede/runner_result.h b/centipede/runner_result.h index 17ecd57..fd4e9f7 100644 --- a/centipede/runner_result.h +++ b/centipede/runner_result.h
@@ -162,8 +162,6 @@ const std::string& failure_description() const { return failure_description_; } - std::string& failure_signature() { return failure_signature_; } - const std::string& failure_signature() const { return failure_signature_; } private: friend class MultiInputMock; @@ -171,13 +169,9 @@ std::vector<ExecutionResult> results_; std::string log_; // log_ is populated optionally, e.g. if there was a crash. int exit_code_ = EXIT_SUCCESS; // Process exit code. - // If the batch execution fails, this may optionally contain a human-readable - // failure description, e.g., the crash type, stack trace... + // If the batch execution fails, this may optionally contain a failure + // description, e.g., the crash type, stack trace... std::string failure_description_; - // A signature uniquely identifying the failure, which does not need to be - // human-readable. Specially, failures with empty signatures are always - // considered unique. - std::string failure_signature_; size_t num_outputs_read_ = 0; };
diff --git a/centipede/workdir.cc b/centipede/workdir.cc index 59b2e0f..3e96506 100644 --- a/centipede/workdir.cc +++ b/centipede/workdir.cc
@@ -16,7 +16,6 @@ #include <cstddef> #include <filesystem> // NOLINT -#include <optional> #include <string> #include <string_view> #include <system_error> // NOLINT @@ -29,7 +28,6 @@ #include "absl/strings/numbers.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" -#include "absl/strings/strip.h" #include "./centipede/environment.h" #include "./common/logging.h" @@ -82,15 +80,6 @@ return absl::StartsWith(path, prefix_); } -std::optional<size_t> WorkDir::PathShards::GetShardIndex( - std::string_view path) const { - if (!absl::ConsumePrefix(&path, prefix_)) return std::nullopt; - if (path.size() != kDigitsInShardIndex) return std::nullopt; - size_t shard = 0; - if (!absl::SimpleAtoi(path, &shard)) return std::nullopt; - return shard; -} - //------------------------------------------------------------------------------ // WorkDir
diff --git a/centipede/workdir.h b/centipede/workdir.h index 5192b93..9aefffd 100644 --- a/centipede/workdir.h +++ b/centipede/workdir.h
@@ -16,7 +16,6 @@ #define THIRD_PARTY_CENTIPEDE_WORKDIR_MGR_H_ #include <cstddef> -#include <optional> #include <ostream> #include <string> #include <string_view> @@ -48,9 +47,6 @@ // but `path` must have the exact `base_dir`/`rel_prefix` prefix, // including any relative "." and ".." path elements. bool IsShard(std::string_view path) const; - // Returns the shard index of `path` if it is a shard parth, `nullopt` - // otherwise. - std::optional<size_t> GetShardIndex(std::string_view path) const; private: friend class WorkDir;
diff --git a/fuzztest/internal/centipede_adaptor.cc b/fuzztest/internal/centipede_adaptor.cc index 0e31571..eff5706 100644 --- a/fuzztest/internal/centipede_adaptor.cc +++ b/fuzztest/internal/centipede_adaptor.cc
@@ -760,18 +760,17 @@ absl::StrCat(read_reproducer_status)); continue; } - const std::string description_file = - std::filesystem::path{exported_crash_file} - .replace_extension("desc") - .string(); - std::string description; - const absl::Status read_description_status = - RemoteFileGetContents(description_file, description); - if (!read_description_status.ok()) { + const std::string metadata_file = std::filesystem::path{exported_crash_file} + .replace_extension("metadata") + .string(); + std::string metadata; + const absl::Status read_metadata_status = + RemoteFileGetContents(metadata_file, metadata); + if (!read_metadata_status.ok()) { absl::FPrintF( GetStderr(), - "[!] Got error while reading the description for crash id %s: %s\n", - crash_id, absl::StrCat(read_description_status)); + "[!] Got error while reading the metadata for crash id %s: %s\n", + crash_id, absl::StrCat(read_metadata_status)); continue; } std::string reproducer_path = WriteDataToDir(reproducer, output.dir_path); @@ -783,8 +782,8 @@ continue; } absl::FPrintF(GetStderr(), - "[.] Saved reproducer with ID %s and crash description %s\n", - Basename(reproducer_path), description); + "[.] Saved reproducer with ID %s and crash metadata %s\n", + Basename(reproducer_path), metadata); if (!single_reproducer_path.has_value()) { single_reproducer_path = reproducer_path; } else {