Split crash metadata into description and signature.

Signature will be used for deduplication while the description is human-readable for reporting.

For runner, the signature will be the same as the description, which is compatible with the current behavior.

PiperOrigin-RevId: 770274129
diff --git a/centipede/centipede.cc b/centipede/centipede.cc
index 84490e4..8510af8 100644
--- a/centipede/centipede.cc
+++ b/centipede/centipede.cc
@@ -206,9 +206,8 @@
 absl::Status Centipede::CrashesToFiles(const Environment &env,
                                        std::string_view dir) {
   std::vector<std::string> reproducer_dirs;
-  const auto wd = WorkDir{env};
   auto reproducer_match_status = RemoteGlobMatch(
-      wd.CrashReproducerDirPaths().AllShardsGlob(), reproducer_dirs);
+      WorkDir{env}.CrashReproducerDirPaths().AllShardsGlob(), reproducer_dirs);
   if (!reproducer_match_status.ok() &&
       !absl::IsNotFound(reproducer_match_status)) {
     return reproducer_match_status;
@@ -226,18 +225,27 @@
       RETURN_IF_NOT_OK(RemoteFileCopy(
           reproducer_path,
           (std::filesystem::path{dir} / absl::StrCat(id, ".data")).string()));
-      const auto shard_index = wd.CrashReproducerDirPaths().GetShardIndex(
-          std::filesystem::path{reproducer_path}.parent_path().string());
-      CHECK(shard_index.has_value());
-      const auto metadata_dir = wd.CrashMetadataDirPaths().Shard(*shard_index);
-      const auto description_filename = absl::StrCat(id, ".desc");
-      const auto signature_filename = absl::StrCat(id, ".sig");
+    }
+  }
+  std::vector<std::string> metadata_dirs;
+  auto metadata_match_status = RemoteGlobMatch(
+      WorkDir{env}.CrashMetadataDirPaths().AllShardsGlob(), metadata_dirs);
+  if (!metadata_match_status.ok() && !absl::IsNotFound(metadata_match_status)) {
+    return metadata_match_status;
+  }
+  for (const auto &metadata_dir : metadata_dirs) {
+    ASSIGN_OR_RETURN_IF_NOT_OK(
+        std::vector<std::string> metadata_paths,
+        RemoteListFiles(metadata_dir, /*recursively=*/false));
+    for (const auto &metadata_path : metadata_paths) {
+      std::string id = std::filesystem::path{metadata_path}.filename();
+      if (crash_ids.erase(id) == 0) {
+        continue;
+      }
       RETURN_IF_NOT_OK(RemoteFileCopy(
-          (std::filesystem::path{metadata_dir} / description_filename).string(),
-          (std::filesystem::path{dir} / description_filename).string()));
-      RETURN_IF_NOT_OK(RemoteFileCopy(
-          (std::filesystem::path{metadata_dir} / signature_filename).string(),
-          (std::filesystem::path{dir} / signature_filename).string()));
+          metadata_path,
+          (std::filesystem::path{dir} / absl::StrCat(id, ".metadata"))
+              .string()));
     }
   }
   return absl::OkStatus();
@@ -880,9 +888,6 @@
               << "\nExit code            : " << batch_result.exit_code()
               << "\nFailure              : "
               << batch_result.failure_description()
-              << "\nSignature            : "
-              << AsPrintableString(AsByteSpan(batch_result.failure_signature()),
-                                   /*max_len=*/32)
               << "\nNumber of inputs     : " << input_vec.size()
               << "\nNumber of inputs read: " << batch_result.num_outputs_read()
               << (batch_result.IsSetupFailure()
@@ -971,7 +976,7 @@
       std::string input_file_path = std::filesystem::path(crash_dir) / hash;
       auto crash_metadata_dir = wd_.CrashMetadataDirPaths().MyShard();
       CHECK_OK(RemoteMkdir(crash_metadata_dir));
-      std::string crash_metadata_path_prefix =
+      std::string crash_metadata_file_path =
           std::filesystem::path(crash_metadata_dir) / hash;
       LOG(INFO) << log_prefix << "Detected crash-reproducing input:"
                 << "\nInput index    : " << input_idx << "\nInput bytes    : "
@@ -979,20 +984,13 @@
                 << "\nExit code      : " << one_input_batch_result.exit_code()
                 << "\nFailure        : "
                 << one_input_batch_result.failure_description()
-                << "\nSignature      : "
-                << AsPrintableString(
-                       AsByteSpan(one_input_batch_result.failure_signature()),
-                       /*max_len=*/32)
                 << "\nSaving input to: " << input_file_path
                 << "\nSaving crash"  //
-                << "\nmetadata to    : " << crash_metadata_path_prefix << ".*";
+                << "\nmetadata to    : " << crash_metadata_file_path;
       CHECK_OK(RemoteFileSetContents(input_file_path, one_input));
-      CHECK_OK(RemoteFileSetContents(
-          absl::StrCat(crash_metadata_path_prefix, ".desc"),
-          one_input_batch_result.failure_description()));
-      CHECK_OK(RemoteFileSetContents(
-          absl::StrCat(crash_metadata_path_prefix, ".sig"),
-          one_input_batch_result.failure_signature()));
+      CHECK_OK(
+          RemoteFileSetContents(crash_metadata_file_path,
+                                one_input_batch_result.failure_description()));
       return;
     }
   }
diff --git a/centipede/centipede_callbacks.cc b/centipede/centipede_callbacks.cc
index 8a2ec0c..0a94e84 100644
--- a/centipede/centipede_callbacks.cc
+++ b/centipede/centipede_callbacks.cc
@@ -157,7 +157,7 @@
       absl::StrCat(":shmem:test=", env_.test_name, ":arg1=",
                    inputs_blobseq_.path(), ":arg2=", outputs_blobseq_.path(),
                    ":failure_description_path=", failure_description_path_,
-                   ":failure_signature_path=", failure_signature_path_, ":"),
+                   ":"),
       disable_coverage)};
 
   if (env_.clang_coverage_binary == binary)
@@ -243,18 +243,9 @@
     ReadFromLocalFile(execute_log_path_, batch_result.log());
     ReadFromLocalFile(failure_description_path_,
                       batch_result.failure_description());
-    if (std::filesystem::exists(failure_signature_path_)) {
-      ReadFromLocalFile(failure_signature_path_,
-                        batch_result.failure_signature());
-    } else {
-      // TODO(xinhaoyuan): Refactor runner to use dispatcher so this branch can
-      // be removed.
-      batch_result.failure_signature() = batch_result.failure_description();
-    }
-    // Remove the failure description and signature files here so that they do
-    // not stay until another failed execution.
+    // Remove failure_description_ here so that it doesn't stay until another
+    // failed execution.
     std::filesystem::remove(failure_description_path_);
-    std::filesystem::remove(failure_signature_path_);
   }
   VLOG(1) << __FUNCTION__ << " took " << (absl::Now() - start_time);
   return retval;
diff --git a/centipede/centipede_callbacks.h b/centipede/centipede_callbacks.h
index 57b9aab..792608d 100644
--- a/centipede/centipede_callbacks.h
+++ b/centipede/centipede_callbacks.h
@@ -174,8 +174,6 @@
       std::filesystem::path(temp_dir_).append("log");
   std::string failure_description_path_ =
       std::filesystem::path(temp_dir_).append("failure_description");
-  std::string failure_signature_path_ =
-      std::filesystem::path(temp_dir_).append("failure_signature");
   const std::string shmem_name1_ = ProcessAndThreadUniqueID("/ctpd-shm1-");
   const std::string shmem_name2_ = ProcessAndThreadUniqueID("/ctpd-shm2-");
 
diff --git a/centipede/centipede_flags.inc b/centipede/centipede_flags.inc
index fd331ac..66e1c19 100644
--- a/centipede/centipede_flags.inc
+++ b/centipede/centipede_flags.inc
@@ -295,10 +295,11 @@
 CENTIPEDE_FLAG(
     std::string, crashes_to_files, "",
     "When set to a directory path, save the crashing reproducers and "
-    "metadata from the workdir to the given path: Each crash with `ID`"
-    "will be saved with file `ID.data` for the reproducer, `ID.desc` the "
-    "description, `ID.sig` the signature. If multiple crashes with the same ID "
-    "exist, only one crash will be saved.")
+    "metadata from the workdir to the given path. Each crash with `ID`"
+    "will be saved with file `ID.data` for the reproducer and "
+    "`ID.metadata` the metadata, which currently contains the failure "
+    "description. If multiple crashes with the same ID exist, only one "
+    "crash will be saved.")
 CENTIPEDE_FLAG(
     std::string, corpus_from_files, "",
     "Export a corpus from a local directory with one file per input into "
diff --git a/centipede/centipede_interface.cc b/centipede/centipede_interface.cc
index ca4c856..504ccf2 100644
--- a/centipede/centipede_interface.cc
+++ b/centipede/centipede_interface.cc
@@ -288,9 +288,9 @@
   return test_shard;
 }
 
-// Prunes non-reproducible and duplicate crashes and returns the crash
-// signatures of the remaining crashes.
-absl::flat_hash_set<std::string> PruneOldCrashesAndGetRemainingCrashSignatures(
+// Prunes non-reproducible and duplicate crashes and returns the crash metadata
+// of the remaining crashes.
+absl::flat_hash_set<std::string> PruneOldCrashesAndGetRemainingCrashMetadata(
     const std::filesystem::path &crashing_dir, const Environment &env,
     CentipedeCallbacksFactory &callbacks_factory) {
   const std::vector<std::string> crashing_input_files =
@@ -299,7 +299,7 @@
       ValueOrDie(RemoteListFiles(crashing_dir.c_str(), /*recursively=*/false));
   ScopedCentipedeCallbacks scoped_callbacks(callbacks_factory, env);
   BatchResult batch_result;
-  absl::flat_hash_set<std::string> remaining_crash_signatures;
+  absl::flat_hash_set<std::string> remaining_crash_metadata;
 
   for (const std::string &crashing_input_file : crashing_input_files) {
     ByteArray crashing_input;
@@ -308,8 +308,7 @@
         env.binary, {crashing_input}, batch_result);
     const bool is_duplicate =
         is_reproducible && !batch_result.IsSetupFailure() &&
-        !batch_result.failure_signature().empty() &&
-        !remaining_crash_signatures.insert(batch_result.failure_signature())
+        !remaining_crash_metadata.insert(batch_result.failure_description())
              .second;
     if (!is_reproducible || batch_result.IsSetupFailure() || is_duplicate) {
       CHECK_OK(RemotePathDelete(crashing_input_file, /*recursively=*/false));
@@ -317,13 +316,13 @@
       CHECK_OK(RemotePathTouchExistingFile(crashing_input_file));
     }
   }
-  return remaining_crash_signatures;
+  return remaining_crash_metadata;
 }
 
 // TODO(b/405382531): Add unit tests once the function is unit-testable.
 void DeduplicateAndStoreNewCrashes(
     const std::filesystem::path &crashing_dir, const WorkDir &workdir,
-    size_t total_shards, absl::flat_hash_set<std::string> crash_signatures) {
+    size_t total_shards, absl::flat_hash_set<std::string> crash_metadata) {
   for (size_t shard_idx = 0; shard_idx < total_shards; ++shard_idx) {
     const std::vector<std::string> new_crashing_input_files =
         // The crash reproducer directory may contain subdirectories with
@@ -339,20 +338,19 @@
     for (const std::string &crashing_input_file : new_crashing_input_files) {
       const std::string crashing_input_file_name =
           std::filesystem::path(crashing_input_file).filename();
-      const std::string crash_signature_path =
-          crash_metadata_dir / absl::StrCat(crashing_input_file_name, ".sig");
-      std::string new_crash_signature;
+      const std::string crash_metadata_file =
+          crash_metadata_dir / crashing_input_file_name;
+      std::string new_crash_metadata;
       const absl::Status status =
-          RemoteFileGetContents(crash_signature_path, new_crash_signature);
+          RemoteFileGetContents(crash_metadata_file, new_crash_metadata);
       if (!status.ok()) {
         LOG(WARNING) << "Ignoring crashing input " << crashing_input_file_name
-                     << " due to failure to read the crash signature: "
+                     << " due to failure to read the crash metadata file: "
                      << status;
         continue;
       }
       const bool is_duplicate =
-          !new_crash_signature.empty() &&
-          !crash_signatures.insert(new_crash_signature).second;
+          !crash_metadata.insert(new_crash_metadata).second;
       if (is_duplicate) continue;
       CHECK_OK(
           RemoteFileRename(crashing_input_file,
@@ -669,11 +667,11 @@
 
     // Deduplicate and update the crashing inputs.
     const std::filesystem::path crashing_dir = fuzztest_db_path / "crashing";
-    absl::flat_hash_set<std::string> crash_signatures =
-        PruneOldCrashesAndGetRemainingCrashSignatures(crashing_dir, env,
-                                                      callbacks_factory);
+    absl::flat_hash_set<std::string> crash_metadata =
+        PruneOldCrashesAndGetRemainingCrashMetadata(crashing_dir, env,
+                                                    callbacks_factory);
     DeduplicateAndStoreNewCrashes(crashing_dir, workdir, env.total_shards,
-                                  std::move(crash_signatures));
+                                  std::move(crash_metadata));
   }
 
   return EXIT_SUCCESS;
diff --git a/centipede/centipede_test.cc b/centipede/centipede_test.cc
index c66e7ab..3254b5e 100644
--- a/centipede/centipede_test.cc
+++ b/centipede/centipede_test.cc
@@ -685,7 +685,6 @@
   std::string binary;
   unsigned char input = 0;
   std::string description;
-  std::string signature;
 };
 
 // A mock for ExtraBinaries test.
@@ -704,7 +703,6 @@
       for (const Crash &crash : crashes_) {
         if (binary == crash.binary && input[0] == crash.input) {
           batch_result.failure_description() = crash.description;
-          batch_result.failure_signature() = crash.signature;
           res = false;
         }
       }
@@ -768,9 +766,9 @@
   env.binary = "b1";
   env.extra_binaries = {"b2", "b3", "b4"};
   env.require_pc_table = false;  // No PC table here.
-  ExtraBinariesMock mock(env, {Crash{"b1", 10, "b1-crash", "b1-sig"},
-                               Crash{"b2", 30, "b2-crash", "b2-sig"},
-                               Crash{"b3", 50, "b3-crash", "b3-sig"}});
+  ExtraBinariesMock mock(
+      env, {Crash{"b1", 10, "b1-crash"}, Crash{"b2", 30, "b2-crash"},
+            Crash{"b3", 50, "b3-crash"}});
   MockFactory factory(mock);
   CentipedeMain(env, factory);
 
@@ -791,15 +789,11 @@
   auto crash_metadata_dir_path = WorkDir{env}.CrashMetadataDirPaths().MyShard();
   ASSERT_TRUE(std::filesystem::exists(crash_metadata_dir_path))
       << VV(crash_metadata_dir_path);
-  EXPECT_THAT(
-      crash_metadata_dir_path,
-      HasFilesWithContents(testing::UnorderedElementsAre(
-          FileAndContents{absl::StrCat(Hash({10}), ".desc"), "b1-crash"},
-          FileAndContents{absl::StrCat(Hash({10}), ".sig"), "b1-sig"},
-          FileAndContents{absl::StrCat(Hash({30}), ".desc"), "b2-crash"},
-          FileAndContents{absl::StrCat(Hash({30}), ".sig"), "b2-sig"},
-          FileAndContents{absl::StrCat(Hash({50}), ".desc"), "b3-crash"},
-          FileAndContents{absl::StrCat(Hash({50}), ".sig"), "b3-sig"})));
+  EXPECT_THAT(crash_metadata_dir_path,
+              HasFilesWithContents(testing::UnorderedElementsAre(
+                  FileAndContents{Hash({10}), "b1-crash"},
+                  FileAndContents{Hash({30}), "b2-crash"},
+                  FileAndContents{Hash({50}), "b3-crash"})));
 }
 
 // A mock for UndetectedCrashingInput test.
diff --git a/centipede/runner_result.h b/centipede/runner_result.h
index 17ecd57..fd4e9f7 100644
--- a/centipede/runner_result.h
+++ b/centipede/runner_result.h
@@ -162,8 +162,6 @@
   const std::string& failure_description() const {
     return failure_description_;
   }
-  std::string& failure_signature() { return failure_signature_; }
-  const std::string& failure_signature() const { return failure_signature_; }
 
  private:
   friend class MultiInputMock;
@@ -171,13 +169,9 @@
   std::vector<ExecutionResult> results_;
   std::string log_;  // log_ is populated optionally, e.g. if there was a crash.
   int exit_code_ = EXIT_SUCCESS;  // Process exit code.
-  // If the batch execution fails, this may optionally contain a human-readable
-  // failure description, e.g., the crash type, stack trace...
+  // If the batch execution fails, this may optionally contain a failure
+  // description, e.g., the crash type, stack trace...
   std::string failure_description_;
-  // A signature uniquely identifying the failure, which does not need to be
-  // human-readable. Specially, failures with empty signatures are always
-  // considered unique.
-  std::string failure_signature_;
   size_t num_outputs_read_ = 0;
 };
 
diff --git a/centipede/workdir.cc b/centipede/workdir.cc
index 59b2e0f..3e96506 100644
--- a/centipede/workdir.cc
+++ b/centipede/workdir.cc
@@ -16,7 +16,6 @@
 
 #include <cstddef>
 #include <filesystem>  // NOLINT
-#include <optional>
 #include <string>
 #include <string_view>
 #include <system_error>  // NOLINT
@@ -29,7 +28,6 @@
 #include "absl/strings/numbers.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_format.h"
-#include "absl/strings/strip.h"
 #include "./centipede/environment.h"
 #include "./common/logging.h"
 
@@ -82,15 +80,6 @@
   return absl::StartsWith(path, prefix_);
 }
 
-std::optional<size_t> WorkDir::PathShards::GetShardIndex(
-    std::string_view path) const {
-  if (!absl::ConsumePrefix(&path, prefix_)) return std::nullopt;
-  if (path.size() != kDigitsInShardIndex) return std::nullopt;
-  size_t shard = 0;
-  if (!absl::SimpleAtoi(path, &shard)) return std::nullopt;
-  return shard;
-}
-
 //------------------------------------------------------------------------------
 //                                 WorkDir
 
diff --git a/centipede/workdir.h b/centipede/workdir.h
index 5192b93..9aefffd 100644
--- a/centipede/workdir.h
+++ b/centipede/workdir.h
@@ -16,7 +16,6 @@
 #define THIRD_PARTY_CENTIPEDE_WORKDIR_MGR_H_
 
 #include <cstddef>
-#include <optional>
 #include <ostream>
 #include <string>
 #include <string_view>
@@ -48,9 +47,6 @@
     // but `path` must have the exact `base_dir`/`rel_prefix` prefix,
     // including any relative "." and ".." path elements.
     bool IsShard(std::string_view path) const;
-    // Returns the shard index of `path` if it is a shard parth, `nullopt`
-    // otherwise.
-    std::optional<size_t> GetShardIndex(std::string_view path) const;
 
    private:
     friend class WorkDir;
diff --git a/fuzztest/internal/centipede_adaptor.cc b/fuzztest/internal/centipede_adaptor.cc
index 0e31571..eff5706 100644
--- a/fuzztest/internal/centipede_adaptor.cc
+++ b/fuzztest/internal/centipede_adaptor.cc
@@ -760,18 +760,17 @@
                     absl::StrCat(read_reproducer_status));
       continue;
     }
-    const std::string description_file =
-        std::filesystem::path{exported_crash_file}
-            .replace_extension("desc")
-            .string();
-    std::string description;
-    const absl::Status read_description_status =
-        RemoteFileGetContents(description_file, description);
-    if (!read_description_status.ok()) {
+    const std::string metadata_file = std::filesystem::path{exported_crash_file}
+                                          .replace_extension("metadata")
+                                          .string();
+    std::string metadata;
+    const absl::Status read_metadata_status =
+        RemoteFileGetContents(metadata_file, metadata);
+    if (!read_metadata_status.ok()) {
       absl::FPrintF(
           GetStderr(),
-          "[!] Got error while reading the description for crash id %s: %s\n",
-          crash_id, absl::StrCat(read_description_status));
+          "[!] Got error while reading the metadata for crash id %s: %s\n",
+          crash_id, absl::StrCat(read_metadata_status));
       continue;
     }
     std::string reproducer_path = WriteDataToDir(reproducer, output.dir_path);
@@ -783,8 +782,8 @@
       continue;
     }
     absl::FPrintF(GetStderr(),
-                  "[.] Saved reproducer with ID %s and crash description %s\n",
-                  Basename(reproducer_path), description);
+                  "[.] Saved reproducer with ID %s and crash metadata %s\n",
+                  Basename(reproducer_path), metadata);
     if (!single_reproducer_path.has_value()) {
       single_reproducer_path = reproducer_path;
     } else {