centipede/testing/centipede_test.cc - third_party/github/google/fuzztest - Git at Google

 // Copyright 2022 The Centipede Authors.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      https://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #include <algorithm>
 #include <cstdint>
 #include <cstdlib>
 #include <cstring>
 #include <filesystem>
 #include <set>
 #include <string>
 #include <string_view>
 #include <vector>

 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "absl/container/flat_hash_set.h"
 #include "./centipede/blob_file.h"
 #include "./centipede/centipede_callbacks.h"
 #include "./centipede/centipede_interface.h"
 #include "./centipede/corpus.h"
 #include "./centipede/defs.h"
 #include "./centipede/environment.h"
 #include "./centipede/execution_result.h"
 #include "./centipede/feature.h"
 #include "./centipede/logging.h"
 #include "./centipede/shard_reader.h"
 #include "./centipede/test_util.h"
 #include "./centipede/util.h"

 namespace centipede {

 namespace {

 // A mock for CentipedeCallbacks.
 class CentipedeMock : public CentipedeCallbacks {
  public:
   CentipedeMock(const Environment &env) : CentipedeCallbacks(env) {}
   // Doesn't execute anything
   // Sets `batch_result.results()` based on the values of `inputs`:
   // Collects various stats about the inputs, to be checked in tests.
   bool Execute(std::string_view binary, const std::vector<ByteArray> &inputs,
                BatchResult &batch_result) override {
     batch_result.results().clear();
     // For every input, we create a 256-element array `counters`, where
     // i-th element is the number of bytes with the value 'i' in the input.
     // `counters` is converted to FeatureVec and added to
     // `batch_result.results()`.
     for (auto &input : inputs) {
       ByteArray counters(256);
       for (uint8_t byte : input) {
         counters[byte]++;
       }
       FeatureVec features;
       for (size_t i = 0; i < counters.size(); ++i) {
         if (counters[i] == 0) continue;
         features.push_back(feature_domains::k8bitCounters.ConvertToMe(
             Convert8bitCounterToNumber(i, counters[i])));
       }
       batch_result.results().emplace_back(ExecutionResult{features});
       if (input.size() == 1) {
         observed_1byte_inputs_.insert(input[0]);
       } else {
         EXPECT_EQ(input.size(), 2);
         uint16_t input2bytes = (input[0] << 8) | input[1];
         observed_2byte_inputs_.insert(input2bytes);
       }
       num_inputs_++;
     }
     num_executions_++;
     max_batch_size_ = std::max(max_batch_size_, inputs.size());
     min_batch_size_ = std::min(min_batch_size_, inputs.size());
     return true;
   }
   // Makes predictable mutants:
   // first 255 mutations are 1-byte sequences {1} ... {255}.
   // (the value {0} is produced by DummyValidInput()).
   // Next 65536 mutations are 2-byte sequences {0,0} ... {255, 255}.
   // Then repeat 2-byte sequences.
   void Mutate(const std::vector<MutationInputRef> &inputs, size_t num_mutants,
               std::vector<ByteArray> &mutants) override {
     mutants.resize(num_mutants);
     for (auto &mutant : mutants) {
       num_mutations_++;
       if (num_mutations_ < 256) {
         mutant = {static_cast<uint8_t>(num_mutations_)};
         continue;
       }
       uint8_t byte0 = (num_mutations_ - 256) / 256;
       uint8_t byte1 = (num_mutations_ - 256) % 256;
       mutant = {byte0, byte1};
     }
   }

   absl::flat_hash_set<uint8_t> observed_1byte_inputs_;
   absl::flat_hash_set<uint16_t> observed_2byte_inputs_;

   size_t num_executions_ = 0;
   size_t num_inputs_ = 0;
   size_t num_mutations_ = 0;
   size_t max_batch_size_ = 0;
   size_t min_batch_size_ = -1;
 };

 // Returns the same CentipedeCallbacks object every time, never destroys it.
 class MockFactory : public CentipedeCallbacksFactory {
  public:
   explicit MockFactory(CentipedeCallbacks &cb) : cb_(cb) {}
   CentipedeCallbacks *create(const Environment &env) override { return &cb_; }
   void destroy(CentipedeCallbacks *cb) override { EXPECT_EQ(cb, &cb_); }

  private:
   CentipedeCallbacks &cb_;
 };

 }  // namespace

 TEST(Centipede, MockTest) {
   TempCorpusDir tmp_dir{test_info_->name()};
   Environment env;    // Reads the flags. We override some members below.
   env.log_level = 0;  // Disable most of the logging in the test.
   env.workdir = tmp_dir.path();
   env.num_runs = 100000;  // Enough to run through all 1- and 2-byte inputs.
   env.batch_size = 7;     // Just some small number.
   env.require_pc_table = false;  // No PC table here.
   CentipedeMock mock(env);
   MockFactory factory(mock);
   CentipedeMain(env, factory);  // Run fuzzing with num_runs inputs.
   EXPECT_EQ(mock.num_inputs_, env.num_runs + 1);  // num_runs and one dummy.
   EXPECT_EQ(mock.num_mutations_, env.num_runs);
   EXPECT_EQ(mock.max_batch_size_, env.batch_size);
   EXPECT_EQ(mock.min_batch_size_, 1);  // 1 for dummy.
   EXPECT_EQ(tmp_dir.CountElementsInCorpusFile(0), 512);
   EXPECT_EQ(mock.observed_1byte_inputs_.size(), 256);    // all 1-byte seqs.
   EXPECT_EQ(mock.observed_2byte_inputs_.size(), 65536);  // all 2-byte seqs.
 }

 static size_t CountFilesInDir(std::string_view dir_path) {
   const std::filesystem::directory_iterator dir_iter{dir_path};
   return std::distance(std::filesystem::begin(dir_iter),
                        std::filesystem::end(dir_iter));
 }

 // Tests fuzzing and distilling in multiple shards.
 TEST(Centipede, ShardsAndDistillTest) {
   TempCorpusDir tmp_dir{test_info_->name()};
   Environment env;  // Reads the flags. We override some members below.
   env.workdir = tmp_dir.path();
   env.log_level = 0;  // Disable most of the logging in the test.
   size_t combined_num_runs = 100000;  // Enough to run through all inputs.
   env.total_shards = 20;
   env.num_runs = combined_num_runs / env.total_shards;
   env.require_pc_table = false;  // No PC table here.

   // Create two empty dirs and add them to corpus_dir.
   env.corpus_dir.push_back(tmp_dir.CreateSubdir("cd1"));
   env.corpus_dir.push_back(tmp_dir.CreateSubdir("cd2"));

   CentipedeMock mock(env);
   // First round of runs: do the actual fuzzing, compute the features.
   size_t max_shard_size = 0;
   for (size_t shard_index = 0; shard_index < env.total_shards; shard_index++) {
     env.my_shard_index = shard_index;
     MockFactory factory(mock);
     CentipedeMain(env, factory);  // Run fuzzing in shard `shard_index`.
     auto corpus_size = tmp_dir.CountElementsInCorpusFile(shard_index);
     // Every byte should be present at least once.
     // With 2-byte inputs, we get at least 128 inputs covering 256 features.
     EXPECT_GT(corpus_size, 128);
     max_shard_size = std::max(max_shard_size, corpus_size);
   }
   EXPECT_EQ(mock.observed_1byte_inputs_.size(), 256);    // all 1-byte seqs.
   EXPECT_EQ(mock.observed_2byte_inputs_.size(), 65536);  // all 2-byte seqs.

   EXPECT_GT(CountFilesInDir(env.corpus_dir[0]), 128);
   EXPECT_EQ(CountFilesInDir(env.corpus_dir[1]), 0);

   // Second round of runs. Don't fuzz, only distill.
   // Don't distill in the last one to test the flag behaviour.
   env.distill_shards = env.total_shards - 1;
   env.num_runs = 0;  // No fuzzing.
   for (size_t shard_index = 0; shard_index < env.total_shards; shard_index++) {
     env.my_shard_index = shard_index;
     // Empty the corpus_dir[0]
     std::filesystem::remove_all(env.corpus_dir[0]);
     std::filesystem::create_directory(env.corpus_dir[0]);
     MockFactory factory(mock);
     CentipedeMain(env, factory);  // Run distilling in shard `shard_index`.
     auto distilled_size =
         tmp_dir.CountElementsInCorpusFile(shard_index, "distilled-.");
     if (shard_index == env.total_shards - 1) {
       EXPECT_EQ(distilled_size, 0);  // Didn't distill in the last shard.
       EXPECT_EQ(CountFilesInDir(env.corpus_dir[0]), 0);
     } else {
       // Distillation is expected to find more inputs than any individual shard.
       EXPECT_GT(distilled_size, max_shard_size);
       // And since we are expecting 512 features, with 2-byte inputs,
       // we get at least 512/2 corpus elements after distillation.
       EXPECT_GT(distilled_size, 256);
       EXPECT_GT(CountFilesInDir(env.corpus_dir[0]), 256);
     }
   }
 }

 // Tests --input_filter. test_input_filter filters out inputs with 'b' in them.
 TEST(Centipede, InputFilter) {
   TempCorpusDir tmp_dir{test_info_->name()};
   Environment env;  // Reads the flags. We override some members below.
   env.workdir = tmp_dir.path();
   env.num_runs = 256;            // Enough to run through all 1- byte inputs.
   env.log_level = 0;             // Disable most of the logging in the test.
   env.require_pc_table = false;  // No PC table here.
   // Add %f so that test_input_filter doesn't need to be linked with forkserver.
   env.input_filter = "%f" + std::string{GetDataDependencyFilepath(
                                 "centipede/testing/test_input_filter")};
   CentipedeMock mock(env);
   MockFactory factory(mock);
   CentipedeMain(env, factory);  // Run fuzzing.
   auto corpus = tmp_dir.GetCorpus(0);
   std::set<ByteArray> corpus_set(corpus.begin(), corpus.end());
   EXPECT_FALSE(corpus_set.count({'b'}));
   EXPECT_TRUE(corpus_set.count({'a'}));
   EXPECT_TRUE(corpus_set.count({'c'}));
 }

 // Callbacks for MutateViaExternalBinary test.
 class MutateCallbacks : public CentipedeCallbacks {
  public:
   explicit MutateCallbacks(const Environment &env) : CentipedeCallbacks(env) {}
   // Will not be called.
   bool Execute(std::string_view binary, const std::vector<ByteArray> &inputs,
                BatchResult &batch_result) override {
     CHECK(false);
     return false;
   }

   // Will not be called.
   void Mutate(const std::vector<MutationInputRef> &inputs, size_t num_mutants,
               std::vector<ByteArray> &mutants) override {
     CHECK(false);
   }

   // Redeclare a protected member function as public so the tests can call it.
   using CentipedeCallbacks::MutateViaExternalBinary;
 };

 TEST(Centipede, MutateViaExternalBinary) {
   // This binary contains a test-friendly custom mutator.
   const std::string binary_with_custom_mutator =
       GetDataDependencyFilepath("centipede/testing/test_fuzz_target");
   // This binary does not contain a custom mutator.
   const std::string binary_without_custom_mutator =
       GetDataDependencyFilepath("centipede/testing/abort_fuzz_target");
   // Mutate a couple of different inputs.
   std::vector<ByteArray> inputs = {{0, 1, 2}, {3, 4}};
   // The custom mutator in the test binary will revert the order of bytes
   // and sometimes add a number in [100-107) at the end.
   // Periodically, the custom mutator will fall back to LLVMFuzzerMutate,
   // which in turn will sometimes shrink the inputs.
   std::vector<ByteArray> some_of_expected_mutants = {
       // Reverted inputs, sometimes with an extra byte at the end.
       {2, 1, 0},
       {2, 1, 0, 100},
       {2, 1, 0, 101},
       {2, 1, 0, 102},
       {4, 3},
       {4, 3, 103},
       {4, 3, 104},
       {4, 3, 105},
       // Shrunk inputs.
       {0, 1},
       {4}};

   std::vector<ByteArray> expected_crossover_mutants = {
       // Crossed-over mutants.
       {0, 1, 2, 42, 3, 4},
       {3, 4, 42, 0, 1, 2},
   };

   auto all_expected_mutants = some_of_expected_mutants;
   all_expected_mutants.insert(all_expected_mutants.end(),
                               expected_crossover_mutants.begin(),
                               expected_crossover_mutants.end());
   std::vector<ByteArray> mutants;

   // Test with crossover enabled (default).
   {
     Environment env;
     MutateCallbacks callbacks(env);

     // Expect to fail on the binary w/o a custom mutator.
     mutants.resize(1);
     EXPECT_FALSE(callbacks.MutateViaExternalBinary(
         binary_without_custom_mutator,
         GetMutationInputRefsFromDataInputs(inputs), mutants));
     // Expect to succeed on the binary with a custom mutator.
     mutants.resize(10000);
     EXPECT_TRUE(callbacks.MutateViaExternalBinary(
         binary_with_custom_mutator, GetMutationInputRefsFromDataInputs(inputs),
         mutants));
     // Check that we see all expected mutants, and that they are non-empty.
     for (auto &mutant : mutants) {
       EXPECT_FALSE(mutant.empty());
     }
     EXPECT_THAT(mutants, testing::IsSupersetOf(all_expected_mutants));
   }

   // Test with crossover disabled.
   {
     Environment env_no_crossover;
     env_no_crossover.crossover_level = 0;
     MutateCallbacks callbacks_no_crossover(env_no_crossover);
     mutants.resize(10000);
     EXPECT_TRUE(callbacks_no_crossover.MutateViaExternalBinary(
         binary_with_custom_mutator, GetMutationInputRefsFromDataInputs(inputs),
         mutants));
     // Must contain normal mutants, but not the ones from crossover.
     EXPECT_THAT(mutants, testing::IsSupersetOf(some_of_expected_mutants));
     for (const auto &crossover_mutant : expected_crossover_mutants) {
       EXPECT_THAT(mutants, testing::Contains(crossover_mutant).Times(0));
     }
   }
 }

 // A mock for MergeFromOtherCorpus test.
 class MergeMock : public CentipedeCallbacks {
  public:
   explicit MergeMock(const Environment &env) : CentipedeCallbacks(env) {}

   // Doesn't execute anything.
   // All inputs are 1-byte long.
   // For an input {X}, the feature output is {X}.
   bool Execute(std::string_view binary, const std::vector<ByteArray> &inputs,
                BatchResult &batch_result) override {
     batch_result.results().resize(inputs.size());
     for (size_t i = 0, n = inputs.size(); i < n; ++i) {
       CHECK_EQ(inputs[i].size(), 1);
       batch_result.results()[i].mutable_features() = {inputs[i][0]};
     }
     return true;
   }

   // Every consecutive mutation is {number_of_mutations_}.
   void Mutate(const std::vector<MutationInputRef> &inputs, size_t num_mutants,
               std::vector<ByteArray> &mutants) override {
     mutants.resize(num_mutants);
     for (auto &mutant : mutants) {
       mutant.resize(1);
       mutant[0] = number_of_mutations_++;  // first mutation is {0}.
     }
   }

   void Reset() { number_of_mutations_ = 0; }

  private:
   size_t number_of_mutations_ = 0;
 };

 TEST(Centipede, MergeFromOtherCorpus) {
   using Corpus = std::vector<ByteArray>;

   // Set up the workdir, create a 2-shard corpus with 3 inputs each.
   TempCorpusDir work_tmp_dir{test_info_->name(), "workdir"};
   Environment env;
   env.workdir = work_tmp_dir.path();
   env.num_runs = 3;              // Just a few runs.
   env.require_pc_table = false;  // No PC table here.
   MergeMock mock(env);
   MockFactory factory(mock);
   for (env.my_shard_index = 0; env.my_shard_index < 2; ++env.my_shard_index) {
     CentipedeMain(env, factory);
   }
   CentipedeMain(env, factory);
   EXPECT_EQ(work_tmp_dir.GetCorpus(0), Corpus({{0}, {1}, {2}}));
   EXPECT_EQ(work_tmp_dir.GetCorpus(1), Corpus({{3}, {4}, {5}}));

   // Set up another workdir, create a 2-shard corpus there, with 4 inputs each.
   TempCorpusDir merge_tmp_dir(test_info_->name(), "merge_from");
   Environment merge_env;
   merge_env.workdir = merge_tmp_dir.path();
   merge_env.num_runs = 4;
   merge_env.require_pc_table = false;  // No PC table here.
   mock.Reset();
   for (merge_env.my_shard_index = 0; merge_env.my_shard_index < 2;

        ++merge_env.my_shard_index) {
     CentipedeMain(merge_env, factory);
   }
   EXPECT_EQ(merge_tmp_dir.GetCorpus(0), Corpus({{0}, {1}, {2}, {3}}));
   EXPECT_EQ(merge_tmp_dir.GetCorpus(1), Corpus({{4}, {5}, {6}, {7}}));

   // Merge shards of `merge_env` into shards of `env`.
   // Shard 0 will receive one extra input: {3}
   // Shard 1 will receive two extra inputs: {6}, {7}
   env.merge_from = merge_tmp_dir.path();
   env.num_runs = 0;
   for (env.my_shard_index = 0; env.my_shard_index < 2; ++env.my_shard_index) {
     CentipedeMain(env, factory);
   }
   EXPECT_EQ(work_tmp_dir.GetCorpus(0), Corpus({{0}, {1}, {2}, {3}}));
   EXPECT_EQ(work_tmp_dir.GetCorpus(1), Corpus({{3}, {4}, {5}, {6}, {7}}));
 }

 // A mock for FunctionFilter test.
 class FunctionFilterMock : public CentipedeCallbacks {
  public:
   explicit FunctionFilterMock(const Environment &env)
       : CentipedeCallbacks(env) {}

   // Executes the target in the normal way.
   bool Execute(std::string_view binary, const std::vector<ByteArray> &inputs,
                BatchResult &batch_result) override {
     return ExecuteCentipedeSancovBinaryWithShmem(env_.binary, inputs,
                                                  batch_result) == EXIT_SUCCESS;
   }

   // Sets the inputs to one of 3 pre-defined values.
   void Mutate(const std::vector<MutationInputRef> &inputs, size_t num_mutants,
               std::vector<ByteArray> &mutants) override {
     mutants.resize(num_mutants);
     for (auto &input : inputs) {
       if (input.data != DummyValidInput()) {
         observed_inputs_.insert(input.data);
       }
     }
     for (auto &mutant : mutants) {
       mutant = GetMutant(++number_of_mutations_);
     }
   }

   // Returns one of 3 pre-defined values, that trigger different code paths in
   // the test target.
   static ByteArray GetMutant(size_t idx) {
     const char *mutants[3] = {"func1", "func2-A", "foo"};
     const char *mutant = mutants[idx % 3];
     return {mutant, mutant + strlen(mutant)};
   }

   // Set of inputs observed by Mutate(), except for DummyValidInput().
   absl::flat_hash_set<ByteArray> observed_inputs_;

  private:
   size_t number_of_mutations_ = 0;
 };

 // Runs a short fuzzing session with the provided `function_filter`.
 // Returns a sorted array of observed inputs.
 static std::vector<ByteArray> RunWithFunctionFilter(
     std::string_view function_filter, const TempDir &tmp_dir) {
   Environment env;
   env.workdir = tmp_dir.path();
   env.seed = 1;  // make the runs predictable.
   env.num_runs = 100;
   env.batch_size = 10;
   env.binary = GetDataDependencyFilepath("centipede/testing/test_fuzz_target");
   env.coverage_binary = env.binary;
   // Must symbolize in order for the filter to work.
   CHECK_EQ(system("which llvm-symbolizer"), EXIT_SUCCESS)
       << "llvm-symbolizer should be installed and findable via PATH";
   CHECK_EQ(system("which objdump"), EXIT_SUCCESS)
       << "odjdump should be installed and findable via PATH";
   env.objdump_path = "objdump";
   env.log_level = 0;
   env.function_filter = function_filter;
   FunctionFilterMock mock(env);
   MockFactory factory(mock);
   CentipedeMain(env, factory);
   LOG(INFO) << mock.observed_inputs_.size();
   std::vector<ByteArray> res(mock.observed_inputs_.begin(),
                              mock.observed_inputs_.end());
   std::sort(res.begin(), res.end());
   return res;
 }

 // Tests --function_filter.
 TEST(Centipede, FunctionFilter) {
   // Run with empty function filter.
   {
     TempDir tmp_dir{test_info_->name(), "none"};
     auto observed_empty = RunWithFunctionFilter("", tmp_dir);
     ASSERT_EQ(observed_empty.size(), 3);
   }

   // Run with a one-function filter
   {
     TempDir tmp_dir{test_info_->name(), "single"};
     auto observed_single = RunWithFunctionFilter("SingleEdgeFunc", tmp_dir);
     ASSERT_EQ(observed_single.size(), 1);
     EXPECT_EQ(observed_single[0], FunctionFilterMock::GetMutant(0));
   }

   // Run with a two-function filter.
   {
     TempDir tmp_dir{test_info_->name(), "single_multi"};
     auto observed_both =
         RunWithFunctionFilter("SingleEdgeFunc,MultiEdgeFunc", tmp_dir);
     ASSERT_EQ(observed_both.size(), 2);
     EXPECT_EQ(observed_both[0], FunctionFilterMock::GetMutant(0));
     EXPECT_EQ(observed_both[1], FunctionFilterMock::GetMutant(1));
   }
 }

 namespace {

 // A mock for ExtraBinaries test.
 class ExtraBinariesMock : public CentipedeCallbacks {
  public:
   explicit ExtraBinariesMock(const Environment &env)
       : CentipedeCallbacks(env) {}

   // Doesn't execute anything.
   // On certain combinations of {binary,input} returns false.
   bool Execute(std::string_view binary, const std::vector<ByteArray> &inputs,
                BatchResult &batch_result) override {
     bool res = true;
     for (const auto &input : inputs) {
       if (input.size() != 1) continue;
       if (binary == "b1" && input[0] == 10) res = false;
       if (binary == "b2" && input[0] == 30) res = false;
       if (binary == "b3" && input[0] == 50) res = false;
     }
     batch_result.results().resize(inputs.size());
     return res;
   }

   // Sets the mutants to different 1-byte values.
   void Mutate(const std::vector<MutationInputRef> &inputs, size_t num_mutants,
               std::vector<ByteArray> &mutants) override {
     mutants.resize(num_mutants);
     for (auto &mutant : mutants) {
       mutant.resize(1);
       mutant[0] = ++number_of_mutations_;
     }
   }

  private:
   size_t number_of_mutations_ = 0;
 };

 }  // namespace

 // Tests --extra_binaries.
 // Executes one main binary (--binary) and 3 extra ones (--extra_binaries).
 // Expects the main binary and two extra ones to generate one crash each.
 TEST(Centipede, ExtraBinaries) {
   TempDir tmp_dir{test_info_->name()};
   Environment env;
   env.workdir = tmp_dir.path();
   env.num_runs = 100;
   env.batch_size = 10;
   env.log_level = 1;
   env.binary = "b1";
   env.extra_binaries = {"b2", "b3", "b4"};
   env.require_pc_table = false;  // No PC table here.
   ExtraBinariesMock mock(env);
   MockFactory factory(mock);
   CentipedeMain(env, factory);

   // Verify that we see the expected crashes.
   // The "crashes" dir must contain 3 crashy inputs, one for each binary.
   // We simply match their file names, because they are hashes of the contents.
   std::vector<std::string> found_crash_file_names;
   auto crashes_dir_path = env.MakeCrashReproducerDirPath();
   ASSERT_TRUE(std::filesystem::exists(crashes_dir_path))
       << VV(crashes_dir_path);
   for (const auto &dir_ent :
        std::filesystem::directory_iterator(crashes_dir_path)) {
     found_crash_file_names.push_back(dir_ent.path().filename());
   }
   EXPECT_THAT(found_crash_file_names, testing::UnorderedElementsAre(
                                           Hash({10}), Hash({30}), Hash({50})));
 }

 namespace {

 // A mock for UndetectedCrashingInput test.
 class UndetectedCrashingInputMock : public CentipedeCallbacks {
  public:
   explicit UndetectedCrashingInputMock(const Environment &env,
                                        size_t crashing_input_idx)
       : CentipedeCallbacks{env}, crashing_input_idx_{crashing_input_idx} {
     CHECK_LE(crashing_input_idx_, std::numeric_limits<uint8_t>::max());
   }

   // Doesn't execute anything.
   // Crash when 0th char of input to binary b1 equals 10, but only on 1st exec.
   bool Execute(std::string_view binary, const std::vector<ByteArray> &inputs,
                BatchResult &batch_result) override {
     batch_result.ClearAndResize(inputs.size());
     bool res = true;
     for (const auto &input : inputs) {
       CHECK_EQ(input.size(), 1);  // By construction in `Mutate()`.
       // The contents of each mutant is its sequential number.
       if (input[0] == crashing_input_idx_) {
         if (first_pass_) {
           first_pass_ = false;
           crashing_input_ = input;
           // TODO(b/274705740): `num_outputs_read()` is the number of outputs
           //  that Centipede engine *expects* to have been read from *the
           //  current BatchResult* by the *particular* implementation of
           //  `CentipedeCallbacks` (and `DefaultCentipedeCallbacks` fits the
           //  bill). `Centipede::ReportCrash()` then uses this value as a hint
           //  for the crashing input's index, and in our case saves the batch's
           //  inputs from 0 up to and including the crasher to a subdir. See the
           //  bug for details. All of this is horribly convoluted and misplaced
           //  here. Implement a cleaner solution.
           batch_result.num_outputs_read() =
               crashing_input_idx_ % env_.batch_size;
           res = false;
         }
       }
     }
     return res;
   }

   // Sets the mutants to different 1-byte values.
   void Mutate(const std::vector<MutationInputRef> &inputs, size_t num_mutants,
               std::vector<ByteArray> &mutants) override {
     mutants.resize(num_mutants);
     for (auto &mutant : mutants) {
       // The contents of each mutant is simply its sequential number.
       mutant = {static_cast<uint8_t>(curr_input_idx_++)};
     }
   }

   // Gets the input that triggered the crash.
   ByteArray crashing_input() const { return crashing_input_; }

  private:
   const size_t crashing_input_idx_;
   size_t curr_input_idx_ = 0;
   ByteArray crashing_input_ = {};
   bool first_pass_ = true;
 };

 }  // namespace

 // Test for preserving a crashing batch when 1-by-1 exec fails to reproduce.
 // Executes one main binary (--binary).
 // Expects the binary to crash once and 1-by-1 reproduction to fail.
 TEST(Centipede, UndetectedCrashingInput) {
   constexpr size_t kNumBatches = 7;
   constexpr size_t kBatchSize = 11;
   constexpr size_t kCrashingInputIdxInBatch = kBatchSize / 2;
   constexpr size_t kCrashingInputIdx =
       (kNumBatches / 2) * kBatchSize + kCrashingInputIdxInBatch;

   LOG(INFO) << VV(kNumBatches) << VV(kBatchSize)
             << VV(kCrashingInputIdxInBatch) VV(kCrashingInputIdx);

   TempDir temp_dir{test_info_->name()};
   Environment env;
   env.workdir = temp_dir.path();
   env.num_runs = kBatchSize * kNumBatches;
   env.batch_size = kBatchSize;
   // No real binary: prevent attempts by Centipede to read a PCtable from it.
   env.require_pc_table = false;

   UndetectedCrashingInputMock mock(env, kCrashingInputIdx);
   MockFactory factory(mock);
   CentipedeMain(env, factory);

   // Verify that we see the expected inputs from the batch.
   // The "crashes/unreliable_batch-<HASH>" dir must contain all inputs from the
   // batch that were executing during the session.
   // We simply verify the number of saved inputs matches the number of executed
   // inputs.
   const auto crashing_input_hash = Hash(mock.crashing_input());
   const auto crashes_dir_path = std::filesystem::path(temp_dir.path())
                                     .append("crashes")
                                     .append("crashing_batch-")
                                     .concat(crashing_input_hash);
   ASSERT_TRUE(std::filesystem::exists(crashes_dir_path)) << crashes_dir_path;
   std::vector<std::string> found_crash_file_names;
   for (auto const &dir_ent :
        std::filesystem::directory_iterator(crashes_dir_path)) {
     found_crash_file_names.push_back(dir_ent.path().filename());
   }
   // TODO(ussuri): Verify exact names/contents of the files, not just count.
   ASSERT_EQ(found_crash_file_names.size(), kCrashingInputIdxInBatch + 1);
 }

 static void WriteBlobsToFile(const std::vector<ByteArray> &blobs,
                              const std::string_view path) {
   auto appender = DefaultBlobFileWriterFactory();
   CHECK_OK(appender->Open(path, "a"));
   for (const auto &blob : blobs) {
     CHECK_OK(appender->Write(blob));
   }
 }

 TEST(Centipede, ShardReader) {
   ByteArray data1 = {1, 2, 3};
   ByteArray data2 = {3, 4, 5, 6};
   ByteArray data3 = {7, 8, 9, 10, 11};
   ByteArray data4 = {12, 13, 14};
   ByteArray data5 = {15, 16};
   FeatureVec fv1 = {100, 200, 300};
   FeatureVec fv2 = {300, 400, 500, 600};
   FeatureVec fv3 = {700, 800, 900, 1000, 1100};
   FeatureVec fv4 = {};  // empty.

   std::vector<ByteArray> corpus_blobs;
   corpus_blobs.push_back(data1);
   corpus_blobs.push_back(data2);
   corpus_blobs.push_back(data3);
   corpus_blobs.push_back(data4);
   corpus_blobs.push_back(data5);

   std::vector<ByteArray> features_blobs;
   features_blobs.push_back(PackFeaturesAndHash(data1, fv1));
   features_blobs.push_back(PackFeaturesAndHash(data2, fv2));
   features_blobs.push_back(PackFeaturesAndHash(data3, fv3));
   features_blobs.push_back(PackFeaturesAndHash(data4, fv4));

   TempDir tmp_dir{test_info_->name()};
   std::string corpus_path = tmp_dir.GetFilePath("corpus");
   std::string features_path = tmp_dir.GetFilePath("features");
   WriteBlobsToFile(corpus_blobs, corpus_path);
   WriteBlobsToFile(features_blobs, features_path);

   std::vector<CorpusRecord> res;
   ReadShard(corpus_path, features_path,
             [&res](const ByteArray &input, const FeatureVec &features) {
               res.push_back(CorpusRecord{input, features});
             });

   EXPECT_EQ(res.size(), 5UL);
   EXPECT_EQ(res[0].data, data1);
   EXPECT_EQ(res[1].data, data2);
   EXPECT_EQ(res[2].data, data3);
   EXPECT_EQ(res[3].data, data4);
   EXPECT_EQ(res[4].data, data5);
   EXPECT_EQ(res[0].features, fv1);
   EXPECT_EQ(res[1].features, fv2);
   EXPECT_EQ(res[2].features, fv3);
   EXPECT_EQ(res[3].features, FeatureVec{feature_domains::kNoFeature});
   EXPECT_EQ(res[4].features, FeatureVec());
 }

 }  // namespace centipede