centipede/stats.h - third_party/github/google/fuzztest - Git at Google

 // Copyright 2022 The Centipede Authors.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      https://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #ifndef THIRD_PARTY_CENTIPEDE_STATS_H_
 #define THIRD_PARTY_CENTIPEDE_STATS_H_

 #include <atomic>
 #include <cstdint>
 #include <cstdlib>
 #include <initializer_list>
 #include <memory>
 #include <ostream>
 #include <sstream>
 #include <string>
 #include <string_view>
 #include <utility>
 #include <vector>

 #include "absl/base/nullability.h"
 #include "absl/container/btree_map.h"
 #include "absl/container/flat_hash_map.h"
 #include "absl/types/span.h"
 #include "./centipede/environment.h"
 #include "./common/remote_file.h"

 namespace fuzztest::internal {

 // A set of statistics about the fuzzing progress.
 // - Each worker thread has its own `std::atomic<Stats>` object and updates it
 //   periodically. Another special thread reads all such objects periodically
 //   and concurrently to report the current stats to log and/or files on disk.
 // - The updates must not be frequent for performance reasons.
 // - These objects may also be accessed after all worker threads have joined.

 struct StatsMeta {
   uint64_t timestamp_unix_micros = 0;

   // NOTE: Ordering in general won't be applicable to metadata, so define
   // equality only.
   friend bool operator==(const StatsMeta &lhs, const StatsMeta &rhs) {
     return lhs.timestamp_unix_micros == rhs.timestamp_unix_micros;
   }
 };

 struct ExecStats {
   uint64_t fuzz_time_sec = 0;
   uint64_t num_executions = 0;
   uint64_t num_target_crashes = 0;

   friend bool operator==(const ExecStats &lhs, const ExecStats &rhs) {
     return lhs.fuzz_time_sec == rhs.fuzz_time_sec &&
            lhs.num_executions == rhs.num_executions &&
            lhs.num_target_crashes == rhs.num_target_crashes;
   }
 };

 struct CovStats {
   uint64_t num_covered_pcs = 0;
   uint64_t num_8bit_counter_features = 0;
   uint64_t num_data_flow_features = 0;
   uint64_t num_cmp_features = 0;
   uint64_t num_call_stack_features = 0;
   uint64_t num_bounded_path_features = 0;
   uint64_t num_pc_pair_features = 0;
   uint64_t num_user_features = 0;
   uint64_t num_user0_features = 0;
   uint64_t num_user1_features = 0;
   uint64_t num_user2_features = 0;
   uint64_t num_user3_features = 0;
   uint64_t num_user4_features = 0;
   uint64_t num_user5_features = 0;
   uint64_t num_user6_features = 0;
   uint64_t num_user7_features = 0;
   uint64_t num_user8_features = 0;
   uint64_t num_user9_features = 0;
   uint64_t num_user10_features = 0;
   uint64_t num_user11_features = 0;
   uint64_t num_user12_features = 0;
   uint64_t num_user13_features = 0;
   uint64_t num_user14_features = 0;
   uint64_t num_user15_features = 0;
   uint64_t num_unknown_features = 0;
   uint64_t num_funcs_in_frontier = 0;

   friend bool operator==(const CovStats &lhs, const CovStats &rhs) {
     return lhs.num_covered_pcs == rhs.num_covered_pcs &&
            lhs.num_8bit_counter_features == rhs.num_8bit_counter_features &&
            lhs.num_data_flow_features == rhs.num_data_flow_features &&
            lhs.num_cmp_features == rhs.num_cmp_features &&
            lhs.num_call_stack_features == rhs.num_call_stack_features &&
            lhs.num_bounded_path_features == rhs.num_bounded_path_features &&
            lhs.num_pc_pair_features == rhs.num_pc_pair_features &&
            lhs.num_user_features == rhs.num_user_features &&
            lhs.num_user0_features == rhs.num_user0_features &&
            lhs.num_user1_features == rhs.num_user1_features &&
            lhs.num_user2_features == rhs.num_user2_features &&
            lhs.num_user3_features == rhs.num_user3_features &&
            lhs.num_user4_features == rhs.num_user4_features &&
            lhs.num_user5_features == rhs.num_user5_features &&
            lhs.num_user6_features == rhs.num_user6_features &&
            lhs.num_user7_features == rhs.num_user7_features &&
            lhs.num_user8_features == rhs.num_user8_features &&
            lhs.num_user9_features == rhs.num_user9_features &&
            lhs.num_user10_features == rhs.num_user10_features &&
            lhs.num_user11_features == rhs.num_user11_features &&
            lhs.num_user12_features == rhs.num_user12_features &&
            lhs.num_user13_features == rhs.num_user13_features &&
            lhs.num_user14_features == rhs.num_user14_features &&
            lhs.num_user15_features == rhs.num_user15_features &&
            lhs.num_unknown_features == rhs.num_unknown_features &&
            lhs.num_funcs_in_frontier == rhs.num_funcs_in_frontier;
   }
 };

 struct CorpusStats {
   uint64_t active_corpus_size = 0;
   uint64_t total_corpus_size = 0;
   uint64_t max_corpus_element_size = 0;
   uint64_t avg_corpus_element_size = 0;

   friend bool operator==(const CorpusStats &lhs, const CorpusStats &rhs) {
     return lhs.active_corpus_size == rhs.active_corpus_size &&
            lhs.total_corpus_size == rhs.total_corpus_size &&
            lhs.max_corpus_element_size == rhs.max_corpus_element_size &&
            lhs.avg_corpus_element_size == rhs.avg_corpus_element_size;
   }
 };

 struct RusageStats {
   uint64_t engine_rusage_avg_millicores = 0;
   uint64_t engine_rusage_cpu_percent = 0;
   uint64_t engine_rusage_rss_mb = 0;
   uint64_t engine_rusage_vsize_mb = 0;

   friend bool operator==(const RusageStats &lhs, const RusageStats &rhs) {
     return lhs.engine_rusage_avg_millicores ==
                rhs.engine_rusage_avg_millicores &&
            lhs.engine_rusage_cpu_percent == rhs.engine_rusage_cpu_percent &&
            lhs.engine_rusage_rss_mb == rhs.engine_rusage_rss_mb &&
            lhs.engine_rusage_vsize_mb == rhs.engine_rusage_vsize_mb;
   }
 };

 struct Stats : StatsMeta, ExecStats, CovStats, CorpusStats, RusageStats {
   using Traits = uint32_t;
   enum TraitBits : Traits {
     // The kind of the stat.
     kTimestamp = 1UL << 0,
     kFuzzStat = 1UL << 1,
     kRUsageStat = 1UL << 2,

     // The aggregate value(s) to report for the stat.
     kMin = 1UL << 8,
     kMax = 1UL << 9,
     kAvg = 1UL << 10,
     kSum = 1UL << 11,
   };

   // Ascribes some properties to each stat. Used in `StatReporter` & subclasses.
   struct FieldInfo {
     uint64_t Stats::*field;
     // The machine-readable name of the field. Used in the CSV header.
     std::string_view name;
     // The human-readable description of the field. Used in logging.
     std::string_view description;
     Traits traits;
   };

   // WARNING!!! Before reordering these or changing the aggregation types,
   // consider the backward compatibility implications for historical CSVs out
   // there: if some end-user has a CSV post-processing step that relies on the
   // old order or the aggregation type of the CSV fields, that step will break
   // if either of those things change; if the post-processing step relies on the
   // field names in the CSV header, than might break if those names change; etc.
   // In other words: do not change the names or the order of the old fields
   // without a very good reason.
   static constexpr std::initializer_list<FieldInfo> kFieldInfos = {
       // Coverage 1.
       {
           &Stats::num_covered_pcs,
           "NumCoveredPcs",
           "Coverage",
           kFuzzStat | kMin | kMax | kAvg,
       },

       // Execution.
       {
           &Stats::num_executions,
           "NumExecs",
           "Number of executions",
           kFuzzStat | kMin | kMax | kAvg,
       },

       // Corpus.
       {
           &Stats::active_corpus_size,
           "ActiveCorpusSize",
           "Active corpus size",
           kFuzzStat | kMin | kMax | kAvg,
       },
       {
           &Stats::max_corpus_element_size,
           "MaxEltSize",
           "Max element size",
           kFuzzStat | kMin | kMax | kAvg,
       },
       {
           &Stats::avg_corpus_element_size,
           "AvgEltSize",
           "Avg element size",
           kFuzzStat | kMin | kMax | kAvg,
       },

       // Metadata.
       {
           &Stats::timestamp_unix_micros,
           "UnixMicros",
           "Timestamp",
           kTimestamp | kMin | kMax,
       },

       // Execution 2.
       {
           &Stats::fuzz_time_sec,
           "FuzzTimeSec",
           "Fuzz time (sec)",
           kFuzzStat | kMin | kMax | kAvg,
       },

       // Coverage 2.
       {
           &Stats::num_target_crashes,
           "NumProxyCrashes",
           "Num proxy crashes",
           kFuzzStat | kMin | kMax | kSum,
       },
       {
           &Stats::total_corpus_size,
           "TotalCorpusSize",
           "Total corpus size",
           kFuzzStat | kMin | kMax | kSum,
       },
       {
           &Stats::num_8bit_counter_features,
           "Num8BitCounterFts",
           "Num 8-bit counter features",
           kFuzzStat | kMin | kMax | kAvg,
       },
       {
           &Stats::num_data_flow_features,
           "NumDataFlowFts",
           "Num data flow features",
           kFuzzStat | kMin | kMax | kAvg,
       },
       {
           &Stats::num_cmp_features,
           "NumCmpFts",
           "Num cmp features",
           kFuzzStat | kMin | kMax | kAvg,
       },
       {
           &Stats::num_call_stack_features,
           "NumCallStackFts",
           "Num call stack features",
           kFuzzStat | kMin | kMax | kAvg,
       },
       {
           &Stats::num_bounded_path_features,
           "NumBoundedPathFts",
           "Num bounded path features",
           kFuzzStat | kMin | kMax | kAvg,
       },
       {
           &Stats::num_pc_pair_features,
           "NumPcPairFts",
           "Num PC pair features",
           kFuzzStat | kMin | kMax | kAvg,
       },
       {
           &Stats::num_user_features,
           "NumUserFts",
           "Num user features",
           kFuzzStat | kMin | kMax | kAvg,
       },
       {
           &Stats::num_unknown_features,
           "NumUnknownFts",
           "Num unknown features",
           kFuzzStat | kMin | kMax | kAvg,
       },
       {
           &Stats::num_funcs_in_frontier,
           "NumFuncsInFrontier",
           "Num funcs in frontier",
           kFuzzStat | kMin | kMax | kAvg,
       },

       // Rusage. Each shard of a run is a thread of the same process, but it
       // measures the following metrics for the whole process. That means that
       // all the shards should return more or less the same number for the same
       // thing, sampling jitter and noise notwithstanding. Therefore, for the
       // aggregate stat we use the upper bound of the samples.
       // TODO(ussuri): Revise aggregation for CPU metrics once/if we start
       // measuring them per-thread.
       {
           &Stats::engine_rusage_avg_millicores,
           "EngineRusageAvgCores",
           "Engine rusage avg cores",
           kRUsageStat | kMax,
       },
       {
           &Stats::engine_rusage_cpu_percent,
           "EngineRusageCpuPct",
           "Engine rusage CPU %",
           kRUsageStat | kMax,
       },
       {
           &Stats::engine_rusage_rss_mb,
           "EngineRusageRssMb",
           "Engine rusage RSS (MB)",
           kRUsageStat | kMax,
       },
       {
           &Stats::engine_rusage_vsize_mb,
           "EngineRusageVSizeMb",
           "Engine rusage VSize (MB)",
           kRUsageStat | kMax,
       },

       // Coverage 3. A breakdown of the total in `Stats::num_user_features` by
       // individual feature types.
       {
           &Stats::num_user0_features,
           "NumUser0Fts",
           "Num user0 features",
           kFuzzStat | kMin | kMax | kAvg,
       },
       {
           &Stats::num_user1_features,
           "NumUser1Fts",
           "Num user1 features",
           kFuzzStat | kMin | kMax | kAvg,
       },
       {
           &Stats::num_user2_features,
           "NumUser2Fts",
           "Num user2 features",
           kFuzzStat | kMin | kMax | kAvg,
       },
       {
           &Stats::num_user3_features,
           "NumUser3Fts",
           "Num user3 features",
           kFuzzStat | kMin | kMax | kAvg,
       },
       {
           &Stats::num_user4_features,
           "NumUser4Fts",
           "Num user4 features",
           kFuzzStat | kMin | kMax | kAvg,
       },
       {
           &Stats::num_user5_features,
           "NumUser5Fts",
           "Num user5 features",
           kFuzzStat | kMin | kMax | kAvg,
       },
       {
           &Stats::num_user6_features,
           "NumUser6Fts",
           "Num user6 features",
           kFuzzStat | kMin | kMax | kAvg,
       },
       {
           &Stats::num_user7_features,
           "NumUser7Fts",
           "Num user7 features",
           kFuzzStat | kMin | kMax | kAvg,
       },
       {
           &Stats::num_user8_features,
           "NumUser8Fts",
           "Num user8 features",
           kFuzzStat | kMin | kMax | kAvg,
       },
       {
           &Stats::num_user9_features,
           "NumUser9Fts",
           "Num user9 features",
           kFuzzStat | kMin | kMax | kAvg,
       },
       {
           &Stats::num_user10_features,
           "NumUser10Fts",
           "Num user10 features",
           kFuzzStat | kMin | kMax | kAvg,
       },
       {
           &Stats::num_user11_features,
           "NumUser11Fts",
           "Num user11 features",
           kFuzzStat | kMin | kMax | kAvg,
       },
       {
           &Stats::num_user12_features,
           "NumUser12Fts",
           "Num user12 features",
           kFuzzStat | kMin | kMax | kAvg,
       },
       {
           &Stats::num_user13_features,
           "NumUser13Fts",
           "Num user13 features",
           kFuzzStat | kMin | kMax | kAvg,
       },
       {
           &Stats::num_user14_features,
           "NumUser14Fts",
           "Num user14 features",
           kFuzzStat | kMin | kMax | kAvg,
       },
       {
           &Stats::num_user15_features,
           "NumUser15Fts",
           "Num user15 features",
           kFuzzStat | kMin | kMax | kAvg,
       },
   };
 };

 // An abstract stats reporter. Observes an external set of `Stats` objects and a
 // matching set of `Environment` objects, assumed to be updated regularly by the
 // owning scope to reflect the current execution numbers. Reports these current
 // numbers to an abstract report sink whenever the owning scope invokes
 // `ReportCurrStats()`. Concrete report sinks are implemented by inheriting
 // classes by overriding the virtual API.
 class StatsReporter {
  public:
   StatsReporter(const std::vector<std::atomic<Stats>> &stats_vec,
                 const std::vector<Environment> &env_vec);

   StatsReporter(const StatsReporter &) = default;
   StatsReporter(StatsReporter &&) noexcept;

   virtual ~StatsReporter() = default;

   // Reports the current sample of stats values as updated in the `stats_vec_`
   // externally by the caller. Implements the Template Method pattern by
   // invoking the private virtual APIs below in the right order and with the
   // right data to create a complete sample report.
   void ReportCurrStats();

  protected:
   using GroupToIndices =  //
       absl::btree_map<std::string /*group_name*/,
                       std::vector<size_t> /*indices*/>;
   using GroupToFlags =
       absl::btree_map<std::string /*group_name*/, std::string /*flags*/>;

   // Substeps of the Template Method pattern, which is implemented in
   // `ReportCurrStats()`, that subclasses need to override to implement their
   // stats reporting.

   // Should this field be reported or skipped for the particular type of
   // reporting that the subclass does. Can use `field.traits` to determine that.
   virtual bool ShouldReportThisField(const Stats::FieldInfo &field) {
     return true;
   }
   // Gives a chance to subclasses to learn ahead of time the fields for which
   // samples are going to be reported, in this order. Is called once.
   virtual void PreAnnounceFields(
       std::initializer_list<Stats::FieldInfo> fields) = 0;
   // Selects the group for the next batch of `ReportCurrFieldSample()` calls.
   virtual void SetCurrGroup(const Environment &master_env) = 0;
   // Selects the field for the next batch of `ReportCurrFieldSample()` calls.
   // Each of those calls will follow a unique combination of `SetCurrGroup()`
   // and `SetCurrField()`.
   virtual void SetCurrField(const Stats::FieldInfo &field_info) = 0;
   // Reports the values for the current group/field selected via the above two
   // calls.
   virtual void ReportCurrFieldSample(std::vector<uint64_t> &&values) = 0;
   // Wraps up the current field sample batch.
   virtual void DoneFieldSamplesBatch() = 0;
   // Gives subclasses an option to report the flags associated with each shard
   // group (e.g. experiments).
   virtual void ReportFlags(const GroupToFlags &group_to_flags) = 0;

  private:
   // Cached external sets of stats and environments to observe.
   const std::vector<std::atomic<Stats>> &stats_vec_;
   const std::vector<Environment> &env_vec_;

   // Maps group names to indices in `env_vec_` / `stats_vec_`. If there is
   // just a single run (no groups), it will be stored in a single "" key.
   // NOTE: Use std::map to order groups lexicographically.
   GroupToIndices group_to_indices_;
   // Maps group names to their distinct flags (stringified). If there is
   // just a single run (no groups), it will be stored in a single "" key.
   // NOTE: Use std::map to order groups lexicographically.
   GroupToFlags group_to_flags_;
 };

 inline StatsReporter::StatsReporter(StatsReporter &&) noexcept = default;

 // Takes a set of `Stats` objects and a corresponding set of `Environment`
 // objects and logs the current `Stats` values to FUZZTEST_LOG(INFO) on each
 // invocation of `ReportCurrStats()`. If the environments indicate the use of
 // the
 // --experiment flag, the stats for each of the experiment are juxtaposed for
 // easy visual comparison.
 class StatsLogger : public StatsReporter {
  public:
   using StatsReporter::StatsReporter;
   ~StatsLogger() override = default;

   StatsLogger(StatsLogger &&) = default;

  private:
   bool ShouldReportThisField(const Stats::FieldInfo &field) override;
   void PreAnnounceFields(
       std::initializer_list<Stats::FieldInfo> fields) override;
   void SetCurrGroup(const Environment &master_env) override;
   void SetCurrField(const Stats::FieldInfo &field_info) override;
   void ReportCurrFieldSample(std::vector<uint64_t> &&values) override;
   void DoneFieldSamplesBatch() override;
   void ReportFlags(const GroupToFlags &group_to_flags) override;

   std::stringstream os_;
   std::string curr_experiment_name_;
   Stats::FieldInfo curr_field_info_;
 };

 // Takes a set of `Stats` objects and a corresponding set of `Environment`
 // objects `env_vec` and appends aggregate metrics of the current `Stats` values
 // to a CSV file on each invocation of `ReportCurrStats()`. If the environments
 // indicate the use of the --experiment flag, the stats for each of the
 // experiments are written to a separate correspondingly named CSV file. The
 // names of each output field are written to the file(s) as a CSV header.
 //
 // When the file already exists (e.g. Centipede runs in a previously populated
 // workdir):
 // - If the current CSV header matches the one in the file, then new CSV lines
 //   will be appended to the file.
 // - If the current CSV header doesn't match the one in the file (e.g. the
 //   Centipede version changed and the set of CSV fields changed with it), then
 //   the existing file will be renamed to `GetBackupFilename(filename)`, and a
 //   new file will be created from scratch.
 class StatsCsvFileAppender : public StatsReporter {
  public:
   using StatsReporter::StatsReporter;
   ~StatsCsvFileAppender() override;

   // Move-only.
   StatsCsvFileAppender(StatsCsvFileAppender &&) noexcept = default;

  private:
   struct BufferedRemoteFile {
     RemoteFile *file = nullptr;
     std::string buffer;
   };

   // Auxiliary struct that holds a pointer to a `BufferedRemoteFile` and sets
   // itself to `nullptr` when moved. This is to avoid having to define an
   // explicit move constructor for `StatsCsvFileAppender` solely to set the
   // pointer to `nullptr`.
   class BufferedRemoteFilePtr {
    public:
     BufferedRemoteFilePtr(BufferedRemoteFile *absl_nullable file)
         : file_(file) {}
     BufferedRemoteFilePtr(BufferedRemoteFilePtr &&other) noexcept
         : file_(std::exchange(other.file_, nullptr)) {}
     BufferedRemoteFilePtr &operator=(BufferedRemoteFile *absl_nullable file) {
       file_ = file;
       return *this;
     }
     bool operator==(BufferedRemoteFile *absl_nullable file) const {
       return file_ == file;
     }
     bool operator!=(BufferedRemoteFile *absl_nullable file) const {
       return file_ != file;
     }
     BufferedRemoteFile *absl_nullable operator->() const { return file_; }

    private:
     BufferedRemoteFile *absl_nullable file_ = nullptr;
   };

   using BufferedRemoteFilesMap =
       absl::flat_hash_map<std::string /*group_name*/, BufferedRemoteFile>;

   void PreAnnounceFields(
       std::initializer_list<Stats::FieldInfo> fields) override;
   void SetCurrGroup(const Environment &master_env) override;
   void SetCurrField(const Stats::FieldInfo &field_info) override;
   void ReportCurrFieldSample(std::vector<uint64_t> &&values) override;
   void DoneFieldSamplesBatch() override;
   void ReportFlags(const GroupToFlags &group_to_flags) override;

   // Given a filename, should return a backup file filename for it. The default
   // version appends the current timestamp as UNIX seconds. Intended for tests.
   virtual std::string GetBackupFilename(const std::string &filename) const;

   std::string csv_header_;
   std::unique_ptr<BufferedRemoteFilesMap> files_ =
       std::make_unique<BufferedRemoteFilesMap>();
   BufferedRemoteFilePtr curr_file_ = nullptr;
   Stats::FieldInfo curr_field_info_;
 };

 // Takes a span of Stats objects `stats_vec` and prints a summary of the results
 // to `os`, such that it can be ingested as a reward function by an ML system.
 // To be used with knobs.
 void PrintRewardValues(absl::Span<const std::atomic<Stats>> stats_vec,
                        std::ostream &os);

 }  // namespace fuzztest::internal

 #endif  // THIRD_PARTY_CENTIPEDE_STATS_H_
	// Copyright 2022 The Centipede Authors.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// https://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	#ifndef THIRD_PARTY_CENTIPEDE_STATS_H_
	#define THIRD_PARTY_CENTIPEDE_STATS_H_

	#include <atomic>
	#include <cstdint>
	#include <cstdlib>
	#include <initializer_list>
	#include <memory>
	#include <ostream>
	#include <sstream>
	#include <string>
	#include <string_view>
	#include <utility>
	#include <vector>

	#include "absl/base/nullability.h"
	#include "absl/container/btree_map.h"
	#include "absl/container/flat_hash_map.h"
	#include "absl/types/span.h"
	#include "./centipede/environment.h"
	#include "./common/remote_file.h"

	namespace fuzztest::internal {

	// A set of statistics about the fuzzing progress.
	// - Each worker thread has its own `std::atomic<Stats>` object and updates it
	// periodically. Another special thread reads all such objects periodically
	// and concurrently to report the current stats to log and/or files on disk.
	// - The updates must not be frequent for performance reasons.
	// - These objects may also be accessed after all worker threads have joined.

	struct StatsMeta {
	uint64_t timestamp_unix_micros = 0;

	// NOTE: Ordering in general won't be applicable to metadata, so define
	// equality only.
	friend bool operator==(const StatsMeta &lhs, const StatsMeta &rhs) {
	return lhs.timestamp_unix_micros == rhs.timestamp_unix_micros;
	}
	};

	struct ExecStats {
	uint64_t fuzz_time_sec = 0;
	uint64_t num_executions = 0;
	uint64_t num_target_crashes = 0;

	friend bool operator==(const ExecStats &lhs, const ExecStats &rhs) {
	return lhs.fuzz_time_sec == rhs.fuzz_time_sec &&
	lhs.num_executions == rhs.num_executions &&
	lhs.num_target_crashes == rhs.num_target_crashes;
	}
	};

	struct CovStats {
	uint64_t num_covered_pcs = 0;
	uint64_t num_8bit_counter_features = 0;
	uint64_t num_data_flow_features = 0;
	uint64_t num_cmp_features = 0;
	uint64_t num_call_stack_features = 0;
	uint64_t num_bounded_path_features = 0;
	uint64_t num_pc_pair_features = 0;
	uint64_t num_user_features = 0;
	uint64_t num_user0_features = 0;
	uint64_t num_user1_features = 0;
	uint64_t num_user2_features = 0;
	uint64_t num_user3_features = 0;
	uint64_t num_user4_features = 0;
	uint64_t num_user5_features = 0;
	uint64_t num_user6_features = 0;
	uint64_t num_user7_features = 0;
	uint64_t num_user8_features = 0;
	uint64_t num_user9_features = 0;
	uint64_t num_user10_features = 0;
	uint64_t num_user11_features = 0;
	uint64_t num_user12_features = 0;
	uint64_t num_user13_features = 0;
	uint64_t num_user14_features = 0;
	uint64_t num_user15_features = 0;
	uint64_t num_unknown_features = 0;
	uint64_t num_funcs_in_frontier = 0;

	friend bool operator==(const CovStats &lhs, const CovStats &rhs) {
	return lhs.num_covered_pcs == rhs.num_covered_pcs &&
	lhs.num_8bit_counter_features == rhs.num_8bit_counter_features &&
	lhs.num_data_flow_features == rhs.num_data_flow_features &&
	lhs.num_cmp_features == rhs.num_cmp_features &&
	lhs.num_call_stack_features == rhs.num_call_stack_features &&
	lhs.num_bounded_path_features == rhs.num_bounded_path_features &&
	lhs.num_pc_pair_features == rhs.num_pc_pair_features &&
	lhs.num_user_features == rhs.num_user_features &&
	lhs.num_user0_features == rhs.num_user0_features &&
	lhs.num_user1_features == rhs.num_user1_features &&
	lhs.num_user2_features == rhs.num_user2_features &&
	lhs.num_user3_features == rhs.num_user3_features &&
	lhs.num_user4_features == rhs.num_user4_features &&
	lhs.num_user5_features == rhs.num_user5_features &&
	lhs.num_user6_features == rhs.num_user6_features &&
	lhs.num_user7_features == rhs.num_user7_features &&
	lhs.num_user8_features == rhs.num_user8_features &&
	lhs.num_user9_features == rhs.num_user9_features &&
	lhs.num_user10_features == rhs.num_user10_features &&
	lhs.num_user11_features == rhs.num_user11_features &&
	lhs.num_user12_features == rhs.num_user12_features &&
	lhs.num_user13_features == rhs.num_user13_features &&
	lhs.num_user14_features == rhs.num_user14_features &&
	lhs.num_user15_features == rhs.num_user15_features &&
	lhs.num_unknown_features == rhs.num_unknown_features &&
	lhs.num_funcs_in_frontier == rhs.num_funcs_in_frontier;
	}
	};

	struct CorpusStats {
	uint64_t active_corpus_size = 0;
	uint64_t total_corpus_size = 0;
	uint64_t max_corpus_element_size = 0;
	uint64_t avg_corpus_element_size = 0;

	friend bool operator==(const CorpusStats &lhs, const CorpusStats &rhs) {
	return lhs.active_corpus_size == rhs.active_corpus_size &&
	lhs.total_corpus_size == rhs.total_corpus_size &&
	lhs.max_corpus_element_size == rhs.max_corpus_element_size &&
	lhs.avg_corpus_element_size == rhs.avg_corpus_element_size;
	}
	};

	struct RusageStats {
	uint64_t engine_rusage_avg_millicores = 0;
	uint64_t engine_rusage_cpu_percent = 0;
	uint64_t engine_rusage_rss_mb = 0;
	uint64_t engine_rusage_vsize_mb = 0;

	friend bool operator==(const RusageStats &lhs, const RusageStats &rhs) {
	return lhs.engine_rusage_avg_millicores ==
	rhs.engine_rusage_avg_millicores &&
	lhs.engine_rusage_cpu_percent == rhs.engine_rusage_cpu_percent &&
	lhs.engine_rusage_rss_mb == rhs.engine_rusage_rss_mb &&
	lhs.engine_rusage_vsize_mb == rhs.engine_rusage_vsize_mb;
	}
	};

	struct Stats : StatsMeta, ExecStats, CovStats, CorpusStats, RusageStats {
	using Traits = uint32_t;
	enum TraitBits : Traits {
	// The kind of the stat.
	kTimestamp = 1UL << 0,
	kFuzzStat = 1UL << 1,
	kRUsageStat = 1UL << 2,

	// The aggregate value(s) to report for the stat.
	kMin = 1UL << 8,
	kMax = 1UL << 9,
	kAvg = 1UL << 10,
	kSum = 1UL << 11,
	};

	// Ascribes some properties to each stat. Used in `StatReporter` & subclasses.
	struct FieldInfo {
	uint64_t Stats::*field;
	// The machine-readable name of the field. Used in the CSV header.
	std::string_view name;
	// The human-readable description of the field. Used in logging.
	std::string_view description;
	Traits traits;
	};

	// WARNING!!! Before reordering these or changing the aggregation types,
	// consider the backward compatibility implications for historical CSVs out
	// there: if some end-user has a CSV post-processing step that relies on the
	// old order or the aggregation type of the CSV fields, that step will break
	// if either of those things change; if the post-processing step relies on the
	// field names in the CSV header, than might break if those names change; etc.
	// In other words: do not change the names or the order of the old fields
	// without a very good reason.
	static constexpr std::initializer_list<FieldInfo> kFieldInfos = {
	// Coverage 1.
	{
	&Stats::num_covered_pcs,
	"NumCoveredPcs",
	"Coverage",
	kFuzzStat \| kMin \| kMax \| kAvg,
	},

	// Execution.
	{
	&Stats::num_executions,
	"NumExecs",
	"Number of executions",
	kFuzzStat \| kMin \| kMax \| kAvg,
	},

	// Corpus.
	{
	&Stats::active_corpus_size,
	"ActiveCorpusSize",
	"Active corpus size",
	kFuzzStat \| kMin \| kMax \| kAvg,
	},
	{
	&Stats::max_corpus_element_size,
	"MaxEltSize",
	"Max element size",
	kFuzzStat \| kMin \| kMax \| kAvg,
	},
	{
	&Stats::avg_corpus_element_size,
	"AvgEltSize",
	"Avg element size",
	kFuzzStat \| kMin \| kMax \| kAvg,
	},

	// Metadata.
	{
	&Stats::timestamp_unix_micros,
	"UnixMicros",
	"Timestamp",
	kTimestamp \| kMin \| kMax,
	},

	// Execution 2.
	{
	&Stats::fuzz_time_sec,
	"FuzzTimeSec",
	"Fuzz time (sec)",
	kFuzzStat \| kMin \| kMax \| kAvg,
	},

	// Coverage 2.
	{
	&Stats::num_target_crashes,
	"NumProxyCrashes",
	"Num proxy crashes",
	kFuzzStat \| kMin \| kMax \| kSum,
	},
	{
	&Stats::total_corpus_size,
	"TotalCorpusSize",
	"Total corpus size",
	kFuzzStat \| kMin \| kMax \| kSum,
	},
	{
	&Stats::num_8bit_counter_features,
	"Num8BitCounterFts",
	"Num 8-bit counter features",
	kFuzzStat \| kMin \| kMax \| kAvg,
	},
	{
	&Stats::num_data_flow_features,
	"NumDataFlowFts",
	"Num data flow features",
	kFuzzStat \| kMin \| kMax \| kAvg,
	},
	{
	&Stats::num_cmp_features,
	"NumCmpFts",
	"Num cmp features",
	kFuzzStat \| kMin \| kMax \| kAvg,
	},
	{
	&Stats::num_call_stack_features,
	"NumCallStackFts",
	"Num call stack features",
	kFuzzStat \| kMin \| kMax \| kAvg,
	},
	{
	&Stats::num_bounded_path_features,
	"NumBoundedPathFts",
	"Num bounded path features",
	kFuzzStat \| kMin \| kMax \| kAvg,
	},
	{
	&Stats::num_pc_pair_features,
	"NumPcPairFts",
	"Num PC pair features",
	kFuzzStat \| kMin \| kMax \| kAvg,
	},
	{
	&Stats::num_user_features,
	"NumUserFts",
	"Num user features",
	kFuzzStat \| kMin \| kMax \| kAvg,
	},
	{
	&Stats::num_unknown_features,
	"NumUnknownFts",
	"Num unknown features",
	kFuzzStat \| kMin \| kMax \| kAvg,
	},
	{
	&Stats::num_funcs_in_frontier,
	"NumFuncsInFrontier",
	"Num funcs in frontier",
	kFuzzStat \| kMin \| kMax \| kAvg,
	},

	// Rusage. Each shard of a run is a thread of the same process, but it
	// measures the following metrics for the whole process. That means that
	// all the shards should return more or less the same number for the same
	// thing, sampling jitter and noise notwithstanding. Therefore, for the
	// aggregate stat we use the upper bound of the samples.
	// TODO(ussuri): Revise aggregation for CPU metrics once/if we start
	// measuring them per-thread.
	{
	&Stats::engine_rusage_avg_millicores,
	"EngineRusageAvgCores",
	"Engine rusage avg cores",
	kRUsageStat \| kMax,
	},
	{
	&Stats::engine_rusage_cpu_percent,
	"EngineRusageCpuPct",
	"Engine rusage CPU %",
	kRUsageStat \| kMax,
	},
	{
	&Stats::engine_rusage_rss_mb,
	"EngineRusageRssMb",
	"Engine rusage RSS (MB)",
	kRUsageStat \| kMax,
	},
	{
	&Stats::engine_rusage_vsize_mb,
	"EngineRusageVSizeMb",
	"Engine rusage VSize (MB)",
	kRUsageStat \| kMax,
	},

	// Coverage 3. A breakdown of the total in `Stats::num_user_features` by
	// individual feature types.
	{
	&Stats::num_user0_features,
	"NumUser0Fts",
	"Num user0 features",
	kFuzzStat \| kMin \| kMax \| kAvg,
	},
	{
	&Stats::num_user1_features,
	"NumUser1Fts",
	"Num user1 features",
	kFuzzStat \| kMin \| kMax \| kAvg,
	},
	{
	&Stats::num_user2_features,
	"NumUser2Fts",
	"Num user2 features",
	kFuzzStat \| kMin \| kMax \| kAvg,
	},
	{
	&Stats::num_user3_features,
	"NumUser3Fts",
	"Num user3 features",
	kFuzzStat \| kMin \| kMax \| kAvg,
	},
	{
	&Stats::num_user4_features,
	"NumUser4Fts",
	"Num user4 features",
	kFuzzStat \| kMin \| kMax \| kAvg,
	},
	{
	&Stats::num_user5_features,
	"NumUser5Fts",
	"Num user5 features",
	kFuzzStat \| kMin \| kMax \| kAvg,
	},
	{
	&Stats::num_user6_features,
	"NumUser6Fts",
	"Num user6 features",
	kFuzzStat \| kMin \| kMax \| kAvg,
	},
	{
	&Stats::num_user7_features,
	"NumUser7Fts",
	"Num user7 features",
	kFuzzStat \| kMin \| kMax \| kAvg,
	},
	{
	&Stats::num_user8_features,
	"NumUser8Fts",
	"Num user8 features",
	kFuzzStat \| kMin \| kMax \| kAvg,
	},
	{
	&Stats::num_user9_features,
	"NumUser9Fts",
	"Num user9 features",
	kFuzzStat \| kMin \| kMax \| kAvg,
	},
	{
	&Stats::num_user10_features,
	"NumUser10Fts",
	"Num user10 features",
	kFuzzStat \| kMin \| kMax \| kAvg,
	},
	{
	&Stats::num_user11_features,
	"NumUser11Fts",
	"Num user11 features",
	kFuzzStat \| kMin \| kMax \| kAvg,
	},
	{
	&Stats::num_user12_features,
	"NumUser12Fts",
	"Num user12 features",
	kFuzzStat \| kMin \| kMax \| kAvg,
	},
	{
	&Stats::num_user13_features,
	"NumUser13Fts",
	"Num user13 features",
	kFuzzStat \| kMin \| kMax \| kAvg,
	},
	{
	&Stats::num_user14_features,
	"NumUser14Fts",
	"Num user14 features",
	kFuzzStat \| kMin \| kMax \| kAvg,
	},
	{
	&Stats::num_user15_features,
	"NumUser15Fts",
	"Num user15 features",
	kFuzzStat \| kMin \| kMax \| kAvg,
	},
	};
	};

	// An abstract stats reporter. Observes an external set of `Stats` objects and a
	// matching set of `Environment` objects, assumed to be updated regularly by the
	// owning scope to reflect the current execution numbers. Reports these current
	// numbers to an abstract report sink whenever the owning scope invokes
	// `ReportCurrStats()`. Concrete report sinks are implemented by inheriting
	// classes by overriding the virtual API.
	class StatsReporter {
	public:
	StatsReporter(const std::vector<std::atomic<Stats>> &stats_vec,
	const std::vector<Environment> &env_vec);

	StatsReporter(const StatsReporter &) = default;
	StatsReporter(StatsReporter &&) noexcept;

	virtual ~StatsReporter() = default;

	// Reports the current sample of stats values as updated in the `stats_vec_`
	// externally by the caller. Implements the Template Method pattern by
	// invoking the private virtual APIs below in the right order and with the
	// right data to create a complete sample report.
	void ReportCurrStats();

	protected:
	using GroupToIndices = //
	absl::btree_map<std::string /group_name/,
	std::vector<size_t> /indices/>;
	using GroupToFlags =
	absl::btree_map<std::string /group_name/, std::string /flags/>;

	// Substeps of the Template Method pattern, which is implemented in
	// `ReportCurrStats()`, that subclasses need to override to implement their
	// stats reporting.

	// Should this field be reported or skipped for the particular type of
	// reporting that the subclass does. Can use `field.traits` to determine that.
	virtual bool ShouldReportThisField(const Stats::FieldInfo &field) {
	return true;
	}
	// Gives a chance to subclasses to learn ahead of time the fields for which
	// samples are going to be reported, in this order. Is called once.
	virtual void PreAnnounceFields(
	std::initializer_list<Stats::FieldInfo> fields) = 0;
	// Selects the group for the next batch of `ReportCurrFieldSample()` calls.
	virtual void SetCurrGroup(const Environment &master_env) = 0;
	// Selects the field for the next batch of `ReportCurrFieldSample()` calls.
	// Each of those calls will follow a unique combination of `SetCurrGroup()`
	// and `SetCurrField()`.
	virtual void SetCurrField(const Stats::FieldInfo &field_info) = 0;
	// Reports the values for the current group/field selected via the above two
	// calls.
	virtual void ReportCurrFieldSample(std::vector<uint64_t> &&values) = 0;
	// Wraps up the current field sample batch.
	virtual void DoneFieldSamplesBatch() = 0;
	// Gives subclasses an option to report the flags associated with each shard
	// group (e.g. experiments).
	virtual void ReportFlags(const GroupToFlags &group_to_flags) = 0;

	private:
	// Cached external sets of stats and environments to observe.
	const std::vector<std::atomic<Stats>> &stats_vec_;
	const std::vector<Environment> &env_vec_;

	// Maps group names to indices in `env_vec_` / `stats_vec_`. If there is
	// just a single run (no groups), it will be stored in a single "" key.
	// NOTE: Use std::map to order groups lexicographically.
	GroupToIndices group_to_indices_;
	// Maps group names to their distinct flags (stringified). If there is
	// just a single run (no groups), it will be stored in a single "" key.
	// NOTE: Use std::map to order groups lexicographically.
	GroupToFlags group_to_flags_;
	};

	inline StatsReporter::StatsReporter(StatsReporter &&) noexcept = default;

	// Takes a set of `Stats` objects and a corresponding set of `Environment`
	// objects and logs the current `Stats` values to FUZZTEST_LOG(INFO) on each
	// invocation of `ReportCurrStats()`. If the environments indicate the use of
	// the
	// --experiment flag, the stats for each of the experiment are juxtaposed for
	// easy visual comparison.
	class StatsLogger : public StatsReporter {
	public:
	using StatsReporter::StatsReporter;
	~StatsLogger() override = default;

	StatsLogger(StatsLogger &&) = default;

	private:
	bool ShouldReportThisField(const Stats::FieldInfo &field) override;
	void PreAnnounceFields(
	std::initializer_list<Stats::FieldInfo> fields) override;
	void SetCurrGroup(const Environment &master_env) override;
	void SetCurrField(const Stats::FieldInfo &field_info) override;
	void ReportCurrFieldSample(std::vector<uint64_t> &&values) override;
	void DoneFieldSamplesBatch() override;
	void ReportFlags(const GroupToFlags &group_to_flags) override;

	std::stringstream os_;
	std::string curr_experiment_name_;
	Stats::FieldInfo curr_field_info_;
	};

	// Takes a set of `Stats` objects and a corresponding set of `Environment`
	// objects `env_vec` and appends aggregate metrics of the current `Stats` values
	// to a CSV file on each invocation of `ReportCurrStats()`. If the environments
	// indicate the use of the --experiment flag, the stats for each of the
	// experiments are written to a separate correspondingly named CSV file. The
	// names of each output field are written to the file(s) as a CSV header.
	//
	// When the file already exists (e.g. Centipede runs in a previously populated
	// workdir):
	// - If the current CSV header matches the one in the file, then new CSV lines
	// will be appended to the file.
	// - If the current CSV header doesn't match the one in the file (e.g. the
	// Centipede version changed and the set of CSV fields changed with it), then
	// the existing file will be renamed to `GetBackupFilename(filename)`, and a
	// new file will be created from scratch.
	class StatsCsvFileAppender : public StatsReporter {
	public:
	using StatsReporter::StatsReporter;
	~StatsCsvFileAppender() override;

	// Move-only.
	StatsCsvFileAppender(StatsCsvFileAppender &&) noexcept = default;

	private:
	struct BufferedRemoteFile {
	RemoteFile *file = nullptr;
	std::string buffer;
	};

	// Auxiliary struct that holds a pointer to a `BufferedRemoteFile` and sets
	// itself to `nullptr` when moved. This is to avoid having to define an
	// explicit move constructor for `StatsCsvFileAppender` solely to set the
	// pointer to `nullptr`.
	class BufferedRemoteFilePtr {
	public:
	BufferedRemoteFilePtr(BufferedRemoteFile *absl_nullable file)
	: file_(file) {}
	BufferedRemoteFilePtr(BufferedRemoteFilePtr &&other) noexcept
	: file_(std::exchange(other.file_, nullptr)) {}
	BufferedRemoteFilePtr &operator=(BufferedRemoteFile *absl_nullable file) {
	file_ = file;
	return *this;
	}
	bool operator==(BufferedRemoteFile *absl_nullable file) const {
	return file_ == file;
	}
	bool operator!=(BufferedRemoteFile *absl_nullable file) const {
	return file_ != file;
	}
	BufferedRemoteFile *absl_nullable operator->() const { return file_; }

	private:
	BufferedRemoteFile *absl_nullable file_ = nullptr;
	};

	using BufferedRemoteFilesMap =
	absl::flat_hash_map<std::string /group_name/, BufferedRemoteFile>;

	void PreAnnounceFields(
	std::initializer_list<Stats::FieldInfo> fields) override;
	void SetCurrGroup(const Environment &master_env) override;
	void SetCurrField(const Stats::FieldInfo &field_info) override;
	void ReportCurrFieldSample(std::vector<uint64_t> &&values) override;
	void DoneFieldSamplesBatch() override;
	void ReportFlags(const GroupToFlags &group_to_flags) override;

	// Given a filename, should return a backup file filename for it. The default
	// version appends the current timestamp as UNIX seconds. Intended for tests.
	virtual std::string GetBackupFilename(const std::string &filename) const;

	std::string csv_header_;
	std::unique_ptr<BufferedRemoteFilesMap> files_ =
	std::make_unique<BufferedRemoteFilesMap>();
	BufferedRemoteFilePtr curr_file_ = nullptr;
	Stats::FieldInfo curr_field_info_;
	};

	// Takes a span of Stats objects `stats_vec` and prints a summary of the results
	// to `os`, such that it can be ingested as a reward function by an ML system.
	// To be used with knobs.
	void PrintRewardValues(absl::Span<const std::atomic<Stats>> stats_vec,
	std::ostream &os);

	} // namespace fuzztest::internal

	#endif // THIRD_PARTY_CENTIPEDE_STATS_H_