blob: 635533d605e9728494a9ed328402bd8fb9f63c9e [file] [log] [blame] [edit]
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_CENTIPEDE_STATS_H_
#define THIRD_PARTY_CENTIPEDE_STATS_H_
#include <atomic>
#include <cstdint>
#include <cstdlib>
#include <initializer_list>
#include <memory>
#include <ostream>
#include <sstream>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include "absl/base/nullability.h"
#include "absl/container/btree_map.h"
#include "absl/container/flat_hash_map.h"
#include "absl/types/span.h"
#include "./centipede/environment.h"
#include "./common/remote_file.h"
namespace fuzztest::internal {
// A set of statistics about the fuzzing progress.
// - Each worker thread has its own `std::atomic<Stats>` object and updates it
// periodically. Another special thread reads all such objects periodically
// and concurrently to report the current stats to log and/or files on disk.
// - The updates must not be frequent for performance reasons.
// - These objects may also be accessed after all worker threads have joined.
struct StatsMeta {
uint64_t timestamp_unix_micros = 0;
// NOTE: Ordering in general won't be applicable to metadata, so define
// equality only.
friend bool operator==(const StatsMeta &lhs, const StatsMeta &rhs) {
return lhs.timestamp_unix_micros == rhs.timestamp_unix_micros;
}
};
struct ExecStats {
uint64_t fuzz_time_sec = 0;
uint64_t num_executions = 0;
uint64_t num_target_crashes = 0;
friend bool operator==(const ExecStats &lhs, const ExecStats &rhs) {
return lhs.fuzz_time_sec == rhs.fuzz_time_sec &&
lhs.num_executions == rhs.num_executions &&
lhs.num_target_crashes == rhs.num_target_crashes;
}
};
struct CovStats {
uint64_t num_covered_pcs = 0;
uint64_t num_8bit_counter_features = 0;
uint64_t num_data_flow_features = 0;
uint64_t num_cmp_features = 0;
uint64_t num_call_stack_features = 0;
uint64_t num_bounded_path_features = 0;
uint64_t num_pc_pair_features = 0;
uint64_t num_user_features = 0;
uint64_t num_user0_features = 0;
uint64_t num_user1_features = 0;
uint64_t num_user2_features = 0;
uint64_t num_user3_features = 0;
uint64_t num_user4_features = 0;
uint64_t num_user5_features = 0;
uint64_t num_user6_features = 0;
uint64_t num_user7_features = 0;
uint64_t num_user8_features = 0;
uint64_t num_user9_features = 0;
uint64_t num_user10_features = 0;
uint64_t num_user11_features = 0;
uint64_t num_user12_features = 0;
uint64_t num_user13_features = 0;
uint64_t num_user14_features = 0;
uint64_t num_user15_features = 0;
uint64_t num_unknown_features = 0;
uint64_t num_funcs_in_frontier = 0;
friend bool operator==(const CovStats &lhs, const CovStats &rhs) {
return lhs.num_covered_pcs == rhs.num_covered_pcs &&
lhs.num_8bit_counter_features == rhs.num_8bit_counter_features &&
lhs.num_data_flow_features == rhs.num_data_flow_features &&
lhs.num_cmp_features == rhs.num_cmp_features &&
lhs.num_call_stack_features == rhs.num_call_stack_features &&
lhs.num_bounded_path_features == rhs.num_bounded_path_features &&
lhs.num_pc_pair_features == rhs.num_pc_pair_features &&
lhs.num_user_features == rhs.num_user_features &&
lhs.num_user0_features == rhs.num_user0_features &&
lhs.num_user1_features == rhs.num_user1_features &&
lhs.num_user2_features == rhs.num_user2_features &&
lhs.num_user3_features == rhs.num_user3_features &&
lhs.num_user4_features == rhs.num_user4_features &&
lhs.num_user5_features == rhs.num_user5_features &&
lhs.num_user6_features == rhs.num_user6_features &&
lhs.num_user7_features == rhs.num_user7_features &&
lhs.num_user8_features == rhs.num_user8_features &&
lhs.num_user9_features == rhs.num_user9_features &&
lhs.num_user10_features == rhs.num_user10_features &&
lhs.num_user11_features == rhs.num_user11_features &&
lhs.num_user12_features == rhs.num_user12_features &&
lhs.num_user13_features == rhs.num_user13_features &&
lhs.num_user14_features == rhs.num_user14_features &&
lhs.num_user15_features == rhs.num_user15_features &&
lhs.num_unknown_features == rhs.num_unknown_features &&
lhs.num_funcs_in_frontier == rhs.num_funcs_in_frontier;
}
};
struct CorpusStats {
uint64_t active_corpus_size = 0;
uint64_t total_corpus_size = 0;
uint64_t max_corpus_element_size = 0;
uint64_t avg_corpus_element_size = 0;
friend bool operator==(const CorpusStats &lhs, const CorpusStats &rhs) {
return lhs.active_corpus_size == rhs.active_corpus_size &&
lhs.total_corpus_size == rhs.total_corpus_size &&
lhs.max_corpus_element_size == rhs.max_corpus_element_size &&
lhs.avg_corpus_element_size == rhs.avg_corpus_element_size;
}
};
struct RusageStats {
uint64_t engine_rusage_avg_millicores = 0;
uint64_t engine_rusage_cpu_percent = 0;
uint64_t engine_rusage_rss_mb = 0;
uint64_t engine_rusage_vsize_mb = 0;
friend bool operator==(const RusageStats &lhs, const RusageStats &rhs) {
return lhs.engine_rusage_avg_millicores ==
rhs.engine_rusage_avg_millicores &&
lhs.engine_rusage_cpu_percent == rhs.engine_rusage_cpu_percent &&
lhs.engine_rusage_rss_mb == rhs.engine_rusage_rss_mb &&
lhs.engine_rusage_vsize_mb == rhs.engine_rusage_vsize_mb;
}
};
struct Stats : StatsMeta, ExecStats, CovStats, CorpusStats, RusageStats {
using Traits = uint32_t;
enum TraitBits : Traits {
// The kind of the stat.
kTimestamp = 1UL << 0,
kFuzzStat = 1UL << 1,
kRUsageStat = 1UL << 2,
// The aggregate value(s) to report for the stat.
kMin = 1UL << 8,
kMax = 1UL << 9,
kAvg = 1UL << 10,
kSum = 1UL << 11,
};
// Ascribes some properties to each stat. Used in `StatReporter` & subclasses.
struct FieldInfo {
uint64_t Stats::*field;
// The machine-readable name of the field. Used in the CSV header.
std::string_view name;
// The human-readable description of the field. Used in logging.
std::string_view description;
Traits traits;
};
// WARNING!!! Before reordering these or changing the aggregation types,
// consider the backward compatibility implications for historical CSVs out
// there: if some end-user has a CSV post-processing step that relies on the
// old order or the aggregation type of the CSV fields, that step will break
// if either of those things change; if the post-processing step relies on the
// field names in the CSV header, than might break if those names change; etc.
// In other words: do not change the names or the order of the old fields
// without a very good reason.
static constexpr std::initializer_list<FieldInfo> kFieldInfos = {
// Coverage 1.
{
&Stats::num_covered_pcs,
"NumCoveredPcs",
"Coverage",
kFuzzStat | kMin | kMax | kAvg,
},
// Execution.
{
&Stats::num_executions,
"NumExecs",
"Number of executions",
kFuzzStat | kMin | kMax | kAvg,
},
// Corpus.
{
&Stats::active_corpus_size,
"ActiveCorpusSize",
"Active corpus size",
kFuzzStat | kMin | kMax | kAvg,
},
{
&Stats::max_corpus_element_size,
"MaxEltSize",
"Max element size",
kFuzzStat | kMin | kMax | kAvg,
},
{
&Stats::avg_corpus_element_size,
"AvgEltSize",
"Avg element size",
kFuzzStat | kMin | kMax | kAvg,
},
// Metadata.
{
&Stats::timestamp_unix_micros,
"UnixMicros",
"Timestamp",
kTimestamp | kMin | kMax,
},
// Execution 2.
{
&Stats::fuzz_time_sec,
"FuzzTimeSec",
"Fuzz time (sec)",
kFuzzStat | kMin | kMax | kAvg,
},
// Coverage 2.
{
&Stats::num_target_crashes,
"NumProxyCrashes",
"Num proxy crashes",
kFuzzStat | kMin | kMax | kSum,
},
{
&Stats::total_corpus_size,
"TotalCorpusSize",
"Total corpus size",
kFuzzStat | kMin | kMax | kSum,
},
{
&Stats::num_8bit_counter_features,
"Num8BitCounterFts",
"Num 8-bit counter features",
kFuzzStat | kMin | kMax | kAvg,
},
{
&Stats::num_data_flow_features,
"NumDataFlowFts",
"Num data flow features",
kFuzzStat | kMin | kMax | kAvg,
},
{
&Stats::num_cmp_features,
"NumCmpFts",
"Num cmp features",
kFuzzStat | kMin | kMax | kAvg,
},
{
&Stats::num_call_stack_features,
"NumCallStackFts",
"Num call stack features",
kFuzzStat | kMin | kMax | kAvg,
},
{
&Stats::num_bounded_path_features,
"NumBoundedPathFts",
"Num bounded path features",
kFuzzStat | kMin | kMax | kAvg,
},
{
&Stats::num_pc_pair_features,
"NumPcPairFts",
"Num PC pair features",
kFuzzStat | kMin | kMax | kAvg,
},
{
&Stats::num_user_features,
"NumUserFts",
"Num user features",
kFuzzStat | kMin | kMax | kAvg,
},
{
&Stats::num_unknown_features,
"NumUnknownFts",
"Num unknown features",
kFuzzStat | kMin | kMax | kAvg,
},
{
&Stats::num_funcs_in_frontier,
"NumFuncsInFrontier",
"Num funcs in frontier",
kFuzzStat | kMin | kMax | kAvg,
},
// Rusage. Each shard of a run is a thread of the same process, but it
// measures the following metrics for the whole process. That means that
// all the shards should return more or less the same number for the same
// thing, sampling jitter and noise notwithstanding. Therefore, for the
// aggregate stat we use the upper bound of the samples.
// TODO(ussuri): Revise aggregation for CPU metrics once/if we start
// measuring them per-thread.
{
&Stats::engine_rusage_avg_millicores,
"EngineRusageAvgCores",
"Engine rusage avg cores",
kRUsageStat | kMax,
},
{
&Stats::engine_rusage_cpu_percent,
"EngineRusageCpuPct",
"Engine rusage CPU %",
kRUsageStat | kMax,
},
{
&Stats::engine_rusage_rss_mb,
"EngineRusageRssMb",
"Engine rusage RSS (MB)",
kRUsageStat | kMax,
},
{
&Stats::engine_rusage_vsize_mb,
"EngineRusageVSizeMb",
"Engine rusage VSize (MB)",
kRUsageStat | kMax,
},
// Coverage 3. A breakdown of the total in `Stats::num_user_features` by
// individual feature types.
{
&Stats::num_user0_features,
"NumUser0Fts",
"Num user0 features",
kFuzzStat | kMin | kMax | kAvg,
},
{
&Stats::num_user1_features,
"NumUser1Fts",
"Num user1 features",
kFuzzStat | kMin | kMax | kAvg,
},
{
&Stats::num_user2_features,
"NumUser2Fts",
"Num user2 features",
kFuzzStat | kMin | kMax | kAvg,
},
{
&Stats::num_user3_features,
"NumUser3Fts",
"Num user3 features",
kFuzzStat | kMin | kMax | kAvg,
},
{
&Stats::num_user4_features,
"NumUser4Fts",
"Num user4 features",
kFuzzStat | kMin | kMax | kAvg,
},
{
&Stats::num_user5_features,
"NumUser5Fts",
"Num user5 features",
kFuzzStat | kMin | kMax | kAvg,
},
{
&Stats::num_user6_features,
"NumUser6Fts",
"Num user6 features",
kFuzzStat | kMin | kMax | kAvg,
},
{
&Stats::num_user7_features,
"NumUser7Fts",
"Num user7 features",
kFuzzStat | kMin | kMax | kAvg,
},
{
&Stats::num_user8_features,
"NumUser8Fts",
"Num user8 features",
kFuzzStat | kMin | kMax | kAvg,
},
{
&Stats::num_user9_features,
"NumUser9Fts",
"Num user9 features",
kFuzzStat | kMin | kMax | kAvg,
},
{
&Stats::num_user10_features,
"NumUser10Fts",
"Num user10 features",
kFuzzStat | kMin | kMax | kAvg,
},
{
&Stats::num_user11_features,
"NumUser11Fts",
"Num user11 features",
kFuzzStat | kMin | kMax | kAvg,
},
{
&Stats::num_user12_features,
"NumUser12Fts",
"Num user12 features",
kFuzzStat | kMin | kMax | kAvg,
},
{
&Stats::num_user13_features,
"NumUser13Fts",
"Num user13 features",
kFuzzStat | kMin | kMax | kAvg,
},
{
&Stats::num_user14_features,
"NumUser14Fts",
"Num user14 features",
kFuzzStat | kMin | kMax | kAvg,
},
{
&Stats::num_user15_features,
"NumUser15Fts",
"Num user15 features",
kFuzzStat | kMin | kMax | kAvg,
},
};
};
// An abstract stats reporter. Observes an external set of `Stats` objects and a
// matching set of `Environment` objects, assumed to be updated regularly by the
// owning scope to reflect the current execution numbers. Reports these current
// numbers to an abstract report sink whenever the owning scope invokes
// `ReportCurrStats()`. Concrete report sinks are implemented by inheriting
// classes by overriding the virtual API.
class StatsReporter {
public:
StatsReporter(const std::vector<std::atomic<Stats>> &stats_vec,
const std::vector<Environment> &env_vec);
StatsReporter(const StatsReporter &) = default;
StatsReporter(StatsReporter &&) noexcept;
virtual ~StatsReporter() = default;
// Reports the current sample of stats values as updated in the `stats_vec_`
// externally by the caller. Implements the Template Method pattern by
// invoking the private virtual APIs below in the right order and with the
// right data to create a complete sample report.
void ReportCurrStats();
protected:
using GroupToIndices = //
absl::btree_map<std::string /*group_name*/,
std::vector<size_t> /*indices*/>;
using GroupToFlags =
absl::btree_map<std::string /*group_name*/, std::string /*flags*/>;
// Substeps of the Template Method pattern, which is implemented in
// `ReportCurrStats()`, that subclasses need to override to implement their
// stats reporting.
// Should this field be reported or skipped for the particular type of
// reporting that the subclass does. Can use `field.traits` to determine that.
virtual bool ShouldReportThisField(const Stats::FieldInfo &field) {
return true;
}
// Gives a chance to subclasses to learn ahead of time the fields for which
// samples are going to be reported, in this order. Is called once.
virtual void PreAnnounceFields(
std::initializer_list<Stats::FieldInfo> fields) = 0;
// Selects the group for the next batch of `ReportCurrFieldSample()` calls.
virtual void SetCurrGroup(const Environment &master_env) = 0;
// Selects the field for the next batch of `ReportCurrFieldSample()` calls.
// Each of those calls will follow a unique combination of `SetCurrGroup()`
// and `SetCurrField()`.
virtual void SetCurrField(const Stats::FieldInfo &field_info) = 0;
// Reports the values for the current group/field selected via the above two
// calls.
virtual void ReportCurrFieldSample(std::vector<uint64_t> &&values) = 0;
// Wraps up the current field sample batch.
virtual void DoneFieldSamplesBatch() = 0;
// Gives subclasses an option to report the flags associated with each shard
// group (e.g. experiments).
virtual void ReportFlags(const GroupToFlags &group_to_flags) = 0;
private:
// Cached external sets of stats and environments to observe.
const std::vector<std::atomic<Stats>> &stats_vec_;
const std::vector<Environment> &env_vec_;
// Maps group names to indices in `env_vec_` / `stats_vec_`. If there is
// just a single run (no groups), it will be stored in a single "" key.
// NOTE: Use std::map to order groups lexicographically.
GroupToIndices group_to_indices_;
// Maps group names to their distinct flags (stringified). If there is
// just a single run (no groups), it will be stored in a single "" key.
// NOTE: Use std::map to order groups lexicographically.
GroupToFlags group_to_flags_;
};
inline StatsReporter::StatsReporter(StatsReporter &&) noexcept = default;
// Takes a set of `Stats` objects and a corresponding set of `Environment`
// objects and logs the current `Stats` values to FUZZTEST_LOG(INFO) on each
// invocation of `ReportCurrStats()`. If the environments indicate the use of
// the
// --experiment flag, the stats for each of the experiment are juxtaposed for
// easy visual comparison.
class StatsLogger : public StatsReporter {
public:
using StatsReporter::StatsReporter;
~StatsLogger() override = default;
StatsLogger(StatsLogger &&) = default;
private:
bool ShouldReportThisField(const Stats::FieldInfo &field) override;
void PreAnnounceFields(
std::initializer_list<Stats::FieldInfo> fields) override;
void SetCurrGroup(const Environment &master_env) override;
void SetCurrField(const Stats::FieldInfo &field_info) override;
void ReportCurrFieldSample(std::vector<uint64_t> &&values) override;
void DoneFieldSamplesBatch() override;
void ReportFlags(const GroupToFlags &group_to_flags) override;
std::stringstream os_;
std::string curr_experiment_name_;
Stats::FieldInfo curr_field_info_;
};
// Takes a set of `Stats` objects and a corresponding set of `Environment`
// objects `env_vec` and appends aggregate metrics of the current `Stats` values
// to a CSV file on each invocation of `ReportCurrStats()`. If the environments
// indicate the use of the --experiment flag, the stats for each of the
// experiments are written to a separate correspondingly named CSV file. The
// names of each output field are written to the file(s) as a CSV header.
//
// When the file already exists (e.g. Centipede runs in a previously populated
// workdir):
// - If the current CSV header matches the one in the file, then new CSV lines
// will be appended to the file.
// - If the current CSV header doesn't match the one in the file (e.g. the
// Centipede version changed and the set of CSV fields changed with it), then
// the existing file will be renamed to `GetBackupFilename(filename)`, and a
// new file will be created from scratch.
class StatsCsvFileAppender : public StatsReporter {
public:
using StatsReporter::StatsReporter;
~StatsCsvFileAppender() override;
// Move-only.
StatsCsvFileAppender(StatsCsvFileAppender &&) noexcept = default;
private:
struct BufferedRemoteFile {
RemoteFile *file = nullptr;
std::string buffer;
};
// Auxiliary struct that holds a pointer to a `BufferedRemoteFile` and sets
// itself to `nullptr` when moved. This is to avoid having to define an
// explicit move constructor for `StatsCsvFileAppender` solely to set the
// pointer to `nullptr`.
class BufferedRemoteFilePtr {
public:
BufferedRemoteFilePtr(BufferedRemoteFile *absl_nullable file)
: file_(file) {}
BufferedRemoteFilePtr(BufferedRemoteFilePtr &&other) noexcept
: file_(std::exchange(other.file_, nullptr)) {}
BufferedRemoteFilePtr &operator=(BufferedRemoteFile *absl_nullable file) {
file_ = file;
return *this;
}
bool operator==(BufferedRemoteFile *absl_nullable file) const {
return file_ == file;
}
bool operator!=(BufferedRemoteFile *absl_nullable file) const {
return file_ != file;
}
BufferedRemoteFile *absl_nullable operator->() const { return file_; }
private:
BufferedRemoteFile *absl_nullable file_ = nullptr;
};
using BufferedRemoteFilesMap =
absl::flat_hash_map<std::string /*group_name*/, BufferedRemoteFile>;
void PreAnnounceFields(
std::initializer_list<Stats::FieldInfo> fields) override;
void SetCurrGroup(const Environment &master_env) override;
void SetCurrField(const Stats::FieldInfo &field_info) override;
void ReportCurrFieldSample(std::vector<uint64_t> &&values) override;
void DoneFieldSamplesBatch() override;
void ReportFlags(const GroupToFlags &group_to_flags) override;
// Given a filename, should return a backup file filename for it. The default
// version appends the current timestamp as UNIX seconds. Intended for tests.
virtual std::string GetBackupFilename(const std::string &filename) const;
std::string csv_header_;
std::unique_ptr<BufferedRemoteFilesMap> files_ =
std::make_unique<BufferedRemoteFilesMap>();
BufferedRemoteFilePtr curr_file_ = nullptr;
Stats::FieldInfo curr_field_info_;
};
// Takes a span of Stats objects `stats_vec` and prints a summary of the results
// to `os`, such that it can be ingested as a reward function by an ML system.
// To be used with knobs.
void PrintRewardValues(absl::Span<const std::atomic<Stats>> stats_vec,
std::ostream &os);
} // namespace fuzztest::internal
#endif // THIRD_PARTY_CENTIPEDE_STATS_H_