blob: 50225ed3f4ef9bc736ef3a665bb03e908fc42956 [file] [log] [blame] [edit]
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./centipede/stats.h"
#include <algorithm>
#include <atomic>
#include <cinttypes>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <filesystem> // NOLINT: C++17
#include <initializer_list>
#include <iomanip>
#include <ios>
#include <iosfwd>
#include <limits>
#include <numeric>
#include <sstream>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include "absl/strings/ascii.h"
#include "absl/strings/match.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "absl/time/clock.h"
#include "absl/time/time.h"
#include "absl/types/span.h"
#include "./centipede/environment.h"
#include "./centipede/workdir.h"
#include "./common/logging.h"
#include "./common/remote_file.h"
namespace fuzztest::internal {
namespace fs = std::filesystem;
using TraitBits = Stats::TraitBits;
// -----------------------------------------------------------------------------
// StatsReporter
StatsReporter::StatsReporter(const std::vector<std::atomic<Stats>> &stats_vec,
const std::vector<Environment> &env_vec)
: stats_vec_{stats_vec}, env_vec_{env_vec} {
FUZZTEST_CHECK_EQ(stats_vec.size(), env_vec.size());
for (size_t i = 0; i < env_vec.size(); ++i) {
const auto &env = env_vec[i];
group_to_indices_[env.experiment_name].push_back(i);
// NOTE: This will overwrite repeatedly for all indices of each group,
// but the value will be the same by construction in environment.cc.
group_to_flags_[env.experiment_name] = env.experiment_flags;
}
}
void StatsReporter::ReportCurrStats() {
// Collect snapshots of the current elements of `stats_vec_`: the elements
// are `std::atomic`s; snapshotting them is required, and also provides
// temporal consistency between the fields of each `Stats` object, even
// as it is being modified by a different thread.
std::vector<Stats> stats_snapshots;
stats_snapshots.reserve(stats_vec_.size());
for (const auto &stats : stats_vec_) {
stats_snapshots.push_back(stats.load());
}
PreAnnounceFields(Stats::kFieldInfos);
for (const Stats::FieldInfo &field_info : Stats::kFieldInfos) {
if (!ShouldReportThisField(field_info)) continue;
SetCurrField(field_info);
for (const auto &[group_name, group_indices] : group_to_indices_) {
SetCurrGroup(env_vec_[group_indices.at(0)]);
// Get the required stat fields into a vector `stat_values`.
std::vector<uint64_t> stat_values;
stat_values.reserve(group_indices.size());
for (const auto idx : group_indices) {
stat_values.push_back(stats_snapshots.at(idx).*(field_info.field));
}
ReportCurrFieldSample(std::move(stat_values));
}
}
ReportFlags(group_to_flags_);
DoneFieldSamplesBatch();
}
// -----------------------------------------------------------------------------
// StatsLogger
bool StatsLogger::ShouldReportThisField(const Stats::FieldInfo &field) {
// Skip timestamps and rusage stats: the former because timestamps are
// not very useful in these logs (only in CSVs), the latter because rusage is
// (at least currently) measured for the whole process, not per shard or
// experiment, so reporting nearly identical numbers would be useless and
// confusing.
return (field.traits & TraitBits::kFuzzStat) != 0;
}
void StatsLogger::PreAnnounceFields(
std::initializer_list<Stats::FieldInfo> fields) {
// Nothing to do: field names are logged together with every sample's values.
}
void StatsLogger::SetCurrGroup(const Environment &master_env) {
curr_experiment_name_ = master_env.experiment_name;
}
void StatsLogger::SetCurrField(const Stats::FieldInfo &field_info) {
curr_field_info_ = field_info;
os_ << curr_field_info_.description << ":\n";
}
void StatsLogger::ReportCurrFieldSample(std::vector<uint64_t> &&values) {
if (!curr_experiment_name_.empty())
os_ << " " << curr_experiment_name_ << ": ";
// Print the requested aggregate stats as well as the full sorted contents of
// `values`.
std::sort(values.begin(), values.end());
const uint64_t min = values.front();
const uint64_t max = values.back();
const uint64_t sum = std::accumulate(values.begin(), values.end(), 0.);
const double avg = !values.empty() ? (1.0 * sum / values.size()) : 0;
os_ << std::fixed << std::setprecision(1);
if (curr_field_info_.traits & TraitBits::kMin) os_ << "min:\t" << min << "\t";
if (curr_field_info_.traits & TraitBits::kMax) os_ << "max:\t" << max << "\t";
if (curr_field_info_.traits & TraitBits::kAvg) os_ << "avg:\t" << avg << "\t";
if (curr_field_info_.traits & TraitBits::kSum) os_ << "sum:\t" << sum << "\t";
os_ << "--";
for (const auto value : values) {
os_ << "\t" << value;
}
os_ << "\n";
}
void StatsLogger::ReportFlags(const GroupToFlags &group_to_flags) {
std::stringstream fos;
for (const auto &[group_name, group_flags] : group_to_flags) {
if (!group_name.empty() || !group_flags.empty()) {
fos << " " << group_name << ": " << group_flags << "\n";
}
}
if (fos.tellp() != std::streampos{0}) os_ << "Flags:\n" << fos.rdbuf();
}
void StatsLogger::DoneFieldSamplesBatch() {
FUZZTEST_LOG(INFO) << "Current stats:\n"
<< absl::StripAsciiWhitespace(os_.str());
// Reset the stream for the next round of logging.
os_.str("");
}
// -----------------------------------------------------------------------------
// StatsCsvFileAppender
StatsCsvFileAppender::~StatsCsvFileAppender() {
if (files_ == nullptr) return;
for (const auto &[group_name, file] : *files_) {
FUZZTEST_CHECK_OK(RemoteFileClose(file.file));
}
}
void StatsCsvFileAppender::PreAnnounceFields(
std::initializer_list<Stats::FieldInfo> fields) {
if (!csv_header_.empty()) return;
for (const auto &field : fields) {
if (field.traits & TraitBits::kMin)
absl::StrAppend(&csv_header_, field.name, "_Min,");
if (field.traits & TraitBits::kMax)
absl::StrAppend(&csv_header_, field.name, "_Max,");
if (field.traits & TraitBits::kAvg)
absl::StrAppend(&csv_header_, field.name, "_Avg,");
if (field.traits & TraitBits::kSum)
absl::StrAppend(&csv_header_, field.name, "_Sum,");
}
absl::StrAppend(&csv_header_, "\n");
}
void StatsCsvFileAppender::SetCurrGroup(const Environment &master_env) {
FUZZTEST_CHECK(files_ != nullptr);
BufferedRemoteFile &file = (*files_)[master_env.experiment_name];
if (file.file == nullptr) {
const std::string filename =
WorkDir{master_env}.FuzzingStatsPath(master_env.experiment_name);
// If a non-empty file already exists and has the same CVS header, then
// keep appending new CSV lines to the file. If the file exists, but has a
// different CSV header (ostensibly because it was created by a different
// version of Centipede), then make a backup copy of the file and start a
// a new one from scratch.
bool append = false;
if (RemotePathExists(filename)) {
std::string contents;
FUZZTEST_CHECK_OK(RemoteFileGetContents(filename, contents));
// NOTE: `csv_header_` ends with '\n', so the match is exact.
if (absl::StartsWith(contents, csv_header_)) {
append = true;
} else {
append = false;
FUZZTEST_CHECK_OK(
RemoteFileSetContents(GetBackupFilename(filename), contents));
}
}
file.file = *RemoteFileOpen(filename, append ? "a" : "w");
FUZZTEST_CHECK(file.file != nullptr) << VV(filename);
if (!append) {
FUZZTEST_CHECK_OK(RemoteFileAppend(file.file, csv_header_));
FUZZTEST_CHECK_OK(RemoteFileFlush(file.file));
}
}
// This is OK even though hash maps provide no pointer stability because the
// field is always updated immediately after the map is modified.
curr_file_ = &file;
}
void StatsCsvFileAppender::SetCurrField(const Stats::FieldInfo &field_info) {
curr_field_info_ = field_info;
}
void StatsCsvFileAppender::ReportCurrFieldSample(
std::vector<uint64_t> &&values) {
uint64_t min = std::numeric_limits<uint64_t>::max();
uint64_t max = std::numeric_limits<uint64_t>::min();
uint64_t sum = 0;
for (const auto value : values) {
min = std::min(min, value);
max = std::max(max, value);
sum += value;
}
double avg = !values.empty() ? (1.0 * sum / values.size()) : 0;
FUZZTEST_CHECK(curr_file_ != nullptr);
std::string &values_str = curr_file_->buffer;
if (curr_field_info_.traits & TraitBits::kMin)
absl::StrAppendFormat(&values_str, "%" PRIu64 ",", min);
if (curr_field_info_.traits & TraitBits::kMax)
absl::StrAppendFormat(&values_str, "%" PRIu64 ",", max);
if (curr_field_info_.traits & TraitBits::kAvg)
absl::StrAppendFormat(&values_str, "%.1lf,", avg);
if (curr_field_info_.traits & TraitBits::kSum)
absl::StrAppendFormat(&values_str, "%" PRIu64 ",", sum);
}
void StatsCsvFileAppender::ReportFlags(const GroupToFlags &group_to_flags) {
// Do nothing: can't write to CSV, as it has no concept of comments.
// TODO(ussuri): Consider writing to a sidecar file.
}
void StatsCsvFileAppender::DoneFieldSamplesBatch() {
FUZZTEST_CHECK(files_ != nullptr);
for (auto &[group_name, file] : *files_) {
FUZZTEST_CHECK_OK(
RemoteFileAppend(file.file, absl::StrCat(file.buffer, "\n")));
FUZZTEST_CHECK_OK(RemoteFileFlush(file.file));
file.buffer.clear();
}
}
std::string StatsCsvFileAppender::GetBackupFilename(
const std::string &filename) const {
fs::path path{filename};
const auto timestamp = absl::ToUnixSeconds(absl::Now());
const auto new_extension =
absl::StrCat(path.extension().string(), ".", timestamp);
path.replace_extension(new_extension);
return path.string();
}
// -----------------------------------------------------------------------------
void PrintRewardValues(absl::Span<const std::atomic<Stats>> stats_vec,
std::ostream &os) {
size_t n = stats_vec.size();
FUZZTEST_CHECK_GT(n, 0);
std::vector<size_t> num_covered_pcs(n);
for (size_t i = 0; i < n; ++i) {
num_covered_pcs[i] = stats_vec[i].load().num_covered_pcs;
}
std::sort(num_covered_pcs.begin(), num_covered_pcs.end());
os << "REWARD_MAX " << num_covered_pcs.back() << "\n";
os << "REWARD_SECOND_MAX " << num_covered_pcs[n == 1 ? 1 : n - 2] << "\n";
os << "REWARD_MIN " << num_covered_pcs.front() << "\n";
os << "REWARD_MEDIAN " << num_covered_pcs[n / 2] << "\n";
os << "REWARD_AVERAGE "
<< (std::accumulate(num_covered_pcs.begin(), num_covered_pcs.end(), 0.) /
n)
<< "\n";
}
} // namespace fuzztest::internal