blob: 42ae8860282adca620b601d024108ecc1b93e835 [file] [log] [blame]
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_CENTIPEDE_CENTIPEDE_H_
#define THIRD_PARTY_CENTIPEDE_CENTIPEDE_H_
#include <cstddef>
#include <string>
#include <string_view>
#include <vector>
#include "./centipede/blob_file.h"
#include "./centipede/centipede_callbacks.h"
#include "./centipede/command.h"
#include "./centipede/control_flow.h"
#include "./centipede/corpus.h"
#include "./centipede/coverage.h"
#include "./centipede/defs.h"
#include "./centipede/environment.h"
#include "./centipede/execution_result.h"
#include "./centipede/rusage_profiler.h"
#include "./centipede/stats.h"
#include "./centipede/symbol_table.h"
namespace centipede {
// The main fuzzing class.
class Centipede {
public:
Centipede(const Environment &env, CentipedeCallbacks &user_callbacks,
const BinaryInfo &binary_info, CoverageLogger &coverage_logger,
Stats &stats);
virtual ~Centipede() = default;
// Non-copyable and non-movable.
Centipede(const Centipede &) = delete;
Centipede(Centipede &&) noexcept = delete;
Centipede &operator=(const Centipede &) = delete;
Centipede &operator=(Centipede &&) noexcept = delete;
// Main loop.
void FuzzingLoop();
// Saves the sharded corpus into `dir`, one file per input.
static void SaveCorpusToLocalDir(const Environment &env,
std::string_view dir);
// Exports the corpus from `dir` (one file per input) into the sharded corpus.
// Reads `dir` recursively.
// Ignores inputs that already exist in the shard they need to be added to.
// Sharding is stable and depends only on env.total_shards and the file name.
static void ExportCorpusFromLocalDir(const Environment &env,
std::string_view dir);
private:
// Executes inputs from `input_vec`.
// For every input, its pruned features are written to
// `unconditional_features_file`, (if that's non-null).
// For every input that caused new features to be observed:
// * the input is added to the corpus (corpus_ and fs_ are updated).
// * the input is written to `corpus_file` (if that's non-null).
// * its features are written to `features_file` (if that's non-null).
// Returns true if new features were observed.
// Post-condition: `batch_result.results.size()` == `input_vec.size()`.
bool RunBatch(const std::vector<ByteArray> &input_vec,
BlobFileWriter *corpus_file, BlobFileWriter *features_file,
BlobFileWriter *unconditional_features_file);
// Loads a shard `shard_index` from `load_env.workdir`.
// Note: `load_env_` may be different from `env_`.
// If `rerun` is true, then also re-runs any inputs
// for which the features are not found in `load_env.workdir`.
void LoadShard(const Environment &load_env, size_t shard_index, bool rerun);
// Loads all the shards from corpus files in `load_env.workdir` in random
// order. If `rerun_my_shard` is true, then also re-runs any inputs found in
// `load_env.my_shard_index`th shard. Note: `load_env_` may be different from
// `env_`.
void LoadAllShardsInRandomOrder(const Environment &load_env,
bool rerun_my_shard);
// Runs all inputs from `to_rerun`, adds their features to the features file
// of env_.my_shard_index, adds interesting inputs to the corpus.
void Rerun(std::vector<ByteArray> &to_rerun);
// Prints one logging line with `log_type` in it
// if `min_log_level` is not greater than `env_.log_level`.
void UpdateAndMaybeLogStats(std::string_view log_type, size_t min_log_level);
// For every feature in `fv`, translates the feature into code coverage
// (PCIndex), then prints one logging line for every
// FUNC/EDGE observed for the first time.
// If symbolization failed, prints a simpler logging line.
// Uses coverage_logger_.
void LogFeaturesAsSymbols(const FeatureVec &f);
// Generates a coverage report file in workdir.
void GenerateCoverageReport(std::string_view filename_annotation,
std::string_view description);
// Generates a corpus stats file in workdir.
void GenerateCorpusStats(std::string_view filename_annotation,
std::string_view description);
// Generates the clang source-based coverage report in workdir.
void GenerateSourceBasedCoverageReport(std::string_view filename_annotation,
std::string_view description);
// Generates a performance report file in workdir.
void GenerateRUsageReport(std::string_view filename_annotation,
std::string_view description);
// Generates all the report and stats files in workdir if this shard is
// assigned to do that.
void MaybeGenerateTelemetry(std::string_view filename_annotation,
std::string_view description);
// Generates all the report and stats files in workdir if this shard is
// assigned to do that and if `batch_index` satisfies the telemetry frequency
// criteria set via the flags.
void MaybeGenerateTelemetryAfterBatch(std::string_view filename_annotation,
size_t batch_index);
// Returns true if `input` passes env_.input_filter.
bool InputPassesFilter(const ByteArray &input);
// Executes `binary` with `input_vec` and `batch_result` as input/output.
// If the binary crashes, calls ReportCrash().
// Returns true iff there were no crashes.
bool ExecuteAndReportCrash(std::string_view binary,
const std::vector<ByteArray> &input_vec,
BatchResult &batch_result);
// Reports a crash and saves the reproducer to workdir/crashes, if possible.
// `binary` is the binary causing the crash.
// Prints the first `env_.max_num_crash_reports` logs.
// `input_vec` is the batch of inputs that caused a crash.
// `batch_result` contains the features computed for `input_vec`
// (batch_result.results().size() == input_vec.size()). `batch_result` is used
// as a hint when choosing which input to try first.
// Stops early if `EarlyExitRequested()`.
void ReportCrash(std::string_view binary,
const std::vector<ByteArray> &input_vec,
const BatchResult &batch_result);
// Merges shard `shard_index_to_merge` of the corpus in `merge_from_dir`
// into the current corpus.
// Writes added inputs to the current shard.
void MergeFromOtherCorpus(std::string_view merge_from_dir,
size_t shard_index_to_merge);
// Reloads the entire corpus for all the shards from workdir (as if with
// `env_.full_sync`) thus distilling it, and saves it to a single file with a
// shard-hashed name in the workdir.
void ReloadAllShardsAndWriteDistilledCorpus();
// Collects all PCs from `fv`, then adds PC-pair features to `fv`.
// Returns the number of added features.
// See more comments in centipede.cc.
size_t AddPcPairFeatures(FeatureVec &fv);
const Environment &env_;
CentipedeCallbacks &user_callbacks_;
Rng rng_;
// A timestamp set just before the actual fuzzing begins. Used to measure
// the fuzzing performance.
absl::Time fuzz_start_time_ = absl::InfiniteFuture();
FeatureSet fs_;
Corpus corpus_;
CoverageFrontier coverage_frontier_;
size_t num_runs_ = 0; // counts executed inputs
// Binary-related data, initialized at startup, once per process,
// by calling the PopulateBinaryInfo callback.
const BinaryInfo &binary_info_;
const PCTable &pc_table_; // same as binary_info_.pc_table.
const SymbolTable &symbols_; // same as binary_info_.symbols.
// Derived from env_.function_filter. Currently, duplicated by every thread.
// In future, threads may have different filters.
const FunctionFilter function_filter_;
// Ensures every coverage location is reported at most once.
// This object is shared with other threads, it is thread-safe.
CoverageLogger &coverage_logger_;
// Statistics of the current run.
Stats &stats_;
// Counts the number of crashes reported so far.
int num_crashes_ = 0;
// Scratch object for AddPcPairFeatures.
std::vector<size_t> add_pc_pair_scratch_;
// Path and command for the input_filter.
std::string input_filter_path_;
Command input_filter_cmd_;
// Resource usage stats collection & reporting.
perf::RUsageProfiler rusage_profiler_;
};
} // namespace centipede
#endif // THIRD_PARTY_CENTIPEDE_CENTIPEDE_H_