| // Copyright 2022 The Centipede Authors. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // https://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #ifndef THIRD_PARTY_CENTIPEDE_CENTIPEDE_H_ |
| #define THIRD_PARTY_CENTIPEDE_CENTIPEDE_H_ |
| |
| #include <cstddef> |
| #include <string> |
| #include <string_view> |
| #include <vector> |
| |
| #include "./centipede/blob_file.h" |
| #include "./centipede/centipede_callbacks.h" |
| #include "./centipede/command.h" |
| #include "./centipede/control_flow.h" |
| #include "./centipede/corpus.h" |
| #include "./centipede/coverage.h" |
| #include "./centipede/defs.h" |
| #include "./centipede/environment.h" |
| #include "./centipede/execution_result.h" |
| #include "./centipede/rusage_profiler.h" |
| #include "./centipede/stats.h" |
| #include "./centipede/symbol_table.h" |
| |
| namespace centipede { |
| |
| // The main fuzzing class. |
| class Centipede { |
| public: |
| Centipede(const Environment &env, CentipedeCallbacks &user_callbacks, |
| const BinaryInfo &binary_info, CoverageLogger &coverage_logger, |
| Stats &stats); |
| virtual ~Centipede() = default; |
| |
| // Non-copyable and non-movable. |
| Centipede(const Centipede &) = delete; |
| Centipede(Centipede &&) noexcept = delete; |
| Centipede &operator=(const Centipede &) = delete; |
| Centipede &operator=(Centipede &&) noexcept = delete; |
| |
| // Main loop. |
| void FuzzingLoop(); |
| |
| // Saves the sharded corpus into `dir`, one file per input. |
| static void SaveCorpusToLocalDir(const Environment &env, |
| std::string_view dir); |
| // Exports the corpus from `dir` (one file per input) into the sharded corpus. |
| // Reads `dir` recursively. |
| // Ignores inputs that already exist in the shard they need to be added to. |
| // Sharding is stable and depends only on env.total_shards and the file name. |
| static void ExportCorpusFromLocalDir(const Environment &env, |
| std::string_view dir); |
| |
| private: |
| // Executes inputs from `input_vec`. |
| // For every input, its pruned features are written to |
| // `unconditional_features_file`, (if that's non-null). |
| // For every input that caused new features to be observed: |
| // * the input is added to the corpus (corpus_ and fs_ are updated). |
| // * the input is written to `corpus_file` (if that's non-null). |
| // * its features are written to `features_file` (if that's non-null). |
| // Returns true if new features were observed. |
| // Post-condition: `batch_result.results.size()` == `input_vec.size()`. |
| bool RunBatch(const std::vector<ByteArray> &input_vec, |
| BlobFileWriter *corpus_file, BlobFileWriter *features_file, |
| BlobFileWriter *unconditional_features_file); |
| // Loads a shard `shard_index` from `load_env.workdir`. |
| // Note: `load_env_` may be different from `env_`. |
| // If `rerun` is true, then also re-runs any inputs |
| // for which the features are not found in `load_env.workdir`. |
| void LoadShard(const Environment &load_env, size_t shard_index, bool rerun); |
| // Loads all the shards from corpus files in `load_env.workdir` in random |
| // order. If `rerun_my_shard` is true, then also re-runs any inputs found in |
| // `load_env.my_shard_index`th shard. Note: `load_env_` may be different from |
| // `env_`. |
| void LoadAllShardsInRandomOrder(const Environment &load_env, |
| bool rerun_my_shard); |
| // Runs all inputs from `to_rerun`, adds their features to the features file |
| // of env_.my_shard_index, adds interesting inputs to the corpus. |
| void Rerun(std::vector<ByteArray> &to_rerun); |
| |
| // Prints one logging line with `log_type` in it |
| // if `min_log_level` is not greater than `env_.log_level`. |
| void UpdateAndMaybeLogStats(std::string_view log_type, size_t min_log_level); |
| // For every feature in `fv`, translates the feature into code coverage |
| // (PCIndex), then prints one logging line for every |
| // FUNC/EDGE observed for the first time. |
| // If symbolization failed, prints a simpler logging line. |
| // Uses coverage_logger_. |
| void LogFeaturesAsSymbols(const FeatureVec &f); |
| |
| // Generates a coverage report file in workdir. |
| void GenerateCoverageReport(std::string_view filename_annotation, |
| std::string_view description); |
| // Generates a corpus stats file in workdir. |
| void GenerateCorpusStats(std::string_view filename_annotation, |
| std::string_view description); |
| // Generates the clang source-based coverage report in workdir. |
| void GenerateSourceBasedCoverageReport(std::string_view filename_annotation, |
| std::string_view description); |
| // Generates a performance report file in workdir. |
| void GenerateRUsageReport(std::string_view filename_annotation, |
| std::string_view description); |
| // Generates all the report and stats files in workdir if this shard is |
| // assigned to do that. |
| void MaybeGenerateTelemetry(std::string_view filename_annotation, |
| std::string_view description); |
| // Generates all the report and stats files in workdir if this shard is |
| // assigned to do that and if `batch_index` satisfies the telemetry frequency |
| // criteria set via the flags. |
| void MaybeGenerateTelemetryAfterBatch(std::string_view filename_annotation, |
| size_t batch_index); |
| |
| // Returns true if `input` passes env_.input_filter. |
| bool InputPassesFilter(const ByteArray &input); |
| // Executes `binary` with `input_vec` and `batch_result` as input/output. |
| // If the binary crashes, calls ReportCrash(). |
| // Returns true iff there were no crashes. |
| bool ExecuteAndReportCrash(std::string_view binary, |
| const std::vector<ByteArray> &input_vec, |
| BatchResult &batch_result); |
| // Reports a crash and saves the reproducer to workdir/crashes, if possible. |
| // `binary` is the binary causing the crash. |
| // Prints the first `env_.max_num_crash_reports` logs. |
| // `input_vec` is the batch of inputs that caused a crash. |
| // `batch_result` contains the features computed for `input_vec` |
| // (batch_result.results().size() == input_vec.size()). `batch_result` is used |
| // as a hint when choosing which input to try first. |
| // Stops early if `EarlyExitRequested()`. |
| void ReportCrash(std::string_view binary, |
| const std::vector<ByteArray> &input_vec, |
| const BatchResult &batch_result); |
| // Merges shard `shard_index_to_merge` of the corpus in `merge_from_dir` |
| // into the current corpus. |
| // Writes added inputs to the current shard. |
| void MergeFromOtherCorpus(std::string_view merge_from_dir, |
| size_t shard_index_to_merge); |
| // Reloads the entire corpus for all the shards from workdir (as if with |
| // `env_.full_sync`) thus distilling it, and saves it to a single file with a |
| // shard-hashed name in the workdir. |
| void ReloadAllShardsAndWriteDistilledCorpus(); |
| |
| // Collects all PCs from `fv`, then adds PC-pair features to `fv`. |
| // Returns the number of added features. |
| // See more comments in centipede.cc. |
| size_t AddPcPairFeatures(FeatureVec &fv); |
| |
| const Environment &env_; |
| CentipedeCallbacks &user_callbacks_; |
| Rng rng_; |
| |
| // A timestamp set just before the actual fuzzing begins. Used to measure |
| // the fuzzing performance. |
| absl::Time fuzz_start_time_ = absl::InfiniteFuture(); |
| |
| FeatureSet fs_; |
| Corpus corpus_; |
| CoverageFrontier coverage_frontier_; |
| size_t num_runs_ = 0; // counts executed inputs |
| |
| // Binary-related data, initialized at startup, once per process, |
| // by calling the PopulateBinaryInfo callback. |
| const BinaryInfo &binary_info_; |
| const PCTable &pc_table_; // same as binary_info_.pc_table. |
| const SymbolTable &symbols_; // same as binary_info_.symbols. |
| |
| // Derived from env_.function_filter. Currently, duplicated by every thread. |
| // In future, threads may have different filters. |
| const FunctionFilter function_filter_; |
| |
| // Ensures every coverage location is reported at most once. |
| // This object is shared with other threads, it is thread-safe. |
| CoverageLogger &coverage_logger_; |
| |
| // Statistics of the current run. |
| Stats &stats_; |
| |
| // Counts the number of crashes reported so far. |
| int num_crashes_ = 0; |
| |
| // Scratch object for AddPcPairFeatures. |
| std::vector<size_t> add_pc_pair_scratch_; |
| |
| // Path and command for the input_filter. |
| std::string input_filter_path_; |
| Command input_filter_cmd_; |
| |
| // Resource usage stats collection & reporting. |
| perf::RUsageProfiler rusage_profiler_; |
| }; |
| |
| } // namespace centipede |
| |
| #endif // THIRD_PARTY_CENTIPEDE_CENTIPEDE_H_ |