Save --analyze coverage information to a proto PiperOrigin-RevId: 561133459
diff --git a/centipede/BUILD b/centipede/BUILD index 07db178..45b3010 100644 --- a/centipede/BUILD +++ b/centipede/BUILD
@@ -77,6 +77,17 @@ deps = [":seed_corpus_config_proto"], ) +# Proto representation of coverage reports +proto_library( + name = "coverage_proto", + srcs = ["coverage.proto"], +) + +cc_proto_library( + name = "coverage_cc_proto", + deps = [":coverage_proto"], +) + ################################################################################ # C++ libraries ################################################################################ @@ -314,9 +325,12 @@ ":binary_info", ":control_flow", ":corpus", + ":coverage_cc_proto", ":feature", ":logging", "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/strings", ], )
diff --git a/centipede/analyze_corpora.cc b/centipede/analyze_corpora.cc index d4a4bbc..60680a7 100644 --- a/centipede/analyze_corpora.cc +++ b/centipede/analyze_corpora.cc
@@ -15,17 +15,63 @@ #include "./centipede/analyze_corpora.h" #include <algorithm> +#include <cstddef> +#include <fstream> +#include <ios> +#include <string> +#include <vector> #include "absl/container/flat_hash_set.h" +#include "absl/log/check.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_split.h" #include "./centipede/control_flow.h" #include "./centipede/corpus.h" +#include "./centipede/coverage.pb.h" #include "./centipede/feature.h" #include "./centipede/logging.h" namespace centipede { + +namespace { + +CoverageReport ToCoverageReport(const std::vector<size_t> &pcs, + const SymbolTable &symbols) { + CoverageReport result; + for (const size_t pc : pcs) { + CoverageReport::Edge *edge = result.add_covered_edges(); + edge->set_function_name(symbols.func(pc)); + + std::string file_line_column = symbols.location(pc); + std::vector<std::string> file_line_column_split = + absl::StrSplit(file_line_column, ':'); + CHECK(file_line_column_split.size() == 3) + << "Unexpected number of elements when splitting source location: " + << file_line_column; + + edge->set_file_name(file_line_column_split[0]); + + int line; + CHECK(absl::SimpleAtoi(file_line_column_split[1], &line)) + << "Unable to convert line number to integer: " + << file_line_column_split[1]; + edge->set_line(line); + + int column; + CHECK(absl::SimpleAtoi(file_line_column_split[2], &column)) + << "Unable to convert column number to integer: " + << file_line_column_split[2]; + edge->set_column(column); + } + return result; +} + +} // namespace + void AnalyzeCorpora(const BinaryInfo &binary_info, const std::vector<CorpusRecord> &a, - const std::vector<CorpusRecord> &b) { + const std::vector<CorpusRecord> &b, + std::string_view analyze_report_path) { // `a_pcs` will contain all PCs covered by `a`. absl::flat_hash_set<size_t> a_pcs; for (const auto &record : a) { @@ -40,6 +86,7 @@ // `b_unique_indices` are indices of inputs that have PCs from `b_only_pcs`. // `b_shared_indices` are indices of all other inputs from `b`. absl::flat_hash_set<size_t> b_only_pcs; + absl::flat_hash_set<size_t> b_pcs; std::vector<size_t> b_shared_indices, b_unique_indices; for (size_t i = 0; i < b.size(); ++i) { const auto &record = b[i]; @@ -47,6 +94,7 @@ for (const auto &feature : record.features) { if (!feature_domains::kPCs.Contains(feature)) continue; auto pc = ConvertPCFeatureToPcIndex(feature); + b_pcs.insert(pc); if (a_pcs.contains(pc)) continue; b_only_pcs.insert(pc); has_b_only = true; @@ -56,20 +104,37 @@ else b_shared_indices.push_back(i); } + + absl::flat_hash_set<size_t> a_only_pcs; + for (const auto &record : a) { + for (const auto &feature : record.features) { + if (!feature_domains::kPCs.Contains(feature)) continue; + auto pc = ConvertPCFeatureToPcIndex(feature); + if (b_pcs.contains(pc)) continue; + a_only_pcs.insert(pc); + } + } LOG(INFO) << VV(a.size()) << VV(b.size()) << VV(a_pcs.size()) - << VV(b_only_pcs.size()) << VV(b_shared_indices.size()) - << VV(b_unique_indices.size()); + << VV(a_only_pcs.size()) << VV(b_only_pcs.size()) + << VV(b_shared_indices.size()) << VV(b_unique_indices.size()); const auto &pc_table = binary_info.pc_table; const auto &symbols = binary_info.symbols; CoverageLogger coverage_logger(pc_table, symbols); - CoverageFrontier frontier_a(binary_info); - frontier_a.Compute(a); + // TODO: these cause a CHECK-fail + // CoverageFrontier frontier_a(binary_info); + // frontier_a.Compute(a); // TODO(kcc): use frontier_a to show the most interesting b-only PCs. - // Sort b-only PCs to print them in the canonical order, as in pc_table. + // Sort PCs to print them in the canonical order, as in pc_table. + std::vector<size_t> a_pcs_vec{a_pcs.begin(), a_pcs.end()}; + std::sort(a_pcs_vec.begin(), a_pcs_vec.end()); + std::vector<size_t> b_pcs_vec{b_pcs.begin(), b_pcs.end()}; + std::sort(b_pcs_vec.begin(), b_pcs_vec.end()); + std::vector<size_t> a_only_pcs_vec{a_only_pcs.begin(), a_only_pcs.end()}; + std::sort(a_only_pcs_vec.begin(), a_only_pcs_vec.end()); std::vector<size_t> b_only_pcs_vec{b_only_pcs.begin(), b_only_pcs.end()}; std::sort(b_only_pcs_vec.begin(), b_only_pcs_vec.end()); @@ -90,6 +155,24 @@ auto str = coverage_logger.ObserveAndDescribeIfNew(pc); if (!str.empty()) LOG(INFO).NoPrefix() << str; } + + if (!analyze_report_path.empty()) { + AnalyzeReport analyze_report; + CoverageReport *a_coverage = analyze_report.mutable_a_coverage(); + *a_coverage = ToCoverageReport(a_pcs_vec, symbols); + CoverageReport *b_coverage = analyze_report.mutable_b_coverage(); + *b_coverage = ToCoverageReport(b_pcs_vec, symbols); + CoverageReport *a_only_coverage = analyze_report.mutable_a_only_coverage(); + *a_only_coverage = ToCoverageReport(a_only_pcs_vec, symbols); + CoverageReport *b_only_coverage = analyze_report.mutable_b_only_coverage(); + *b_only_coverage = ToCoverageReport(b_only_pcs_vec, symbols); + + std::fstream f(std::string{analyze_report_path}, + std::ios::binary | std::ios::out); + CHECK(f) << "Unable to open AnalyzeReport path: " << analyze_report_path; + analyze_report.SerializeToOstream(&f); + f.close(); + } } } // namespace centipede
diff --git a/centipede/analyze_corpora.h b/centipede/analyze_corpora.h index 9843562..cef41a9 100644 --- a/centipede/analyze_corpora.h +++ b/centipede/analyze_corpora.h
@@ -23,7 +23,8 @@ // Analyzes two corpora, `a` and `b`, reports the differences. void AnalyzeCorpora(const BinaryInfo &binary_info, const std::vector<CorpusRecord> &a, - const std::vector<CorpusRecord> &b); + const std::vector<CorpusRecord> &b, + std::string_view analyze_report_path); } // namespace centipede
diff --git a/centipede/centipede_interface.cc b/centipede/centipede_interface.cc index 9ea4a10..5dfe2e4 100644 --- a/centipede/centipede_interface.cc +++ b/centipede/centipede_interface.cc
@@ -178,7 +178,7 @@ LOG(INFO) << "corpus size " << corpus.size(); } CHECK_EQ(corpora.size(), 2); - AnalyzeCorpora(binary_info, corpora[0], corpora[1]); + AnalyzeCorpora(binary_info, corpora[0], corpora[1], env.analyze_report); return EXIT_SUCCESS; }
diff --git a/centipede/coverage.proto b/centipede/coverage.proto new file mode 100644 index 0000000..b27ea64 --- /dev/null +++ b/centipede/coverage.proto
@@ -0,0 +1,51 @@ +// Copyright 2023 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Proto representation of Centipede coverage reports. +syntax = "proto3"; + +package centipede; + +// Describes the covered edges and functions from, e.g., a fuzzing run. +// +// Next tag: 2 +message CoverageReport { + // Describes a particular location within a program. + // + // Next tag: 5 + message Edge { + // The name of the function where the edge resides. + string function_name = 1; + // The name of the file where the edge resides. + string file_name = 2; + // The line in `file_name` where the edge resides. + int32 line = 3; + // The column in `file_name` where the edge resides. + int32 column = 4; + } + repeated Edge covered_edges = 1; +} + +// Describes the comparison of the coverage of two corpora A and B (--analyze). +// Next tag: 5 +message AnalyzeReport { + // Coverage from corpus A. + CoverageReport a_coverage = 1; + // Coverage from corpus B. + CoverageReport b_coverage = 2; + // Coverage found in A but not B + CoverageReport a_only_coverage = 3; + // Coverage found in B but not A + CoverageReport b_only_coverage = 4; +}
diff --git a/centipede/environment.cc b/centipede/environment.cc index 753c621..cccdb16 100644 --- a/centipede/environment.cc +++ b/centipede/environment.cc
@@ -337,6 +337,9 @@ " as argv and analyze differences between those corpora." " Used by the Centipede developers to improve the engine. " " TODO(kcc) implement. "); +ABSL_FLAG(std::string, analyze_report, "", + "If set, --analyze will output a binary proto to the provided path " + "containing the AnalyzeReport"); ABSL_FLAG(std::string, dictionary, "", "A comma-separated list of paths to dictionary files. The dictionary " "file is either in AFL/libFuzzer plain text format or in the binary " @@ -472,6 +475,7 @@ for_each_blob(absl::GetFlag(FLAGS_for_each_blob)), experiment(absl::GetFlag(FLAGS_experiment)), analyze(absl::GetFlag(FLAGS_analyze)), + analyze_report(absl::GetFlag(FLAGS_analyze_report)), exit_on_crash(absl::GetFlag(FLAGS_exit_on_crash)), max_num_crash_reports(absl::GetFlag(FLAGS_num_crash_reports)), minimize_crash_file_path(absl::GetFlag(FLAGS_minimize_crash)),
diff --git a/centipede/environment.h b/centipede/environment.h index 9d36950..84b97e2 100644 --- a/centipede/environment.h +++ b/centipede/environment.h
@@ -105,6 +105,7 @@ std::string for_each_blob; std::string experiment; bool analyze; + std::string analyze_report; bool exit_on_crash; size_t max_num_crash_reports; std::string minimize_crash_file_path;