No public description PiperOrigin-RevId: 588874409
diff --git a/centipede/BUILD b/centipede/BUILD index d8a553e..88ddb79 100644 --- a/centipede/BUILD +++ b/centipede/BUILD
@@ -129,12 +129,17 @@ ":control_flow", ":logging", ":pc_info", - ":util", "@com_google_absl//absl/log", "@com_google_absl//absl/log:check", - "@com_google_absl//absl/status", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:span", + "@com_google_riegeli//riegeli/base:any_dependency", + "@com_google_riegeli//riegeli/bytes:fd_reader", + "@com_google_riegeli//riegeli/bytes:fd_writer", + "@com_google_riegeli//riegeli/bytes:reader", + "@com_google_riegeli//riegeli/bytes:writer", + "@com_google_riegeli//riegeli/lines:line_reading", + "@com_google_riegeli//riegeli/text:write_int", ], ) @@ -165,11 +170,7 @@ srcs = ["pc_info.cc"], hdrs = ["pc_info.h"], visibility = PUBLIC_API_VISIBILITY, - deps = [ - ":defs", - "@com_google_absl//absl/log:check", - "@com_google_absl//absl/types:span", - ], + deps = ["@com_google_absl//absl/log:check"], ) cc_library( @@ -234,7 +235,6 @@ deps = [ ":defs", ":feature", - ":logging", ":remote_file", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/log:check", @@ -242,6 +242,14 @@ "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/synchronization", "@com_google_absl//absl/types:span", + "@com_google_riegeli//riegeli/base:closing_ptr", + "@com_google_riegeli//riegeli/bytes:copy_all", + "@com_google_riegeli//riegeli/bytes:fd_reader", + "@com_google_riegeli//riegeli/bytes:fd_writer", + "@com_google_riegeli//riegeli/bytes:read_all", + "@com_google_riegeli//riegeli/bytes:resizable_writer", + "@com_google_riegeli//riegeli/bytes:string_writer", + "@com_google_riegeli//riegeli/bytes:write", ], ) @@ -290,6 +298,9 @@ "@com_google_absl//absl/log", "@com_google_absl//absl/log:check", "@com_google_absl//absl/strings", + "@com_google_riegeli//riegeli/bytes:fd_writer", + "@com_google_riegeli//riegeli/bytes:read_all", + "@com_google_riegeli//riegeli/bytes:write", ], ) @@ -352,6 +363,9 @@ "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/time", "@com_google_absl//absl/types:span", + "@com_google_riegeli//riegeli/bytes:writer", + "@com_google_riegeli//riegeli/csv:csv_record", + "@com_google_riegeli//riegeli/csv:csv_writer", ], ) @@ -413,6 +427,7 @@ "@com_google_absl//absl/status", "@com_google_absl//absl/strings:string_view", "@com_google_riegeli//riegeli/base:object", + "@com_google_riegeli//riegeli/bytes:read_all", "@com_google_riegeli//riegeli/bytes:reader", "@com_google_riegeli//riegeli/bytes:writer", "@com_google_riegeli//riegeli/records:record_reader", @@ -507,16 +522,18 @@ visibility = PUBLIC_API_VISIBILITY, deps = [ ":control_flow", - ":defs", ":feature", ":logging", + ":pc_info", ":symbol_table", - ":util", "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/log:check", "@com_google_absl//absl/strings", "@com_google_absl//absl/synchronization", + "@com_google_riegeli//riegeli/base:any_dependency", + "@com_google_riegeli//riegeli/bytes:string_writer", + "@com_google_riegeli//riegeli/bytes:writer", ], ) @@ -536,12 +553,16 @@ ":defs", ":logging", ":pc_info", - ":util", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/log", "@com_google_absl//absl/log:check", "@com_google_absl//absl/strings", + "@com_google_riegeli//riegeli/base:closing_ptr", + "@com_google_riegeli//riegeli/bytes:copy_all", + "@com_google_riegeli//riegeli/bytes:fd_reader", + "@com_google_riegeli//riegeli/bytes:resizable_writer", + "@com_google_riegeli//riegeli/lines:line_reading", ], ) @@ -569,7 +590,6 @@ srcs = ["remote_file.cc"], hdrs = ["remote_file.h"], deps = [ - ":defs", ":logging", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/log", @@ -613,6 +633,8 @@ "@com_google_absl//absl/log", "@com_google_absl//absl/log:check", "@com_google_absl//absl/strings", + "@com_google_riegeli//riegeli/base:any_dependency", + "@com_google_riegeli//riegeli/bytes:writer", ], ) @@ -647,6 +669,7 @@ ":call_graph", ":command", ":control_flow", + ":logging", ":pc_info", ":remote_file", ":symbol_table", @@ -654,6 +677,8 @@ "@com_google_absl//absl/log", "@com_google_absl//absl/log:check", "@com_google_absl//absl/strings", + "@com_google_riegeli//riegeli/bytes:reader_istream", + "@com_google_riegeli//riegeli/bytes:writer_ostream", ], ) @@ -777,6 +802,9 @@ "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/synchronization", "@com_google_absl//absl/time", + "@com_google_riegeli//riegeli/bytes:read_all", + "@com_google_riegeli//riegeli/bytes:write", + "@com_google_riegeli//riegeli/bytes:writer", ], ) @@ -838,6 +866,7 @@ "@com_google_absl//absl/log:check", "@com_google_absl//absl/strings", "@com_google_absl//absl/time", + "@com_google_riegeli//riegeli/bytes:read_all", ], ) @@ -1060,6 +1089,7 @@ "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/time", "@com_google_protobuf//:protobuf", + "@com_google_riegeli//riegeli/bytes:read_all", ], ) @@ -1129,6 +1159,7 @@ ":symbol_table", ":test_util", "@com_google_googletest//:gtest_main", + "@com_google_riegeli//riegeli/bytes:string_reader", ], ) @@ -1186,6 +1217,8 @@ deps = [ ":symbol_table", "@com_google_googletest//:gtest_main", + "@com_google_riegeli//riegeli/bytes:string_reader", + "@com_google_riegeli//riegeli/bytes:string_writer", ], ) @@ -1301,6 +1334,7 @@ ":stats", ":test_util", ":util", + "@com_google_absl//absl/log", "@com_google_absl//absl/log:log_entry", "@com_google_absl//absl/log:log_sink", "@com_google_absl//absl/log:log_sink_registry", @@ -1482,12 +1516,16 @@ name = "corpus_test", srcs = ["corpus_test.cc"], deps = [ + ":binary_info", + ":call_graph", ":control_flow", ":corpus", ":defs", ":feature", ":feature_set", + ":pc_info", "@com_google_googletest//:gtest_main", + "@com_google_riegeli//riegeli/bytes:string_writer", ], ) @@ -1635,20 +1673,23 @@ tags = ["not_run:arm"], deps = [ ":binary_info", - ":centipede_interface", + ":centipede_callbacks", ":control_flow", ":coverage", ":defs", ":environment", ":feature", - ":logging", + ":mutation_input", ":pc_info", ":runner_result", ":symbol_table", ":test_util", ":util", - "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/log:check", "@com_google_absl//absl/strings", "@com_google_googletest//:gtest_main", + "@com_google_riegeli//riegeli/bytes:string_reader", + "@com_google_riegeli//riegeli/bytes:string_writer", ], )
diff --git a/centipede/batch_fuzz_example/BUILD b/centipede/batch_fuzz_example/BUILD index 8f125a7..9a4e17a 100644 --- a/centipede/batch_fuzz_example/BUILD +++ b/centipede/batch_fuzz_example/BUILD
@@ -58,6 +58,7 @@ "@com_google_fuzztest//centipede:centipede_callbacks", "@com_google_fuzztest//centipede:centipede_interface", "@com_google_fuzztest//centipede:config_file", + "@com_google_fuzztest//centipede:environment", "@com_google_fuzztest//centipede:environment_flags", ], )
diff --git a/centipede/batch_fuzz_example/customized_centipede.cc b/centipede/batch_fuzz_example/customized_centipede.cc index 6bea212..34c5fb7 100644 --- a/centipede/batch_fuzz_example/customized_centipede.cc +++ b/centipede/batch_fuzz_example/customized_centipede.cc
@@ -39,7 +39,7 @@ ByteArray content; ReadFromLocalFile(output_file, content); if (content.empty()) { - LOG(WARNING) << "Skip updating batch result with an emtpy output file: " + LOG(WARNING) << "Skip updating batch result with an empty output file: " << output_file; return true; }
diff --git a/centipede/binary_info.cc b/centipede/binary_info.cc index 95a6b08..8af9bd3 100644 --- a/centipede/binary_info.cc +++ b/centipede/binary_info.cc
@@ -16,7 +16,6 @@ #include <cstdlib> #include <filesystem> // NOLINT -#include <sstream> #include <string> #include <string_view> #include <vector> @@ -27,9 +26,12 @@ #include "absl/strings/str_split.h" #include "./centipede/command.h" #include "./centipede/control_flow.h" +#include "./centipede/logging.h" #include "./centipede/pc_info.h" #include "./centipede/remote_file.h" #include "./centipede/util.h" +#include "riegeli/bytes/reader_istream.h" +#include "riegeli/bytes/writer_ostream.h" namespace centipede { @@ -121,31 +123,25 @@ } void BinaryInfo::Read(std::string_view dir) { - std::string symbol_table_contents; // TODO(b/295978603): move calculation of paths into WorkDir class. - RemoteFileGetContents(std::filesystem::path(dir).append(kSymbolTableFileName), - symbol_table_contents); - std::istringstream symbol_table_stream(symbol_table_contents); - symbols.ReadFromLLVMSymbolizer(symbol_table_stream); + symbols.ReadFromLLVMSymbolizer(CreateRiegeliFileReader( + std::filesystem::path(dir).append(kSymbolTableFileName).native())); - std::string pc_table_contents; - RemoteFileGetContents(std::filesystem::path(dir).append(kPCTableFileName), - pc_table_contents); - std::istringstream pc_table_stream(pc_table_contents); + riegeli::ReaderIStream pc_table_stream(CreateRiegeliFileReader( + std::filesystem::path(dir).append(kPCTableFileName).native())); pc_table = ReadPcTable(pc_table_stream); + CHECK(pc_table_stream.close()) << VV(pc_table_stream.status()); } void BinaryInfo::Write(std::string_view dir) { - std::ostringstream symbol_table_stream; - symbols.WriteToLLVMSymbolizer(symbol_table_stream); // TODO(b/295978603): move calculation of paths into WorkDir class. - RemoteFileSetContents(std::filesystem::path(dir).append(kSymbolTableFileName), - symbol_table_stream.str()); + symbols.WriteToLLVMSymbolizer(CreateRiegeliFileWriter( + std::filesystem::path(dir).append(kSymbolTableFileName).native())); - std::ostringstream pc_table_stream; + riegeli::WriterOStream pc_table_stream(CreateRiegeliFileWriter( + std::filesystem::path(dir).append(kPCTableFileName).native())); WritePcTable(pc_table, pc_table_stream); - RemoteFileSetContents(std::filesystem::path(dir).append(kPCTableFileName), - pc_table_stream.str()); + CHECK(pc_table_stream.close()) << VV(pc_table_stream.status()); } } // namespace centipede
diff --git a/centipede/binary_info_test.cc b/centipede/binary_info_test.cc index c0eba16..0ab6149 100644 --- a/centipede/binary_info_test.cc +++ b/centipede/binary_info_test.cc
@@ -14,20 +14,20 @@ #include "./centipede/binary_info.h" -#include <sstream> -#include <string> +#include <string_view> #include "gtest/gtest.h" #include "./centipede/pc_info.h" #include "./centipede/symbol_table.h" #include "./centipede/test_util.h" +#include "riegeli/bytes/string_reader.h" namespace centipede { namespace { TEST(BinaryInfoTest, SerializesAndDeserializesBinaryInfoSuccessfully) { PCTable input_pcs = {{.pc = 0, .flags = 1}, {.pc = 2, .flags = 3}}; - std::string input_symbols = + const std::string_view input_symbols = R"(FunctionOne source/location/one.cc:1:0 @@ -35,9 +35,8 @@ source/location/two.cc:2:0 )"; - std::istringstream input_stream(input_symbols); SymbolTable symbol_table; - symbol_table.ReadFromLLVMSymbolizer(input_stream); + symbol_table.ReadFromLLVMSymbolizer(riegeli::StringReader(input_symbols)); BinaryInfo input = {.pc_table = input_pcs, .symbols = symbol_table}; auto temp_dir = GetTestTempDir(test_info_->name());
diff --git a/centipede/blob_file.cc b/centipede/blob_file.cc index fd9bbc2..6d2542e 100644 --- a/centipede/blob_file.cc +++ b/centipede/blob_file.cc
@@ -16,7 +16,9 @@ #include <cstddef> #include <memory> +#include <string> #include <string_view> +#include <utility> #include <vector> #include "absl/log/check.h" @@ -28,6 +30,7 @@ #include "./centipede/remote_file.h" #include "./centipede/util.h" #include "riegeli/base/object.h" +#include "riegeli/bytes/read_all.h" #include "riegeli/bytes/reader.h" #include "riegeli/bytes/writer.h" #include "riegeli/records/record_reader.h" @@ -47,7 +50,7 @@ class SimpleBlobFileReader : public BlobFileReader { public: ~SimpleBlobFileReader() override { - if (file_ && !closed_) { + if (open_ && !closed_) { // Virtual resolution is off in dtors, so use a specific Close(). CHECK_OK(SimpleBlobFileReader::Close()); } @@ -55,22 +58,23 @@ absl::Status Open(std::string_view path) override { if (closed_) return absl::FailedPreconditionError("already closed"); - if (file_) return absl::FailedPreconditionError("already open"); - file_ = RemoteFileOpen(path, "r"); - if (file_ == nullptr) return absl::UnknownError("can't open file"); + if (open_) return absl::FailedPreconditionError("already open"); + auto reader = CreateRiegeliFileReader(path); + if (!reader->ok()) return reader->status(); + open_ = true; // Read the entire file at once. // It may be useful to read the file in chunks, but if we are going // to migrate to something else, it's not important here. - ByteArray raw_bytes; - RemoteFileRead(file_, raw_bytes); - RemoteFileClose(file_); // close the file here, we won't need it. - UnpackBytesFromAppendFile(raw_bytes, &unpacked_blobs_); + std::string raw_bytes; + // close the file here, we won't need it. + absl::Status status = riegeli::ReadAll(std::move(reader), raw_bytes); + UnpackBytesFromAppendFile(AsByteSpan(raw_bytes), &unpacked_blobs_); return absl::OkStatus(); } absl::Status Read(ByteSpan &blob) override { if (closed_) return absl::FailedPreconditionError("already closed"); - if (!file_) return absl::FailedPreconditionError("was not open"); + if (!open_) return absl::FailedPreconditionError("was not open"); if (next_to_read_blob_index_ == unpacked_blobs_.size()) return absl::OutOfRangeError("no more blobs"); if (next_to_read_blob_index_ != 0) // Clear the previous blob to save RAM. @@ -83,14 +87,14 @@ // Closes the file (it must be open). absl::Status Close() override { if (closed_) return absl::FailedPreconditionError("already closed"); - if (!file_) return absl::FailedPreconditionError("was not open"); + if (!open_) return absl::FailedPreconditionError("was not open"); closed_ = true; // Nothing to do here, we've already closed the underlying file (in Open()). return absl::OkStatus(); } private: - RemoteFile *file_ = nullptr; + bool open_ = false; bool closed_ = false; std::vector<ByteArray> unpacked_blobs_; size_t next_to_read_blob_index_ = 0; @@ -100,7 +104,7 @@ class SimpleBlobFileWriter : public BlobFileWriter { public: ~SimpleBlobFileWriter() override { - if (file_ && !closed_) { + if (writer_ && !closed_) { // Virtual resolution is off in dtors, so use a specific Close(). CHECK_OK(SimpleBlobFileWriter::Close()); } @@ -109,34 +113,31 @@ absl::Status Open(std::string_view path, std::string_view mode) override { CHECK(mode == "w" || mode == "a") << VV(mode); if (closed_) return absl::FailedPreconditionError("already closed"); - if (file_) return absl::FailedPreconditionError("already open"); - file_ = RemoteFileOpen(path, mode.data()); - if (file_ == nullptr) return absl::UnknownError("can't open file"); + if (writer_) return absl::FailedPreconditionError("already open"); + writer_ = CreateRiegeliFileWriter(path, mode == "a"); + if (!writer_->ok()) return std::exchange(writer_, nullptr)->status(); return absl::OkStatus(); } absl::Status Write(ByteSpan blob) override { if (closed_) return absl::FailedPreconditionError("already closed"); - if (!file_) return absl::FailedPreconditionError("was not open"); - // TODO(kcc): [as-needed] This copy from a span to vector is clumsy. Change - // RemoteFileAppend to accept a span. - ByteArray bytes(blob.begin(), blob.end()); - ByteArray packed = PackBytesForAppendFile(bytes); - RemoteFileAppend(file_, packed); - + if (!writer_) return absl::FailedPreconditionError("was not open"); + if (!writer_->Write(AsStringView(PackBytesForAppendFile(blob)))) { + return writer_->status(); + } return absl::OkStatus(); } absl::Status Close() override { if (closed_) return absl::FailedPreconditionError("already closed"); - if (!file_) return absl::FailedPreconditionError("was not open"); + if (!writer_) return absl::FailedPreconditionError("was not open"); closed_ = true; - RemoteFileClose(file_); + if (!writer_->Close()) return writer_->status(); return absl::OkStatus(); } private: - RemoteFile *file_ = nullptr; + std::unique_ptr<riegeli::Writer> writer_; bool closed_ = false; };
diff --git a/centipede/centipede.cc b/centipede/centipede.cc index 685e8c3..53943c3 100644 --- a/centipede/centipede.cc +++ b/centipede/centipede.cc
@@ -47,7 +47,7 @@ #include <cmath> #include <cstddef> #include <cstdlib> -#include <filesystem> +#include <filesystem> // NOLINT #include <functional> #include <iostream> #include <memory> @@ -55,6 +55,7 @@ #include <sstream> #include <string> #include <string_view> +#include <utility> #include <vector> #include "absl/base/attributes.h" @@ -91,6 +92,9 @@ #include "./centipede/stats.h" #include "./centipede/util.h" #include "./centipede/workdir.h" +#include "riegeli/bytes/read_all.h" +#include "riegeli/bytes/write.h" +#include "riegeli/bytes/writer.h" namespace centipede { @@ -178,19 +182,18 @@ auto appender = DefaultBlobFileWriterFactory(env.riegeli); CHECK_OK(appender->Open(corpus_path, "a")) << "Failed to open corpus file: " << corpus_path; - ByteArray shard_data; for (const auto &path : sharded_paths[shard]) { std::string input; - RemoteFileGetContents(path, input); + CHECK_OK(riegeli::ReadAll(CreateRiegeliFileReader(path), input)); if (input.empty() || existing_hashes.contains(Hash(input))) { ++inputs_ignored; continue; } - CHECK_OK(appender->Write(ByteArray{input.begin(), input.end()})); + CHECK_OK(appender->Write(AsByteSpan(input))); ++inputs_added; } LOG(INFO) << VV(shard) << VV(inputs_added) << VV(inputs_ignored) - << VV(num_shard_bytes) << VV(shard_data.size()); + << VV(num_shard_bytes); } CHECK_EQ(total_paths, inputs_added + inputs_ignored); } @@ -459,10 +462,9 @@ << VV(coverage_path); auto pci_vec = fs_.ToCoveragePCs(); Coverage coverage(pc_table_, pci_vec); - std::stringstream out; - out << "# " << description << ":\n\n"; - coverage.Print(symbols_, out); - RemoteFileSetContents(coverage_path, out.str()); + auto out = CreateRiegeliFileWriter(coverage_path); + out->Write("# ", description, ":\n\n"); + coverage.Print(symbols_, std::move(out)); } void Centipede::GenerateCorpusStats(std::string_view filename_annotation, @@ -470,10 +472,9 @@ auto stats_path = wd_.CorpusStatsPath(filename_annotation); LOG(INFO) << "Generate corpus stats: " << description << " " << VV(stats_path); - std::ostringstream os; - os << "# " << description << ":\n\n"; - corpus_.PrintStats(os, fs_); - RemoteFileSetContents(stats_path, os.str()); + auto out = CreateRiegeliFileWriter(stats_path); + out->Write("# ", description, ":\n\n"); + corpus_.PrintStats(fs_, std::move(out)); } // TODO(nedwill): add integration test once tests are refactored per b/255660879 @@ -525,19 +526,19 @@ class ReportDumper : public RUsageProfiler::ReportSink { public: explicit ReportDumper(std::string_view path) - : file_{RemoteFileOpen(path, "w")} { - CHECK(file_ != nullptr) << VV(path); + : out_(CreateRiegeliFileWriter(path)) { + CHECK(out_->ok()) << VV(out_->status()); } - ~ReportDumper() override { RemoteFileClose(file_); } + ~ReportDumper() override { CHECK(out_->Close()) << VV(out_->status()); } - ReportDumper &operator<<(const std::string &fragment) override { - RemoteFileAppend(file_, ByteArray{fragment.cbegin(), fragment.cend()}); + ReportDumper &operator<<(std::string_view fragment) override { + CHECK(out_->Write(fragment)) << VV(out_->status()); return *this; } private: - RemoteFile *file_; + std::unique_ptr<riegeli::Writer> out_; }; const auto &snapshot = rusage_profiler_.TakeSnapshot( @@ -818,10 +819,9 @@ << "\nFailure : " << one_input_batch_result.failure_description() << "\nSaving input to: " << file_path; - auto *file = RemoteFileOpen(file_path, "w"); // overwrites existing file. - CHECK(file != nullptr) << log_prefix << "Failed to open " << file_path; - RemoteFileAppend(file, one_input); - RemoteFileClose(file); + CHECK_OK(riegeli::Write(AsStringView(one_input), + CreateRiegeliFileWriter(file_path))) + << log_prefix; return; } } @@ -850,10 +850,9 @@ auto hash = Hash(one_input); std::string file_path = std::filesystem::path(save_dir).append( absl::StrFormat("input-%010d-%s", i, hash)); - auto *file = RemoteFileOpen(file_path, "w"); - CHECK(file != nullptr) << log_prefix << "Failed to open " << file_path; - RemoteFileAppend(file, one_input); - RemoteFileClose(file); + CHECK_OK(riegeli::Write(AsStringView(one_input), + CreateRiegeliFileWriter(file_path))) + << log_prefix; } }
diff --git a/centipede/centipede_interface.cc b/centipede/centipede_interface.cc index 7f073f0..2b0aad4 100644 --- a/centipede/centipede_interface.cc +++ b/centipede/centipede_interface.cc
@@ -19,7 +19,6 @@ #include <algorithm> #include <atomic> #include <csignal> -#include <cstdint> #include <cstdlib> #include <filesystem> // NOLINT #include <functional> @@ -114,12 +113,9 @@ } ByteSpan blob; while (blob_reader->Read(blob) == absl::OkStatus()) { - ByteArray bytes; - bytes.insert(bytes.begin(), blob.data(), blob.end()); - // TODO(kcc): [impl] add a variant of WriteToLocalFile that accepts Span. - WriteToLocalFile(tmpfile, bytes); + WriteToLocalFile(tmpfile, blob); std::string command_line = absl::StrReplaceAll( - env.for_each_blob, {{"%P", tmpfile}, {"%H", Hash(bytes)}}); + env.for_each_blob, {{"%P", tmpfile}, {"%H", Hash(blob)}}); Command cmd(command_line); // TODO(kcc): [as-needed] this creates one process per blob. // If this flag gets active use, we may want to define special cases, @@ -174,9 +170,10 @@ } void SavePCTableToFile(const PCTable &pc_table, std::string_view file_path) { - ByteSpan bytes = {reinterpret_cast<const uint8_t *>(pc_table.data()), - pc_table.size() * sizeof(pc_table[0])}; - WriteToLocalFile(file_path, bytes); + WriteToLocalFile( + file_path, + std::string_view(reinterpret_cast<const char *>(pc_table.data()), + pc_table.size() * sizeof(pc_table[0]))); } } // namespace
diff --git a/centipede/config_file.cc b/centipede/config_file.cc index 01fa635..58a5a33 100644 --- a/centipede/config_file.cc +++ b/centipede/config_file.cc
@@ -37,6 +37,9 @@ #include "./centipede/logging.h" #include "./centipede/remote_file.h" #include "./centipede/util.h" +#include "riegeli/bytes/fd_writer.h" +#include "riegeli/bytes/read_all.h" +#include "riegeli/bytes/write.h" // TODO(ussuri): Move these flags next to main() ASAP. They are here // only temporarily to simplify the APIs and implementation in V1. @@ -143,7 +146,8 @@ if (!path.empty() && !std::filesystem::exists(path)) { // assume remote // Read the remote file. std::string contents; - RemoteFileGetContents(path, contents); + CHECK_OK( + riegeli::ReadAll(CreateRiegeliFileReader(path.native()), contents)); // Save a temporary local copy. const std::filesystem::path tmp_dir = TemporaryLocalDirPath(); @@ -151,7 +155,7 @@ LOG(INFO) << "Localizing remote config: " << VV(path) << VV(local_path); // NOTE: Ignore "Remote" in the API names here: the paths are always local. RemoteMkdir(tmp_dir.c_str()); - RemoteFileSetContents(local_path, contents); + CHECK_OK(riegeli::Write(contents, riegeli::FdWriter(local_path.native()))); // Augment the argv to point at the local copy and ensure it is cleaned up. replacements.emplace_back(path.c_str(), local_path.c_str()); @@ -224,7 +228,8 @@ } else { file_contents = flags_str; } - RemoteFileSetContents(path, file_contents); + CHECK_OK( + riegeli::Write(file_contents, CreateRiegeliFileWriter(path.native()))); } return path;
diff --git a/centipede/control_flow.cc b/centipede/control_flow.cc index 3f1ecc1..d10cd84 100644 --- a/centipede/control_flow.cc +++ b/centipede/control_flow.cc
@@ -16,33 +16,38 @@ #include <cstddef> #include <cstdint> +#include <cstdlib> #include <filesystem> // NOLINT -#include <fstream> #include <queue> #include <string> #include <string_view> +#include <utility> #include <vector> +#include "absl/container/flat_hash_set.h" #include "absl/log/check.h" #include "absl/log/log.h" #include "absl/strings/match.h" +#include "absl/strings/str_cat.h" #include "absl/strings/str_split.h" #include "./centipede/command.h" -#include "./centipede/defs.h" #include "./centipede/logging.h" #include "./centipede/pc_info.h" -#include "./centipede/util.h" +#include "riegeli/base/closing_ptr.h" +#include "riegeli/bytes/copy_all.h" +#include "riegeli/bytes/fd_reader.h" +#include "riegeli/bytes/resizable_writer.h" +#include "riegeli/lines/line_reading.h" namespace centipede { PCTable ReadPcTableFromFile(std::string_view file_path) { - ByteArray pc_infos_as_bytes; - ReadFromLocalFile(file_path, pc_infos_as_bytes); - CHECK_EQ(pc_infos_as_bytes.size() % sizeof(PCInfo), 0); - size_t pc_table_size = pc_infos_as_bytes.size() / sizeof(PCInfo); - const auto *pc_infos = reinterpret_cast<PCInfo *>(pc_infos_as_bytes.data()); - PCTable pc_table{pc_infos, pc_infos + pc_table_size}; - CHECK_EQ(pc_table.size(), pc_table_size); + PCTable pc_table; + riegeli::ResizableWriter<riegeli::VectorResizableTraits<PCInfo>> out( + &pc_table); + CHECK_OK(riegeli::CopyAll(riegeli::FdReader(file_path), + riegeli::ClosingPtr(&out))); + CHECK_EQ(out.pos() % sizeof(PCInfo), 0); return pc_table; } @@ -59,14 +64,13 @@ return {}; } PCTable pc_table; - std::ifstream in(std::string{tmp_path}); - CHECK(in.good()) << VV(tmp_path); + riegeli::FdReader in(tmp_path); bool saw_new_function = false; // Read the objdump output, find lines that start a function // and lines that have a call to __sanitizer_cov_trace_pc. // Reconstruct the PCTable from those. - for (std::string line; std::getline(in, line);) { + for (std::string line; riegeli::ReadLine(in, line);) { if (absl::EndsWith(line, ">:")) { // new function. saw_new_function = true; continue; @@ -79,26 +83,27 @@ saw_new_function = false; // next trace_pc will be in the same function. pc_table.push_back({pc, flags}); } + CHECK(in.Close()) << VV(in.status()); std::filesystem::remove(tmp_path); return pc_table; } CFTable ReadCfTableFromFile(std::string_view file_path) { - ByteArray cf_infos_as_bytes; - ReadFromLocalFile(file_path, cf_infos_as_bytes); - size_t cf_table_size = cf_infos_as_bytes.size() / sizeof(CFTable::value_type); - const auto *cf_infos = - reinterpret_cast<CFTable::value_type *>(cf_infos_as_bytes.data()); - CFTable cf_table{cf_infos, cf_infos + cf_table_size}; - CHECK_EQ(cf_table.size(), cf_table_size); + CFTable cf_table; + riegeli::ResizableWriter<riegeli::VectorResizableTraits<intptr_t>> out( + &cf_table); + CHECK_OK(riegeli::CopyAll(riegeli::FdReader(file_path), + riegeli::ClosingPtr(&out))); + CHECK_EQ(out.pos() % sizeof(intptr_t), 0); return cf_table; } DsoTable ReadDsoTableFromFile(std::string_view file_path) { DsoTable result; - std::string data; - ReadFromLocalFile(file_path, data); - for (const auto &line : absl::StrSplit(data, '\n', absl::SkipEmpty())) { + riegeli::FdReader in(file_path); + CHECK(in.ok()) << VV(in.status()); + for (std::string_view line; riegeli::ReadLine(in, line);) { + if (line.empty()) continue; // Use std::string; there is no std::stoul for std::string_view. const std::vector<std::string> tokens = absl::StrSplit(line, ' ', absl::SkipEmpty());
diff --git a/centipede/corpus.cc b/centipede/corpus.cc index 0dd532a..9bb46a3 100644 --- a/centipede/corpus.cc +++ b/centipede/corpus.cc
@@ -17,8 +17,8 @@ #include <algorithm> #include <cstddef> #include <cstdint> -#include <ostream> #include <string> +#include <string_view> #include <utility> #include <vector> @@ -33,6 +33,8 @@ #include "./centipede/feature_set.h" #include "./centipede/logging.h" // IWYU pragma: keep #include "./centipede/util.h" +#include "riegeli/base/any_dependency.h" +#include "riegeli/bytes/writer.h" namespace centipede { @@ -130,25 +132,27 @@ return records_[random % records_.size()]; } -void Corpus::PrintStats(std::ostream &out, const FeatureSet &fs) { - out << "{\n"; - out << " \"num_inputs\": " << records_.size() << ",\n"; - out << " \"corpus_stats\": [\n"; - std::string before_record; +void Corpus::PrintStats(const FeatureSet &fs, + riegeli::AnyDependencyRef<riegeli::Writer *> out) { + out->Write("{\n"); + out->Write(" \"num_inputs\": ", records_.size(), ",\n"); + out->Write(" \"corpus_stats\": [\n"); + std::string_view before_record; for (const auto &record : records_) { - out << before_record; + out->Write(before_record); before_record = ",\n"; - out << " {\"size\": " << record.data.size() << ", "; - out << "\"frequencies\": ["; - std::string before_feature; + out->Write(" {\"size\": ", record.data.size(), ", "); + out->Write("\"frequencies\": ["); + std::string_view before_feature; for (const auto feature : record.features) { - out << before_feature; + out->Write(before_feature); before_feature = ", "; - out << fs.Frequency(feature); + out->Write(fs.Frequency(feature)); } - out << "]}"; + out->Write("]}"); } - out << "\n ]\n}\n"; + out->Write("\n ]\n}\n"); + if (out.is_owning()) CHECK(out->Close()) << VV(out->status()); } std::string Corpus::MemoryUsageString() const {
diff --git a/centipede/corpus.h b/centipede/corpus.h index 6d81369..5375b96 100644 --- a/centipede/corpus.h +++ b/centipede/corpus.h
@@ -17,18 +17,19 @@ #include <cstddef> #include <cstdint> -#include <ostream> #include <string> #include <utility> #include <vector> +#include "absl/log/check.h" #include "./centipede/binary_info.h" -#include "./centipede/control_flow.h" #include "./centipede/defs.h" #include "./centipede/execution_metadata.h" #include "./centipede/feature.h" #include "./centipede/feature_set.h" #include "./centipede/util.h" +#include "riegeli/base/any_dependency.h" +#include "riegeli/bytes/writer.h" namespace centipede { @@ -141,7 +142,8 @@ // Logging. // Prints corpus stats in JSON format to `out` using `fs` for frequencies. - void PrintStats(std::ostream &out, const FeatureSet &fs); + void PrintStats(const FeatureSet &fs, + riegeli::AnyDependencyRef<riegeli::Writer *> out); // Returns a string used for logging the corpus memory usage. std::string MemoryUsageString() const;
diff --git a/centipede/corpus_test.cc b/centipede/corpus_test.cc index a1bbcdc..7231044 100644 --- a/centipede/corpus_test.cc +++ b/centipede/corpus_test.cc
@@ -17,15 +17,18 @@ #include <algorithm> #include <cstddef> #include <cstdint> -#include <sstream> #include <string> #include <vector> #include "gtest/gtest.h" +#include "./centipede/binary_info.h" +#include "./centipede/call_graph.h" #include "./centipede/control_flow.h" #include "./centipede/defs.h" #include "./centipede/feature.h" #include "./centipede/feature_set.h" +#include "./centipede/pc_info.h" +#include "riegeli/bytes/string_writer.h" namespace centipede { namespace { @@ -59,9 +62,9 @@ corpus.Add({1, 2, 3}, features1, {}, fs, coverage_frontier); fs.IncrementFrequencies(features2); corpus.Add({4, 5}, features2, {}, fs, coverage_frontier); - std::ostringstream os; - corpus.PrintStats(os, fs); - EXPECT_EQ(os.str(), + std::string str; + corpus.PrintStats(fs, riegeli::StringWriter(&str)); + EXPECT_EQ(str, R"({ "num_inputs": 2, "corpus_stats": [
diff --git a/centipede/coverage.cc b/centipede/coverage.cc index cd02294..17ea919 100644 --- a/centipede/coverage.cc +++ b/centipede/coverage.cc
@@ -17,17 +17,24 @@ #include <string.h> #include <cstdint> -#include <filesystem> #include <limits> +#include <string> #include <string_view> +#include <utility> +#include <vector> #include "absl/container/flat_hash_set.h" +#include "absl/log/check.h" #include "absl/strings/str_split.h" #include "absl/synchronization/mutex.h" -#include "./centipede/defs.h" +#include "./centipede/control_flow.h" +#include "./centipede/feature.h" #include "./centipede/logging.h" +#include "./centipede/pc_info.h" #include "./centipede/symbol_table.h" -#include "./centipede/util.h" +#include "riegeli/base/any_dependency.h" +#include "riegeli/bytes/string_writer.h" +#include "riegeli/bytes/writer.h" namespace centipede { @@ -82,26 +89,28 @@ } } -void Coverage::Print(const SymbolTable &symbols, std::ostream &out) { +void Coverage::Print(const SymbolTable &symbols, + riegeli::AnyDependencyRef<riegeli::Writer *> out) { // Print symbolized function names for all covered functions. for (auto pc_index : fully_covered_funcs) { - out << "FULL: " << symbols.full_description(pc_index) << "\n"; + out->Write("FULL: ", symbols.full_description(pc_index), '\n'); } // Same for uncovered functions. for (auto pc_index : uncovered_funcs) { - out << "NONE: " << symbols.full_description(pc_index) << "\n"; + out->Write("NONE: ", symbols.full_description(pc_index), '\n'); } // For every partially covered function, first print its name, // then print its covered edges, then uncovered edges. for (auto &pcf : partially_covered_funcs) { - out << "PARTIAL: " << symbols.full_description(pcf.covered[0]) << "\n"; + out->Write("PARTIAL: ", symbols.full_description(pcf.covered[0]), '\n'); for (auto pc_index : pcf.covered) { - out << " + " << symbols.full_description(pc_index) << "\n"; + out->Write(" + ", symbols.full_description(pc_index), '\n'); } for (auto pc_index : pcf.uncovered) { - out << " - " << symbols.full_description(pc_index) << "\n"; + out->Write(" - ", symbols.full_description(pc_index), '\n'); } } + if (out.is_owning()) CHECK(out->Close()) << VV(out->status()); } //---------------------- NewCoverageLogger @@ -109,16 +118,18 @@ if (pc_table_.empty()) return ""; // Fast-path return (symbolization is off). absl::MutexLock l(&mu_); if (!observed_indices_.insert(pc_index).second) return ""; - std::ostringstream os; + riegeli::StringWriter out; if (pc_index >= pc_table_.size()) { - os << "FUNC/EDGE index: " << pc_index; + out.Write("FUNC/EDGE index: ", pc_index); + out.Close(); } else { - os << (pc_table_[pc_index].has_flag(PCInfo::kFuncEntry) ? "FUNC: " - : "EDGE: "); - os << symbols_.full_description(pc_index); - if (!observed_descriptions_.insert(os.str()).second) return ""; + out.Write( + pc_table_[pc_index].has_flag(PCInfo::kFuncEntry) ? "FUNC: " : "EDGE: ", + symbols_.full_description(pc_index)); + out.Close(); + if (!observed_descriptions_.insert(out.dest()).second) return ""; } - return os.str(); + return std::move(out.dest()); } FunctionFilter::FunctionFilter(std::string_view functions_to_filter,
diff --git a/centipede/coverage.h b/centipede/coverage.h index 9c04e7d..fbf2c42 100644 --- a/centipede/coverage.h +++ b/centipede/coverage.h
@@ -19,18 +19,19 @@ #include <algorithm> #include <cstdint> -#include <ostream> #include <string> #include <string_view> #include <vector> #include "absl/base/thread_annotations.h" -#include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" +#include "absl/log/check.h" #include "absl/synchronization/mutex.h" #include "./centipede/control_flow.h" #include "./centipede/feature.h" -#include "./centipede/logging.h" +#include "./centipede/pc_info.h" +#include "riegeli/base/any_dependency.h" +#include "riegeli/bytes/writer.h" namespace centipede { @@ -48,7 +49,8 @@ const PCIndexVec &pci_vec); // Prints in human-readable form to `out` using `symbols`. - void Print(const SymbolTable &symbols, std::ostream &out); + void Print(const SymbolTable &symbols, + riegeli::AnyDependencyRef<riegeli::Writer *> out); // Returns true if the function is fully covered. pc_index is for a function // entry.
diff --git a/centipede/coverage_test.cc b/centipede/coverage_test.cc index 1984359..3b6cbe6 100644 --- a/centipede/coverage_test.cc +++ b/centipede/coverage_test.cc
@@ -17,11 +17,10 @@ #include <stdio.h> #include <unistd.h> -#include <algorithm> #include <cstddef> #include <cstdint> #include <cstdlib> -#include <filesystem> +#include <filesystem> // NOLINT #include <iostream> #include <string> #include <string_view> @@ -30,20 +29,23 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" +#include "absl/log/check.h" #include "absl/strings/str_cat.h" #include "./centipede/binary_info.h" -#include "./centipede/centipede_interface.h" +#include "./centipede/centipede_callbacks.h" #include "./centipede/control_flow.h" #include "./centipede/defs.h" #include "./centipede/environment.h" #include "./centipede/feature.h" -#include "./centipede/logging.h" +#include "./centipede/mutation_input.h" #include "./centipede/pc_info.h" #include "./centipede/runner_result.h" #include "./centipede/symbol_table.h" #include "./centipede/test_util.h" #include "./centipede/util.h" +#include "riegeli/bytes/string_reader.h" +#include "riegeli/bytes/string_writer.h" namespace centipede { namespace { @@ -52,7 +54,7 @@ // A, BB, CCC. // A and BB have one control flow edge each. // CCC has 3 edges. -const char *symbolizer_output = +constexpr std::string_view symbolizer_output = "A\n" "a.cc:1:0\n" "\n" @@ -86,8 +88,7 @@ TEST(Coverage, SymbolTable) { // Initialize and test SymbolTable. SymbolTable symbols; - std::istringstream iss(symbolizer_output); - symbols.ReadFromLLVMSymbolizer(iss); + symbols.ReadFromLLVMSymbolizer(riegeli::StringReader(symbolizer_output)); EXPECT_EQ(symbols.size(), 6U); EXPECT_EQ(symbols.func(1), "BB"); EXPECT_EQ(symbols.location(2), "ccc.cc:1:0"); @@ -98,10 +99,9 @@ // Tests coverage output for PCIndexVec = {0, 2}, // i.e. the covered edges are 'A' and the entry of 'CCC'. Coverage cov(g_pc_table, {0, 2}); - cov.Print(symbols, std::cout); - std::ostringstream os; - cov.Print(symbols, os); - std::string str = os.str(); + std::string str; + cov.Print(symbols, riegeli::StringWriter(&str)); + std::cout << str; EXPECT_THAT(str, testing::HasSubstr("FULL: A a.cc:1:0")); EXPECT_THAT(str, testing::HasSubstr("NONE: BB bb.cc:1:0")); EXPECT_THAT(str, testing::HasSubstr("PARTIAL: CCC ccc.cc:1:0")); @@ -112,9 +112,8 @@ { // Same as above, but for PCIndexVec = {1, 2, 3}, Coverage cov(g_pc_table, {1, 2, 3}); - std::ostringstream os; - cov.Print(symbols, os); - std::string str = os.str(); + std::string str; + cov.Print(symbols, riegeli::StringWriter(&str)); EXPECT_THAT(str, testing::HasSubstr("FULL: BB bb.cc:1:0")); EXPECT_THAT(str, testing::HasSubstr("NONE: A a.cc:1:0")); EXPECT_THAT(str, testing::HasSubstr("PARTIAL: CCC ccc.cc:1:0")); @@ -146,8 +145,7 @@ TEST(Coverage, CoverageLogger) { SymbolTable symbols; - std::istringstream iss(symbolizer_output); - symbols.ReadFromLLVMSymbolizer(iss); + symbols.ReadFromLLVMSymbolizer(riegeli::StringReader(symbolizer_output)); CoverageLogger logger(g_pc_table, symbols); // First time logging pc_index=0. EXPECT_EQ(logger.ObserveAndDescribeIfNew(0), "FUNC: A a.cc:1:0");
diff --git a/centipede/environment.cc b/centipede/environment.cc index ad6f2f9..bb9fe74 100644 --- a/centipede/environment.cc +++ b/centipede/environment.cc
@@ -20,6 +20,7 @@ #include <cstddef> #include <cstdint> #include <string> +#include <system_error> // NOLINT #include <vector> #include "absl/container/flat_hash_map.h" @@ -32,6 +33,7 @@ #include "./centipede/knobs.h" #include "./centipede/logging.h" #include "./centipede/remote_file.h" +#include "riegeli/bytes/read_all.h" namespace centipede { @@ -189,14 +191,11 @@ void Environment::ReadKnobsFileIfSpecified() { const std::string_view knobs_file_path = knobs_file; if (knobs_file_path.empty()) return; - ByteArray knob_bytes; - auto *f = RemoteFileOpen(knobs_file, "r"); - CHECK(f) << "Failed to open remote file " << knobs_file; - RemoteFileRead(f, knob_bytes); - RemoteFileClose(f); + std::string knob_bytes; + CHECK_OK(riegeli::ReadAll(CreateRiegeliFileReader(knobs_file), knob_bytes)); VLOG(1) << "Knobs: " << knob_bytes.size() << " knobs read from " << knobs_file; - knobs.Set(knob_bytes); + knobs.Set(AsByteSpan(knob_bytes)); knobs.ForEachKnob([](std::string_view name, Knobs::value_type value) { VLOG(1) << "knob " << name << ": " << static_cast<uint32_t>(value); });
diff --git a/centipede/pc_info.cc b/centipede/pc_info.cc index 7e06e93..3567917 100644 --- a/centipede/pc_info.cc +++ b/centipede/pc_info.cc
@@ -14,40 +14,27 @@ #include "./centipede/pc_info.h" -#include <cstddef> +#include <ios> #include <istream> -#include <iterator> #include <ostream> -#include <string> #include "absl/log/check.h" -#include "absl/types/span.h" -#include "./centipede/defs.h" namespace centipede { -bool PCInfo::operator==(const PCInfo &rhs) const { - return this->pc == rhs.pc && this->flags == rhs.flags; -} - PCTable ReadPcTable(std::istream &in) { - std::string input_string(std::istreambuf_iterator<char>(in), {}); - - ByteArray pc_infos_as_bytes(input_string.begin(), input_string.end()); - CHECK_EQ(pc_infos_as_bytes.size() % sizeof(PCInfo), 0); - size_t pc_table_size = pc_infos_as_bytes.size() / sizeof(PCInfo); - const auto *pc_infos = reinterpret_cast<PCInfo *>(pc_infos_as_bytes.data()); - PCTable pc_table{pc_infos, pc_infos + pc_table_size}; - CHECK_EQ(pc_table.size(), pc_table_size); - + in.seekg(0, std::ios_base::end); + auto size = in.tellg(); + in.seekg(0, std::ios_base::beg); + CHECK_EQ(size % sizeof(PCInfo), 0); + PCTable pc_table(size / sizeof(PCInfo)); + in.read(reinterpret_cast<char *>(pc_table.data()), size); return pc_table; } void WritePcTable(const PCTable &pc_table, std::ostream &out) { - auto pc_infos_as_bytes = - absl::Span<const char>(reinterpret_cast<const char *>(pc_table.data()), - sizeof(PCInfo) * pc_table.size()); - out.write(pc_infos_as_bytes.data(), pc_infos_as_bytes.size()); + out.write(reinterpret_cast<const char *>(pc_table.data()), + pc_table.size() * sizeof(PCInfo)); } } // namespace centipede
diff --git a/centipede/pc_info.h b/centipede/pc_info.h index de747fa..a1607f5 100644 --- a/centipede/pc_info.h +++ b/centipede/pc_info.h
@@ -38,7 +38,9 @@ bool has_flag(PCFlags f) const { return flags & f; } - bool operator==(const PCInfo &rhs) const; + friend bool operator==(const PCInfo &lhs, const PCInfo &rhs) { + return lhs.pc == rhs.pc && lhs.flags == rhs.flags; + } }; // Array of PCInfo-s.
diff --git a/centipede/remote_file.cc b/centipede/remote_file.cc index 825b5fe..a74c4df 100644 --- a/centipede/remote_file.cc +++ b/centipede/remote_file.cc
@@ -19,7 +19,6 @@ #include <glob.h> -#include <cstdio> #include <filesystem> // NOLINT #include <memory> #include <string> @@ -30,7 +29,6 @@ #include "absl/base/attributes.h" #include "absl/log/check.h" #include "absl/log/log.h" -#include "./centipede/defs.h" #include "./centipede/logging.h" #include "riegeli/bytes/fd_reader.h" #include "riegeli/bytes/fd_writer.h" @@ -52,79 +50,6 @@ CHECK(!error) << VV(path) << VV(error); } -ABSL_ATTRIBUTE_WEAK RemoteFile *RemoteFileOpen(std::string_view path, - const char *mode) { - CHECK(!path.empty()); - FILE *f = std::fopen(path.data(), mode); - return reinterpret_cast<RemoteFile *>(f); -} - -ABSL_ATTRIBUTE_WEAK void RemoteFileClose(RemoteFile *f) { - CHECK(f != nullptr); - std::fclose(reinterpret_cast<FILE *>(f)); -} - -ABSL_ATTRIBUTE_WEAK void RemoteFileAppend(RemoteFile *f, const ByteArray &ba) { - CHECK(f != nullptr); - auto *file = reinterpret_cast<FILE *>(f); - constexpr auto elt_size = sizeof(ba[0]); - const auto elts_to_write = ba.size(); - const auto elts_written = - std::fwrite(ba.data(), elt_size, elts_to_write, file); - CHECK_EQ(elts_written, elts_to_write); -} - -// Does not need weak attribute as the implementation depends on -// RemoteFileAppend(RemoteFile *, ByteArray). -void RemoteFileAppend(RemoteFile *f, const std::string &contents) { - CHECK(f != nullptr); - ByteArray contents_ba{contents.cbegin(), contents.cend()}; - RemoteFileAppend(f, contents_ba); -} - -ABSL_ATTRIBUTE_WEAK void RemoteFileRead(RemoteFile *f, ByteArray &ba) { - CHECK(f != nullptr); - auto *file = reinterpret_cast<FILE *>(f); - std::fseek(file, 0, SEEK_END); // seek to end - const auto file_size = std::ftell(file); - std::fseek(file, 0, SEEK_SET); // seek back to start - constexpr auto elt_size = sizeof(ba[0]); - CHECK_EQ(file_size % elt_size, 0) << VV(file_size) << VV(elt_size); - const auto elts_to_read = file_size / elt_size; - ba.resize(elts_to_read); - const auto elts_read = std::fread(ba.data(), elt_size, elts_to_read, file); - CHECK_EQ(elts_read, elts_to_read); -} - -// Does not need weak attribute as the implementation depends on -// RemoteFileRead(RemoteFile *, ByteArray). -void RemoteFileRead(RemoteFile *f, std::string &contents) { - CHECK(f != nullptr); - ByteArray contents_ba; - RemoteFileRead(f, contents_ba); - contents.assign(contents_ba.cbegin(), contents_ba.cend()); -} - -// Does not need weak attribute as the implementation depends on -// RemoteFileAppend(RemoteFile *, std::string). -void RemoteFileSetContents(const std::filesystem::path &path, - const std::string &contents) { - auto *file = RemoteFileOpen(path.c_str(), "w"); - CHECK(file != nullptr) << VV(path); - RemoteFileAppend(file, contents); - RemoteFileClose(file); -} - -// Does not need weak attribute as the implementation depends on -// RemoteFileRead(RemoteFile *, std::string). -void RemoteFileGetContents(const std::filesystem::path &path, - std::string &contents) { - auto *file = RemoteFileOpen(path.c_str(), "r"); - CHECK(file != nullptr) << VV(path); - RemoteFileRead(file, contents); - RemoteFileClose(file); -} - ABSL_ATTRIBUTE_WEAK bool RemotePathExists(std::string_view path) { return std::filesystem::exists(path); }
diff --git a/centipede/remote_file.h b/centipede/remote_file.h index a400174..979d876 100644 --- a/centipede/remote_file.h +++ b/centipede/remote_file.h
@@ -22,51 +22,20 @@ #ifndef THIRD_PARTY_CENTIPEDE_REMOTE_FILE_H_ #define THIRD_PARTY_CENTIPEDE_REMOTE_FILE_H_ -#include <filesystem> // NOLINT +#include <memory> #include <string> #include <string_view> #include <vector> -#include "./centipede/defs.h" #include "riegeli/bytes/reader.h" #include "riegeli/bytes/writer.h" namespace centipede { -// An opaque file handle. -struct RemoteFile {}; - -// Opens a (potentially remote) file 'file_path' and returns a handle to it. -// Supported modes: "r", "a", "w", same as in C FILE API. -RemoteFile *RemoteFileOpen(std::string_view file_path, const char *mode); - -// Closes the file previously opened by RemoteFileOpen. -void RemoteFileClose(RemoteFile *f); - -// Appends bytes from 'ba' to 'f'. -void RemoteFileAppend(RemoteFile *f, const ByteArray &ba); - -// Appends characters from 'contents' to 'f'. -void RemoteFileAppend(RemoteFile *f, const std::string &contents); - -// Reads all current contents of 'f' into 'ba'. -void RemoteFileRead(RemoteFile *f, ByteArray &ba); - -// Reads all current contents of 'f' into 'contents'. -void RemoteFileRead(RemoteFile *f, std::string &contents); - // Creates a (potentially remote) directory 'dir_path'. // No-op if the directory already exists. void RemoteMkdir(std::string_view dir_path); -// Sets the contents of the file at 'path' to 'contents'. -void RemoteFileSetContents(const std::filesystem::path &path, - const std::string &contents); - -// Reads the contents of the file at 'path' into 'contents'. -void RemoteFileGetContents(const std::filesystem::path &path, - std::string &contents); - // Returns true if `path` exists. bool RemotePathExists(std::string_view path); @@ -86,7 +55,7 @@ // If `append` is `true`, writes will append to the end of the file if it // exists. If `false, the file will be truncated to empty if it exists. std::unique_ptr<riegeli::Writer> CreateRiegeliFileWriter( - std::string_view file_path, bool append); + std::string_view file_path, bool append = false); } // namespace centipede
diff --git a/centipede/rusage_profiler.cc b/centipede/rusage_profiler.cc index 2d718ba..69b8748 100644 --- a/centipede/rusage_profiler.cc +++ b/centipede/rusage_profiler.cc
@@ -14,6 +14,7 @@ #include "./centipede/rusage_profiler.h" +#include <algorithm> #include <atomic> #include <cmath> #include <cstdint> @@ -22,6 +23,7 @@ #include <memory> #include <ostream> #include <string> +#include <string_view> #include <thread> // NOLINT #include <utility> @@ -504,9 +506,9 @@ } } - ReportLogger& operator<<(const std::string& fragment) override { + ReportLogger& operator<<(std::string_view fragment) override { const auto last_newline = fragment.rfind('\n'); - if (last_newline == std::string::npos) { + if (last_newline == std::string_view::npos) { // Accumulate no-'\n' fragments: LOG() always wraps around. buffer_ += fragment; } else {
diff --git a/centipede/rusage_profiler.h b/centipede/rusage_profiler.h index 2f1e2a8..e21b81f 100644 --- a/centipede/rusage_profiler.h +++ b/centipede/rusage_profiler.h
@@ -213,7 +213,9 @@ #include <memory> #include <ostream> #include <string> +#include <string_view> +#include "absl/base/thread_annotations.h" #include "absl/synchronization/mutex.h" #include "absl/time/time.h" #include "./centipede/rusage_stats.h" @@ -288,7 +290,7 @@ class ReportSink { public: virtual ~ReportSink() = default; - virtual ReportSink& operator<<(const std::string& fragment) = 0; + virtual ReportSink& operator<<(std::string_view fragment) = 0; }; //----------------------------------------------------------------------------
diff --git a/centipede/rusage_profiler_test.cc b/centipede/rusage_profiler_test.cc index 64ea044..93a5a48 100644 --- a/centipede/rusage_profiler_test.cc +++ b/centipede/rusage_profiler_test.cc
@@ -19,6 +19,7 @@ #include <cmath> #include <cstdint> #include <string> +#include <string_view> #include "gtest/gtest.h" #include "absl/flags/flag.h" @@ -230,7 +231,7 @@ class ReportCapture : public RUsageProfiler::ReportSink { public: ~ReportCapture() override = default; - ReportCapture& operator<<(const std::string& fragment) override { + ReportCapture& operator<<(std::string_view fragment) override { LOG(INFO).NoPrefix() << fragment; return *this; }
diff --git a/centipede/seed_corpus_maker_lib.cc b/centipede/seed_corpus_maker_lib.cc index 87d8975..40f9ca6 100644 --- a/centipede/seed_corpus_maker_lib.cc +++ b/centipede/seed_corpus_maker_lib.cc
@@ -52,6 +52,7 @@ #include "./centipede/util.h" #include "./centipede/workdir.h" #include "google/protobuf/text_format.h" +#include "riegeli/bytes/read_all.h" // TODO(ussuri): Implement a smarter on-the-fly sampling to avoid having to // load all of a source's elements into RAM only to pick some of them. That @@ -76,7 +77,8 @@ LOG(INFO) << "Config spec points at an existing file; trying to parse " "textproto config from it: " << VV(config_spec); - RemoteFileGetContents(config_spec, config_str); + CHECK_OK( + riegeli::ReadAll(CreateRiegeliFileReader(config_spec), config_str)); LOG(INFO) << "Raw config read from file:\n" << config_str; base_dir = std::filesystem::path{config_spec}.parent_path(); } else {
diff --git a/centipede/stats.cc b/centipede/stats.cc index 04a8ee6..b149511 100644 --- a/centipede/stats.cc +++ b/centipede/stats.cc
@@ -15,8 +15,6 @@ #include "./centipede/stats.h" #include <algorithm> -#include <cinttypes> -#include <cmath> #include <cstdint> #include <cstdlib> #include <cstring> @@ -25,6 +23,7 @@ #include <ios> #include <iosfwd> #include <limits> +#include <memory> #include <numeric> #include <sstream> #include <string> @@ -36,13 +35,14 @@ #include "absl/strings/ascii.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" -#include "absl/strings/substitute.h" #include "absl/time/time.h" #include "absl/types/span.h" #include "./centipede/environment.h" #include "./centipede/logging.h" #include "./centipede/remote_file.h" #include "./centipede/workdir.h" +#include "riegeli/bytes/writer.h" +#include "riegeli/csv/csv_writer.h" namespace centipede { @@ -153,8 +153,9 @@ // StatsCsvFileAppender StatsCsvFileAppender::~StatsCsvFileAppender() { - for (const auto &[group_name, file] : files_) { - RemoteFileClose(file); + for (const auto &[group_name, csv_writer_with_record] : csv_writers_) { + CHECK(csv_writer_with_record.csv_writer->Close()) + << VV(csv_writer_with_record.csv_writer->status()); } } @@ -163,32 +164,34 @@ if (!csv_header_.empty()) return; for (const auto &field : fields) { - std::string col_names; switch (field.aggregation) { case Stats::Aggregation::kMinMax: - col_names = absl::Substitute("$0_Min,$0_Max,", field.name); + csv_header_.Add(absl::StrCat(field.name, "_Min"), + absl::StrCat(field.name, "_Max")); break; case Stats::Aggregation::kMinMaxAvg: - col_names = absl::Substitute("$0_Min,$0_Max,$0_Avg,", field.name); + csv_header_.Add(absl::StrCat(field.name, "_Min"), + absl::StrCat(field.name, "_Max"), + absl::StrCat(field.name, "_Avg")); break; } - absl::StrAppend(&csv_header_, col_names); } - absl::StrAppend(&csv_header_, "\n"); } void StatsCsvFileAppender::SetCurrGroup(const Environment &master_env) { - RemoteFile *&file = files_[master_env.experiment_name]; - if (file == nullptr) { + curr_csv_writer_ = &csv_writers_[master_env.experiment_name]; + if (curr_csv_writer_->csv_writer == nullptr) { const std::string filename = WorkDir{master_env}.FuzzingStatsPath(master_env.experiment_name); - // TODO(ussuri): Append, not overwrite, so restarts keep accumulating. - // This will require writing the CSV header only if the file is brand new. - file = RemoteFileOpen(filename, "w"); - CHECK(file != nullptr) << VV(filename); - RemoteFileAppend(file, csv_header_); + auto writer = CreateRiegeliFileWriter(filename, /*append=*/true); + CHECK(writer->ok()) << VV(writer->status()); + riegeli::CsvWriterBase::Options options; + if (writer->pos() == 0) options.set_header(csv_header_); + curr_csv_writer_->csv_writer = + std::make_unique<riegeli::CsvWriter<std::unique_ptr<riegeli::Writer>>>( + std::move(writer), std::move(options)); + curr_csv_writer_->record.reserve(csv_header_.size()); } - curr_file_ = file; } void StatsCsvFileAppender::SetCurrField(const Stats::FieldInfo &field_info) { @@ -207,17 +210,17 @@ avg += value; } if (!values.empty()) avg /= values.size(); - std::string values_str; switch (curr_field_info_.aggregation) { case Stats::Aggregation::kMinMax: - values_str = absl::StrFormat("%" PRIu64 ",%" PRIu64 ",", min, max); + curr_csv_writer_->record.push_back(absl::StrCat(min)); + curr_csv_writer_->record.push_back(absl::StrCat(max)); break; case Stats::Aggregation::kMinMaxAvg: - values_str = - absl::StrFormat("%" PRIu64 ",%" PRIu64 ",%.1Lf,", min, max, avg); + curr_csv_writer_->record.push_back(absl::StrCat(min)); + curr_csv_writer_->record.push_back(absl::StrCat(max)); + curr_csv_writer_->record.push_back(absl::StrFormat("%.1f", avg)); break; } - RemoteFileAppend(curr_file_, values_str); } void StatsCsvFileAppender::ReportFlags(const GroupToFlags &group_to_flags) { @@ -226,8 +229,10 @@ } void StatsCsvFileAppender::DoneFieldSamplesBatch() { - for (const auto &[group_name, file] : files_) { - RemoteFileAppend(file, "\n"); + for (auto &[group_name, csv_writer_with_record] : csv_writers_) { + csv_writer_with_record.csv_writer->WriteRecord( + csv_writer_with_record.record); + csv_writer_with_record.record.clear(); } }
diff --git a/centipede/stats.h b/centipede/stats.h index 37ed90c..24928fa 100644 --- a/centipede/stats.h +++ b/centipede/stats.h
@@ -1,5 +1,4 @@ // Copyright 2022 The Centipede Authors. -// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -19,6 +18,7 @@ #include <cstdint> #include <cstdlib> #include <initializer_list> +#include <memory> #include <ostream> #include <sstream> #include <string> @@ -29,7 +29,8 @@ #include "absl/container/flat_hash_map.h" #include "absl/types/span.h" #include "./centipede/environment.h" -#include "./centipede/remote_file.h" +#include "riegeli/csv/csv_record.h" +#include "riegeli/csv/csv_writer.h" namespace centipede { @@ -195,6 +196,11 @@ ~StatsCsvFileAppender() override; private: + struct CsvWriterWithRecord { + std::unique_ptr<riegeli::CsvWriterBase> csv_writer; + std::vector<std::string> record; + }; + void PreAnnounceFields( std::initializer_list<Stats::FieldInfo> fields) override; void SetCurrGroup(const Environment &master_env) override; @@ -203,9 +209,10 @@ void DoneFieldSamplesBatch() override; void ReportFlags(const GroupToFlags &group_to_flags) override; - std::string csv_header_; - absl::flat_hash_map<std::string /*group_name*/, RemoteFile *> files_; - RemoteFile *curr_file_; + riegeli::CsvHeader csv_header_; + absl::flat_hash_map<std::string /*group_name*/, CsvWriterWithRecord> + csv_writers_; + CsvWriterWithRecord *curr_csv_writer_; Stats::FieldInfo curr_field_info_; };
diff --git a/centipede/stats_test.cc b/centipede/stats_test.cc index de47fbc..d5237a8 100644 --- a/centipede/stats_test.cc +++ b/centipede/stats_test.cc
@@ -24,6 +24,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "absl/log/log.h" #include "absl/log/log_entry.h" #include "absl/log/log_sink.h" #include "absl/log/log_sink_registry.h" @@ -212,13 +213,13 @@ workdir / "fuzzing-stats-.000000.ExperimentB.csv", }; const std::vector<std::string_view> kExpectedCsvContents = { - R"(NumCoveredPcs_Min,NumCoveredPcs_Max,NumCoveredPcs_Avg,NumExecs_Min,NumExecs_Max,NumExecs_Avg,CorpusSize_Min,CorpusSize_Max,CorpusSize_Avg,MaxEltSize_Min,MaxEltSize_Max,MaxEltSize_Avg,AvgEltSize_Min,AvgEltSize_Max,AvgEltSize_Avg,UnixMicros_Min,UnixMicros_Max, -10,25,17.5,100,102,101.0,1000,3000,2000.0,1,5,3.0,1,3,2.0,1000000,1000002, -11,26,18.5,101,103,102.0,1001,3001,2001.0,2,6,4.0,2,4,3.0,1000001,1000003, + R"(NumCoveredPcs_Min,NumCoveredPcs_Max,NumCoveredPcs_Avg,NumExecs_Min,NumExecs_Max,NumExecs_Avg,CorpusSize_Min,CorpusSize_Max,CorpusSize_Avg,MaxEltSize_Min,MaxEltSize_Max,MaxEltSize_Avg,AvgEltSize_Min,AvgEltSize_Max,AvgEltSize_Avg,UnixMicros_Min,UnixMicros_Max +10,25,17.5,100,102,101.0,1000,3000,2000.0,1,5,3.0,1,3,2.0,1000000,1000002 +11,26,18.5,101,103,102.0,1001,3001,2001.0,2,6,4.0,2,4,3.0,1000001,1000003 )", - R"(NumCoveredPcs_Min,NumCoveredPcs_Max,NumCoveredPcs_Avg,NumExecs_Min,NumExecs_Max,NumExecs_Avg,CorpusSize_Min,CorpusSize_Max,CorpusSize_Avg,MaxEltSize_Min,MaxEltSize_Max,MaxEltSize_Avg,AvgEltSize_Min,AvgEltSize_Max,AvgEltSize_Avg,UnixMicros_Min,UnixMicros_Max, -15,40,27.5,101,103,102.0,2000,4000,3000.0,3,7,5.0,2,4,3.0,1000001,1000003, -16,41,28.5,102,104,103.0,2001,4001,3001.0,4,8,6.0,3,5,4.0,1000002,1000004, + R"(NumCoveredPcs_Min,NumCoveredPcs_Max,NumCoveredPcs_Avg,NumExecs_Min,NumExecs_Max,NumExecs_Avg,CorpusSize_Min,CorpusSize_Max,CorpusSize_Avg,MaxEltSize_Min,MaxEltSize_Max,MaxEltSize_Avg,AvgEltSize_Min,AvgEltSize_Max,AvgEltSize_Avg,UnixMicros_Min,UnixMicros_Max +15,40,27.5,101,103,102.0,2000,4000,3000.0,3,7,5.0,2,4,3.0,1000001,1000003 +16,41,28.5,102,104,103.0,2001,4001,3001.0,4,8,6.0,3,5,4.0,1000002,1000004 )", };
diff --git a/centipede/symbol_table.cc b/centipede/symbol_table.cc index b69ed98..f781662 100644 --- a/centipede/symbol_table.cc +++ b/centipede/symbol_table.cc
@@ -15,9 +15,7 @@ #include "./centipede/symbol_table.h" #include <cstdlib> -#include <filesystem> -#include <fstream> -#include <ostream> +#include <filesystem> // NOLINT #include <string> #include <string_view> #include <vector> @@ -26,7 +24,6 @@ #include "absl/log/log.h" #include "absl/strings/match.h" #include "absl/strings/numbers.h" -#include "absl/strings/str_cat.h" #include "absl/strings/str_split.h" #include "absl/strings/strip.h" #include "absl/types/span.h" @@ -34,7 +31,13 @@ #include "./centipede/control_flow.h" #include "./centipede/logging.h" #include "./centipede/pc_info.h" -#include "./centipede/util.h" +#include "riegeli/base/any_dependency.h" +#include "riegeli/bytes/fd_reader.h" +#include "riegeli/bytes/fd_writer.h" +#include "riegeli/bytes/reader.h" +#include "riegeli/bytes/writer.h" +#include "riegeli/lines/line_reading.h" +#include "riegeli/text/write_int.h" namespace centipede { @@ -42,31 +45,33 @@ return this->entries_ == other.entries_; } -void SymbolTable::ReadFromLLVMSymbolizer(std::istream &in) { +void SymbolTable::ReadFromLLVMSymbolizer( + riegeli::AnyDependencyRef<riegeli::Reader *> in) { // We remove some useless file prefixes for better human readability. const std::string_view file_prefixes_to_remove[] = {"/proc/self/cwd/", "./"}; - while (in) { - // We (mostly) blindly trust the input format is correct. - std::string func, file, empty; - std::getline(in, func); - std::getline(in, file); - std::getline(in, empty); + std::string func, file, empty; + // We (mostly) blindly trust the input format is correct. + while (riegeli::ReadLine(*in, func) && riegeli::ReadLine(*in, file) && + riegeli::ReadLine(*in, empty)) { CHECK(empty.empty()) << "Unexpected symbolizer output format: " << VV(func) << VV(file) << VV(empty); - if (!in) break; + std::string_view file_view = file; for (auto &bad_prefix : file_prefixes_to_remove) { - file = absl::StripPrefix(file, bad_prefix); + file_view = absl::StripPrefix(file_view, bad_prefix); } - AddEntry(func, file); + AddEntry(func, file_view); } + if (in.is_owning()) CHECK(in->Close()) << VV(in->status()); } -void SymbolTable::WriteToLLVMSymbolizer(std::ostream &out) { +void SymbolTable::WriteToLLVMSymbolizer( + riegeli::AnyDependencyRef<riegeli::Writer *> out) { for (const Entry &entry : entries_) { - out << entry.func << '\n'; - out << entry.file_line_col() << '\n'; - out << std::endl; + out->Write(entry.func, '\n'); + out->Write(entry.file_line_col(), '\n'); + out->Write('\n'); } + if (out.is_owning()) CHECK(out->Close()) << VV(out->status()); } void SymbolTable::GetSymbolsFromOneDso(absl::Span<const PCInfo> pc_infos, @@ -77,11 +82,11 @@ auto pcs_path(tmp_path1); auto symbols_path(tmp_path2); // Create the input file (one PC per line). - std::string pcs_string; + riegeli::FdWriter pcs_writer(pcs_path); for (const auto &pc_info : pc_infos) { - absl::StrAppend(&pcs_string, "0x", absl::Hex(pc_info.pc), "\n"); + pcs_writer.Write("0x", riegeli::Hex(pc_info.pc), '\n'); } - WriteToLocalFile(pcs_path, pcs_string); + CHECK(pcs_writer.Close()) << VV(pcs_writer.status()); // Run the symbolizer. Command cmd(symbolizer_path, { @@ -102,9 +107,8 @@ return; } // Get and process the symbolizer output. - std::ifstream symbolizer_output(std::string{symbols_path}); size_t old_size = size(); - ReadFromLLVMSymbolizer(symbolizer_output); + ReadFromLLVMSymbolizer(riegeli::FdReader(symbols_path)); std::filesystem::remove(pcs_path); std::filesystem::remove(symbols_path); size_t new_size = size();
diff --git a/centipede/symbol_table.h b/centipede/symbol_table.h index b08d81b..a6dab6e 100644 --- a/centipede/symbol_table.h +++ b/centipede/symbol_table.h
@@ -16,22 +16,18 @@ #define THIRD_PARTY_CENTIPEDE_SYMBOL_TABLE_H_ #include <cstddef> -#include <istream> -#include <ostream> #include <string> #include <string_view> #include <vector> -#include "absl/log/check.h" -#include "absl/log/log.h" -#include "absl/status/status.h" #include "absl/strings/match.h" -#include "absl/strings/numbers.h" #include "absl/strings/str_cat.h" -#include "absl/strings/str_split.h" #include "absl/types/span.h" #include "./centipede/control_flow.h" #include "./centipede/pc_info.h" +#include "riegeli/base/any_dependency.h" +#include "riegeli/bytes/reader.h" +#include "riegeli/bytes/writer.h" namespace centipede { @@ -69,11 +65,11 @@ // SourceCodeLocation // <empty line> // If called multiple times, this function will append symbols to `this`. - void ReadFromLLVMSymbolizer(std::istream &in); + void ReadFromLLVMSymbolizer(riegeli::AnyDependencyRef<riegeli::Reader *> in); - // Writes the contents of `this` to `path` in the same format as read by + // Writes the contents of `this` to `out` in the same format as read by // `ReadFromLLVMSymbolizer`. - void WriteToLLVMSymbolizer(std::ostream &out); + void WriteToLLVMSymbolizer(riegeli::AnyDependencyRef<riegeli::Writer *> out); // Invokes `symbolizer_path --no-inlines` on all binaries from `dso_table`, // pipes through it all PCs in pc_table that correspond to each of the
diff --git a/centipede/symbol_table_test.cc b/centipede/symbol_table_test.cc index 0f9fea4..75a8473 100644 --- a/centipede/symbol_table_test.cc +++ b/centipede/symbol_table_test.cc
@@ -14,16 +14,18 @@ #include "./centipede/symbol_table.h" -#include <sstream> #include <string> +#include <string_view> #include "gtest/gtest.h" +#include "riegeli/bytes/string_reader.h" +#include "riegeli/bytes/string_writer.h" namespace centipede { namespace { TEST(SymbolTableTest, SerializesAndDeserializesCorrectly) { - std::string input = + const std::string_view input = R"(FunctionOne source/location/one.cc:1:0 @@ -31,30 +33,26 @@ source/location/two.cc:2:0 )"; - std::istringstream input_stream(input); SymbolTable symbol_table; + symbol_table.ReadFromLLVMSymbolizer(riegeli::StringReader(input)); - symbol_table.ReadFromLLVMSymbolizer(input_stream); - - std::ostringstream output_stream; - symbol_table.WriteToLLVMSymbolizer(output_stream); - EXPECT_EQ(input, output_stream.str()); + std::string output; + symbol_table.WriteToLLVMSymbolizer(riegeli::StringWriter(&output)); + EXPECT_EQ(input, output); } TEST(SymbolTableTest, SerializesAndDeserializesCorrectlyWithUnknownFile) { - std::string input = + const std::string_view input = R"(? ? )"; - std::istringstream input_stream(input); SymbolTable symbol_table; + symbol_table.ReadFromLLVMSymbolizer(riegeli::StringReader(input)); - symbol_table.ReadFromLLVMSymbolizer(input_stream); - - std::ostringstream output_stream; - symbol_table.WriteToLLVMSymbolizer(output_stream); - EXPECT_EQ(input, output_stream.str()); + std::string output; + symbol_table.WriteToLLVMSymbolizer(riegeli::StringWriter(&output)); + EXPECT_EQ(input, output); } } // namespace
diff --git a/centipede/util.cc b/centipede/util.cc index c629c7d..2a3cb7c 100644 --- a/centipede/util.cc +++ b/centipede/util.cc
@@ -30,9 +30,7 @@ #include <cstring> #include <ctime> #include <filesystem> // NOLINT -#include <fstream> #include <functional> -#include <ios> #include <queue> #include <random> #include <sstream> @@ -53,8 +51,15 @@ #include "absl/types/span.h" #include "./centipede/defs.h" #include "./centipede/feature.h" -#include "./centipede/logging.h" #include "./centipede/remote_file.h" +#include "riegeli/base/closing_ptr.h" +#include "riegeli/bytes/copy_all.h" +#include "riegeli/bytes/fd_reader.h" +#include "riegeli/bytes/fd_writer.h" +#include "riegeli/bytes/read_all.h" +#include "riegeli/bytes/resizable_writer.h" +#include "riegeli/bytes/string_writer.h" +#include "riegeli/bytes/write.h" namespace centipede { @@ -66,69 +71,73 @@ std::hash<std::thread::id>{}(std::this_thread::get_id()); } -std::string AsString(const ByteArray &data, size_t max_len) { - std::ostringstream out; - size_t len = std::min(max_len, data.size()); - for (size_t i = 0; i < len; ++i) { - const auto ch = data[i]; - if (std::isprint(ch)) { - out << ch; +std::string AsString(ByteSpan data, size_t max_len) { + static constexpr char kHexChars[16] = {'0', '1', '2', '3', '4', '5', + '6', '7', '8', '9', 'A', 'B', + 'C', 'D', 'E', 'F'}; + riegeli::StringWriter out; + bool was_single_digit_hex = false; + for (uint8_t ch : data.subspan(0, max_len)) { + if (ch == '\\') { + out.Write("\\\\"); + } else if (std::isprint(ch) && + !(was_single_digit_hex && + ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'F')))) { + out.Write(static_cast<char>(ch)); + was_single_digit_hex = false; } else { - out << "\\x" << std::uppercase << std::hex << static_cast<uint32_t>(ch); + out.Write("\\x"); + if (ch >= 16) { + out.Write(kHexChars[ch >> 4]); + was_single_digit_hex = false; + } else { + was_single_digit_hex = true; + } + out.Write(kHexChars[ch & 0xf]); } } - return out.str(); + out.Close(); + return std::move(out.dest()); } -template <typename Container> -void ReadFromLocalFile(std::string_view file_path, Container &data) { - std::ifstream f(std::string{file_path}); - if (!f) return; - f.seekg(0, std::ios_base::end); - auto size = f.tellg(); - f.seekg(0, std::ios_base::beg); - CHECK_EQ(size % sizeof(data[0]), 0); - data.resize(size / sizeof(data[0])); - f.read(reinterpret_cast<char *>(data.data()), size); - CHECK(f) << "Failed to read from local file: " << VV(file_path) << VV(f.eof()) - << VV(f.bad()) << VV(f.fail()) << VV(size); - f.close(); +template <typename T> +void ReadFromLocalFileToVector(std::string_view file_path, + std::vector<T> &data) { + riegeli::FdReader in(file_path); + if (!in.ok()) return; + riegeli::ResizableWriter<riegeli::VectorResizableTraits<T>> out(&data); + CHECK_OK(riegeli::CopyAll(std::move(in), riegeli::ClosingPtr(&out))); + CHECK_EQ(out.pos() % sizeof(T), 0); } void ReadFromLocalFile(std::string_view file_path, std::string &data) { - return ReadFromLocalFile<std::string>(file_path, data); + riegeli::FdReader in(file_path); + if (!in.ok()) return; + CHECK_OK(riegeli::ReadAll(std::move(in), data)); } void ReadFromLocalFile(std::string_view file_path, ByteArray &data) { - return ReadFromLocalFile<ByteArray>(file_path, data); + ReadFromLocalFileToVector(file_path, data); } void ReadFromLocalFile(std::string_view file_path, FeatureVec &data) { - return ReadFromLocalFile<FeatureVec>(file_path, data); + ReadFromLocalFileToVector(file_path, data); } void ReadFromLocalFile(std::string_view file_path, std::vector<uint32_t> &data) { - return ReadFromLocalFile<std::vector<uint32_t> &>(file_path, data); + ReadFromLocalFileToVector(file_path, data); } void WriteToLocalFile(std::string_view file_path, ByteSpan data) { - std::ofstream f(std::string{file_path.data()}); - CHECK(f) << "Failed to open local file: " << file_path; - f.write(reinterpret_cast<const char *>(data.data()), - static_cast<int64_t>(data.size())); - CHECK(f) << "Failed to write to local file: " << file_path; - f.close(); + WriteToLocalFile(file_path, AsStringView(data)); } void WriteToLocalFile(std::string_view file_path, std::string_view data) { - static_assert(sizeof(decltype(data)::value_type) == sizeof(uint8_t)); - WriteToLocalFile( - file_path, - ByteSpan(reinterpret_cast<const uint8_t *>(data.data()), data.size())); + CHECK_OK(riegeli::Write(data, riegeli::FdWriter(file_path))); } void WriteToLocalFile(std::string_view file_path, const FeatureVec &data) { WriteToLocalFile(file_path, - ByteSpan(reinterpret_cast<const uint8_t *>(data.data()), - sizeof(data[0]) * data.size())); + std::string_view(reinterpret_cast<const char *>(data.data()), + data.size() * sizeof(data[0]))); } void WriteToLocalHashedFileInDir(std::string_view dir_path, ByteSpan data) { @@ -140,13 +149,14 @@ void WriteToRemoteHashedFileInDir(std::string_view dir_path, ByteSpan data) { if (dir_path.empty()) return; std::string file_path = std::filesystem::path(dir_path).append(Hash(data)); - RemoteFileSetContents(file_path, std::string(data.begin(), data.end())); + CHECK_OK( + riegeli::Write(AsStringView(data), CreateRiegeliFileWriter(file_path))); } std::string HashOfFileContents(std::string_view file_path) { if (file_path.empty()) return ""; std::string file_contents; - RemoteFileGetContents(std::filesystem::path(file_path), file_contents); + CHECK_OK(riegeli::ReadAll(CreateRiegeliFileReader(file_path), file_contents)); return Hash(file_contents); } @@ -218,7 +228,7 @@ // // This is simple and efficient, but I wonder if there is a ready-to-use // standard open-source alternative. Or should we just use tar? -ByteArray PackBytesForAppendFile(const ByteArray &data) { +ByteArray PackBytesForAppendFile(ByteSpan data) { ByteArray res; auto hash = Hash(data); CHECK_EQ(hash.size(), kHashLen); @@ -234,7 +244,7 @@ } // Reverse to a sequence of PackBytesForAppendFile() appended to each other. -void UnpackBytesFromAppendFile(const ByteArray &packed_data, +void UnpackBytesFromAppendFile(ByteSpan packed_data, std::vector<ByteArray> *unpacked, std::vector<std::string> *hashes) { auto pos = packed_data.cbegin();
diff --git a/centipede/util.h b/centipede/util.h index 4a705da..7a592dd 100644 --- a/centipede/util.h +++ b/centipede/util.h
@@ -39,7 +39,7 @@ // being remote. Returns an empty string if the `file_path` is empty. std::string HashOfFileContents(std::string_view file_path); // Returns a printable string representing at most `max_len` bytes of `data`. -std::string AsString(const ByteArray &data, size_t max_len = 16); +std::string AsString(ByteSpan data, size_t max_len = 16); // Reads from a local file `file_path` into `data`. // Crashes on any error. void ReadFromLocalFile(std::string_view file_path, ByteArray &data); @@ -141,12 +141,12 @@ // TODO(kcc): [impl] is there a lightweight equivalent in the open-source world? // tar sounds too heavy. // TODO(kcc): [impl] investigate https://github.com/google/riegeli. -ByteArray PackBytesForAppendFile(const ByteArray &data); +ByteArray PackBytesForAppendFile(ByteSpan data); // Unpacks `packed_data` into `unpacked` and `hashes`. // `packed_data` is multiple data packed by PackBytesForAppendFile() // and merged together. // `unpacked` or `hashes` can be nullptr. -void UnpackBytesFromAppendFile(const ByteArray &packed_data, +void UnpackBytesFromAppendFile(ByteSpan packed_data, std::vector<ByteArray> *unpacked, std::vector<std::string> *hashes = nullptr); // Append the bytes from 'hash' to 'ba'.