Add a set of file-related utilities that will be used in corpus traversal during replay. (#99)

* Add a set of file-related utilities that will be used in corpus traversal during replay.

* Make the linter happy.

* Fixed a few control flow nits.
diff --git a/fuzzing/replay/BUILD b/fuzzing/replay/BUILD
index e35454e..af9c1f8 100644
--- a/fuzzing/replay/BUILD
+++ b/fuzzing/replay/BUILD
@@ -22,6 +22,17 @@
 ############
 
 cc_library(
+    name = "file_util",
+    srcs = ["file_util.cc"],
+    hdrs = ["file_util.h"],
+    deps = [
+        ":status_util",
+        "@com_google_absl//absl/functional:function_ref",
+        "@com_google_absl//absl/status",
+    ],
+)
+
+cc_library(
     name = "status_util",
     srcs = ["status_util.cc"],
     hdrs = ["status_util.h"],
@@ -35,6 +46,17 @@
 ########
 
 cc_test(
+    name = "file_util_test",
+    size = "small",
+    srcs = ["file_util_test.cc"],
+    deps = [
+        ":file_util",
+        "@com_google_absl//absl/strings",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
     name = "status_util_test",
     size = "small",
     srcs = ["status_util_test.cc"],
diff --git a/fuzzing/replay/file_util.cc b/fuzzing/replay/file_util.cc
new file mode 100644
index 0000000..348aa44
--- /dev/null
+++ b/fuzzing/replay/file_util.cc
@@ -0,0 +1,96 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fuzzing/replay/file_util.h"
+
+#include <dirent.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <cerrno>
+#include <cstdio>
+#include <string>
+
+#include "absl/functional/function_ref.h"
+#include "absl/status/status.h"
+#include "absl/strings/match.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
+#include "fuzzing/replay/status_util.h"
+
+namespace fuzzing {
+
+namespace {
+
+absl::Status TraverseDirectory(
+    const std::string& path,
+    absl::FunctionRef<void(absl::string_view, const struct stat&)> callback) {
+  DIR* dir = opendir(path.c_str());
+  if (!dir) {
+    return ErrnoStatus(absl::StrCat("could not open directory ", path), errno);
+  }
+  absl::Status status = absl::OkStatus();
+  while (true) {
+    errno = 0;
+    struct dirent* entry = readdir(dir);
+    if (!entry) {
+      if (errno) {
+        status.Update(ErrnoStatus(
+            absl::StrCat("could not complete directory traversal for ", path),
+            errno));
+      }
+      break;
+    }
+    if (absl::StartsWith(entry->d_name, ".")) {
+      continue;
+    }
+    const std::string entry_path = absl::StrCat(path, "/", entry->d_name);
+    status.Update(YieldFiles(entry_path, callback));
+  }
+  closedir(dir);
+  return status;
+}
+
+}  // namespace
+
+absl::Status YieldFiles(
+    const std::string& path,
+    absl::FunctionRef<void(absl::string_view, const struct stat&)> callback) {
+  struct stat path_stat;
+  if (stat(path.c_str(), &path_stat) < 0) {
+    return ErrnoStatus(absl::StrCat("could not stat ", path), errno);
+  }
+  if (S_ISDIR(path_stat.st_mode)) {
+    return TraverseDirectory(path, callback);
+  }
+  callback(path, path_stat);
+  return absl::OkStatus();
+}
+
+absl::Status SetFileContents(const std::string& path,
+                             absl::string_view contents) {
+  FILE* f = fopen(path.c_str(), "w");
+  if (!f) {
+    return ErrnoStatus("could not open file", errno);
+  }
+  const size_t result = fwrite(contents.data(), 1, contents.size(), f);
+  fclose(f);
+  if (result < contents.size()) {
+    return absl::UnknownError("could not write file contents");
+  }
+  return absl::OkStatus();
+}
+
+}  // namespace fuzzing
diff --git a/fuzzing/replay/file_util.h b/fuzzing/replay/file_util.h
new file mode 100644
index 0000000..64c8a78
--- /dev/null
+++ b/fuzzing/replay/file_util.h
@@ -0,0 +1,46 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Utilities for working with files and filesystems.
+
+#ifndef FUZZING_REPLAY_FILE_UTIL_H_
+#define FUZZING_REPLAY_FILE_UTIL_H_
+
+#include <sys/stat.h>
+
+#include <string>
+
+#include "absl/functional/function_ref.h"
+#include "absl/status/status.h"
+#include "absl/strings/string_view.h"
+
+namespace fuzzing {
+
+// Recursively traverses the directory at `path` and calls the provided
+// `callback` for each file encountered. The callback receives the file path and
+// its stat structure as arguments. Returns OK if the entire directory tree was
+// traversed successfully, or an error status if some parts could not be
+// traversed. If `path` refers to a file, the callback will be called once and
+// the function returns OK.
+absl::Status YieldFiles(
+    const std::string& path,
+    absl::FunctionRef<void(absl::string_view, const struct stat&)> callback);
+
+// Opens the given `path` for writing and sets the file contents to `contents`.
+absl::Status SetFileContents(const std::string& path,
+                             absl::string_view contents);
+
+}  // namespace fuzzing
+
+#endif  // FUZZING_REPLAY_FILE_UTIL_H_
diff --git a/fuzzing/replay/file_util_test.cc b/fuzzing/replay/file_util_test.cc
new file mode 100644
index 0000000..4867e22
--- /dev/null
+++ b/fuzzing/replay/file_util_test.cc
@@ -0,0 +1,102 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fuzzing/replay/file_util.h"
+
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <cstdlib>
+#include <functional>
+#include <string>
+#include <vector>
+
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_format.h"
+#include "absl/strings/string_view.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace fuzzing {
+
+namespace {
+
+std::function<void(absl::string_view, const struct stat&)> CollectPathsCallback(
+    std::vector<std::string>* collected_paths) {
+  return [collected_paths](absl::string_view path, const struct stat&) {
+    absl::FPrintF(stderr, "Collected path: %s\n", path);
+    collected_paths->push_back(std::string(path));
+  };
+}
+
+TEST(YieldFilesTest, ReturnsEmptyResultsOnEmptyDir) {
+  const std::string root_dir =
+      absl::StrCat(getenv("TEST_TMPDIR"), "/empty-root");
+  ASSERT_EQ(mkdir(root_dir.c_str(), 0755), 0);
+
+  std::vector<std::string> collected_paths;
+  const absl::Status status =
+      YieldFiles(root_dir, CollectPathsCallback(&collected_paths));
+  EXPECT_TRUE(status.ok());
+  EXPECT_THAT(collected_paths, testing::IsEmpty());
+}
+
+TEST(YieldFilesTest, ReturnsErrorOnMissingDir) {
+  const std::string missing_dir =
+      absl::StrCat(getenv("TEST_TMPDIR"), "/missing");
+  std::vector<std::string> collected_paths;
+  const absl::Status status =
+      YieldFiles(missing_dir, CollectPathsCallback(&collected_paths));
+  EXPECT_FALSE(status.ok());
+  EXPECT_THAT(status.message(), testing::HasSubstr("could not stat"));
+}
+
+TEST(YieldFilesTest, YieldsTopLevelFiles) {
+  const std::string root_dir =
+      absl::StrCat(getenv("TEST_TMPDIR"), "/top-level-root");
+  ASSERT_EQ(mkdir(root_dir.c_str(), 0755), 0);
+  ASSERT_TRUE(SetFileContents(absl::StrCat(root_dir, "/a"), "foo").ok());
+  ASSERT_TRUE(SetFileContents(absl::StrCat(root_dir, "/b"), "bar").ok());
+  ASSERT_TRUE(SetFileContents(absl::StrCat(root_dir, "/c"), "baz").ok());
+
+  std::vector<std::string> collected_paths;
+  const absl::Status status =
+      YieldFiles(root_dir, CollectPathsCallback(&collected_paths));
+  EXPECT_TRUE(status.ok());
+  EXPECT_THAT(collected_paths, testing::SizeIs(3));
+}
+
+TEST(YieldFilesTest, YieldsDeepFiles) {
+  const std::string root_dir =
+      absl::StrCat(getenv("TEST_TMPDIR"), "/deep-root");
+  ASSERT_EQ(mkdir(root_dir.c_str(), 0755), 0);
+  const std::string child_dir = absl::StrCat(root_dir, "/child");
+  ASSERT_EQ(mkdir(child_dir.c_str(), 0755), 0);
+  const std::string leaf_dir = absl::StrCat(child_dir, "/leaf");
+  ASSERT_EQ(mkdir(leaf_dir.c_str(), 0755), 0);
+  ASSERT_TRUE(SetFileContents(absl::StrCat(root_dir, "/a"), "foo").ok());
+  ASSERT_TRUE(SetFileContents(absl::StrCat(child_dir, "/b"), "bar").ok());
+  ASSERT_TRUE(SetFileContents(absl::StrCat(leaf_dir, "/c"), "baz").ok());
+  ASSERT_TRUE(SetFileContents(absl::StrCat(leaf_dir, "/d"), "boo").ok());
+
+  std::vector<std::string> collected_paths;
+  const absl::Status status =
+      YieldFiles(root_dir, CollectPathsCallback(&collected_paths));
+  EXPECT_TRUE(status.ok());
+  EXPECT_THAT(collected_paths, testing::SizeIs(4));
+}
+
+}  // namespace
+
+}  // namespace fuzzing