centipede/util.h - third_party/github/google/fuzztest - Git at Google

 // Copyright 2022 The Centipede Authors.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      https://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #ifndef THIRD_PARTY_CENTIPEDE_UTIL_H_
 #define THIRD_PARTY_CENTIPEDE_UTIL_H_

 #include <algorithm>
 #include <cstddef>
 #include <cstdint>
 #include <string>
 #include <string_view>
 #include <vector>

 #include "absl/base/nullability.h"
 #include "absl/types/span.h"
 #include "./centipede/feature.h"
 #include "./common/defs.h"
 #include "./common/logging.h"

 namespace fuzztest::internal {

 // Returns the hash of the contents of the file `file_path`. Supports the file
 // being remote. Returns an empty string if the `file_path` is empty.
 std::string HashOfFileContents(std::string_view file_path);
 // Returns a printable string representing at most `max_len` bytes of `data`.
 std::string AsPrintableString(ByteSpan data, size_t max_len);
 // Reads from a local file `file_path` into `data`.
 // Crashes on any error.
 void ReadFromLocalFile(std::string_view file_path, ByteArray &data);
 // Same as above.
 void ReadFromLocalFile(std::string_view file_path, std::string &data);
 // Same as above but for FeatureVec.
 // Crashes if the number of read bytes is not 0 mod sizeof(feature_t).
 void ReadFromLocalFile(std::string_view file_path, FeatureVec &data);
 // Same as above but for vector<uint32_t>.
 void ReadFromLocalFile(std::string_view file_path, std::vector<uint32_t> &data);
 // Clears the content of the file `file_path`. Crashes on any error.
 void ClearLocalFileContents(std::string_view file_path);
 // Writes the contents of `data` to a local file `file_path`.
 // Crashes on any error.
 void WriteToLocalFile(std::string_view file_path, ByteSpan data);
 // Same as above.
 void WriteToLocalFile(std::string_view file_path, std::string_view data);
 // Same as above but for FeatureVec.
 void WriteToLocalFile(std::string_view file_path, const FeatureVec &data);
 // Writes `data` to `dir_path`/Hash(`data`). Does nothing if `dir_path.empty()`.
 void WriteToLocalHashedFileInDir(std::string_view dir_path, ByteSpan data);
 // Same as `WriteToLocalHashedFileInDir` except supports remote files.
 void WriteToRemoteHashedFileInDir(std::string_view dir_path, ByteSpan data);
 // Returns a path string suitable to create a temporary local directory.
 // Will return the same value every time it is called within one thread,
 // but different values for different threads and difference processes.
 std::string TemporaryLocalDirPath();

 // Creates an empty dir `path` and schedules it for deletion at exit.
 // Uses atexit(), and so the removal will not happen if exit() is bypassed.
 // This function is supposed to be called only on paths created by
 // TemporaryLocalDirPath().
 void CreateLocalDirRemovedAtExit(std::string_view path);

 // In CTOR, creates a file path inside `dir_path`.
 // In DTOR, removes this file if it was created.
 class ScopedFile {
  public:
   ScopedFile(std::string_view dir_path, std::string_view name);
   ~ScopedFile();

   // Returns the path.
   std::string_view path() const { return my_path_; }

  private:
   std::string my_path_;
 };

 // If `seed` != 0, returns `seed`, otherwise returns a random number
 // based on time, pid, tid, etc.
 size_t GetRandomSeed(size_t seed);

 // Returns a string that starts with `prefix` and that uniquely identifies
 // the caller's process and thread.
 std::string ProcessAndThreadUniqueID(std::string_view prefix);

 // Computes a random subset of `set` that needs to be
 // removed to reach `target_size` non-zero weights in the set.
 // `set` is an array of weights, some of which could be zero.
 //
 // Subsets with smaller combined weight are more likely to be returned.
 // The return value is a sorted vector of indices of elements of `set`
 // that need to be removed, including those with zero weights.
 // Randomness is retrieved from `rng`.
 //
 // Example:  set = {20, 10, 0, 40, 50}
 //   For target_size >= 4, the return value will be {2}, i.e. indices of all 0s.
 //   For target_size == 3, the return value will be one of
 //   {1, 2}, {0, 2}, {2, 3}, {2, 4}, i.e. the indices of 0s and one more index.
 //   {1, 2} is more likely than {2, 4} because set[1] < set[4].
 //
 // This is a flavour of https://en.wikipedia.org/wiki/Reservoir_sampling
 // with two differences:
 //   * Elements with weight 0 are unconditionally included.
 //   * We choose which elements to remove instead of which elements to pick.
 //     We use this inverted algorithm because in a typical use case
 //     `target_size` is just slightly smaller than set.size().
 std::vector<size_t> RandomWeightedSubset(absl::Span<const uint64_t> set,
                                          size_t target_size, Rng &rng);

 // Removes all elements from `set` whose indices are found in `subset_indices`.
 // `subset_indices` is a sorted vector.
 template <typename T>
 void RemoveSubset(const std::vector<size_t> &subset_indices,
                   std::vector<T> &set) {
   size_t pos_to_write = 0;
   for (size_t i = 0, n = set.size(); i < n; i++) {
     // If subset_indices.size() is k, this loop's complexity is O(n*log(k)).
     // We can do it in O(n+k) with a bit more code, but this loop is not
     // expected to be hot. Besides, k would typically be small.
     if (!std::binary_search(subset_indices.begin(), subset_indices.end(), i))
       std::swap(set[pos_to_write++], set[i]);
   }
   set.resize(pos_to_write);
 }

 // Append the bytes from 'hash' to 'ba'.
 void AppendHashToArray(ByteArray &ba, std::string_view hash);
 // Reverse to AppendHashToArray.
 std::string ExtractHashFromArray(ByteArray &ba);

 // Pack {features, Hash(data)} into a byte array.
 ByteArray PackFeaturesAndHash(const ByteArray &data,
                               const FeatureVec &features);

 // Pack `features` and the hash of `data` directly from their raw data format.
 ByteArray PackFeaturesAndHashAsRawBytes(const ByteArray &data,
                                         ByteSpan features);

 // Given a `blob` created by `PackFeaturesAndHash`, unpack the features into
 // `features` and return the hash.
 std::string UnpackFeaturesAndHash(ByteSpan blob,
                                   FeatureVec *absl_nonnull features);

 // Parses `dictionary_text` representing an AFL/libFuzzer dictionary.
 // https://github.com/google/AFL/blob/master/dictionaries/README.dictionaries
 // https://llvm.org/docs/LibFuzzer.html#dictionaries
 // Fills in `dictionary_entries` with byte sequences from the dictionary.
 // Returns true iff parsing completes successfully.
 bool ParseAFLDictionary(std::string_view dictionary_text,
                         std::vector<ByteArray> &dictionary_entries);

 // Maps `size` bytes with `mmap(NO_RESERVE)` or equivalent, returns the result.
 // FUZZTEST_CHECK-fails on error.
 // The resulting memory is unreserved, and will be zero-initialized on first
 // access.
 uint8_t *MmapNoReserve(size_t size);

 // Unmaps memory returned by `MmapNoReserve`().
 // FUZZTEST_CHECK-fails on error.
 void Munmap(uint8_t *ptr, size_t size);

 // Fixed size array allocated/deallocated with MmapNoReserve/Munmap.
 // operator[] checks indices for out-of-bounds.
 template <size_t kSize>
 class MmapNoReserveArray {
  public:
   MmapNoReserveArray() : array_(MmapNoReserve(kSize)) {}
   ~MmapNoReserveArray() { Munmap(array_, kSize); }
   MmapNoReserveArray(const MmapNoReserveArray &) = delete;
   MmapNoReserveArray &operator=(const MmapNoReserveArray &) = delete;
   MmapNoReserveArray(MmapNoReserveArray &&) = delete;
   MmapNoReserveArray &operator=(MmapNoReserveArray &&) = delete;
   uint8_t operator[](size_t i) const {
     FUZZTEST_CHECK_LT(i, kSize);
     return array_[i];
   }
   uint8_t &operator[](size_t i) {
     FUZZTEST_CHECK_LT(i, kSize);
     return array_[i];
   }

  private:
   uint8_t *array_;
 };

 }  // namespace fuzztest::internal

 #endif  // THIRD_PARTY_CENTIPEDE_UTIL_H_
	// Copyright 2022 The Centipede Authors.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// https://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	#ifndef THIRD_PARTY_CENTIPEDE_UTIL_H_
	#define THIRD_PARTY_CENTIPEDE_UTIL_H_

	#include <algorithm>
	#include <cstddef>
	#include <cstdint>
	#include <string>
	#include <string_view>
	#include <vector>

	#include "absl/base/nullability.h"
	#include "absl/types/span.h"
	#include "./centipede/feature.h"
	#include "./common/defs.h"
	#include "./common/logging.h"

	namespace fuzztest::internal {

	// Returns the hash of the contents of the file `file_path`. Supports the file
	// being remote. Returns an empty string if the `file_path` is empty.
	std::string HashOfFileContents(std::string_view file_path);
	// Returns a printable string representing at most `max_len` bytes of `data`.
	std::string AsPrintableString(ByteSpan data, size_t max_len);
	// Reads from a local file `file_path` into `data`.
	// Crashes on any error.
	void ReadFromLocalFile(std::string_view file_path, ByteArray &data);
	// Same as above.
	void ReadFromLocalFile(std::string_view file_path, std::string &data);
	// Same as above but for FeatureVec.
	// Crashes if the number of read bytes is not 0 mod sizeof(feature_t).
	void ReadFromLocalFile(std::string_view file_path, FeatureVec &data);
	// Same as above but for vector<uint32_t>.
	void ReadFromLocalFile(std::string_view file_path, std::vector<uint32_t> &data);
	// Clears the content of the file `file_path`. Crashes on any error.
	void ClearLocalFileContents(std::string_view file_path);
	// Writes the contents of `data` to a local file `file_path`.
	// Crashes on any error.
	void WriteToLocalFile(std::string_view file_path, ByteSpan data);
	// Same as above.
	void WriteToLocalFile(std::string_view file_path, std::string_view data);
	// Same as above but for FeatureVec.
	void WriteToLocalFile(std::string_view file_path, const FeatureVec &data);
	// Writes `data` to `dir_path`/Hash(`data`). Does nothing if `dir_path.empty()`.
	void WriteToLocalHashedFileInDir(std::string_view dir_path, ByteSpan data);
	// Same as `WriteToLocalHashedFileInDir` except supports remote files.
	void WriteToRemoteHashedFileInDir(std::string_view dir_path, ByteSpan data);
	// Returns a path string suitable to create a temporary local directory.
	// Will return the same value every time it is called within one thread,
	// but different values for different threads and difference processes.
	std::string TemporaryLocalDirPath();

	// Creates an empty dir `path` and schedules it for deletion at exit.
	// Uses atexit(), and so the removal will not happen if exit() is bypassed.
	// This function is supposed to be called only on paths created by
	// TemporaryLocalDirPath().
	void CreateLocalDirRemovedAtExit(std::string_view path);

	// In CTOR, creates a file path inside `dir_path`.
	// In DTOR, removes this file if it was created.
	class ScopedFile {
	public:
	ScopedFile(std::string_view dir_path, std::string_view name);
	~ScopedFile();

	// Returns the path.
	std::string_view path() const { return my_path_; }

	private:
	std::string my_path_;
	};

	// If `seed` != 0, returns `seed`, otherwise returns a random number
	// based on time, pid, tid, etc.
	size_t GetRandomSeed(size_t seed);

	// Returns a string that starts with `prefix` and that uniquely identifies
	// the caller's process and thread.
	std::string ProcessAndThreadUniqueID(std::string_view prefix);

	// Computes a random subset of `set` that needs to be
	// removed to reach `target_size` non-zero weights in the set.
	// `set` is an array of weights, some of which could be zero.
	//
	// Subsets with smaller combined weight are more likely to be returned.
	// The return value is a sorted vector of indices of elements of `set`
	// that need to be removed, including those with zero weights.
	// Randomness is retrieved from `rng`.
	//
	// Example: set = {20, 10, 0, 40, 50}
	// For target_size >= 4, the return value will be {2}, i.e. indices of all 0s.
	// For target_size == 3, the return value will be one of
	// {1, 2}, {0, 2}, {2, 3}, {2, 4}, i.e. the indices of 0s and one more index.
	// {1, 2} is more likely than {2, 4} because set[1] < set[4].
	//
	// This is a flavour of https://en.wikipedia.org/wiki/Reservoir_sampling
	// with two differences:
	// * Elements with weight 0 are unconditionally included.
	// * We choose which elements to remove instead of which elements to pick.
	// We use this inverted algorithm because in a typical use case
	// `target_size` is just slightly smaller than set.size().
	std::vector<size_t> RandomWeightedSubset(absl::Span<const uint64_t> set,
	size_t target_size, Rng &rng);

	// Removes all elements from `set` whose indices are found in `subset_indices`.
	// `subset_indices` is a sorted vector.
	template <typename T>
	void RemoveSubset(const std::vector<size_t> &subset_indices,
	std::vector<T> &set) {
	size_t pos_to_write = 0;
	for (size_t i = 0, n = set.size(); i < n; i++) {
	// If subset_indices.size() is k, this loop's complexity is O(n*log(k)).
	// We can do it in O(n+k) with a bit more code, but this loop is not
	// expected to be hot. Besides, k would typically be small.
	if (!std::binary_search(subset_indices.begin(), subset_indices.end(), i))
	std::swap(set[pos_to_write++], set[i]);
	}
	set.resize(pos_to_write);
	}

	// Append the bytes from 'hash' to 'ba'.
	void AppendHashToArray(ByteArray &ba, std::string_view hash);
	// Reverse to AppendHashToArray.
	std::string ExtractHashFromArray(ByteArray &ba);

	// Pack {features, Hash(data)} into a byte array.
	ByteArray PackFeaturesAndHash(const ByteArray &data,
	const FeatureVec &features);

	// Pack `features` and the hash of `data` directly from their raw data format.
	ByteArray PackFeaturesAndHashAsRawBytes(const ByteArray &data,
	ByteSpan features);

	// Given a `blob` created by `PackFeaturesAndHash`, unpack the features into
	// `features` and return the hash.
	std::string UnpackFeaturesAndHash(ByteSpan blob,
	FeatureVec *absl_nonnull features);

	// Parses `dictionary_text` representing an AFL/libFuzzer dictionary.
	// https://github.com/google/AFL/blob/master/dictionaries/README.dictionaries
	// https://llvm.org/docs/LibFuzzer.html#dictionaries
	// Fills in `dictionary_entries` with byte sequences from the dictionary.
	// Returns true iff parsing completes successfully.
	bool ParseAFLDictionary(std::string_view dictionary_text,
	std::vector<ByteArray> &dictionary_entries);

	// Maps `size` bytes with `mmap(NO_RESERVE)` or equivalent, returns the result.
	// FUZZTEST_CHECK-fails on error.
	// The resulting memory is unreserved, and will be zero-initialized on first
	// access.
	uint8_t *MmapNoReserve(size_t size);

	// Unmaps memory returned by `MmapNoReserve`().
	// FUZZTEST_CHECK-fails on error.
	void Munmap(uint8_t *ptr, size_t size);

	// Fixed size array allocated/deallocated with MmapNoReserve/Munmap.
	// operator[] checks indices for out-of-bounds.
	template <size_t kSize>
	class MmapNoReserveArray {
	public:
	MmapNoReserveArray() : array_(MmapNoReserve(kSize)) {}
	~MmapNoReserveArray() { Munmap(array_, kSize); }
	MmapNoReserveArray(const MmapNoReserveArray &) = delete;
	MmapNoReserveArray &operator=(const MmapNoReserveArray &) = delete;
	MmapNoReserveArray(MmapNoReserveArray &&) = delete;
	MmapNoReserveArray &operator=(MmapNoReserveArray &&) = delete;
	uint8_t operator[](size_t i) const {
	FUZZTEST_CHECK_LT(i, kSize);
	return array_[i];
	}
	uint8_t &operator[](size_t i) {
	FUZZTEST_CHECK_LT(i, kSize);
	return array_[i];
	}

	private:
	uint8_t *array_;
	};

	} // namespace fuzztest::internal

	#endif // THIRD_PARTY_CENTIPEDE_UTIL_H_