blob: 93ca1cbd46b776636f7e364e3ad0dd085d53ce1c [file] [log] [blame] [edit]
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_CENTIPEDE_UTIL_H_
#define THIRD_PARTY_CENTIPEDE_UTIL_H_
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <string>
#include <string_view>
#include <vector>
#include "absl/base/nullability.h"
#include "absl/types/span.h"
#include "./centipede/feature.h"
#include "./common/defs.h"
#include "./common/logging.h"
namespace fuzztest::internal {
// Returns the hash of the contents of the file `file_path`. Supports the file
// being remote. Returns an empty string if the `file_path` is empty.
std::string HashOfFileContents(std::string_view file_path);
// Returns a printable string representing at most `max_len` bytes of `data`.
std::string AsPrintableString(ByteSpan data, size_t max_len);
// Reads from a local file `file_path` into `data`.
// Crashes on any error.
void ReadFromLocalFile(std::string_view file_path, ByteArray &data);
// Same as above.
void ReadFromLocalFile(std::string_view file_path, std::string &data);
// Same as above but for FeatureVec.
// Crashes if the number of read bytes is not 0 mod sizeof(feature_t).
void ReadFromLocalFile(std::string_view file_path, FeatureVec &data);
// Same as above but for vector<uint32_t>.
void ReadFromLocalFile(std::string_view file_path, std::vector<uint32_t> &data);
// Clears the content of the file `file_path`. Crashes on any error.
void ClearLocalFileContents(std::string_view file_path);
// Writes the contents of `data` to a local file `file_path`.
// Crashes on any error.
void WriteToLocalFile(std::string_view file_path, ByteSpan data);
// Same as above.
void WriteToLocalFile(std::string_view file_path, std::string_view data);
// Same as above but for FeatureVec.
void WriteToLocalFile(std::string_view file_path, const FeatureVec &data);
// Writes `data` to `dir_path`/Hash(`data`). Does nothing if `dir_path.empty()`.
void WriteToLocalHashedFileInDir(std::string_view dir_path, ByteSpan data);
// Same as `WriteToLocalHashedFileInDir` except supports remote files.
void WriteToRemoteHashedFileInDir(std::string_view dir_path, ByteSpan data);
// Returns a path string suitable to create a temporary local directory.
// Will return the same value every time it is called within one thread,
// but different values for different threads and difference processes.
std::string TemporaryLocalDirPath();
// Creates an empty dir `path` and schedules it for deletion at exit.
// Uses atexit(), and so the removal will not happen if exit() is bypassed.
// This function is supposed to be called only on paths created by
// TemporaryLocalDirPath().
void CreateLocalDirRemovedAtExit(std::string_view path);
// In CTOR, creates a file path inside `dir_path`.
// In DTOR, removes this file if it was created.
class ScopedFile {
public:
ScopedFile(std::string_view dir_path, std::string_view name);
~ScopedFile();
// Returns the path.
std::string_view path() const { return my_path_; }
private:
std::string my_path_;
};
// If `seed` != 0, returns `seed`, otherwise returns a random number
// based on time, pid, tid, etc.
size_t GetRandomSeed(size_t seed);
// Returns a string that starts with `prefix` and that uniquely identifies
// the caller's process and thread.
std::string ProcessAndThreadUniqueID(std::string_view prefix);
// Computes a random subset of `set` that needs to be
// removed to reach `target_size` non-zero weights in the set.
// `set` is an array of weights, some of which could be zero.
//
// Subsets with smaller combined weight are more likely to be returned.
// The return value is a sorted vector of indices of elements of `set`
// that need to be removed, including those with zero weights.
// Randomness is retrieved from `rng`.
//
// Example: set = {20, 10, 0, 40, 50}
// For target_size >= 4, the return value will be {2}, i.e. indices of all 0s.
// For target_size == 3, the return value will be one of
// {1, 2}, {0, 2}, {2, 3}, {2, 4}, i.e. the indices of 0s and one more index.
// {1, 2} is more likely than {2, 4} because set[1] < set[4].
//
// This is a flavour of https://en.wikipedia.org/wiki/Reservoir_sampling
// with two differences:
// * Elements with weight 0 are unconditionally included.
// * We choose which elements to remove instead of which elements to pick.
// We use this inverted algorithm because in a typical use case
// `target_size` is just slightly smaller than set.size().
std::vector<size_t> RandomWeightedSubset(absl::Span<const uint64_t> set,
size_t target_size, Rng &rng);
// Removes all elements from `set` whose indices are found in `subset_indices`.
// `subset_indices` is a sorted vector.
template <typename T>
void RemoveSubset(const std::vector<size_t> &subset_indices,
std::vector<T> &set) {
size_t pos_to_write = 0;
for (size_t i = 0, n = set.size(); i < n; i++) {
// If subset_indices.size() is k, this loop's complexity is O(n*log(k)).
// We can do it in O(n+k) with a bit more code, but this loop is not
// expected to be hot. Besides, k would typically be small.
if (!std::binary_search(subset_indices.begin(), subset_indices.end(), i))
std::swap(set[pos_to_write++], set[i]);
}
set.resize(pos_to_write);
}
// Append the bytes from 'hash' to 'ba'.
void AppendHashToArray(ByteArray &ba, std::string_view hash);
// Reverse to AppendHashToArray.
std::string ExtractHashFromArray(ByteArray &ba);
// Pack {features, Hash(data)} into a byte array.
ByteArray PackFeaturesAndHash(const ByteArray &data,
const FeatureVec &features);
// Pack `features` and the hash of `data` directly from their raw data format.
ByteArray PackFeaturesAndHashAsRawBytes(const ByteArray &data,
ByteSpan features);
// Given a `blob` created by `PackFeaturesAndHash`, unpack the features into
// `features` and return the hash.
std::string UnpackFeaturesAndHash(ByteSpan blob,
FeatureVec *absl_nonnull features);
// Parses `dictionary_text` representing an AFL/libFuzzer dictionary.
// https://github.com/google/AFL/blob/master/dictionaries/README.dictionaries
// https://llvm.org/docs/LibFuzzer.html#dictionaries
// Fills in `dictionary_entries` with byte sequences from the dictionary.
// Returns true iff parsing completes successfully.
bool ParseAFLDictionary(std::string_view dictionary_text,
std::vector<ByteArray> &dictionary_entries);
// Maps `size` bytes with `mmap(NO_RESERVE)` or equivalent, returns the result.
// FUZZTEST_CHECK-fails on error.
// The resulting memory is unreserved, and will be zero-initialized on first
// access.
uint8_t *MmapNoReserve(size_t size);
// Unmaps memory returned by `MmapNoReserve`().
// FUZZTEST_CHECK-fails on error.
void Munmap(uint8_t *ptr, size_t size);
// Fixed size array allocated/deallocated with MmapNoReserve/Munmap.
// operator[] checks indices for out-of-bounds.
template <size_t kSize>
class MmapNoReserveArray {
public:
MmapNoReserveArray() : array_(MmapNoReserve(kSize)) {}
~MmapNoReserveArray() { Munmap(array_, kSize); }
MmapNoReserveArray(const MmapNoReserveArray &) = delete;
MmapNoReserveArray &operator=(const MmapNoReserveArray &) = delete;
MmapNoReserveArray(MmapNoReserveArray &&) = delete;
MmapNoReserveArray &operator=(MmapNoReserveArray &&) = delete;
uint8_t operator[](size_t i) const {
FUZZTEST_CHECK_LT(i, kSize);
return array_[i];
}
uint8_t &operator[](size_t i) {
FUZZTEST_CHECK_LT(i, kSize);
return array_[i];
}
private:
uint8_t *array_;
};
} // namespace fuzztest::internal
#endif // THIRD_PARTY_CENTIPEDE_UTIL_H_